Defining Spells Based on Multiple Criterion W/Missing Data

Question 1

Much less elegant than @BrodieG's data.table solution (which convinces me I really must familiarise myself with data.table!), but since I've coded it I may as well provide it here.

d <- read.table(text='Caseid     Year        Unemployed  EmployerID  occID   indID  Wage           
1          1999         0          1           1       1      5.00       
1          2000         NA         NA          NA      NA     NA       
1          2001         NA         NA          NA      NA     NA       
1          2002         0          1           1       2      6.00       
2          1999         0          1           1       1      4.00
2          2000         0          1           1       1      5.00
2          2001         0          1           1       1      6.00
2          2002         1          1           1       1      6.00
3          1999         0          1           1       1      4.00
3          2000         0          3           1       1      5.00
3          2001         0          1           4       1      5.00
3          2002         NA         NA          NA      NA     NA
4          1999         0          1           1       1      5.00
4          2000         0          1           1       1      5.00
4          2001         0          1           1       1      7.00
4          2002         0          1           1       1      7.00', header=TRUE)


d <- merge(unsplit(
  lapply(split(na.omit(d), na.omit(d)$Caseid), function(x) {
    cbind(x, Spell=cumsum(!duplicated(x[, 3:6])))
  }), 
  na.omit(d)$Caseid), d, all=TRUE)

d <- merge(d, aggregate(list(avgWage=d$Wage), 
                        list(Caseid=d$Caseid, Spell=d$Spell), 
                        mean, na.rm=TRUE), 
           all.x=TRUE)

d[order(d$Caseid, d$Year), ]

Note, though, that this returns NA for Wage and avgWage where rows contain NA.

Question 2

Here is a data.table solution:

library(data.table)
dt <- data.table(df)
dt[
  !is.na(Unemployed), 
  change:=
    as.numeric(
      apply(
        vapply(.SD, function(x) as.logical(c(0, diff(x))), logical(.N)),
        1,
        any
    ) ),
  by=Caseid, 
  .SDcols=3:6
]
dt[, spell:=cumsum(ifelse(is.na(change), 0, change)) + 1, by=Caseid]
dt[, avgWage:=mean(Wage, na.rm=T), by=list(Caseid, spell)]
dt
#     Caseid Year Unemployed EmployerID occID indID Wage change spell avgWage
#  1:      1 1999          0          1     1     1    5      0     1       5
#  2:      1 2000         NA         NA    NA    NA   NA     NA     1       5
#  3:      1 2001         NA         NA    NA    NA   NA     NA     1       5
#  4:      1 2002          0          1     1     2    6      1     2       6
#  5:      2 1999          0          1     1     1    4      0     1       5
#  6:      2 2000          0          1     1     1    5      0     1       5
#  7:      2 2001          0          1     1     1    6      0     1       5
#  8:      2 2002          1          1     1     1    6      1     2       6
#  9:      3 1999          0          1     1     1    4      0     1       4
# 10:      3 2000          0          3     1     1    5      1     2       5
# 11:      3 2001          0          1     4     1    5      1     3       5
# 12:      3 2002         NA         NA    NA    NA   NA     NA     3       5
# 13:      4 1999          0          1     1     1    5      0     1       6
# 14:      4 2000          0          1     1     1    5      0     1       6
# 15:      4 2001          0          1     1     1    7      0     1       6
# 16:      4 2002          0          1     1     1    7      0     1       6

Data, for debugging:

df <- structure(list(Caseid = c(1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 3L, 
3L, 3L, 3L, 4L, 4L, 4L, 4L), Year = c(1999L, 2000L, 2001L, 2002L, 
1999L, 2000L, 2001L, 2002L, 1999L, 2000L, 2001L, 2002L, 1999L, 
2000L, 2001L, 2002L), Unemployed = c(0L, NA, NA, 0L, 0L, 0L, 
0L, 1L, 0L, 0L, 0L, NA, 0L, 0L, 0L, 0L), EmployerID = c(1L, NA, 
NA, 1L, 1L, 1L, 1L, 1L, 1L, 3L, 1L, NA, 1L, 1L, 1L, 1L), occID = c(1L, 
NA, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 4L, NA, 1L, 1L, 1L, 1L), 
    indID = c(1L, NA, NA, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, 
    1L, 1L, 1L, 1L), Wage = c(5, NA, NA, 6, 4, 5, 6, 6, 4, 5, 
    5, NA, 5, 5, 7, 7)), .Names = c("Caseid", "Year", "Unemployed", 
"EmployerID", "occID", "indID", "Wage"), class = "data.frame", row.names = c(NA, 
-16L))

EDIT: updated to run with new data:

library(data.table)
dt <- data.table(df)
dt[!is.na(newemp), newemp:=cumsum(newemp), by=caseid]
dt[
  !is.na(unemp), 
  change:=
    as.numeric(
      apply(
        vapply(.SD, function(x) as.logical(c(0, diff(x))), logical(.N)),
        1,
        any
    ) ),
  by=caseid, 
  .SDcols=3:6
]
dt[, spell:=cumsum(ifelse(is.na(change), 0, change)) + 1, by=caseid]
dt[, avgWage:=mean(lwage, na.rm=T), by=list(caseid, spell)]
dt

Note the new data has some additional issues that aren't fully dealt with (i.e. some rows are partially NA, instead of fully NA as in the original). You'll have to tinker with the logic to get it to do exactly what you want.