Populating a column of 0-1 with cumulative amounts between 1s

I have data that looks like this:

id <- c(1,1,1,1,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4)
start <- c(NA, NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, 1, NA, NA, NA, NA, NA, 1, NA, NA, NA)
e <- as.data.frame(cbind(id, start))

      

I would like the full NAs with the grand total analogy to start each time, either when running == 1, or there was a new ID there. I did a for-loop, but my actual data is too long for the loop to complete over the next few days. Is there a way to speed up the solution? My target variable can be reproduced like this:

e$target <- NA
for (i in 2:length(e$id)){
  if (e$id[i]!=e$id[i-1]){
    e$target[i] <- NA
  } else {
    e$target[i] <- e$target[i-1]+1
    if (!is.na(e$start[i]==1)){
      e$target[i] <- 0
    }
  }
}

      

+3


source to share


3 answers


We can do this with data.table



library(data.table)
setDT(e)[,  target1 := seq_len(.N)-1,.(grp = cumsum(!is.na(start)), id)]
e[e[, c(.I[all(is.na(start))], .I[seq_len(which.max(!is.na(start))-1)]),
                  id]$V1, target1 := NA]
e
#    id start target target1
# 1:  1    NA     NA      NA
# 2:  1    NA     NA      NA
# 3:  1    NA     NA      NA
# 4:  1     1      0       0
# 5:  1    NA      1       1
# 6:  1    NA      2       2
# 7:  2    NA     NA      NA
# 8:  2    NA     NA      NA
# 9:  2     1      0       0
#10:  2    NA      1       1
#11:  3    NA     NA      NA
#12:  3    NA     NA      NA
#13:  3     1      0       0
#14:  3    NA      1       1
#15:  3    NA      2       2
#16:  3    NA      3       3
#17:  3    NA      4       4
#18:  3    NA      5       5
#19:  3     1      0       0
#20:  4    NA     NA      NA
#21:  4    NA     NA      NA
#22:  4    NA     NA      NA

      

+2


source


You can try tidyverse

. Use fill

to drag the last non-NA entry and then replace these values ​​with a sequence of their length (-1 - to get a sequence starting at 0)



library(tidyverse)

e %>% 
 group_by(id) %>% 
 mutate(target = start) %>% 
 fill(target) %>% 
 mutate(target = replace(target, !is.na(target), seq(length(target[!is.na(target)]))-1), 
        target = replace(target, start == 1, 0))

      

+2


source


Another parameter data.table

:

library(data.table)
setDT(e)[, subgroup := cumsum(start==1 &  !is.na(start)), by = id]
e[ , target2 := cumsum(is.na(start)), by = .(id, subgroup)][subgroup == 0, target2 := NA_integer_]

 #   id start target subgroup target2
 #1:  1    NA     NA        0      NA
 #2:  1    NA     NA        0      NA
 #3:  1    NA     NA        0      NA
 #4:  1     1      0        1       0
 #5:  1    NA      1        1       1
 #6:  1    NA      2        1       2
 #7:  2    NA     NA        0      NA
 #8:  2    NA     NA        0      NA
 #9:  2     1      0        1       0
#10:  2    NA      1        1       1
#11:  3    NA     NA        0      NA
#12:  3    NA     NA        0      NA
#13:  3     1      0        1       0
#14:  3    NA      1        1       1
#15:  3    NA      2        1       2
#16:  3    NA      3        1       3
#17:  3    NA      4        1       4
#18:  3    NA      5        1       5
#19:  3     1      0        2       0
#20:  4    NA     NA        0      NA
#21:  4    NA     NA        0      NA
#22:  4    NA     NA        0      NA

      

+2


source







All Articles