Populating a column of 0-1 with cumulative amounts between 1s
I have data that looks like this:
id <- c(1,1,1,1,1,1,2,2,2,2,3,3,3,3,3,3,3,3,3,4,4,4)
start <- c(NA, NA, NA, 1, NA, NA, NA, NA, 1, NA, NA, NA, 1, NA, NA, NA, NA, NA, 1, NA, NA, NA)
e <- as.data.frame(cbind(id, start))
I would like the full NAs with the grand total analogy to start each time, either when running == 1, or there was a new ID there. I did a for-loop, but my actual data is too long for the loop to complete over the next few days. Is there a way to speed up the solution? My target variable can be reproduced like this:
e$target <- NA
for (i in 2:length(e$id)){
if (e$id[i]!=e$id[i-1]){
e$target[i] <- NA
} else {
e$target[i] <- e$target[i-1]+1
if (!is.na(e$start[i]==1)){
e$target[i] <- 0
}
}
}
source to share
We can do this with data.table
library(data.table)
setDT(e)[, target1 := seq_len(.N)-1,.(grp = cumsum(!is.na(start)), id)]
e[e[, c(.I[all(is.na(start))], .I[seq_len(which.max(!is.na(start))-1)]),
id]$V1, target1 := NA]
e
# id start target target1
# 1: 1 NA NA NA
# 2: 1 NA NA NA
# 3: 1 NA NA NA
# 4: 1 1 0 0
# 5: 1 NA 1 1
# 6: 1 NA 2 2
# 7: 2 NA NA NA
# 8: 2 NA NA NA
# 9: 2 1 0 0
#10: 2 NA 1 1
#11: 3 NA NA NA
#12: 3 NA NA NA
#13: 3 1 0 0
#14: 3 NA 1 1
#15: 3 NA 2 2
#16: 3 NA 3 3
#17: 3 NA 4 4
#18: 3 NA 5 5
#19: 3 1 0 0
#20: 4 NA NA NA
#21: 4 NA NA NA
#22: 4 NA NA NA
source to share
You can try tidyverse
. Use fill
to drag the last non-NA entry and then replace these values ββwith a sequence of their length (-1 - to get a sequence starting at 0)
library(tidyverse)
e %>%
group_by(id) %>%
mutate(target = start) %>%
fill(target) %>%
mutate(target = replace(target, !is.na(target), seq(length(target[!is.na(target)]))-1),
target = replace(target, start == 1, 0))
source to share
Another parameter data.table
:
library(data.table)
setDT(e)[, subgroup := cumsum(start==1 & !is.na(start)), by = id]
e[ , target2 := cumsum(is.na(start)), by = .(id, subgroup)][subgroup == 0, target2 := NA_integer_]
# id start target subgroup target2
#1: 1 NA NA 0 NA
#2: 1 NA NA 0 NA
#3: 1 NA NA 0 NA
#4: 1 1 0 1 0
#5: 1 NA 1 1 1
#6: 1 NA 2 1 2
#7: 2 NA NA 0 NA
#8: 2 NA NA 0 NA
#9: 2 1 0 1 0
#10: 2 NA 1 1 1
#11: 3 NA NA 0 NA
#12: 3 NA NA 0 NA
#13: 3 1 0 1 0
#14: 3 NA 1 1 1
#15: 3 NA 2 1 2
#16: 3 NA 3 1 3
#17: 3 NA 4 1 4
#18: 3 NA 5 1 5
#19: 3 1 0 2 0
#20: 4 NA NA 0 NA
#21: 4 NA NA 0 NA
#22: 4 NA NA 0 NA
source to share