Create a list that counts from the start to the current value
I currently have a data table that looks like this:
Name Person Date
A 1 1/1/2004
A 2 1/3/2004
A 3 1/9/2004
B 4 1/7/2004
B 5 1/10/2004
B 6 1/17/2004
I am trying to create a new data table that looks like this:
Name Person Date
A 1 1/1/2004
A 2 1/1/2004
A 2 1/3/2004
A 3 1/1/2004
A 3 1/3/2004
A 3 1/9/2004
B 4 1/7/2004
B 5 1/7/2004
B 5 1/10/2004
B 6 1/7/2004
B 6 1/10/2004
B 6 1/17/2004
where the date starts with the minimum name group date.
So far I have done something like this:
data$D = ave(data$Date, data$Name, FUN=min)
diff = data$Date - data$D
sequence(diff) + rep(data$D,diff)
However, this only results in repetition, which also takes into account the dates in between. Is there an easy way to make an extension from a minimum date? Thank!
+3
source to share
3 answers
It seems that you are looking for a solution data.table
, so here goes
library(data.table)
setDT(data)[, list(Person = rep(Person, seq_len(.N)),
Date = Date[sequence(seq_len(.N))]), by = Name]
# Name Person Date
# 1: A 1 1/1/2004
# 2: A 2 1/1/2004
# 3: A 2 1/3/2004
# 4: A 3 1/1/2004
# 5: A 3 1/3/2004
# 6: A 3 1/9/2004
# 7: B 4 1/7/2004
# 8: B 5 1/7/2004
# 9: B 5 1/10/2004
# 10: B 6 1/7/2004
# 11: B 6 1/10/2004
# 12: B 6 1/17/2004
Edit
This is the dataset that was used for this answer
data <- structure(list(Name = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("A",
"B"), class = "factor"), Person = 1:6, Date = structure(c(1L,
4L, 6L, 5L, 2L, 3L), .Label = c("1/1/2004", "1/10/2004", "1/17/2004",
"1/3/2004", "1/7/2004", "1/9/2004"), class = "factor")), .Names = c("Name",
"Person", "Date"), class = "data.frame", row.names = c(NA, -6L
))
+4
source to share
If dat
is a dataset
res <- do.call(rbind,lapply(split(dat, dat$Name),
function(x) {
Date1 <- as.Date(x$Date, "%m/%d/%Y")
x <- x[order(Date1),]
indx <- seq_len(nrow(x))
cbind(x[rep(indx,indx), 1:2], Date=x[sequence(indx),3])}))
row.names(res) <- 1:nrow(res)
res
# Name Person Date
#1 A 1 1/1/2004
#2 A 2 1/1/2004
#3 A 2 1/3/2004
#4 A 3 1/1/2004
#5 A 3 1/3/2004
#6 A 3 1/9/2004
#7 B 4 1/7/2004
#8 B 5 1/7/2004
#9 B 5 1/10/2004
#10 B 6 1/7/2004
#11 B 6 1/10/2004
#12 B 6 1/17/2004
Using dplyr
dat %>%
group_by(Name) %>%
mutate(Date1= as.Date(Date, "%m/%d/%Y")) %>%
arrange(Name, Date1) %>%
mutate(N= row_number()) %>%
do(data.frame(.[rep(.$N, .$N),1:2], Date=.[sequence(.$N),3],stringsAsFactors=F))
# Name Person Date
#1 A 1 1/1/2004
#2 A 2 1/1/2004
#3 A 2 1/3/2004
#4 A 3 1/1/2004
#5 A 3 1/3/2004
#6 A 3 1/9/2004
#7 B 4 1/7/2004
#8 B 5 1/7/2004
#9 B 5 1/10/2004
#10 B 6 1/7/2004
#11 B 6 1/10/2004
#12 B 6 1/17/2004
+3
source to share
Nested lapply
:
#dummy data
df <- read.table(text="Name Person Date
A 1 1/1/2004
A 2 1/3/2004
A 3 1/9/2004
B 4 1/7/2004
B 5 1/10/2004
B 6 1/17/2004",header=TRUE)
#convert to date, to be used for min()
df$Date <- as.Date(df$Date,"%m/%d/%Y")
#result
res <-
do.call(rbind,
lapply(split(df,df$Name),
function(i){
do.call(rbind,
lapply(unique(i$Person),
function(j){
d <- i[ i$Date<=min(i[ i$Person==j,"Date"]),]
d$Person <- j
return(d)}))
})
)
# Name Person Date
# A.1 A 1 2004-01-01
# A.2 A 2 2004-01-01
# A.3 A 2 2004-01-03
# A.4 A 3 2004-01-01
# A.5 A 3 2004-01-03
# A.6 A 3 2004-01-09
# B.4 B 4 2004-01-07
# B.41 B 5 2004-01-07
# B.5 B 5 2004-01-10
# B.42 B 6 2004-01-07
# B.51 B 6 2004-01-10
# B.6 B 6 2004-01-17
+2
source to share