Create a list that counts from the start to the current value

Question

Create a list that counts from the start to the current value

I currently have a data table that looks like this:

Name     Person     Date
A        1          1/1/2004
A        2          1/3/2004
A        3          1/9/2004 
B        4          1/7/2004
B        5          1/10/2004 
B        6          1/17/2004

I am trying to create a new data table that looks like this:

Name     Person     Date
A        1          1/1/2004
A        2          1/1/2004
A        2          1/3/2004
A        3          1/1/2004
A        3          1/3/2004
A        3          1/9/2004
B        4          1/7/2004
B        5          1/7/2004
B        5          1/10/2004
B        6          1/7/2004
B        6          1/10/2004
B        6          1/17/2004

where the date starts with the minimum name group date.

So far I have done something like this:

data$D = ave(data$Date, data$Name, FUN=min)
diff =  data$Date - data$D
sequence(diff) + rep(data$D,diff)

However, this only results in repetition, which also takes into account the dates in between. Is there an easy way to make an extension from a minimum date? Thank!

+3

r

James schwartz 13 Aug 14 at 8:54

source to share

3 answers

If dat

is a dataset

res <- do.call(rbind,lapply(split(dat, dat$Name),
     function(x) {
     Date1 <- as.Date(x$Date, "%m/%d/%Y")
     x <- x[order(Date1),]
     indx <- seq_len(nrow(x))
     cbind(x[rep(indx,indx), 1:2], Date=x[sequence(indx),3])}))

 row.names(res) <- 1:nrow(res)
 res
#  Name Person     Date
#1     A      1    1/1/2004
#2     A      2    1/1/2004
#3     A      2    1/3/2004
#4     A      3    1/1/2004
#5     A      3    1/3/2004
#6     A      3    1/9/2004
#7     B      4    1/7/2004
#8     B      5    1/7/2004
#9     B      5   1/10/2004
#10    B      6    1/7/2004
#11    B      6   1/10/2004
#12    B      6   1/17/2004

Using dplyr

dat %>% 
group_by(Name) %>%
mutate(Date1= as.Date(Date, "%m/%d/%Y")) %>% 
arrange(Name, Date1) %>% 
mutate(N= row_number()) %>% 
do(data.frame(.[rep(.$N, .$N),1:2], Date=.[sequence(.$N),3],stringsAsFactors=F))

#   Name Person      Date
#1     A      1  1/1/2004
#2     A      2  1/1/2004
#3     A      2  1/3/2004
#4     A      3  1/1/2004
#5     A      3  1/3/2004
#6     A      3  1/9/2004
#7     B      4  1/7/2004
#8     B      5  1/7/2004
#9     B      5 1/10/2004
#10    B      6  1/7/2004
#11    B      6 1/10/2004
#12    B      6 1/17/2004

+3

akrun 13 Aug '14 at 9:15

source to share

Nested lapply

:

#dummy data
df <- read.table(text="Name     Person     Date
A        1          1/1/2004
A        2          1/3/2004
A        3          1/9/2004 
B        4          1/7/2004
B        5          1/10/2004 
B        6          1/17/2004",header=TRUE)

#convert to date, to be used for min()
df$Date <- as.Date(df$Date,"%m/%d/%Y")

#result
res <- 
  do.call(rbind,
          lapply(split(df,df$Name),
                 function(i){
                   do.call(rbind,
                           lapply(unique(i$Person),
                                  function(j){
                                    d <- i[ i$Date<=min(i[ i$Person==j,"Date"]),]
                                    d$Person <- j
                                    return(d)}))
                 })
  )

# Name Person       Date
# A.1     A      1 2004-01-01
# A.2     A      2 2004-01-01
# A.3     A      2 2004-01-03
# A.4     A      3 2004-01-01
# A.5     A      3 2004-01-03
# A.6     A      3 2004-01-09
# B.4     B      4 2004-01-07
# B.41    B      5 2004-01-07
# B.5     B      5 2004-01-10
# B.42    B      6 2004-01-07
# B.51    B      6 2004-01-10
# B.6     B      6 2004-01-17

+2

zx8754 13 Aug At 10:11

source to share

David Arenburg · Accepted Answer · 2014-08-13T09:32:27+0000

It seems that you are looking for a solution data.table

, so here goes

library(data.table)
setDT(data)[, list(Person = rep(Person, seq_len(.N)),
                   Date = Date[sequence(seq_len(.N))]), by = Name]

#     Name Person      Date
#  1:    A      1  1/1/2004
#  2:    A      2  1/1/2004
#  3:    A      2  1/3/2004
#  4:    A      3  1/1/2004
#  5:    A      3  1/3/2004
#  6:    A      3  1/9/2004
#  7:    B      4  1/7/2004
#  8:    B      5  1/7/2004
#  9:    B      5 1/10/2004
# 10:    B      6  1/7/2004
# 11:    B      6 1/10/2004
# 12:    B      6 1/17/2004

Edit

This is the dataset that was used for this answer

data <- structure(list(Name = structure(c(1L, 1L, 1L, 2L, 2L, 2L), .Label = c("A", 
        "B"), class = "factor"), Person = 1:6, Date = structure(c(1L, 
        4L, 6L, 5L, 2L, 3L), .Label = c("1/1/2004", "1/10/2004", "1/17/2004", 
        "1/3/2004", "1/7/2004", "1/9/2004"), class = "factor")), .Names = c("Name", 
         "Person", "Date"), class = "data.frame", row.names = c(NA, -6L
         ))

Create a list that counts from the start to the current value

More articles: