Events for the last 21 days for each line by name

This is how my framework looks like. The two right-most columns are my search columns. These two columns check if there is an "Email" activity type in the last 21 days and an "Activity type" for the web administrator for the last 21 days.

 Name      ActivityType     ActivityDate  Email(last21days) Webinar(last21day)**             
John       Email            1/1/2014        TRUE                  NA   
John       Webinar          1/5/2014        TRUE                 TRUE
John       Sale             1/20/2014       TRUE                 TRUE
John       Webinar          3/25/2014       NA                   TRUE
John       Sale             4/1/2014        NA                   TRUE
John       Sale             7/1/2014        NA                   NA
Tom        Email            1/1/2015        TRUE                   NA   
Tom        Webinar          1/5/2015        TRUE                 TRUE
Tom        Sale             1/20/2015      TRUE                 TRUE
Tom        Webinar          3/25/2015       NA                   TRUE
Tom        Sale              4/1/2015        NA                   TRUE
Tom        Sale              7/1/2015       NA                   NA

      

Building on help here: Retrieving event types from the last 21 days window I've tried:

df$ActivityDate <- as.Date(df$ActivityDate)
library(data.table)
setDT(df)
setkey(df, Name,ActivityDate)
Elsetemp <- df[, .(Name, ActivityDate, ActivityType)]
df[Elsetemp, `:=`(Email21 = as.logical(which(i.ActivityType == "Email")), 
                        Webinar21 = as.logical(which(i.ActivityType == "Webinar"))), 
         roll = -21, by = .EACHI]

      

to no avail as I am getting TRUE

for rows with "Sale". For example, on the second line, where ActivityType = Webinar, both Email21 and Webinar21 must say TRUE. When I define the last 21 days, I try to include the very day the event happened.

+3


source to share


2 answers


How about this?

Using sliding joints from data.table

:



require(data.table)
dt[, ActivityDate := as.Date(ActivityDate, format="%m/%d/%Y")]
setkey(dt, Name, ActivityDate)

roll_index <- function(x, types, roll=21) {
    lapply(types, function(type) {
         idx = x[ActivityType == type][x, roll=roll, which=TRUE]
         as.logical(idx)
    })
}
dt[, c("Email_21", "Webinar_21") := roll_index(dt, c("Email", "Webinar"))]

#     Name ActivityType ActivityDate Email_21 Webinar_21
#  1: John        Email   2014-01-01     TRUE         NA
#  2: John      Webinar   2014-01-05     TRUE       TRUE
#  3: John         Sale   2014-01-20     TRUE       TRUE
#  4: John      Webinar   2014-03-25       NA       TRUE
#  5: John         Sale   2014-04-01       NA       TRUE
#  6: John         Sale   2014-07-01       NA         NA
#  7:  Tom        Email   2015-01-01     TRUE         NA
#  8:  Tom      Webinar   2015-01-05     TRUE       TRUE
#  9:  Tom         Sale   2015-01-20     TRUE       TRUE
# 10:  Tom      Webinar   2015-03-25       NA       TRUE
# 11:  Tom         Sale   2015-04-01       NA       TRUE
# 12:  Tom         Sale   2015-07-01       NA         NA

      

+4


source


Basic R solution:

#New type of sequence function that can accept vectors
seq2 <- function(v1) {
  res <- list()
  for(i in seq_along(v1)) {
    res[[i]] <- seq(v1[i], v1[i]+21, by='day')
  }
  as.Date(unlist(res), origin='1970-01-01')
}

df <- df[ ,1:3]
df$ActivityDate <- as.Date(df$ActivityDate, format='%m/%d/%Y')

#Email column
emailed <- df[df$ActivityType == 'Email', 'ActivityDate']
df$Email <- df$ActivityDate %in% seq2(emailed)

#Webinar column
webbed <- df[df$ActivityType == 'Webinar', 'ActivityDate']
df$Webinar <- df$ActivityDate %in% seq2(webbed)

      

First, we are multiplying the first three columns without outputting an example. Then convert the date factors to as.Date

. The vector is emailed

searched ActivityType

using a string Email

. The function seq2

was created to find the date and 21 days after. It creates a sequence that can be verified.



df
#    Name ActivityType ActivityDate Email Webinar
# 1  John        Email   2014-01-01  TRUE   FALSE
# 2  John      Webinar   2014-01-05  TRUE    TRUE
# 3  John         Sale   2014-01-20  TRUE    TRUE
# 4  John      Webinar   2014-03-25 FALSE    TRUE
# 5  John         Sale   2014-04-01 FALSE    TRUE
# 6  John         Sale   2014-07-01 FALSE   FALSE
# 7   Tom        Email   2015-01-01  TRUE   FALSE
# 8   Tom      Webinar   2015-01-05  TRUE    TRUE
# 9   Tom         Sale   2015-01-20  TRUE    TRUE
# 10  Tom      Webinar   2015-03-25 FALSE    TRUE
# 11  Tom         Sale   2015-04-01 FALSE    TRUE
# 12  Tom         Sale   2015-07-01 FALSE   FALSE

      

Data

df <- read.table(text=' Name      ActivityType     ActivityDate  Email(last21days) Webinar(last21day)**             
John       Email            1/1/2014        TRUE                  NA   
John       Webinar          1/5/2014        TRUE                 TRUE
John       Sale             1/20/2014       TRUE                 TRUE
John       Webinar          3/25/2014       NA                   TRUE
John       Sale             4/1/2014        NA                   TRUE
John       Sale             7/1/2014        NA                   NA
Tom        Email            1/1/2015        TRUE                   NA   
Tom        Webinar          1/5/2015        TRUE                 TRUE
Tom        Sale             1/20/2015      TRUE                 TRUE
Tom        Webinar          3/25/2015       NA                   TRUE
Tom        Sale              4/1/2015        NA                   TRUE
Tom        Sale              7/1/2015       NA                   NA', header=T)

      

0


source







All Articles