R: Subset to last date

Question

R: Subset to last date

I have:

Keyword   Date   Pos   Bid
a       4/11/14   1   5.00
a       4/13/14   1   5.00
a       4/14/14   1   5.00
b        6/2/14   3   9.00
b        7/2/14   4   9.00  
b        8/2/14   4   9.00
c       8/29/14   2   3.00
c       8/30/14   2   3.00
c       8/31/14   2   3.00

I need a subset so that only the row with the last date remains:

Keyword   Date   Pos   Bid
a       4/14/14   1   5.00
b        8/2/14   4   9.00
c       8/31/14   2   3.00

I tried:

Latest = ddply( df, 
                'Keyword', 
                function(x) c (
                    Date = max(as.Date(x$Date, '%m/%d/%y')), 
                    Pos = x$Pos[which(x$Date == max(as.Date(x$Date, '%m/%d/%y')))], 
                    Bid = x$Bid[which(x$Date == max(as.Date(x$Date, '%m/%d/%y')))]
                )
         )

and

Latest = subset( x, 
                 Date = max(as.Date(Date, '%m/%d/%y')), 
                 select = c('Identity', 'Date', 'Round.Avg.Pos.', 'Search.Bid')
         )

But they either give me the error or not what I want. What am I missing?

Thank.

+3

r subset

Cinji18 30 oct. 14 at 8:04

source to share

3 answers

akrun · Answer 1 · 2014-10-30T08:06:13+0000

You may try

 library(dplyr)
 library(tidyr)

  df %>% 
     mutate(Date=as.Date(Date, format= "%m/%d/%y"))%>% 
     group_by(Keyword) %>%  
     arrange(desc(Date)) %>%
     slice(1)

  #   Keyword       Date Pos Bid
  #1       a 2014-04-14   1   5
  #2       b 2014-08-02   4   9
  #3       c 2014-08-31   2   3

or

   df %>% 
      group_by(Keyword) %>%
      mutate(Date=as.Date(Date, format= "%m/%d/%y"))%>% 
      filter(Date==max(Date))

Or using base R

  indx <- with(df, ave(as.Date(Date, format="%m/%d/%y"), Keyword, FUN=max))
  df[with(df, as.Date(Date, format='%m/%d/%y')==indx),]
  #  Keyword    Date Pos Bid
  #3       a 4/14/14   1   5
  #6       b  8/2/14   4   9
  #9       c 8/31/14   2   3

Or using ddply

  ddply(df, .(Keyword), function(x) {
                  Date=as.Date(x$Date, '%m/%d/%y')
                  x[Date==max(Date),]})

  #  Keyword    Date Pos Bid
  #1       a 4/14/14   1   5
  #2       b  8/2/14   4   9
  #3       c 8/31/14   2   3

data

df <- structure(list(Keyword = c("a", "a", "a", "b", "b", "b", "c", 
 "c", "c"), Date = c("4/11/14", "4/13/14", "4/14/14", "6/2/14", 
 "7/2/14", "8/2/14", "8/29/14", "8/30/14", "8/31/14"), Pos = c(1L, 
1L, 1L, 3L, 4L, 4L, 2L, 2L, 2L), Bid = c(5, 5, 5, 9, 9, 9, 3, 
3, 3)), .Names = c("Keyword", "Date", "Pos", "Bid"), class = "data.frame", row.names = c(NA, 
-9L))

David Arenburg · Answer 2 · 2014-10-30T08:22:21+0000

Or using data.table

library(data.table)
setDT(df)[ ,.SD[which.max(as.Date(Date, format= "%m/%d/%y"))], by = Keyword]
#    Keyword    Date Pos Bid
# 1:       a 4/14/14   1   5
# 2:       b  8/2/14   4   9
# 3:       c 8/31/14   2   3

Here's an additional basic R solution using the "split-apply-comb" methodology

do.call(rbind, lapply(split(df, df$Keyword), 
        function(x) x[which.max(as.Date(x$Date, format='%m/%d/%y')), ]))
#   Keyword    Date Pos Bid
# a       a 4/14/14   1   5
# b       b  8/2/14   4   9
# c       c 8/31/14   2   3

Note. Your desired output left the column Date

in the same format as before, so I apply as.Date

on each iteration in both solutions, while the best is to convert it to a class Date

once and then using the already converted column in the aggregation process

rnso · Answer 3 · 2014-10-30T08:28:54+0000

Try:

ddf$Date = as.Date(ddf$Date, format("%m/%d/%y"))
ddf= ddf[rev(order(ddf$Date)),]
ddf = ddf[!duplicated(ddf$Keyword),]
ddf[order(ddf$Keyword),]
  Keyword       Date Pos Bid
3       a 2014-04-14   1   5
6       b 2014-08-02   4   9
9       c 2014-08-31   2   3

R: Subset to last date

data

More articles: