Apply table to list of vectors and aggregate it

I have it:

require(data.table)
items = list(c(1,1,3), c(2,2,4), c(3,4,5,6))
multiplier = c(10,20,30)
dt = data.table(items, multiplier)
#     items multiplier
#1:   1,1,3         10
#2:   2,2,4         20
#3: 3,4,5,6         30

      

I want it:

table(unlist(rep(items, multiplier)))
# 1  2  3  4  5  6 
#20 40 40 50 30 30 

      

This is very poor performance when the element vectors are large.
Can this be done without using it rep

?

+3


source to share


4 answers


If you don't mind that with a data.table instead of a table object, you could do:

library(tidyr)
library(data.table)

unnest(dt, items)[, .(sum(multiplier)), items]
#   items V1
#1:     1 20
#2:     3 40
#3:     2 40
#4:     4 50
#5:     5 30
#6:     6 30

      



You can of course go ahead and modify the result in the format you want, for example using dcast.data.table

.

Note: for tiny sample data, the original approach is with table

and rep

is faster on my machine, but perhaps this approach scales better (?).

+2


source


Using tidyr

anddplyr

library(dplyr)
library(tidyr)

dt %>% 
  unnest(items) %>% 
  group_by(items) %>% 
  summarise(sum = sum(multiplier)) %>% 
  arrange (items)

      



You get:

Source: local data table [6 x 2]

  items sum
1     1  20
2     2  40
3     3  40
4     4  50
5     5  30
6     6  30

      

+1


source


This ended up being somewhat roundabout but not replicating itemsso would probably be faster:

stbls <- lapply( apply(dt,1,I),function(rw) table(rw[['items']])*rw[['multiplier']])
> 
> str(stbls)
List of 3
 $ : table [1:2(1d)] 20 10
  ..- attr(*, "dimnames")=List of 1
  .. ..$ : chr [1:2] "1" "3"
 $ : table [1:2(1d)] 40 20
  ..- attr(*, "dimnames")=List of 1
  .. ..$ : chr [1:2] "2" "4"
 $ : table [1:4(1d)] 30 30 30 30
  ..- attr(*, "dimnames")=List of 1
  .. ..$ : chr [1:4] "3" "4" "5" "6"

> xtabs(Freq ~ Var1, data=do.call(rbind, lapply(stbls,as.data.frame))) 
Var1
 1  3  2  4  5  6 
20 40 40 50 30 30 

      

The starting point was:

> apply(dt,1,I)
[[1]]
$items
[1] 1 1 3

$multiplier
[1] 10


[[2]]
$items
[1] 2 2 4

$multiplier
[1] 20


[[3]]
$items
[1] 3 4 5 6

$multiplier
[1] 30

      

0


source


data.tables are columnar data structures (like data.frames) and work best when you have data (including grouping variables) in the correct format.

require(data.table)
dt[, .(items = unlist(items), 
       mult  = rep.int(multiplier, vapply(items, 
                 length, 0L)))][, sum(mult), by=items]
#    items V1
# 1:     1 20
# 2:     3 40
# 3:     2 40
# 4:     4 50
# 5:     5 30
# 6:     6 30

      

0


source







All Articles