Apply table to list of vectors and aggregate it
I have it:
require(data.table)
items = list(c(1,1,3), c(2,2,4), c(3,4,5,6))
multiplier = c(10,20,30)
dt = data.table(items, multiplier)
# items multiplier
#1: 1,1,3 10
#2: 2,2,4 20
#3: 3,4,5,6 30
I want it:
table(unlist(rep(items, multiplier)))
# 1 2 3 4 5 6
#20 40 40 50 30 30
This is very poor performance when the element vectors are large.
Can this be done without using it rep
?
source to share
If you don't mind that with a data.table instead of a table object, you could do:
library(tidyr)
library(data.table)
unnest(dt, items)[, .(sum(multiplier)), items]
# items V1
#1: 1 20
#2: 3 40
#3: 2 40
#4: 4 50
#5: 5 30
#6: 6 30
You can of course go ahead and modify the result in the format you want, for example using dcast.data.table
.
Note: for tiny sample data, the original approach is with table
and rep
is faster on my machine, but perhaps this approach scales better (?).
source to share
This ended up being somewhat roundabout but not replicating itemsso would probably be faster:
stbls <- lapply( apply(dt,1,I),function(rw) table(rw[['items']])*rw[['multiplier']])
>
> str(stbls)
List of 3
$ : table [1:2(1d)] 20 10
..- attr(*, "dimnames")=List of 1
.. ..$ : chr [1:2] "1" "3"
$ : table [1:2(1d)] 40 20
..- attr(*, "dimnames")=List of 1
.. ..$ : chr [1:2] "2" "4"
$ : table [1:4(1d)] 30 30 30 30
..- attr(*, "dimnames")=List of 1
.. ..$ : chr [1:4] "3" "4" "5" "6"
> xtabs(Freq ~ Var1, data=do.call(rbind, lapply(stbls,as.data.frame)))
Var1
1 3 2 4 5 6
20 40 40 50 30 30
The starting point was:
> apply(dt,1,I)
[[1]]
$items
[1] 1 1 3
$multiplier
[1] 10
[[2]]
$items
[1] 2 2 4
$multiplier
[1] 20
[[3]]
$items
[1] 3 4 5 6
$multiplier
[1] 30
source to share
data.tables are columnar data structures (like data.frames) and work best when you have data (including grouping variables) in the correct format.
require(data.table)
dt[, .(items = unlist(items),
mult = rep.int(multiplier, vapply(items,
length, 0L)))][, sum(mult), by=items]
# items V1
# 1: 1 20
# 2: 3 40
# 3: 2 40
# 4: 4 50
# 5: 5 30
# 6: 6 30
source to share