Replicate each row and change one column to binary values
df <- data.frame(n = c(3, 2, 2),
survive = c(2, 1, 2),
a = c(1,1,0),
b = c(0,0,1))
How can I expand the last two columns of the data.frame above so that each row displays the number of times given in column "n". The second column "survives" is changed to binary values 0/1 according to the value "survive"
In other words:
n survive a b
3 2 1 0
2 1 1 0
2 2 0 1
For this
survive a b
1 1 0
1 1 0
0 1 0
1 1 0
0 1 0
1 0 1
1 0 1
+3
source to share
4 answers
Several alternative solutions:
1) Using base R:
rn <- rep(1:nrow(df), df$n)
df2 <- df[rn,]
df2$survive <- as.integer(df2$survive >= ave(rn, rn, FUN = seq_along))
which gives:
> df2[,-1]
survive a b
1: 1 1 0
2: 1 1 0
3: 0 1 0
4: 1 1 0
5: 0 1 0
6: 1 0 1
7: 1 0 1
2) Using the data.table package:
library(data.table)
df2 <- setDT(df)[, rid := .I
][, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b)
, by = rid
][, rid := NULL][]
which gives:
> df2
survive a b
1: 1 1 0
2: 1 1 0
3: 0 1 0
4: 1 1 0
5: 0 1 0
6: 1 0 1
7: 1 0 1
Or a little shorter:
df2 <- setDT(df)[, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b), by = 1:nrow(df)
][, nrow := NULL]
3) Using dplyr package:
library(dplyr)
df %>%
mutate(rid = row_number()) %>%
.[rep(1:nrow(df), df$n),] %>%
group_by(rid) %>%
mutate(survive = c(rep(1, unique(survive)), rep(0, unique(n) - unique(survive))) ) %>%
ungroup() %>%
select(-n, -rid)
which gives:
# A tibble: 7 × 3
survive a b
<dbl> <dbl> <dbl>
1 1 1 0
2 1 1 0
3 0 1 0
4 1 1 0
5 0 1 0
6 1 0 1
7 1 0 1
Data used:
df <- data.frame(n = c(3, 2, 2),
survive = c(2, 1, 2),
a = c(1,1,0),
b = c(0,0,1))
+3
source to share
One solution using splitstackshape
to expand strings and dplyr
,
library(splitstackshape)
library(dplyr)
df %>%
mutate(new = 1) %>%
expandRows('n') %>%
group_by(grp = cumsum(c(1, diff(survive) != 0))) %>%
mutate(survive = replace(new, tail(new, n() - survive[1]), 0)) %>%
arrange(grp, desc(survive)) %>%
ungroup() %>%
select(-c(new, grp))
# A tibble: 7 × 3
# survive a b
# <dbl> <dbl> <dbl>
#1 1 1 0
#2 1 1 0
#3 0 1 0
#4 1 1 0
#5 0 1 0
#6 1 0 1
#7 1 0 1
+3
source to share
We can do with base R
df2 <- df1[rep(1:nrow(df1), df1$n),-(1:2)]
row.names(df2) <- NULL
df2 <- cbind(Survive = unlist(Map(function(x, y) rep(c(1,0),
c(y, x-y)), df1$n, df1$survive)), df2)
df2
# Survive a b
#1 1 1 0
#2 1 1 0
#3 0 1 0
#4 1 1 0
#5 0 1 0
#6 1 0 1
#7 1 0 1
Or a more vectorized approach
df1 <- df[rep(seq_len(nrow(df)), df$n),-(1:2)]
df1$survive <- with(df, rep(rep(c(1,0), nrow(df)), rbind(survive, n - survive)))
+3
source to share
Here's a solution using split / apply / comb approach in R base:
df2 <- do.call(rbind, lapply(split(df, seq_along(df$n)), function(i) {
survive = c(rep(1, i$survive), rep(0, i$n - i$survive))
cbind(survive, i[rep(1, i$n), c("a", "b")])
}))
Result:
survive a b
1.1 1 1 0
1.1.1 1 1 0
1.1.2 0 1 0
2.2 1 1 0
2.2.1 0 1 0
3.3 1 0 1
3.3.1 1 0 1
+2
source to share