Replicate each row and change one column to binary values

Question

Replicate each row and change one column to binary values

df <- data.frame(n = c(3, 2, 2), 
                 survive = c(2, 1, 2), 
                 a = c(1,1,0), 
                 b = c(0,0,1))

How can I expand the last two columns of the data.frame above so that each row displays the number of times given in column "n". The second column "survives" is changed to binary values 0/1 according to the value "survive"

In other words:

n  survive a  b
3  2       1  0
2  1       1  0
2  2       0  1

For this

survive a  b
1       1  0
1       1  0
0       1  0
1       1  0
0       1  0
1       0  1
1       0  1

+3

r

Cheng 23 Apr 17 at 9:08

source to share

4 answers

One solution using splitstackshape

to expand strings and dplyr

,

library(splitstackshape)
library(dplyr)

df %>% 
  mutate(new = 1) %>% 
  expandRows('n') %>% 
  group_by(grp = cumsum(c(1, diff(survive) != 0))) %>% 
  mutate(survive = replace(new, tail(new, n() - survive[1]), 0)) %>% 
  arrange(grp, desc(survive)) %>% 
  ungroup() %>% 
  select(-c(new, grp))

# A tibble: 7 × 3
#  survive     a     b
#    <dbl> <dbl> <dbl>
#1       1     1     0
#2       1     1     0
#3       0     1     0
#4       1     1     0
#5       0     1     0
#6       1     0     1
#7       1     0     1

+3

Sotos 23 Apr 17 at 9:56

source to share

We can do with base R

df2 <- df1[rep(1:nrow(df1), df1$n),-(1:2)]
row.names(df2) <- NULL
df2 <- cbind(Survive = unlist(Map(function(x, y) rep(c(1,0),
             c(y, x-y)),  df1$n, df1$survive)), df2)
df2
#  Survive a b
#1       1 1 0
#2       1 1 0
#3       0 1 0
#4       1 1 0
#5       0 1 0
#6       1 0 1
#7       1 0 1

Or a more vectorized approach

df1 <- df[rep(seq_len(nrow(df)), df$n),-(1:2)]
df1$survive <- with(df, rep(rep(c(1,0), nrow(df)), rbind(survive, n - survive)))

+3

akrun 23 Apr 17 at 9:56

source to share

Here's a solution using split / apply / comb approach in R base:

df2 <- do.call(rbind, lapply(split(df, seq_along(df$n)), function(i) {

  survive = c(rep(1, i$survive), rep(0, i$n - i$survive))

  cbind(survive, i[rep(1, i$n), c("a", "b")])

}))

Result:

      survive a b
1.1         1 1 0
1.1.1       1 1 0
1.1.2       0 1 0
2.2         1 1 0
2.2.1       0 1 0
3.3         1 0 1
3.3.1       1 0 1

+2

ulfelder 23 Apr 17 at 9:57

source to share

Jaap · Accepted Answer · 2017-04-23T09:59:29+0000

Several alternative solutions:

1) Using base R:

rn <- rep(1:nrow(df), df$n)
df2 <- df[rn,]
df2$survive <- as.integer(df2$survive >= ave(rn, rn, FUN = seq_along))

which gives:

> df2[,-1]
   survive a b
1:       1 1 0
2:       1 1 0
3:       0 1 0
4:       1 1 0
5:       0 1 0
6:       1 0 1
7:       1 0 1

2) Using the data.table package:

library(data.table)
df2 <- setDT(df)[, rid := .I
                 ][, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b)
                   , by = rid
                   ][, rid := NULL][]

which gives:

> df2
   survive a b
1:       1 1 0
2:       1 1 0
3:       0 1 0
4:       1 1 0
5:       0 1 0
6:       1 0 1
7:       1 0 1

Or a little shorter:

df2 <- setDT(df)[, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b), by = 1:nrow(df)
                 ][, nrow := NULL]

3) Using dplyr package:

library(dplyr)
df %>% 
  mutate(rid = row_number()) %>% 
  .[rep(1:nrow(df), df$n),] %>% 
  group_by(rid) %>% 
  mutate(survive = c(rep(1, unique(survive)), rep(0, unique(n) - unique(survive))) ) %>% 
  ungroup() %>% 
  select(-n, -rid)

which gives:

# A tibble: 7 × 3
  survive     a     b
    <dbl> <dbl> <dbl>
1       1     1     0
2       1     1     0
3       0     1     0
4       1     1     0
5       0     1     0
6       1     0     1
7       1     0     1

Data used:

df <- data.frame(n = c(3, 2, 2), 
                 survive = c(2, 1, 2), 
                 a = c(1,1,0), 
                 b = c(0,0,1))

Replicate each row and change one column to binary values

More articles: