Replicate each row and change one column to binary values

df <- data.frame(n = c(3, 2, 2), 
                 survive = c(2, 1, 2), 
                 a = c(1,1,0), 
                 b = c(0,0,1))

      

How can I expand the last two columns of the data.frame above so that each row displays the number of times given in column "n". The second column "survives" is changed to binary values ​​0/1 according to the value "survive"

In other words:

n  survive a  b
3  2       1  0
2  1       1  0
2  2       0  1

      

For this

survive a  b
1       1  0
1       1  0
0       1  0
1       1  0
0       1  0
1       0  1
1       0  1

      

+3


source to share


4 answers


Several alternative solutions:

1) Using base R:

rn <- rep(1:nrow(df), df$n)
df2 <- df[rn,]
df2$survive <- as.integer(df2$survive >= ave(rn, rn, FUN = seq_along))

      

which gives:

> df2[,-1]
   survive a b
1:       1 1 0
2:       1 1 0
3:       0 1 0
4:       1 1 0
5:       0 1 0
6:       1 0 1
7:       1 0 1

      


2) Using the data.table package:

library(data.table)
df2 <- setDT(df)[, rid := .I
                 ][, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b)
                   , by = rid
                   ][, rid := NULL][]

      

which gives:

> df2
   survive a b
1:       1 1 0
2:       1 1 0
3:       0 1 0
4:       1 1 0
5:       0 1 0
6:       1 0 1
7:       1 0 1

      



Or a little shorter:

df2 <- setDT(df)[, .(survive = c(rep(1, survive), rep(0, n - survive)), a, b), by = 1:nrow(df)
                 ][, nrow := NULL]

      


3) Using dplyr package:

library(dplyr)
df %>% 
  mutate(rid = row_number()) %>% 
  .[rep(1:nrow(df), df$n),] %>% 
  group_by(rid) %>% 
  mutate(survive = c(rep(1, unique(survive)), rep(0, unique(n) - unique(survive))) ) %>% 
  ungroup() %>% 
  select(-n, -rid)

      

which gives:

# A tibble: 7 × 3
  survive     a     b
    <dbl> <dbl> <dbl>
1       1     1     0
2       1     1     0
3       0     1     0
4       1     1     0
5       0     1     0
6       1     0     1
7       1     0     1

      


Data used:

df <- data.frame(n = c(3, 2, 2), 
                 survive = c(2, 1, 2), 
                 a = c(1,1,0), 
                 b = c(0,0,1))

      

+3


source


One solution using splitstackshape

to expand strings and dplyr

,



library(splitstackshape)
library(dplyr)

df %>% 
  mutate(new = 1) %>% 
  expandRows('n') %>% 
  group_by(grp = cumsum(c(1, diff(survive) != 0))) %>% 
  mutate(survive = replace(new, tail(new, n() - survive[1]), 0)) %>% 
  arrange(grp, desc(survive)) %>% 
  ungroup() %>% 
  select(-c(new, grp))

# A tibble: 7 × 3
#  survive     a     b
#    <dbl> <dbl> <dbl>
#1       1     1     0
#2       1     1     0
#3       0     1     0
#4       1     1     0
#5       0     1     0
#6       1     0     1
#7       1     0     1

      

+3


source


We can do with base R

df2 <- df1[rep(1:nrow(df1), df1$n),-(1:2)]
row.names(df2) <- NULL
df2 <- cbind(Survive = unlist(Map(function(x, y) rep(c(1,0),
             c(y, x-y)),  df1$n, df1$survive)), df2)
df2
#  Survive a b
#1       1 1 0
#2       1 1 0
#3       0 1 0
#4       1 1 0
#5       0 1 0
#6       1 0 1
#7       1 0 1

      


Or a more vectorized approach

df1 <- df[rep(seq_len(nrow(df)), df$n),-(1:2)]
df1$survive <- with(df, rep(rep(c(1,0), nrow(df)), rbind(survive, n - survive)))

      

+3


source


Here's a solution using split / apply / comb approach in R base:

df2 <- do.call(rbind, lapply(split(df, seq_along(df$n)), function(i) {

  survive = c(rep(1, i$survive), rep(0, i$n - i$survive))

  cbind(survive, i[rep(1, i$n), c("a", "b")])

}))

      

Result:

      survive a b
1.1         1 1 0
1.1.1       1 1 0
1.1.2       0 1 0
2.2         1 1 0
2.2.1       0 1 0
3.3         1 0 1
3.3.1       1 0 1

      

+2


source







All Articles