Convert nested list items to a data frame and bind the result to a single data frame

I have nested lists:

x <- list(x = list(a = 1, 
                   b = 2), 
          y = list(a = 3, 
                   b = 4))

      

And I would like to convert the nested list to data.frames and then bind all data frames to one.

For this nesting level, I can do it with this line:

do.call(rbind.data.frame, lapply(x, as.data.frame, stringsAsFactors = FALSE))

      

So the result is:

  a b
x 1 2
y 3 4

      

My problem is that I would like to achieve this regardless of the nesting level. Another example with this list:

x <- list(X = list(x = list(a = 1, 
                       b = 2), 
              y = list(a = 3, 
                       b = 4)),
     Y = list(x = list(a = 1, 
                       b = 2), 
              y = list(a = 3, 
                       b = 4)))

do.call(rbind.data.frame, lapply(x, function(x) do.call(rbind.data.frame, lapply(x, as.data.frame, stringsAsFactors = FALSE))))

    a b
X.x 1 2
X.y 3 4
Y.x 1 2
Y.y 3 4

      

Does anyone have an idea to generate this for any nesting level? Thanks for any help

+3


source to share


5 answers


Borrowing from Spacedman and flodel here , we can define the following pair of recursive functions:

library(tidyverse)  # I use dplyr and purrr here, plus tidyr further down below

depth <- function(this) ifelse(is.list(this), 1L + max(sapply(this, depth)), 0L)

bind_at_any_depth <- function(l) {
  if (depth(l) == 2) {
    return(bind_rows(l))
  } else {
    l <- at_depth(l, depth(l) - 2, bind_rows)
    bind_at_any_depth(l)
  }
}

      

Now we can bind any arbitrary list of depths to one data.frame:

bind_at_any_depth(x)

      

# A tibble: 2 Γ— 2
      a     b
  <dbl> <dbl>
1     1     2
2     3     4

      

bind_at_any_depth(x_ext) # From P Lapointe

      

# A tibble: 5 Γ— 2
      a     b
  <dbl> <dbl>
1     1     2
2     5     6
3     7     8
4     1     2
5     3     4

      



If you want to track the origin of each line, you can use this version:

bind_at_any_depth2 <- function(l) {
  if (depth(l) == 2) {
    l <- bind_rows(l, .id = 'source')
    l <- unite(l, 'source', contains('source'))
    return(l)
  } else {
    l <- at_depth(l, depth(l) - 2, bind_rows, .id = paste0('source', depth(l)))
    bind_at_any_depth(l)
  }
}

      

This will add a column source

:

bind_at_any_depth2(x_ext)

      

# A tibble: 5 Γ— 3
  source     a     b
*  <chr> <dbl> <dbl>
1  X_x_1     1     2
2  X_y_z     5     6
3 X_y_zz     7     8
4  Y_x_1     1     2
5  Y_y_1     3     4

      

Note . At some point you can use purrr::depth

and change at_depth

to modify_depth

when their new version comes out in CRAN (thanks @ManuelS).

+7


source


UPDATE

Here you can iron out more deeply nested lists simply with unlist

. Since the structure is now uneven, there will be no result data.frame

.

x_ext <- list(X = list(x = list(a = 1,
                       b = 2),
              y = list(z=list(a = 5,
                       b = 6),
                       zz=list(a = 7,
                       b = 8))),
     Y = list(x = list(a = 1,
                       b = 2),
              y = list(a = 3,
                       b = 4)))

unlist(x_ext)

   X.x.a    X.x.b  X.y.z.a  X.y.z.b X.y.zz.a X.y.zz.b    Y.x.a    Y.x.b    Y.y.a    Y.y.b 
       1        2        5        6        7        8        1        2        3        4 

      



My initial answer was the unlist

first and rbind

after. However, it only works with the example in the question.

x_unlist <- unlist(x, recursive = FALSE)
do.call("rbind", x_unlist)
    a b
X.x 1 2
X.y 3 4
Y.x 1 2
Y.y 3 4

      

+2


source


You can dither and coerce the data.frame by collecting names purrr::flatten_df

from the development version :

library(purrr)    # or library(tidyverse)

x <- list(X = list(x = list(a = 1, 
                       b = 2), 
              y = list(a = 3, 
                       b = 4)),
     Y = list(x = list(a = 1, 
                       b = 2), 
              y = list(a = 3, 
                       b = 4)))

x %>% flatten_df(.id = 'var')
#> # A tibble: 4 Γ— 3
#>     var     a     b
#>   <chr> <dbl> <dbl>
#> 1     x     1     2
#> 2     y     3     4
#> 3     x     1     2
#> 4     y     3     4

      

or if you want to keep both sets of names map_df

:

library(tidyverse)

x %>% map_df(~bind_rows(.x, .id = 'var2'), .id = 'var1')
#> # A tibble: 4 Γ— 4
#>    var1  var2     a     b
#>   <chr> <chr> <dbl> <dbl>
#> 1     X     x     1     2
#> 2     X     y     3     4
#> 3     Y     x     1     2
#> 4     Y     y     3     4

      

+2


source


We can do this with tidyverse

library(tidyverse)
x %>% 
   map(bind_rows) %>%
   bind_rows(.id = 'grp')
# A tibble: 4 Γ— 3
#     grp     a     b    
#   <chr> <dbl> <dbl>
#1     X     1     2
#2     X     3     4
#3     Y     1     2
#4     Y     3     4

      


Or using base R

do.call(rbind, do.call(c, x))
#    a b
#X.x 1 2
#X.y 3 4
#Y.x 1 2
#Y.y 3 4

      

0


source


This is based on P. Lapointe's answer and uses the idea here and here to extract the final names in the list.

 bind <- function(x) {
     s = stack(unlist(x))
     s$major = tools::file_path_sans_ext(s$ind)
     s$minor = tools::file_ext(s$ind)
     as.data.frame.matrix(xtabs(data=s, values ~  major + minor))
 }

 bind(x)
    a b
X.x 1 2
X.y 3 4
Y.x 1 2
Y.y 3 4

 bind(x_ext)
       a b
X.x    1 2
X.y.z  5 6
X.y.zz 7 8
Y.x    1 2
Y.y    3 4

      

0


source







All Articles