2

What is the best way to make nested lists with large datasets. Currently I am adding the elements with a for but I don't know if it is the most efficient way to do it.

In the example below I want to fill a nested list with the taxonomic levels Domain> Kingdom> Phylum> Class> Order> Family> Genus> Species.

So the code currently goes through each taxonomic level collecting the information and filling in the list, but due to the amount of data, the process takes too long and I would like to know if there is a way to optimize the process.

The code is attached below. I appreciate any suggestions, comments, etc.

Thank you


data <- data.frame(Reino   = c("reinoa","reinoa","reinob","reinoc"),
                   Filo    = c("Filoa1","Filoa2","Filob","Filoc"),
                   Clase   = c("Clasea1","Clase2","Claseb","Clasec"),
                   Orden   = c("Ordena1","Ordena2","Ordenb","Ordenc"),
                   Familia = c("Familiaa1","Familiaa2","Familiab","Familiac"),
                   Genero  = c("Generoa1","Generoa2","Generob","Generoc"),
                   Especie = c("Especiea1","Especiea2","Especieb","Especiec"))

for(i in unique(data$Reino)){
  
  dftaxonomica[[i]] <- list()
  
  print(i)
  
  for(j in unique(data[data$Reino==i,]$Filo)){
    
    dftaxonomica[[i]][[j]] <- list()
    
    for(w in unique(data[data$Reino==i & data$Filo==j,]$Clase)){
      
      dftaxonomica[[i]][[j]][[w]] <- list()
      
      
      for(z in unique(data[data$Reino==i & data$Filo==j & data$Clase == w,]$Orden)){
        
        dftaxonomica[[i]][[j]][[w]][[z]] <- list()
        
        for(h in unique(data[data$Reino==i & data$Filo==j & data$Clase == w & data$Orden == z,]$Familia)){
          
          dftaxonomica[[i]][[j]][[w]][[z]][[h]] <- list()
          
          for(q in unique(data[data$Reino==i & data$Filo==j & data$Clase == w & data$Orden == z & data$Familia == h,]$Genero)){
            
            dftaxonomica[[i]][[j]][[w]][[z]][[h]][[q]] <- list()
            
            for(k in unique(data[data$Reino==i & data$Filo==j & data$Clase == w & data$Orden == z & data$Familia == h & data$Género == h,]$Especie)){
              
              dftaxonomica[[i]][[j]][[w]][[z]][[h]][[q]][[k]] <- list()
              
              
              
              
            }
            
            
          } 
          
          
        }  
        
        
        
        
      }  
      
      
    }
    
    
    
    
    
  }
  
}


2 Answers 2

2

A recursive function might help:

recurse <- function(x) {
    nms <- names(x)
    if (length(nms) > 1L) {
        lapply(split(x[nms[-1L]], x[[nms[1L]]]), recurse)
    } else {
        nms <- unique(x[[1L]])
        setNames(vector("list", length(nms)), nms)
    }
}

recurse(data)
$reinoa
$reinoa$Filoa1
$reinoa$Filoa1$Clasea1
$reinoa$Filoa1$Clasea1$Ordena1
$reinoa$Filoa1$Clasea1$Ordena1$Familiaa1
$reinoa$Filoa1$Clasea1$Ordena1$Familiaa1$Generoa1
$reinoa$Filoa1$Clasea1$Ordena1$Familiaa1$Generoa1$Especiea1
NULL

$reinoa$Filoa2
$reinoa$Filoa2$Clase2
$reinoa$Filoa2$Clase2$Ordena2
$reinoa$Filoa2$Clase2$Ordena2$Familiaa2
$reinoa$Filoa2$Clase2$Ordena2$Familiaa2$Generoa2
$reinoa$Filoa2$Clase2$Ordena2$Familiaa2$Generoa2$Especiea2
NULL


$reinob
$reinob$Filob
$reinob$Filob$Claseb
$reinob$Filob$Claseb$Ordenb
$reinob$Filob$Claseb$Ordenb$Familiab
$reinob$Filob$Claseb$Ordenb$Familiab$Generob
$reinob$Filob$Claseb$Ordenb$Familiab$Generob$Especieb
NULL


$reinoc
$reinoc$Filoc
$reinoc$Filoc$Clasec
$reinoc$Filoc$Clasec$Ordenc
$reinoc$Filoc$Clasec$Ordenc$Familiac
$reinoc$Filoc$Clasec$Ordenc$Familiac$Generoc
$reinoc$Filoc$Clasec$Ordenc$Familiac$Generoc$Especiec
NULL


Sign up to request clarification or add additional context in comments.

Comments

1

The rrapply() function in the rrapply-package has an (efficient) option how = "unmelt" exactly for this purpose:

library(rrapply)

## add value column
data[["Value"]] <- replicate(nrow(data), list())

## unmelt data.frame
out <- rrapply(data, how = "unmelt")

str(out)
#> List of 3
#>  $ reinoa:List of 2
#>   ..$ Filoa1:List of 1
#>   .. ..$ Clasea1:List of 1
#>   .. .. ..$ Ordena1:List of 1
#>   .. .. .. ..$ Familiaa1:List of 1
#>   .. .. .. .. ..$ Generoa1:List of 1
#>   .. .. .. .. .. ..$ Especiea1: list()
#>   ..$ Filoa2:List of 1
#>   .. ..$ Clase2:List of 1
#>   .. .. ..$ Ordena2:List of 1
#>   .. .. .. ..$ Familiaa2:List of 1
#>   .. .. .. .. ..$ Generoa2:List of 1
#>   .. .. .. .. .. ..$ Especiea2: list()
#>  $ reinob:List of 1
#>   ..$ Filob:List of 1
#>   .. ..$ Claseb:List of 1
#>   .. .. ..$ Ordenb:List of 1
#>   .. .. .. ..$ Familiab:List of 1
#>   .. .. .. .. ..$ Generob:List of 1
#>   .. .. .. .. .. ..$ Especieb: list()
#>  $ reinoc:List of 1
#>   ..$ Filoc:List of 1
#>   .. ..$ Clasec:List of 1
#>   .. .. ..$ Ordenc:List of 1
#>   .. .. .. ..$ Familiac:List of 1
#>   .. .. .. .. ..$ Generoc:List of 1
#>   .. .. .. .. .. ..$ Especiec: list()

Reference link

https://jorischau.github.io/rrapply/articles/articles/2-efficient-melting-unnesting.html#efficient-unmelting-of-melted-data-frames

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.