View comma separated data in another table

I have 2 tables where the data frame contains data with commas.

It is necessary to find out which category is present in the search dataframe, that is, in Option 1 from the category "Cat A, Cat B, Cat C" in which there is a category. if found, you must show a score against it.

If you find a multiple instance, we need to show the first occurrence score, so in the first case, Cat C and Cat Z are both in Option 1, however, we need to show the score for Cat C, which is 2.

Table

Option           | Cat 
-----             ------   
Option 1         | Cat A, Cat B, Cat C, Cat Z
Option 2         | Cat X, Cat Y, Cat Z, Cat B
Option 3         | Cat P, Cat Q, Cat R, Cat S, Cat T 
Option 1         | Cat T 
Option 3         | Cat E, Cat F 

      

Search

Cat   |   Option    |   Score

Cat A |   Option 2   |   8
Cat B |   Option 3   |   3
Cat C |   Option 1   |   2
Cat X |   Option 3   |   9
Cat Y |   Option 1   |   1
Cat Z |   Option 2   |   1
Cat P |   Option 3   |   2
Cat Q |   Option 2   |   9
Cat R |   Option 1   |   4
Cat S |   Option 4   |   0
Cat T |   Option 1   |   5
Cat E |   Option 4   |   1
Cat F |   Option 3   |   10

      

Output

 Option      |     Cat                                |     Found_cat    |     Score
 Option 1    |     Cat A, Cat B, Cat C                |     Cat C        |     2
 Option 2    |     Cat X, Cat Y, Cat Z, Cat B          |     Cat Z        |     1
 Option 3    |     Cat P, Cat Q, Cat R, Cat S, Cat T  |     Cat P        |     2
 Option 1    |     Cat T                              |     Cat T        |     5
 Option 3    |     Cat E, Cat F                       |     Cat F        |     10

      

R Code to create data frame

Table <- data.frame(Option = c("Option 1", "Option 2", "Option 3", "Option 1", "Option 3"),
                 Cat = c("Cat A, Cat B, Cat C", "Cat X, Cat Y,Cat Z, Cat B", "Cat P, Cat Q, Cat R, Cat S, Cat T", "Cat T", "Cat E, Cat F"))

lookup <-data.frame(Cat = c("Cat A", "Cat B", "Cat C", "Cat X", "Cat Y", "Cat Z", "Cat P", "Cat Q", "Cat R", "Cat S", "Cat T", "Cat E", "Cat F"),
                    Options = c("Option 2", "Option 3", "Option 1", "Option 3", "Option 1", "Option 2", "Option 3", "Option 2", "Option 1", "Option 4", "Option 1", "Option 4", "Option 3"),
                    Score = c(8, 3, 2, 9, 1, 1, 2, 9, 4, 0, 5, 1, 10)) 



output <- data.frame(Option = c("Option 1", "Option 2", "Option 3", "Option 1", "Option 3"),
                     Cat = c("Cat A, Cat B, Cat C", "Cat X, Cat Y, Cat Z, Cat B,", "Cat P, Cat Q, Cat R, Cat S, Cat T", "Cat T", "Cat E, Cat F"),
                     Found_cat = c("Cat C", "Cat Z", "Cat P", "Cat T", "Cat F"),
                     Score = c(2, 1, 2, 5, 10))

      

+3


source to share


2 answers


I'll just try using a loop:



Table <- data.frame(Option = c("Option 1", "Option 2", "Option 3", "Option 1", "Option 3"),
                Cat = c("Cat A, Cat B, Cat C", "Cat X, Cat Y, Cat Z, Cat B", "Cat P, Cat Q, Cat R, Cat S, Cat T", "Cat T", "Cat E, Cat F"), 
                stringsAsFactors = F)

lookup <-data.frame(Cat = c("Cat A", "Cat B", "Cat C", "Cat X", "Cat Y", "Cat Z", "Cat P", "Cat Q", "Cat R", "Cat S", "Cat T", "Cat E", "Cat F"),
                Options = c("Option 2", "Option 3", "Option 1", "Option 3", "Option 1", "Option 2", "Option 3", "Option 2", "Option 1", "Option 4", "Option 1", "Option 4", "Option 3"),
                Score = c(8, 3, 2, 9, 1, 1, 2, 9, 4, 0, 5, 1, 10), 
                stringsAsFactors = F)
app = matrix(nrow = nrow(Table), ncol = 2)
for (i in 1:nrow(Table)) {
lookup.cats = lookup$Cat[lookup$Options == Table$Option[i]]
Table.cats = unlist(strsplit(Table$Cat[i], split = ', '))
found.cat = intersect(lookup.cats, Table.cats)
score = lookup$Score[which(lookup$Cat == found.cat & 
                               lookup$Options == Table$Option[i])]
app[i, 1] = found.cat
app[i, 2] = score
}
app = as.data.frame(app)
names(app) = c('Found_cat', 'Score')
cbind(Table, app)

      

+2


source


Here is an option tidyverse

,



library(tidyverse)

Table %>% 
  unnest(Cat = strsplit(as.character(Cat), ', ')) %>% 
  inner_join(lookup, by = c('Option', 'Cat')) %>%
  select(Cat, Score) %>% 
  rename(Cat_Found = Cat) %>% 
  bind_cols(Table, .)

#    Option                               Cat Cat_Found Score
#1 Option 1               Cat A, Cat B, Cat C     Cat C     2
#2 Option 2        Cat X, Cat Y, Cat Z, Cat B     Cat Z     1
#3 Option 3 Cat P, Cat Q, Cat R, Cat S, Cat T     Cat P     2
#4 Option 1                             Cat T     Cat T     5
#5 Option 3                      Cat E, Cat F     Cat F    10

      

+1


source







All Articles