View comma separated data in another table
I have 2 tables where the data frame contains data with commas.
It is necessary to find out which category is present in the search dataframe, that is, in Option 1 from the category "Cat A, Cat B, Cat C" in which there is a category. if found, you must show a score against it.
If you find a multiple instance, we need to show the first occurrence score, so in the first case, Cat C and Cat Z are both in Option 1, however, we need to show the score for Cat C, which is 2.
Table
Option | Cat
----- ------
Option 1 | Cat A, Cat B, Cat C, Cat Z
Option 2 | Cat X, Cat Y, Cat Z, Cat B
Option 3 | Cat P, Cat Q, Cat R, Cat S, Cat T
Option 1 | Cat T
Option 3 | Cat E, Cat F
Search
Cat | Option | Score
Cat A | Option 2 | 8
Cat B | Option 3 | 3
Cat C | Option 1 | 2
Cat X | Option 3 | 9
Cat Y | Option 1 | 1
Cat Z | Option 2 | 1
Cat P | Option 3 | 2
Cat Q | Option 2 | 9
Cat R | Option 1 | 4
Cat S | Option 4 | 0
Cat T | Option 1 | 5
Cat E | Option 4 | 1
Cat F | Option 3 | 10
Output
Option | Cat | Found_cat | Score
Option 1 | Cat A, Cat B, Cat C | Cat C | 2
Option 2 | Cat X, Cat Y, Cat Z, Cat B | Cat Z | 1
Option 3 | Cat P, Cat Q, Cat R, Cat S, Cat T | Cat P | 2
Option 1 | Cat T | Cat T | 5
Option 3 | Cat E, Cat F | Cat F | 10
R Code to create data frame
Table <- data.frame(Option = c("Option 1", "Option 2", "Option 3", "Option 1", "Option 3"),
Cat = c("Cat A, Cat B, Cat C", "Cat X, Cat Y,Cat Z, Cat B", "Cat P, Cat Q, Cat R, Cat S, Cat T", "Cat T", "Cat E, Cat F"))
lookup <-data.frame(Cat = c("Cat A", "Cat B", "Cat C", "Cat X", "Cat Y", "Cat Z", "Cat P", "Cat Q", "Cat R", "Cat S", "Cat T", "Cat E", "Cat F"),
Options = c("Option 2", "Option 3", "Option 1", "Option 3", "Option 1", "Option 2", "Option 3", "Option 2", "Option 1", "Option 4", "Option 1", "Option 4", "Option 3"),
Score = c(8, 3, 2, 9, 1, 1, 2, 9, 4, 0, 5, 1, 10))
output <- data.frame(Option = c("Option 1", "Option 2", "Option 3", "Option 1", "Option 3"),
Cat = c("Cat A, Cat B, Cat C", "Cat X, Cat Y, Cat Z, Cat B,", "Cat P, Cat Q, Cat R, Cat S, Cat T", "Cat T", "Cat E, Cat F"),
Found_cat = c("Cat C", "Cat Z", "Cat P", "Cat T", "Cat F"),
Score = c(2, 1, 2, 5, 10))
+3
source to share
2 answers
I'll just try using a loop:
Table <- data.frame(Option = c("Option 1", "Option 2", "Option 3", "Option 1", "Option 3"),
Cat = c("Cat A, Cat B, Cat C", "Cat X, Cat Y, Cat Z, Cat B", "Cat P, Cat Q, Cat R, Cat S, Cat T", "Cat T", "Cat E, Cat F"),
stringsAsFactors = F)
lookup <-data.frame(Cat = c("Cat A", "Cat B", "Cat C", "Cat X", "Cat Y", "Cat Z", "Cat P", "Cat Q", "Cat R", "Cat S", "Cat T", "Cat E", "Cat F"),
Options = c("Option 2", "Option 3", "Option 1", "Option 3", "Option 1", "Option 2", "Option 3", "Option 2", "Option 1", "Option 4", "Option 1", "Option 4", "Option 3"),
Score = c(8, 3, 2, 9, 1, 1, 2, 9, 4, 0, 5, 1, 10),
stringsAsFactors = F)
app = matrix(nrow = nrow(Table), ncol = 2)
for (i in 1:nrow(Table)) {
lookup.cats = lookup$Cat[lookup$Options == Table$Option[i]]
Table.cats = unlist(strsplit(Table$Cat[i], split = ', '))
found.cat = intersect(lookup.cats, Table.cats)
score = lookup$Score[which(lookup$Cat == found.cat &
lookup$Options == Table$Option[i])]
app[i, 1] = found.cat
app[i, 2] = score
}
app = as.data.frame(app)
names(app) = c('Found_cat', 'Score')
cbind(Table, app)
+2
source to share
Here is an option tidyverse
,
library(tidyverse)
Table %>%
unnest(Cat = strsplit(as.character(Cat), ', ')) %>%
inner_join(lookup, by = c('Option', 'Cat')) %>%
select(Cat, Score) %>%
rename(Cat_Found = Cat) %>%
bind_cols(Table, .)
# Option Cat Cat_Found Score
#1 Option 1 Cat A, Cat B, Cat C Cat C 2
#2 Option 2 Cat X, Cat Y, Cat Z, Cat B Cat Z 1
#3 Option 3 Cat P, Cat Q, Cat R, Cat S, Cat T Cat P 2
#4 Option 1 Cat T Cat T 5
#5 Option 3 Cat E, Cat F Cat F 10
+1
source to share