Filter data rows based on ordered character vector
Not sure if my question is repeating itself, but searching on stackoverflow doesn't give any possible solutions.
I have the following dataframe
num char
1 A
2 K
3 I
4 B
5 I
6 N
7 G
8 O
9 Z
10 Q
I would like to select only those strings that form the word BINGO (in that order) in the char column, resulting in the following framework:
num char
4 B
5 I
6 N
7 G
8 O
Any help would be much appreciated.
+3
source to share
5 answers
One option is to use zoo::rollapply
:
library(zoo)
bingo = c("B", "I", "N", "G", "O") # the pattern you want to check
# use rollapply to check if the pattern exists in any window
index = which(rollapply(df$char, length(bingo), function(x) all(x == bingo)))
# extract the window from the table
df[mapply(`:`, index, index + length(bingo) - 1),]
# num char
#4 4 B
#5 5 I
#6 6 N
#7 7 G
#8 8 O
+3
source to share
Here's a solution using a recursive function - the BINGO letters don't have to be sequential, but they should be fine.
df <- data.frame(num=1:10,char=c("A","K","I","B","I","N","G","O","Z","Q"),stringsAsFactors = FALSE)
word<-"BINGO"
chars<-strsplit(word,"")[[1]]
findword <- function(chars,df,a=integer(0),m=0){ #a holds the result so far on recursion, m is the position to start searching
z <- m+match(chars[1],df$char[(m+1):nrow(df)]) #next match of next letter
if(!is.na(z)){
if(length(chars)==1){
a <- c(z,a)
} else {
a <- c(z,Recall(chars[-1],df,a,max(m,z))) #Recall is function referring to itself recursively
}
return(a) #returns row index numbers of df
} else {
return(NA)
}
}
result <- df[findword(chars,df),]
+1
source to share
d = data.frame(num=1:15, char=c('A', 'K', 'I', 'B', 'I', 'N', 'G', 'O', 'Z', 'Q', 'B', 'I', 'N', 'G', 'O'))
w = "BINGO"
N = nchar(w)
char_str = paste(d$char, sep='', collapse='')
idx = as.integer(gregexpr(w, char_str)[[1]])
idx = as.integer(sapply(idx, function(i)seq(i, length=N)))
d[idx, ]
num char
4 4 B
5 5 I
6 6 N
7 7 G
8 8 O
11 11 B
12 12 I
13 13 N
14 14 G
15 15 O
0
source to share
I guess nobody likes loops, but this is a possibility in the base:
char <- c("A", "K", "I", "B", "I", "N", "G", "O", "Z", "Q")
num <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
df <- data.frame(num, char)
word <- "BINGO"
index <- NULL
for(z in 1:nrow(df)){
if(substr(word, 1,1) == as.character(df[z,2])){
index <- c(index, z)
word <- substr(word, 2, nchar(word))
}
}
df[index,]
0
source to share