Filter data rows based on ordered character vector

Not sure if my question is repeating itself, but searching on stackoverflow doesn't give any possible solutions.

I have the following dataframe

num   char  
1     A  
2     K  
3     I  
4     B  
5     I  
6     N  
7     G  
8     O  
9     Z  
10    Q 

      

I would like to select only those strings that form the word BINGO (in that order) in the char column, resulting in the following framework:

num char  
4     B  
5     I  
6     N  
7     G  
8     O 

      

Any help would be much appreciated.

+3


source to share


5 answers


One option is to use zoo::rollapply

:



library(zoo)
bingo = c("B", "I", "N", "G", "O")    # the pattern you want to check

# use rollapply to check if the pattern exists in any window
index = which(rollapply(df$char, length(bingo), function(x) all(x == bingo)))

# extract the window from the table
df[mapply(`:`, index, index + length(bingo) - 1),]

#  num char
#4   4    B
#5   5    I
#6   6    N
#7   7    G
#8   8    O

      

+3


source


Here's a solution using a recursive function - the BINGO letters don't have to be sequential, but they should be fine.



df <- data.frame(num=1:10,char=c("A","K","I","B","I","N","G","O","Z","Q"),stringsAsFactors = FALSE)

word<-"BINGO"

chars<-strsplit(word,"")[[1]]

findword <- function(chars,df,a=integer(0),m=0){ #a holds the result so far on recursion, m is the position to start searching
  z <- m+match(chars[1],df$char[(m+1):nrow(df)]) #next match of next letter
  if(!is.na(z)){      
    if(length(chars)==1){
      a <- c(z,a)
    } else {
      a <- c(z,Recall(chars[-1],df,a,max(m,z))) #Recall is function referring to itself recursively
    }
    return(a) #returns row index numbers of df
  } else {
    return(NA)
  }
}

result <- df[findword(chars,df),]

      

+1


source


d = data.frame(num=1:15, char=c('A', 'K', 'I', 'B', 'I', 'N', 'G', 'O', 'Z', 'Q', 'B', 'I', 'N', 'G', 'O'))
w = "BINGO"
N = nchar(w)
char_str = paste(d$char, sep='', collapse='')

idx = as.integer(gregexpr(w, char_str)[[1]]) 
idx = as.integer(sapply(idx, function(i)seq(i, length=N)))
d[idx, ]

   num char
4    4    B
5    5    I
6    6    N
7    7    G
8    8    O
11  11    B
12  12    I
13  13    N
14  14    G
15  15    O

      

0


source


I guess nobody likes loops, but this is a possibility in the base:

char <- c("A", "K", "I", "B", "I", "N", "G", "O", "Z", "Q")
num <- c(1, 2, 3, 4, 5, 6, 7, 8, 9, 10)
df <- data.frame(num, char)

word <- "BINGO"
index <- NULL
for(z in 1:nrow(df)){
  if(substr(word, 1,1) == as.character(df[z,2])){
    index <- c(index, z) 
    word <- substr(word, 2, nchar(word))    
  }
}

df[index,]

      

0


source


I went too fast the first time, but based on the example you gave I think this might work:

filter(df[which(df$char == "B"):dim(df)[1],], char %in% c("B","I","N","G","O"))

      

0


source







All Articles