Find consecutive subvectors of length k from a numeric vector that satisfy the given condition

I have a numeric vector in R, say

v= c(2,3,5,6,7,6,3,2,3,4,5,7,8,9,6,1,1,2,5,6,7,11,2,3,4)

      

Now I have to find from it the whole next subvector of size 4, with the condition that each element of the subvector must be greater than 2, and the entire subvector must be disjoint in the sense that not of two subvectors can contain the same index element. So my output would be:

(3,5,6,7),(3,4,5,7),(5,6,7,11)

      

Edited: More examples to illustrate: for,

v=c(3,3,3,3,1,3,3,3,3,3,3,3,3) 

      

the output will be:

(3,3,3,3), (3,3,3,3),(3,3,3,3).

      

and for <

v= c(2,3,5,5,7,6,3,2,3,4,5,7,8,9,6,1,1,2,5,6,7,11,2,3,4) 

      

the output will be

(3,5,5,7),(3,4,5,7),(5,6,7,11)

      

The second condition in the output simply says that if we found that any sub-matrix speaks (v[m],v[m+1],v[m+2],v[m+3])

with each element more than> 2, then it will go into my output, and the next sub-matrix can only start with v[m+4]

(if possible)

+3


source to share


3 answers


This solution is used embed()

to create a delay matrix and then retrieves the required rows from that matrix:



v <- c(2,3,5,6,7,6,3,2,3,4,5,7,8,9,6,1,1,2,5,6,7,11,2,3,4)

e <- embed(v, 4)
ret <- which(
  apply(e, 1, function(x)all(x > 2)) &
  apply(e, 1, function(x)length(unique(x)) == 4)
)
rows <- ret[c(1, 1 + which(diff(ret) > 4))]

e[rows, 4:1]

     [,1] [,2] [,3] [,4]
[1,]    3    5    6    7
[2,]    3    4    5    7
[3,]    5    6    7   11

      

+5


source


Try:



  fun1 <- function(vec, n, cond1) {
  lst1 <- lapply(1:(length(vec) - n+1), function(i) {
    x1 <- vec[i:(i + (n-1))]
    if (all(diff(x1) >= 0) & all(x1 > cond1)) 
        x1
   })
   indx <- which(sapply(lst1, length) == n)
  indx2 <- unlist(lapply(split(indx, cumsum(c(TRUE, diff(indx) != 1))), function(x) x[seq(1, 
    length(x), by = n-1)]))
   lst1[indx2]
}


v1 <- c(3,3,3,3,1,3,3,3,3,3,3,3,3)
v2 <- c(2,3,5,5,7,6,3,2,3,4,5,7,8,9,6,1,1,2,5,6,7,11,2,3,4)
v3 <- c(2,3,5,6,7,6,3,2,3,4,5,7,8,9,6,1,1,2,5,6,7,11,2,3,4)

fun1(v1,4,2)
#[[1]]
#[1] 3 3 3 3

#[[2]]
#[1] 3 3 3 3

#[[3]]
#[1] 3 3 3 3

 fun1(v2,4,2)
 #[[1]]
 #[1] 3 5 5 7

#[[2]]
#[1] 3 4 5 7

#[[3]]
#[1]  5  6  7 11

fun1(v3,4,2)
#[[1]]
#[1] 3 5 6 7

#[[2]]
#[1] 3 4 5 7

#[[3]]
#[1]  5  6  7 11

      

+1


source


Here's another idea based on rle

:

ff = function(x, size, thres)
{
   valid_subsets = sapply(head(seq_along(x), -(size - 1)), 
                          function(i) all(x[i:(i + (size - 1))] > thres))
   r = rle(valid_subsets)

   lapply(unlist(mapply(function(a, b) a + (seq_len(b) - 1) * size, 
                        (cumsum(r$lengths) - r$lengths + 1)[which(r$values)], 
                        (r$lengths[which(r$values)] + size - 1) %/% size)), 
          function(i) x[i:(i + (size - 1))])
}

ff(c(3,3,3,3,1,3,3,3,3,3,3,3,3), 4, 2)
ff(c(2,3,5,6,7,6,3,2,3,4,5,7,8,9,6,1,1,2,5,6,7,11,2,3,4), 4, 2)

      

Testing on another vector (I assume this is the correct conclusion):

set.seed(4); xx = sample(1:10, 20, T)
xx
# [1]  6  1  3  3  9  3  8 10 10  1  8  3  2 10  5  5 10  6 10  8
ff(xx, 4, 2)
#[[1]]
#[1] 3 3 9 3
#
#[[2]]
#[1] 10  5  5 10

      

If I'm missing something, for "xx" (and also in other cases) the other posted answers don't work:

fun1(xx, 4, 2)
#[[1]]
#[1]  3  8 10 10

#e[rows, 4:1]
#[1]  9  3  8 10

      

0


source







All Articles