Add a new column based on data between zeros

I have power (wattage) data collected every second (sample). So my data.frame is structured like this:

Test <- data.frame(Sample = c(1:20), 
                   Power = c(0,0,0,0,0,50,67,100,92,0,0,0,36,89,36,0,0,0,89,90))

      

The number of power recordings depends on the person performing the cycling effort and resting sporadically. Therefore, the power does not appear in an orderly manner. Since there are no markers to indicate when efforts start and stop, I want to include this detail. Efforts can be characterized when power> 0 and the start / stop of each effort can be estimated based on a group of data together.

Now I want to include a new column (Marker) that looks for cardinality data grouped together and separated by zeros. For example, my expected output is:

Test$Marker <- c("Rest","Rest","Rest","Rest","Rest","Effort 1","Effort 1","Effort 1","Effort 1",
                 "Rest","Rest","Rest","Effort 2","Effort 2","Effort 2","Rest","Rest","Rest",
                 "Effort 3","Effort 3")

      

Unfortunately my raw data is> 3000 rows, so doing it manually would be tedious! How do I do this in R?

+3


source to share


3 answers


Option with base R:

indx1 = with(rle(Test$Power>0),rep(values,lengths))
indx2 = with(rle(Test$Power>0),rep(cumsum(values),lengths))
Test$Effort[indx1] = paste0("Effort",indx2[indx1])
Test$Effort[!indx1]="Rest"

      

Output:



   Sample Power  Effort
1       1     0    Rest
2       2     0    Rest
3       3     0    Rest
4       4     0    Rest
5       5     0    Rest
6       6    50 Effort1
7       7    67 Effort1
8       8   100 Effort1
9       9    92 Effort1
10     10     0    Rest
11     11     0    Rest
12     12     0    Rest
13     13    36 Effort2
14     14    89 Effort2
15     15    36 Effort2
16     16     0    Rest
17     17     0    Rest
18     18     0    Rest
19     19    89 Effort3
20     20    90 Effort3

      

About 0.0038 seconds for 3000 lines;) Hope this helps!

+5


source


An alternate basic version of R using cumsum

:



mrk <- Test$Power==0
Test$New[!mrk] <- paste("effort", as.numeric(factor(cumsum(mrk)[!mrk])))
Test$New[mrk] <- "rest"

#   Sample Power   Marker      New
#1       1     0     Rest     rest
#2       2     0     Rest     rest
#3       3     0     Rest     rest
#4       4     0     Rest     rest
#5       5     0     Rest     rest
#6       6    50 Effort 1 effort 1
#7       7    67 Effort 1 effort 1
#8       8   100 Effort 1 effort 1
#9       9    92 Effort 1 effort 1
#10     10     0     Rest     rest
#11     11     0     Rest     rest
#12     12     0     Rest     rest
#13     13    36 Effort 2 effort 2
#14     14    89 Effort 2 effort 2
#15     15    36 Effort 2 effort 2
#16     16     0     Rest     rest
#17     17     0     Rest     rest
#18     18     0     Rest     rest
#19     19    89 Effort 3 effort 3
#20     20    90 Effort 3 effort 3

      

+3


source


Option from dplyr

from tidyverse

:

library(dplyr)
Test <- data.frame(Sample = c(1:20), 
                   Power = c(0,0,0,0,0,50,67,100,92,0,0,0,36,89,36,0,0,0,89,90))

Test_df <- Test %>%
  mutate(
    Marker = case_when(
      Power > 0 ~ "Effort",
      Power == 0 ~"Rest"),
    rleid = cumsum(Marker != lag(Marker, 1, default = "NA")), 
    Marker = case_when(
      Marker == "Effort" ~ paste0(Marker, rleid %/% 2),
      TRUE ~ "Rest"),
    rleid = NULL
  )

Test_df
#>    Sample Power  Marker
#> 1       1     0    Rest
#> 2       2     0    Rest
#> 3       3     0    Rest
#> 4       4     0    Rest
#> 5       5     0    Rest
#> 6       6    50 Effort1
#> 7       7    67 Effort1
#> 8       8   100 Effort1
#> 9       9    92 Effort1
#> 10     10     0    Rest
#> 11     11     0    Rest
#> 12     12     0    Rest
#> 13     13    36 Effort2
#> 14     14    89 Effort2
#> 15     15    36 Effort2
#> 16     16     0    Rest
#> 17     17     0    Rest
#> 18     18     0    Rest
#> 19     19    89 Effort3
#> 20     20    90 Effort3

      


Another use case for a one-liner data.table

:

library(data.table)
Test <- data.frame(Sample = c(1:20), 
                   Power = c(0,0,0,0,0,50,67,100,92,0,0,0,36,89,36,0,0,0,89,90))
setDT(Test)
Test[, Marker := ifelse(Power > 0, paste0("Effort", rleidv(Power > 0) %/% 2), "Rest")]
Test
#>     Sample Power  Marker
#>  1:      1     0    Rest
#>  2:      2     0    Rest
#>  3:      3     0    Rest
#>  4:      4     0    Rest
#>  5:      5     0    Rest
#>  6:      6    50 Effort1
#>  7:      7    67 Effort1
#>  8:      8   100 Effort1
#>  9:      9    92 Effort1
#> 10:     10     0    Rest
#> 11:     11     0    Rest
#> 12:     12     0    Rest
#> 13:     13    36 Effort2
#> 14:     14    89 Effort2
#> 15:     15    36 Effort2
#> 16:     16     0    Rest
#> 17:     17     0    Rest
#> 18:     18     0    Rest
#> 19:     19    89 Effort3
#> 20:     20    90 Effort3

      

+2


source







All Articles