Exclude Rows Exceeding Threshold

I have a dataset df

  field_pointpath                       assetclass_code value
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT      6KSFS_6KSF   50
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT      6KSFS_6KSF   60
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT      6KSFS_6KSF   65
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT      6KSFS_6KSF   43
C_THDxL_0154x6KSFS_A03xaP_ACT                  6KSFS    70
C_THDxL_0154x6KSFS_A03xaP_ACT                  6KSFS    55
C_THDxL_0154x6KSFS_A03xaP_ACT                  6KSFS    43
C_THDxL_0154x6KSFS_A03xaP_ACT                  6KSFS    11
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT     6KSFS_6KSF    67
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT     6KSFS_6KSF    36
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT     6KSFS_6KSF    39
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT     6KSFS_6KSF    33
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT     6KSFS_6KSF    20
C_THDxL_0154x6KSFS_B03xaP_ACT                  6KSFS    21
C_THDxL_0154x6KSFS_B03xaP_ACT                  6KSFS    22
C_THDxL_0154x6KSFS_B03xaP_ACT                  6KSFS    17
C_THDxL_0154x6KSFS_B03xaP_ACT                  6KSFS    30

      

I want to filter values

for specific ones field_pointpath

like:

  • C_THDxL_0154x6KSFS_ A03 x6KSF_11xaP_ACT
  • C_THDxL_0154x6KSFS_ A03 x6KSF_11xaP_ACT

    which has character A must not be value

    greater than 60:

Similarly ,

  • C_THDxL_0154x6KSFS_ B03 x6KSF_11xaP_ACT,

  • C_THDxL_0154x6KSFS_ B03 x6KSF_11xaP_ACT

    which has character B must not have value

    more than 30

So the output should be

                    field_pointpath    assetclass_code  value
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT   6KSFS_6KSF       50
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT   6KSFS_6KSF       60
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT   6KSFS_6KSF       43
C_THDxL_0154x6KSFS_A03xaP_ACT              6KSFS         55
C_THDxL_0154x6KSFS_A03xaP_ACT              6KSFS         43
C_THDxL_0154x6KSFS_A03xaP_ACT              6KSFS         11
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT   6KSFS_6KSF       20
C_THDxL_0154x6KSFS_B03xaP_ACT              6KSFS         21
C_THDxL_0154x6KSFS_B03xaP_ACT              6KSFS         22
C_THDxL_0154x6KSFS_B03xaP_ACT              6KSFS         17
C_THDxL_0154x6KSFS_B03xaP_ACT              6KSFS         30

      

+3


source to share


3 answers


You can use regular expression

to filter by field_pointpath

. I am assuming you are looking for records that have _A or _B followed by 2 digits. Then you combine this with a filter on value

:



ind <- (grepl("\\_A[0-9]{2}", df$field_pointpath) & df$value > 60) | 
       (grepl("\\_B[0-9]{2}", df$field_pointpath) & df$value > 30)
df[!ind, ]

#                          field_pointpath assetclass_code value
# 1  C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT      6KSFS_6KSF    50
# 2  C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT      6KSFS_6KSF    60
# 4  C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT      6KSFS_6KSF    43
# 6          C_THDxL_0154x6KSFS_A03xaP_ACT           6KSFS    55
# 7          C_THDxL_0154x6KSFS_A03xaP_ACT           6KSFS    43
# 8          C_THDxL_0154x6KSFS_A03xaP_ACT           6KSFS    11
# 13 C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT      6KSFS_6KSF    20
# 14         C_THDxL_0154x6KSFS_B03xaP_ACT           6KSFS    21
# 15         C_THDxL_0154x6KSFS_B03xaP_ACT           6KSFS    22
# 16         C_THDxL_0154x6KSFS_B03xaP_ACT           6KSFS    17
# 17         C_THDxL_0154x6KSFS_B03xaP_ACT           6KSFS    30  

      

+3


source


    df[(substr(df$field_pointpath, 20,20)=="A" & df$value <= 60) |
         (substr(df$field_pointpath, 20,20)=="B" & df$value <= 30),]

1  C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT      6KSFS_6KSF    50
2  C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT      6KSFS_6KSF    60
4  C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT      6KSFS_6KSF    43
6          C_THDxL_0154x6KSFS_A03xaP_ACT           6KSFS    55
7          C_THDxL_0154x6KSFS_A03xaP_ACT           6KSFS    43
8          C_THDxL_0154x6KSFS_A03xaP_ACT           6KSFS    11
13 C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT      6KSFS_6KSF    20
14         C_THDxL_0154x6KSFS_B03xaP_ACT           6KSFS    21
15         C_THDxL_0154x6KSFS_B03xaP_ACT           6KSFS    22
16         C_THDxL_0154x6KSFS_B03xaP_ACT           6KSFS    17
17         C_THDxL_0154x6KSFS_B03xaP_ACT           6KSFS    30

      



+2


source


You can use the following code:

# your data, with 2 lines appended that will not be selected
my_df  <- read.table(header = TRUE, 
text = "field_pointpath                       assetclass_code value
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT      6KSFS_6KSF   50
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT      6KSFS_6KSF   60
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT      6KSFS_6KSF   65
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT      6KSFS_6KSF   43
C_THDxL_0154x6KSFS_A03xaP_ACT                  6KSFS    70
C_THDxL_0154x6KSFS_A03xaP_ACT                  6KSFS    55
C_THDxL_0154x6KSFS_A03xaP_ACT                  6KSFS    43
C_THDxL_0154x6KSFS_A03xaP_ACT                  6KSFS    11
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT     6KSFS_6KSF    67
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT     6KSFS_6KSF    36
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT     6KSFS_6KSF    39
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT     6KSFS_6KSF    33
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT     6KSFS_6KSF    20
C_THDxL_0154x6KSFS_B03xaP_ACT                  6KSFS    21
C_THDxL_0154x6KSFS_B03xaP_ACT                  6KSFS    22
C_THDxL_0154x6KSFS_B03xaP_ACT                  6KSFS    17
C_THDxL_0154x6KSFS_B03xaP_ACT                  6KSFS    30
C_THDxL_0154x6KSFS_C03xaP_ACT                  6KSFS    30 ## added
C_THDxL_0154x6KSFS_D03xaP_ACT                  6KSFS    30 ## added
")

library(stringr)
    my_df2 <- my_df[ (str_detect( my_df$field_pointpath, "_A03") & my_df$value <= 30) 
| ( str_detect(my_df$field_pointpath, "_B03")  & my_df$value <= 60 ), ]

      

Please let me know if you want this.

+1


source







All Articles