Exclude Rows Exceeding Threshold
I have a dataset df
field_pointpath assetclass_code value
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 50
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 60
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 65
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 43
C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 70
C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 55
C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 43
C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 11
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT 6KSFS_6KSF 67
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT 6KSFS_6KSF 36
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT 6KSFS_6KSF 39
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT 6KSFS_6KSF 33
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT 6KSFS_6KSF 20
C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 21
C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 22
C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 17
C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 30
I want to filter values
for specific ones field_pointpath
like:
- C_THDxL_0154x6KSFS_ A03 x6KSF_11xaP_ACT
-
C_THDxL_0154x6KSFS_ A03 x6KSF_11xaP_ACT
which has character A must not be
value
greater than 60:
Similarly ,
-
C_THDxL_0154x6KSFS_ B03 x6KSF_11xaP_ACT,
-
C_THDxL_0154x6KSFS_ B03 x6KSF_11xaP_ACT
which has character B must not have
value
more than 30
So the output should be
field_pointpath assetclass_code value
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 50
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 60
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 43
C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 55
C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 43
C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 11
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT 6KSFS_6KSF 20
C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 21
C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 22
C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 17
C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 30
source to share
You can use regular expression
to filter by field_pointpath
. I am assuming you are looking for records that have _A or _B followed by 2 digits. Then you combine this with a filter on value
:
ind <- (grepl("\\_A[0-9]{2}", df$field_pointpath) & df$value > 60) |
(grepl("\\_B[0-9]{2}", df$field_pointpath) & df$value > 30)
df[!ind, ]
# field_pointpath assetclass_code value
# 1 C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 50
# 2 C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 60
# 4 C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 43
# 6 C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 55
# 7 C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 43
# 8 C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 11
# 13 C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT 6KSFS_6KSF 20
# 14 C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 21
# 15 C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 22
# 16 C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 17
# 17 C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 30
source to share
df[(substr(df$field_pointpath, 20,20)=="A" & df$value <= 60) |
(substr(df$field_pointpath, 20,20)=="B" & df$value <= 30),]
1 C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 50
2 C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 60
4 C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 43
6 C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 55
7 C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 43
8 C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 11
13 C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT 6KSFS_6KSF 20
14 C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 21
15 C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 22
16 C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 17
17 C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 30
source to share
You can use the following code:
# your data, with 2 lines appended that will not be selected
my_df <- read.table(header = TRUE,
text = "field_pointpath assetclass_code value
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 50
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 60
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 65
C_THDxL_0154x6KSFS_A03x6KSF_11xaP_ACT 6KSFS_6KSF 43
C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 70
C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 55
C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 43
C_THDxL_0154x6KSFS_A03xaP_ACT 6KSFS 11
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT 6KSFS_6KSF 67
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT 6KSFS_6KSF 36
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT 6KSFS_6KSF 39
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT 6KSFS_6KSF 33
C_THDxL_0154x6KSFS_B03x6KSF_11xaP_ACT 6KSFS_6KSF 20
C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 21
C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 22
C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 17
C_THDxL_0154x6KSFS_B03xaP_ACT 6KSFS 30
C_THDxL_0154x6KSFS_C03xaP_ACT 6KSFS 30 ## added
C_THDxL_0154x6KSFS_D03xaP_ACT 6KSFS 30 ## added
")
library(stringr)
my_df2 <- my_df[ (str_detect( my_df$field_pointpath, "_A03") & my_df$value <= 30)
| ( str_detect(my_df$field_pointpath, "_B03") & my_df$value <= 60 ), ]
Please let me know if you want this.
source to share