Subset of data.frame and calculate frequency
I want to count the number of times the values 1 and -1 are found in each row of my data.frame
df <-
Chr start end value.1 value.2 value.3
1 68580000 68640000 0 1 1
1 115900000 116260000 NA -1 1
1 173500000 173680000 -1 -1 1
1 173500000 173680000 1 1 -1
expected output <-
Chr start end value.1 value.2 value.3 freq.1 freq.-1
1 68580000 68640000 0 1 1 2 0
1 115900000 116260000 0 -1 1 1 1
1 173500000 173680000 -1 -1 1 1 2
1 173500000 173680000 1 1 -1 2 1
+3
source to share
4 answers
Another option using table
:
df[paste("freq", c(1, -1), sep=".")] <- t(apply(df[, 4:6], 1,
function(x){
x <- factor(x, levels=(-1):1)
return(table(x)[c("1","-1")])
}))
df
# Chr start end value.1 value.2 value.3 freq.1 freq.-1
#1 1 68580000 68640000 0 1 1 2 0
#2 1 115900000 116260000 NA -1 1 1 1
#3 1 173500000 173680000 -1 -1 1 1 2
#4 1 173500000 173680000 1 1 -1 2 1
<strong> data
df <- structure(list(Chr = c(1L, 1L, 1L, 1L), start = c(68580000L,
115900000L, 173500000L, 173500000L), end = c(68640000L, 116260000L,
173680000L, 173680000L), value.1 = c(0L, NA, -1L, 1L), value.2 = c(1L,
-1L, -1L, 1L), value.3 = c(1L, 1L, 1L, -1L)), .Names = c("Chr",
"start", "end", "value.1", "value.2", "value.3"), class = "data.frame", row.names = c(NA,
-4L))
+1
source to share
A more general solution, which can be made even more general for any number of columns value.*
:
ds <- read.table(header=T, text="Chr start end value.1 value.2 value.3
1 68580000 68640000 0 1 1
1 115900000 116260000 NA -1 1
1 173500000 173680000 -1 -1 1
1 173500000 173680000 1 1 -1")
ds
x <- mapply(function(...) {c(sum(c(...) == 1, na.rm=T), sum(c(...) == -1, na.rm=T))}, ds$value.1, ds$value.2, ds$value.3)
cbind(ds, freq.1=x[1,],`freq.-1`=x[2,])
Outputs:
Chr start end value.1 value.2 value.3 freq.1 freq.-1
1 1 68580000 68640000 0 1 1 2 0
2 1 115900000 116260000 NA -1 1 1 1
3 1 173500000 173680000 -1 -1 1 1 2
4 1 173500000 173680000 1 1 -1 2 1
+1
source to share