How can I calculate odds ratio in many tables

I have a large table, for example:

n1   n2    freq1   freq2   
A    C      33      44
A    C      23      19
R    E      163     56
R    E      32      12
W    Q      111     54
W    Q      12      33

      

How can I calculate the odds of the alternation odds?

n1   n2    freq1   freq2   odd_ratio
A    C      33      44       0.61
A    C      23      19       0.61
R    E      163     56       1.09
R    E      32      12       1.09
W    Q      111     54       5.65
W    Q      12      33       5.65

#0.61=(33*19)/(23*44)
#1.09=(163*12)/(32*56)

      

0


source to share


4 answers


Or try the approach data.table

library(data.table)

# read in the data
    dt <- read.table('n1   n2    freq1   freq2   
A    C      33      44
A    C      23      19
R    E      163     56
R    E      32      12
W    Q      111     54
W    Q      12      33', header=TRUE)

setDT(dt) # make the data frame into a data.table


# one line and done
dt[, odds_ratio:=freq1[1] * freq2[2] / (freq1[2] * freq2[1]), by=c('n1','n2')]

#    n1 n2 freq1 freq2 odds_ratio
# 1:  A  C    33    44  0.6195652
# 2:  A  C    23    19  0.6195652
# 3:  R  E   163    56  1.0915179
# 4:  R  E    32    12  1.0915179
# 5:  W  Q   111    54  5.6527778
# 6:  W  Q    12    33  5.6527778

      



This is fast too:

library(microbenchmark)

microbenchmark(    dt[, odds_ratio:=freq1[1] * freq2[2] / (freq1[2] * freq2[1]), by=c('n1','n2')],
        times=1000L)

#    Unit: milliseconds
#    expr      min       lq          median       uq        max       neval
#     ##       2.367839  2.612129    2.691221     2.838895  16.24584  1000

      

+2


source


You can do it with split-apply-comb:



do.call(rbind, lapply(split(tab, paste(tab$n1, tab$n2)), function(x) {
  x$odd_ratio = (x$freq1[1] * x$freq2[2]) / (x$freq1[2] * x$freq2[1])
  x
}))
#       n1 n2 freq1 freq2 odd_ratio
# A C.1  A  C    33    44 0.6195652
# A C.2  A  C    23    19 0.6195652
# R E.3  R  E   163    56 1.0915179
# R E.4  R  E    32    12 1.0915179
# W Q.5  W  Q   111    54 5.6527778
# W Q.6  W  Q    12    33 5.6527778

      

+2


source


If you don't necessarily need the odds to be repeated multiple times for a group, and if all 2x2 tables are in rows right next to each other, this will work

step<-seq(1, nrow(dd), by=2)
cbind(dd[step, 1:2], OR=with(dd, 
    freq1[step]*freq2[step+1]/freq2[step]/freq1[step+1]
))

      

0


source


merge(dat, sapply( split(dat[ , c('freq1','freq2')], dat$n1), 
                          function(dd) dd[1,1]*dd[2,2]/(dd[1,2]*dd[2,1]) ), 
       by.x="n1", by.y="row.names")
#-----------
  n1 n2 freq1 freq2         y
1  A  C    33    44 0.6195652
2  A  C    23    19 0.6195652
3  R  E   163    56 1.0915179
4  R  E    32    12 1.0915179
5  W  Q   111    54 5.6527778
6  W  Q    12    33 5.6527778

      

0


source







All Articles