Using a loop for web links

I want to download data from the internet, but the code is too long and error prone. Is there a way to use a loop for web links? The only value that changes is a few weeks.

Small example from my code:

library(XML)

# import - week 1
data11=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=1&mid2=2")
data12=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=3&mid2=4")
data13=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=5&mid2=6")
data14=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=7&mid2=8")
data15=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=9&mid2=10")
data16=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=11&mid2=12")

data11 <- data11[[4]] 
data12 <- data12[[4]]
data13 <- data13[[4]]
data14 <- data14[[4]]
data15 <- data15[[4]]
data16 <- data16[[4]]

mlb.data1 <- rbind(data11, data12, data13, data14, data15, data16) 

# import - week 2
data11=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=1&mid2=2")
data12=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=3&mid2=4")
data13=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=5&mid2=6")
data14=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=7&mid2=8")
data15=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=9&mid2=10")
data16=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=11&mid2=12")

data11 <- data11[[4]] 
data12 <- data12[[4]]
data13 <- data13[[4]]
data14 <- data14[[4]]
data15 <- data15[[4]]
data16 <- data16[[4]]

mlb.data2 <- rbind(data11, data12, data13, data14, data15, data16) 

# import - week 3
data11=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=1&mid2=2")
data12=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=3&mid2=4")
data13=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=5&mid2=6")
data14=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=7&mid2=8")
data15=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=9&mid2=10")
data16=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=11&mid2=12")

data11 <- data11[[4]] 
data12 <- data12[[4]]
data13 <- data13[[4]]
data14 <- data14[[4]]
data15 <- data15[[4]]
data16 <- data16[[4]]

mlb.data3 <- rbind(data11, data12, data13, data14, data15, data16) 

# add number of week
mlb.data1$week      <- 1
mlb.data2$week      <- 2
mlb.data3$week      <- 3

# complete table
mlb.complet <- rbind(mlb.data1, mlb.data2, mlb.data3)

      

+3


source to share


1 answer


This should work, please note that the link returns a list of two tables, you need to clear it after the function readHTMLTable

.



output <- 
  do.call(rbind,
          lapply(1:2, function(week){
            do.call(rbind,
                    lapply(seq(2,12,2),function(id){
                      x <- readHTMLTable(paste0(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=",week,"2&mid1=1&mid2=",id))
                      #choose which tables to keep
                      res <- x$statTable3
                      res$WEEK <- week
                      res$ID <- id
                      res
                    }))
          })
  )

      

+1


source







All Articles