Using a loop for web links
I want to download data from the internet, but the code is too long and error prone. Is there a way to use a loop for web links? The only value that changes is a few weeks.
Small example from my code:
library(XML)
# import - week 1
data11=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=1&mid2=2")
data12=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=3&mid2=4")
data13=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=5&mid2=6")
data14=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=7&mid2=8")
data15=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=9&mid2=10")
data16=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=1&mid1=11&mid2=12")
data11 <- data11[[4]]
data12 <- data12[[4]]
data13 <- data13[[4]]
data14 <- data14[[4]]
data15 <- data15[[4]]
data16 <- data16[[4]]
mlb.data1 <- rbind(data11, data12, data13, data14, data15, data16)
# import - week 2
data11=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=1&mid2=2")
data12=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=3&mid2=4")
data13=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=5&mid2=6")
data14=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=7&mid2=8")
data15=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=9&mid2=10")
data16=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=2&mid1=11&mid2=12")
data11 <- data11[[4]]
data12 <- data12[[4]]
data13 <- data13[[4]]
data14 <- data14[[4]]
data15 <- data15[[4]]
data16 <- data16[[4]]
mlb.data2 <- rbind(data11, data12, data13, data14, data15, data16)
# import - week 3
data11=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=1&mid2=2")
data12=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=3&mid2=4")
data13=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=5&mid2=6")
data14=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=7&mid2=8")
data15=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=9&mid2=10")
data16=readHTMLTable(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=3&mid1=11&mid2=12")
data11 <- data11[[4]]
data12 <- data12[[4]]
data13 <- data13[[4]]
data14 <- data14[[4]]
data15 <- data15[[4]]
data16 <- data16[[4]]
mlb.data3 <- rbind(data11, data12, data13, data14, data15, data16)
# add number of week
mlb.data1$week <- 1
mlb.data2$week <- 2
mlb.data3$week <- 3
# complete table
mlb.complet <- rbind(mlb.data1, mlb.data2, mlb.data3)
+3
source to share
1 answer
This should work, please note that the link returns a list of two tables, you need to clear it after the function readHTMLTable
.
output <-
do.call(rbind,
lapply(1:2, function(week){
do.call(rbind,
lapply(seq(2,12,2),function(id){
x <- readHTMLTable(paste0(doc = "http://baseball.fantasysports.yahoo.com/b1/2276/matchup?week=",week,"2&mid1=1&mid2=",id))
#choose which tables to keep
res <- x$statTable3
res$WEEK <- week
res$ID <- id
res
}))
})
)
+1
source to share