Automatic vlookup and multiply coefficients with R

I am trying to code a function in R (statistics programming language) that would allow me to automate the linear regression (lm) calculation

Problem: The regression is calculated using the step function, so the chosen coefficients cannot be known in advance. Problem

  • Automation of identification of coefficients selected by the step function.

  • Vlookup and cross multiply the second column of Ex results. "View (OpenCoefs)" (scores) with the last row (last day) of the corresponding columns of the original dataframe "sp"

The desired solution would be a function that I would simply type "run ()" that will return "y" for each regression, namely the forecast of the S & P500 index for the next day (Open, Low, High, Close).

The code fetches data from the yahoo finance website, so it works if you run it.

Here is the code.

sp <- read.csv(paste("http://ichart.finance.yahoo.com/table.csv?s=%5EGSPC&a=03&b=1&c=1940&d=03&e=1&f=2014&g=d&ignore=.csv"))

sp$Adj.Close<-NULL

sp<-sp[nrow(sp):1,]

sp<-as.data.frame(sp)


for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Open" ] <-
    ( sp[ i , "Open" ] / sp[ i - 1 , "Open" ] ) - 1       
} 


for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_High" ] <-
    ( sp[ i , "High" ] / sp[ i - 1 , "High" ] ) - 1       
} 


for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Low" ] <-
    ( sp[ i , "Low" ] / sp[ i - 1 , "Low" ] ) - 1       
} 


for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Close" ] <-
    ( sp[ i , "Close" ] / sp[ i - 1 , "Close" ] ) - 1       
} 


for ( i in 2:nrow( sp ) ) {
sp[ i , "Gr_Volume" ] <-
    ( sp[ i , "Volume" ] / sp[ i - 1 , "Volume" ] ) - 1       
} 

nRows_in_sp<-1:nrow(sp)

sp<-cbind(sp,nRows_in_sp)


Open_Rollin<-NA

sp<-cbind(sp,Open_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Open_Rollin" ]<-0 
} else {
sp[ i , "Open_Rollin" ]<-(( mean(sp[,"Open"][(i-100):i])))
}
}


Close_Rollin<-NA

nRows_in_sp<-1:nrow(sp)

sp<-cbind(sp,Close_Rollin)

for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , " Close_Rollin" ]<-0 
} else {
sp[ i , "Close_Rollin" ]<-(( mean(sp[,"Close"][(i-100):i])))
}
}



Low_Rollin<-NA

sp<-cbind(sp,Low_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Low_Rollin" ]<-0 
} else {
sp[ i , "Low_Rollin" ]<-(( mean(sp[,"Low"][(i-100):i])))
}
}


High_Rollin<-NA

sp<-cbind(sp,High_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "High_Rollin" ]<-0 
} else {
sp[ i , "High_Rollin" ]<-(( mean(sp[,"High"][(i-100):i])))
}
}


Open_GR_Rollin<-NA

sp<-cbind(sp,Open_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Open_GR_Rollin" ]<-0 
} else {
sp[ i , "Open_GR_Rollin" ]<-(( mean(sp[,"Gr_Open"][(i-100):i])))
}
}



Close_GR_Rollin<-NA

sp<-cbind(sp, Close_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Close_GR_Rollin" ]<-0 
} else {
sp[ i , "Close_GR_Rollin" ]<-(( mean(sp[,"Gr_Close"][(i-100):i])))
}
}



Low_GR_Rollin<-NA

sp<-cbind(sp, Low_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "Low_GR_Rollin" ]<-0 
} else {
sp[ i , "Low_GR_Rollin" ]<-(( mean(sp[,"Gr_Low"][(i-100):i])))
}
}


High_GR_Rollin<-NA

sp<-cbind(sp, High_GR_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]<=1000)
{
sp[ i , "High_GR_Rollin" ]<-0 
} else {
sp[ i , "High_GR_Rollin" ]<-(( mean(sp[,"Gr_High"][(i-100):i])))
}
}


Open_SD_Rollin<-NA

sp<-cbind(sp,Open_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "Open_SD_Rollin" ] <- sd(sp[,"Open"][(i-100):i])
} 
}



Close_SD_Rollin<-NA

sp<-cbind(sp, Close_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "Close_SD_Rollin" ] <- sd(sp[,"Close"][(i-100):i])
} 
}


Low_SD_Rollin<-NA

sp<-cbind(sp, Low_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "Low_SD_Rollin" ] <- sd(sp[,"Low"][(i-100):i])
} 
}



High_SD_Rollin<-NA

sp<-cbind(sp, High_SD_Rollin)
for ( i in 2:nrow( sp ) ) {
if(sp[i,"nRows_in_sp"]>100)
{
sp[ i, "High_SD_Rollin" ] <- sd(sp[,"High"][(i-100):i])
} 
}


N <- length(sp[,"Open"])



Openlag <- c(NA, sp[,"Open"][1:(N-1)])
sp<-cbind(sp,Openlag)

Highlag <- c(NA, sp[,"High"][1:(N-1)])

sp<-cbind(sp,Highlag)

Lowlag <- c(NA, sp[,"Low"][1:(N-1)])

sp<-cbind(sp,Lowlag)

Closelag <- c(NA, sp[,"Close"][1:(N-1)])

sp<-cbind(sp,Closelag)


Gr_Openlag <- c(NA, sp[,"Gr_Open"][1:(N-1)])

sp<-cbind(sp,Gr_Openlag)

Gr_Highlag <- c(NA, sp[,"Gr_High"][1:(N-1)])

sp<-cbind(sp,Gr_Highlag)

Gr_Lowlag <- c(NA, sp[,"Gr_Low"][1:(N-1)])

sp<-cbind(sp,Gr_Lowlag)

Gr_Closelag <- c(NA, sp[,"Gr_Close"][1:(N-1)])

sp<-cbind(sp,Gr_Closelag)

Gr_Volumelag <- c(NA, sp[,"Gr_Volume"][1:(N-1)])

sp<-cbind(sp,Gr_Volumelag)



Open_GR_Rollinlag <- c(NA, sp[,"Open_GR_Rollin"][1:(N-1)])

sp<-cbind(sp, Open_GR_Rollinlag)

Low_GR_Rollinlag <- c(NA, sp[,"Low_GR_Rollin"][1:(N-1)])

sp<-cbind(sp, Low_GR_Rollinlag)

High_GR_Rollinlag <- c(NA, sp[,"High_GR_Rollin"][1:(N-1)])
sp<-cbind(sp, High_GR_Rollinlag)

Close_GR_Rollinlag <- c(NA, sp[,"Close_GR_Rollin"][1:(N-1)])

sp<-cbind(sp, Close_GR_Rollinlag)


Open_SD_Rollinlag <- c(NA, sp[,"Open_SD_Rollin"][1:(N-1)])

sp<-cbind(sp, Open_SD_Rollinlag)

Low_SD_Rollinlag <- c(NA, sp[,"Low_SD_Rollin"][1:(N-1)])

sp<-cbind(sp, Low_SD_Rollinlag)

High_SD_Rollinlag <- c(NA, sp[,"High_SD_Rollin"][1:(N-1)])

sp<-cbind(sp, High_SD_Rollinlag)

Close_SD_Rollinlag <- c(NA, sp[,"Close_SD_Rollin"][1:(N-1)])

sp<-cbind(sp, Close_SD_Rollinlag)




OpenCoefs<-coefficients(summary(step(lm(sp[,"Open"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))


LowCoefs<-coefficients(summary(step(lm(sp[,"Low"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))


HighCoefs<-coefficients(summary(step(lm(sp[,"High"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))


CloseCoefs<-coefficients(summary(step(lm(sp[,"Close"] ~ Openlag + Lowlag + Highlag + Closelag + Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag + Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag + Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag),direction="both",test="F")))


View(OpenCoefs)

View(LowCoefs)

View(HighCoefs)

View(CloseCoefs)

View(sp)

      

+3


source to share


1 answer


Your code is so bad that I should have felt sorry for you. :) Here's an edited version of your code:

library(quantmod)
sp <- getSymbols("^GSPC", auto.assign=FALSE)
sp$GSPC.Adjusted <- NULL
colnames(sp) <- gsub("^GSPC\\.","",colnames(sp))

sp$Gr_Open   <- ROC(Op(sp), type="discrete")
sp$Gr_High   <- ROC(Hi(sp), type="discrete")
sp$Gr_Low    <- ROC(Lo(sp), type="discrete")
sp$Gr_Close  <- ROC(Cl(sp), type="discrete")
sp$Gr_Volume <- ROC(Vo(sp), type="discrete")

N <- 100
sp$Open_Rollin  <- runMean(sp$Open, N)
sp$High_Rollin  <- runMean(sp$High, N)
sp$Low_Rollin   <- runMean(sp$Low, N)
sp$Close_Rollin <- runMean(sp$Close, N)

sp$Open_GR_Rollin  <- runMean(sp$Gr_Open, N)
sp$High_GR_Rollin  <- runMean(sp$Gr_High, N)
sp$Low_GR_Rollin   <- runMean(sp$Gr_Low, N)
sp$Close_GR_Rollin <- runMean(sp$Gr_Close, N)

sp$Open_SD_Rollin  <- runSD(sp$Open, N)
sp$High_SD_Rollin  <- runSD(sp$High, N)
sp$Low_SD_Rollin   <- runSD(sp$Low, N)
sp$Close_SD_Rollin <- runSD(sp$Close, N)

spLag <- lag(sp)
colnames(spLag) <- paste(colnames(sp),"lag",sep="")
sp <- na.omit(merge(sp, spLag))

      



You don't need to answer your first question to answer your second question. You don't need to cross-multiply coefficients with data manually. You can simply access the set values ​​from the model. This requires you to save the model, though ...

f <- Open ~ Openlag + Lowlag + Highlag + Closelag +
  Gr_Openlag + Gr_Lowlag + Gr_Highlag + Gr_Closelag + Gr_Volumelag +
  Open_GR_Rollinlag + Low_GR_Rollinlag + High_GR_Rollinlag + Close_GR_Rollinlag +
  Open_SD_Rollinlag + Low_SD_Rollinlag + High_SD_Rollinlag + Close_SD_Rollinlag

OpenLM <- lm(f, data=sp)
HighLM <- update(OpenLM, High ~ .)
LowLM <- update(OpenLM, Low ~ .)
CloseLM <- update(OpenLM, Close ~ .)

OpenStep <- step(OpenLM,direction="both",test="F")
HighStep <- step(HighLM,direction="both",test="F")
LowStep <- step(LowLM,direction="both",test="F")
CloseStep <- step(CloseLM,direction="both",test="F")

tail(fitted(OpenStep),1)
# 2013-02-01 
#    1497.91 
tail(fitted(HighStep),1)
# 2013-02-01 
#    1504.02 
tail(fitted(LowStep),1)
# 2013-02-01 
#   1491.934 
tail(fitted(CloseStep),1)
# 2013-02-01 
#   1499.851

      

+6


source







All Articles