Piyush Sachdeva - Homework 5
Piyush Sachdeva - Homework 5
Part 1
Homework 5
#loading the data
bonddata = read.table("http://www.stat.cmu.edu/~cschafer/MSCF/bonddata.txt",
sep=",", header=T)
#creating the data frame
attach(bonddata)
newdata=as.data.frame(cbind(weight, current_coupon, time_to_maturity,
reporting_delay, trade_size,
curve_based_price, received_time_diff_last1,
trade_price_last1,
trade_size_last1,curve_based_price_last1,
is_callable, trade_type, trade_type_last1,
trade_price))
#removing influential observation # 1457
newdata=as.data.frame(newdata[-1457,])
#initial histograms of weight, time_to_maturity, trade_size, trade_size_last1
hist(newdata$weight, main="Weight")
#log transformation of the previous variables (to spread the data more evenly)
log_weight=log(newdata$weight)
hist(log_weight, main="log Weight")
log_time_to_maturity=log(newdata$time_to_maturity)
hist(log_time_to_maturity, main="log Time to maturity")
log_trade_size=log(newdata$trade_size)
hist(log_trade_size, main="log Trade size")
log_trade_size_last1=log(newdata$trade_size_last1)
hist(log_trade_size_last1, main="log Trade size last 1")
#transformation of reporting delay and received time diff last1 into a categorical va
riable
categorical_reporting_delay=as.factor(cut(newdata$reporting_delay,c(-Inf,2,10,100,Inf
)))
categorical_received_time_diff_last1=as.factor(cut(newdata$received_time_diff_last1,c
(-Inf,500,75000,4000000,Inf)))
#fitting GAM from mgcv
library(mgcv)
## Loading required package: nlme
## This is mgcv 1.8-3. For overview type 'help("mgcv-package")'.
Family: gaussian
Link function: identity
Formula:
trade_price ~ s(log_weight) + s(current_coupon) + s(log_time_to_maturity) +
categorical_reporting_delay + s(log_trade_size) + s(curve_based_price) +
categorical_received_time_diff_last1 + s(trade_price_last1) +
s(log_trade_size_last1) + s(curve_based_price_last1) + is_callable +
trade_type + trade_type_last1
Parametric coefficients:
Estimate Std. Error t value
(Intercept)
105.56558
0.22310 473.181
categorical_reporting_delay2
-0.23967
0.09514 -2.519
categorical_reporting_delay3
-0.29058
0.09462 -3.071
categorical_reporting_delay4
-0.42087
0.12867 -3.271
categorical_received_time_diff_last12 -0.33503
0.20980 -1.597
categorical_received_time_diff_last13 -0.55544
0.27039 -2.054
categorical_received_time_diff_last14 -1.47683
0.45408 -3.252
is_callable1
-0.18380
0.12913 -1.423
trade_type3
1.55252
0.09667 16.060
trade_type4
0.73781
0.09016
8.184
trade_type_last13
-0.94159
0.09679 -9.728
trade_type_last14
-0.42791
0.09327 -4.588
Pr(>|t|)
(Intercept)
< 2e-16 ***
categorical_reporting_delay2
0.01187 *
categorical_reporting_delay3
0.00217 **
categorical_reporting_delay4
0.00110 **
categorical_received_time_diff_last12 0.11048
categorical_received_time_diff_last13 0.04012 *
categorical_received_time_diff_last14 0.00117 **
is_callable1
0.15483
trade_type3
< 2e-16 ***
trade_type4
5.63e-16 ***
trade_type_last13
< 2e-16 ***
trade_type_last14
4.83e-06 ***
--Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Approximate significance of smooth terms:
edf Ref.df
F
s(log_weight)
1.000 1.000
4.562
s(current_coupon)
2.435 3.108
2.403
p-value
0.03284 *
0.06374 .
##
##
##
##
##
##
##
##
##
##
##
s(log_time_to_maturity)
5.231 6.444
1.751 0.09992 .
s(log_trade_size)
1.681 2.115 12.755 2.28e-06 ***
s(curve_based_price)
8.510 8.936 19.838 < 2e-16 ***
s(trade_price_last1)
4.846 6.217 278.604 < 2e-16 ***
s(log_trade_size_last1)
4.744 5.758
3.180 0.00497 **
s(curve_based_price_last1) 8.948 8.994
6.757 1.47e-09 ***
--Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
R-sq.(adj) =
GCV = 2.0643
0.985
Deviance explained = 98.5%
Scale est. = 2.0014
n = 1619
## null device
##
1
plot(as.numeric(holdgam$fitted.values),as.numeric(holdgam$residuals),
pch=16,xlab="Fitted Values", ylab="Residuals",main="Fitted values vs. Residuals"
,cex.axis=1.3,cex.lab=1.3)
abline(h = 0,lwd=2,col=4,lty=2)
print("Analyzing the fitted values vs. the residuals one can argue that there is no
strong evidence of uneven variance across the data. Also it seems reasonable to
assume that the data are spread around zero.")
## [1] "Analyzing the fitted values vs. the residuals one can argue that there is no
strong evidence of uneven variance across the data. Also it seems reasonable to assum
e that the data are spread around zero."
#Actual response vs fitted values
plot(as.numeric(holdgam$fitted.values),as.numeric(transformeddata$trade_price),
pch=16,xlab="Fitted Values", ylab="response",main="Fitted values vs. response"
,cex.axis=1.3,cex.lab=1.3)
abline(a=0,b=1,lwd=2,col=4,lty=1)
print("Analyzing the fitted values vs. the response one can argue that the model is r
easonably good in its predicting power given that the data are scattered around the p
erfect agreement line here plotted. This introduces some worries as the prediction is
too good which might indicate possible overffiting or spurious relationships between
the regressors and the response.")
## [1] "Analyzing the fitted values vs. the response one can argue that the model is
reasonably good in its predicting power given that the data are scattered around the
perfect agreement line here plotted. This introduces some worries as the prediction i
s too good which might indicate possible overffiting or spurious relationships betwee
n the regressors and the response."
#qq plot
qqnorm(as.numeric(holdgam$residuals),cex.axis=1.3,cex.lab=1.3,pch=16,main="QQ PLot")
qqline(as.numeric(holdgam$residuals))
print("There is clear evidence of heavy tails on both the lower and upper end of the
plot, perhaps modifying the normality assumption would be advisable.")
## [1] "There is clear evidence of heavy tails on both the lower and upper end of the
plot, perhaps modifying the normality assumption would be advisable."
#fitting the linear model
holdlinear=gam(trade_price~log_weight+current_coupon+log_time_to_maturity+
categorical_reporting_delay+log_trade_size+curve_based_price+
categorical_received_time_diff_last1+trade_price_last1+log_trade_siz
e_last1+
curve_based_price_last1+is_callable+trade_type+trade_type_last1,data
=transformeddata)
summary(holdlinear)
##
##
##
##
##
##
Family: gaussian
Link function: identity
Formula:
trade_price ~ log_weight + current_coupon + log_time_to_maturity +
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
##
(Intercept)
*
log_weight
*
current_coupon
log_time_to_maturity
categorical_reporting_delay2
.
categorical_reporting_delay3
**
categorical_reporting_delay4
**
log_trade_size
***
curve_based_price
***
categorical_received_time_diff_last12 .
categorical_received_time_diff_last13 *
categorical_received_time_diff_last14 ***
trade_price_last1
***
log_trade_size_last1
***
curve_based_price_last1
***
is_callable1
trade_type3
***
trade_type4
***
trade_type_last13
***
trade_type_last14
***
--Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
R-sq.(adj) =
GCV =
2.2
0.984
Deviance explained = 98.4%
Scale est. = 2.1728
n = 1619
plot(as.numeric(holdlinear$fitted.values),as.numeric(holdlinear$residuals),
pch=16,xlab="Fitted Values", ylab="Residuals",main="Fitted values vs. Residuals"
,cex.axis=1.3,cex.lab=1.3)
abline(h = 0,lwd=2,col=3,lty=2)
print("Analyzing the fitted values vs. the residuals one can argue that there is no s
trong evidence of uneven variance across the data. Also it seems reasonable to assume
that the data are spread around zero.")
## [1] "Analyzing the fitted values vs. the residuals one can argue that there is no
strong evidence of uneven variance across the data. Also it seems reasonable to assum
e that the data are spread around zero."
#Actual response vs fitted values
plot(as.numeric(holdgam$fitted.values),as.numeric(transformeddata$trade_price),
pch=16,xlab="Fitted Values", ylab="response",main="Fitted values vs. response"
,cex.axis=1.3,cex.lab=1.3)
abline(a=0,b=1,lwd=2,col=3,lty=1)
print("Analyzing the fitted values vs. the response one can argue that the model is r
easonably good in its predicting power given that the data are scattered around the p
erfect agreement line here plotted. This introduces some worries as the prediction is
too good which might indicate possible overffiting or spurious relationships between
the regressors and the response.")
## [1] "Analyzing the fitted values vs. the response one can argue that the model is
reasonably good in its predicting power given that the data are scattered around the
perfect agreement line here plotted. This introduces some worries as the prediction i
s too good which might indicate possible overffiting or spurious relationships betwee
n the regressors and the response."
#qq plot
qqnorm(as.numeric(holdgam$residuals),cex.axis=1.3,cex.lab=1.3,pch=4,main="QQ PLot")
qqline(as.numeric(holdgam$residuals))
print("There is clear evidence of heavy tails on both the lower and upper end of the
plot, perhaps modifying the normality assumption would be advisable.")
## [1] "There is clear evidence of heavy tails on both the lower and upper end of the
plot, perhaps modifying the normality assumption would be advisable."
#AIC comparison
holdgam$aic
## [1] 5768.452
holdlinear$aic
## [1] 5872.798
print("Analyzing diagnostic plots for both models and their AIC values (which are dis
tribution dependant and given that the qq plots are showing little reliability on the
normality assumption), one could argue that there is no justification in adding more
complexity in the model given that the simple linear relationships seem to explain as
much variability as in the complex model.")
## [1] "Analyzing diagnostic plots for both models and their AIC values (which are di
stribution dependent and given that the qq plots are showing little reliability on th
e normality assumption), one could argue that there is no justification in adding mor
e complexity in the model given that the simple linear relationships seem to explain
as much variability as in the complex model."
Lucas Triana
Part 2
Homework 5
market_symbols=read.table("http://www.stat.cmu.edu/~cschafer/MSCF/Project/ChallengeSy
mbols2015.txt")
#including the following predictors
#^GSPC S&P500
#^VIX CBOE Volatility Index
#^VXN CBOE NASDAQ Volatility Index
#^VXO CBOE S&P 100 Volatility Index
my_symbols=read.table("mySymbols.txt")
library(quantmod)
market_symbols=as.vector(market_symbols$V1)
my_symbols=as.vector(my_symbols$V1)
#vectors that will store the volatility for the two months
VOLMONTH1=seq(length(data))
VOLMONTH2=seq(length(data))
#vectors that will store the the price values and returns
returns=data.frame(matrix(ncol = length(my_symbols)*4, nrow =
length(market_symbols)))
pricePerasset=data.frame(matrix(ncol = length(my_symbols)*4, nrow =
length(market_symbols)))
#loop that renames the columns for the data frames needed in the regression
j=1
k=1
for (i in 1:length(my_symbols)*4)
{
colname=sprintf("%s_weekly return", my_symbols[k])
name=sprintf("%s%d", "X", j)
names(returns)[names(returns)==name] <- colname
colname=sprintf("%s_weekly price", my_symbols[k])
names(pricePerasset)[names(pricePerasset)==name] <- colname
j=j+1
colname=sprintf("%s_monthly return", my_symbols[k])
name=sprintf("%s%d", "X", j)
names(returns)[names(returns)==name] <- colname
colname=sprintf("%s_monthly price", my_symbols[k])
names(pricePerasset)[names(pricePerasset)==name] <- colname
j=j+1
colname=sprintf("%s_quarterly return", my_symbols[k])
name=sprintf("%s%d", "X", j)
names(returns)[names(returns)==name] <- colname
colname=sprintf("%s_quarterly price", my_symbols[k])
names(pricePerasset)[names(pricePerasset)==name] <- colname
j=j+1
name=sprintf("%s%d", "X", j)
colname=sprintf("%s_yearly return", my_symbols[k])
#weekly returns
returns[length(market_symbols)j,9]=weeklyReturn(asset,type="log")[length(weeklyReturn(asset,type="log"))-j]
#monthly returns
returns[length(market_symbols)j,10]=monthlyReturn(asset,type="log")[length(monthlyReturn(asset,type="log"))-j]
#quarterly returns
returns[length(market_symbols)j,11]=quarterlyReturn(asset,type="log")[length(quarterlyReturn(asset,type="log"))-j]
#yearly returns
returns[length(market_symbols)j,12]=quarterlyReturn(asset,type="log")[length(quarterlyReturn(asset,type="log"))-j]
}
#^VXO data
asset=getSymbols(my_symbols[4], from=(Sys.Date()-(7560)), to=(Sys.Date()-(30)),
auto.assign=F)
print(my_symbols[4])
for(j in 0:(length(market_symbols)-1))
{
#price last week close
pricePerasset[length(market_symbols)-j,13]=asset$VXO.Adjusted[nrow(asset)-5-j] #5
trading days
#price last month close
pricePerasset[length(market_symbols)-j,14]=asset$VXO.Adjusted[nrow(asset)-21-j] #
21 trading days
#price last quarter close
pricePerasset[length(market_symbols)-j,15]=asset$VXO.Adjusted[nrow(asset)-63-j]
#63 trading days
#price last year close
pricePerasset[length(market_symbols)-j,16]=asset$VXO.Adjusted[nrow(asset)-252-j]
#252 trading days
#weekly returns
returns[length(market_symbols)j,13]=weeklyReturn(asset,type="log")[length(weeklyReturn(asset,type="log"))-j]
#monthly returns
returns[length(market_symbols)j,14]=monthlyReturn(asset,type="log")[length(monthlyReturn(asset,type="log"))-j]
#quarterly returns
returns[length(market_symbols)j,15]=quarterlyReturn(asset,type="log")[length(quarterlyReturn(asset,type="log"))-j]
#yearly returns
returns[length(market_symbols)j,16]=quarterlyReturn(asset,type="log")[length(quarterlyReturn(asset,type="log"))-j]
}
#Computes the daily returns and volatility of the 70 initial stocks for two months
for (i in 1:length(market_symbols))
{
month1=getSymbols(market_symbols[i], from=(Sys.Date()-60), to=(Sys.Date()-30),
auto.assign=F)
DAILYRETURNS= dailyReturn(month1,type="log")
volatilitymonth1=sqrt(sum(DAILYRETURNS^2)/length(DAILYRETURNS))
VOLMONTH1[i]=volatilitymonth1
month2=getSymbols(market_symbols[i], from=(Sys.Date()-32), to=Sys.Date(),
auto.assign=F)
DAILYRETURNS2= dailyReturn(month2,type="log")
volatilitymonth2= sqrt(sum(DAILYRETURNS2^2)/length(DAILYRETURNS2))
VOLMONTH2[i]=volatilitymonth2
}
data=data.frame(cbind(returns,pricePerasset,as.data.frame(VOLMONTH1),as.data.frame(VO
LMONTH2)))
noNAdata=na.omit(data) #removing N/A data
#histogram drawing of the predictors and response to see if any transformations are
needed
for (i in 1:ncol(noNAdata)){
hist(as.matrix(noNAdata[i]),main=colnames(noNAdata)[i],xlab=i)
}
#noticing that several columns of the data might need transformations
coltotransform=c(3,4,17,19,21,22,23,24,25,26,27,29,30,31,32,33)
skew=seq(length(coltotransform))
#columns whose skewness is to be less seem to be largely positive or negative are to
be transformed:
#two transformations will take place, log(x) (if all data are positive), x^2 or e^x
depending on the skew
library(moments)
counter=0
for(i in 1:length(coltotransform)){
skew[i]=skewness(as.matrix(noNAdata[coltotransform[i]]),na.rm = FALSE)
-140 -60
log-Likelihood
95%
-2
-1
#full model
linearmod=step(fullmod, direction="both")
summary(linearmod)
Call:
lm(formula = VOLMONTH2 ~ X.GSPC_weekly.return + X.VXN_weekly.return +
X.VXN_monthly.return + X.VXO_monthly.return + X.GSPC_monthly.price +
X.VIX_quarterly.price + X.VXN_yearly.price + X.VXO_monthly.price +
VOLMONTH1, data = noNAdata)
Residuals:
Min
1Q
-0.007720 -0.002627
Median
0.000164
3Q
0.001879
Max
0.009041
Coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept)
1.550e-01 5.348e-02
2.898 0.00564 **
X.GSPC_weekly.return -1.538e-01 8.126e-02 -1.893 0.06439 .
X.VXN_weekly.return
-1.418e-02 9.224e-03 -1.538 0.13068
X.VXN_monthly.return -1.194e-02 7.371e-03 -1.620 0.11170
X.VXO_monthly.return
1.667e-02 6.542e-03
2.548 0.01409 *
X.GSPC_monthly.price -5.809e-05 2.196e-05 -2.645 0.01102 *
X.VIX_quarterly.price 7.347e-03 3.705e-03
1.983 0.05308 .
X.VXN_yearly.price
3.345e-03 7.823e-04
4.275 9.02e-05 ***
X.VXO_monthly.price
-1.412e-02 6.311e-03 -2.237 0.02997 *
VOLMONTH1
1.249e-02 9.805e-04 12.736 < 2e-16 ***
--Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Residual standard error: 0.003814 on 48 degrees of freedom
Multiple R-squared: 0.8028, Adjusted R-squared: 0.7658
F-statistic: 21.71 on 9 and 48 DF, p-value: 4.576e-14
#Influential observation search
cookd=as.numeric(cooks.distance(linearmod))
plot(cookd,xlab="Observation",ylab="Cook's Distance",main="Cook's Distance")
lines(c(1,length(cookd)),c(4/length(cookd),4/length(cookd)),lwd=2,col=3,lty=2)
0.20
0.00
Cook's Distance
Cook's Distance
10
20
30
40
50
60
Observation
#residuals vs fitted values
plot(as.numeric(linearmod$fitted.values),as.numeric(linearmod$residuals),
pch=16,xlab="Fitted Values", ylab="Residuals",main="Fitted values vs. Residuals"
,cex.axis=1.3,cex.lab=1.3)
abline(h = 0,lwd=2,col=3,lty=2)
Residuals
-0.005
0.005
0.020
Fitted Values
Residuals
-0.005
Evolution of residuals
20
40
Time
60
response
0.01
0.005
0.020
Fitted Values
#qq plot
qqnorm(as.numeric(finalmod$residuals),cex.axis=1.3,cex.lab=1.3,pch=16,main="QQ PLot")
qqline(as.numeric(finalmod$residuals))
Sample Quantiles
-0.005
QQ PLot
-2 -1 0 1 2
Theoretical Quantiles
#fit gam
library(mgcv)
attributes(noNAdata)
#noticing that there is a lack of data compared to the number of predictors and rows
in the
#data frame, those variables chosen by glm will be the ones included in gam
#(data frame noNAdata of size 58x34)
holdgam=gam(VOLMONTH2~s(X.GSPC_weekly.return)+(X.VXN_weekly.return)+s(X.VXN_monthly.r
eturn)+
(X.VXO_monthly.return)+s(X.GSPC_monthly.price)+s(X.VIX_quarterly.price)+
(X.VXN_yearly.price)+s(X.VXO_monthly.price)+s(VOLMONTH1)
,data=noNAdata)
summary(holdgam)
Family: gaussian
Link function: identity
Formula:
VOLMONTH2 ~ s(X.GSPC_weekly.return) + (X.VXN_weekly.return) +
s(X.VXN_monthly.return) + (X.VXO_monthly.return) + s(X.GSPC_monthly.price
) +
s(X.VIX_quarterly.price) + (X.VXN_yearly.price) + s(X.VXO_monthly.price)
+
s(VOLMONTH1)
Parametric coefficients:
Estimate Std. Error t value Pr(>|t|)
(Intercept)
-0.0255803 0.0110534 -2.314 0.025176 *
X.VXN_weekly.return -0.0076158 0.0091063 -0.836 0.407304
X.VXO_monthly.return 0.0147098 0.0061787
2.381 0.021479 *
X.VXN_yearly.price
0.0028573 0.0007523
3.798 0.000426 ***
--Signif. codes: 0 *** 0.001 ** 0.01 * 0.05 . 0.1 1
Approximate significance of smooth terms:
edf Ref.df
F p-value
s(X.GSPC_weekly.return) 1.000 1.000 1.922 0.1722
s(X.VXN_monthly.return) 1.000 1.000 1.691 0.1999
s(X.GSPC_monthly.price) 1.000 1.000 5.514 0.0231 *
.
*
***
0.1 1
R-sq.(adj) =
0.8
Deviance explained = 83.8%
GCV = 1.571e-05 Scale est. = 1.2451e-05 n = 58
-0.02
-0.005
s(X.VXN_monthly.return,1)
0.010
-0.010
s(X.GSPC_weekly.return,1)
-0.4
0.2
1900 2050
0.006
-0.002
s(X.VIX_quarterly.price,1)
0.010
-0.005
s(X.GSPC_monthly.price,1)
X.GSPC_weekly.returnX.VXN_monthly.return
2.6 3.0
X.GSPC_monthly.price X.VIX_quarterly.price
X.VXO_monthly.price
0.01
-0.02
3.0
s(VOLMONTH1,2.13)
-0.010
s(X.VXO_monthly.price,1.9)
2.4
-5.5
-3.5
VOLMONTH1
Residuals
-0.006
0.005
0.020
0.035
Fitted Values
response
0.01
0.005
0.020
0.035
Fitted Values
Sample Quantiles
-0.006
#qq plot
qqnorm(as.numeric(holdgam$residuals),cex.axis=1.3,cex.lab=1.3,pch=16,main="QQ PLot")
qqline(as.numeric(holdgam$residuals))
QQ PLot
-2 -1 0 1 2
Theoretical Quantiles
#fitting PPR
holdppr=ppr(VOLMONTH2~(X.GSPC_weekly.return)+(X.VXN_weekly.return)+(X.VXN_monthly.ret
urn)+
(X.VXO_monthly.return)+(X.GSPC_monthly.price)+(X.VIX_quarterly.price)+
(X.VXN_yearly.price)+(X.VXO_monthly.price)+(VOLMONTH1),nterms=2
,data=noNAdata,sm.method="gcvspline")
summary(holdppr)
Call:
ppr(formula = VOLMONTH2 ~ (X.GSPC_weekly.return) + (X.VXN_weekly.return) +
(X.VXN_monthly.return) + (X.VXO_monthly.return) + (X.GSPC_monthly.price)
+
(X.VIX_quarterly.price) + (X.VXN_yearly.price) + (X.VXO_monthly.price) +
(VOLMONTH1), data = noNAdata, nterms = 2, sm.method = "gcvspline")
Goodness of fit:
2 terms
0.0001070832
Projection direction vectors:
term 1
X.GSPC_weekly.return -0.339424629
X.VXN_weekly.return
0.186715643
X.VXN_monthly.return -0.468685910
X.VXO_monthly.return
0.515409817
X.GSPC_monthly.price -0.001276822
X.VIX_quarterly.price 0.263574043
X.VXN_yearly.price
0.035662202
X.VXO_monthly.price
-0.381815004
VOLMONTH1
0.384820779
term 2
-0.345525364
-0.489083752
0.539051462
-0.443739648
-0.001519311
-0.114543181
0.122195934
0.343320609
-0.089468527
Residuals
-0.003
0.01
0.03
Fitted Values
response
0.01
0.01
0.03
Fitted Values
Sample Quantiles
-0.003
#qq plot
qqnorm(as.numeric(holdppr$residuals),cex.axis=1.3,cex.lab=1.3,pch=16,main="QQ PLot")
qqline(as.numeric(holdppr$residuals))
QQ PLot
-2 -1 0 1 2
Theoretical Quantiles
#fit NNET
library(nnet)
holdnnet=nnet(VOLMONTH2~(X.GSPC_weekly.return)+(X.VXN_weekly.return)+(X.VXN_monthly.r
eturn)+
(X.VXO_monthly.return)+(X.GSPC_monthly.price)+(X.VIX_quarterly.price)+
(X.VXN_yearly.price)+(X.VXO_monthly.price)+(VOLMONTH1),
data=noNAdata,size=4, lineout=TRUE, decay=0.001, maxit=2000)
summary(holdnnet)
a 9-4-1 network with 45 weights
options were - decay=0.001
b->h1 i1->h1 i2->h1 i3->h1 i4->h1 i5->h1 i6->h1 i7->h1 i8->h1 i9->h1
0.00
0.00
0.00
0.00
0.00
0.01
0.00
0.00
0.00
0.00
b->h2 i1->h2 i2->h2 i3->h2 i4->h2 i5->h2 i6->h2 i7->h2 i8->h2 i9->h2
0.00
0.00
0.00
0.00
0.00
0.01
0.00
0.00
0.00
0.00
b->h3 i1->h3 i2->h3 i3->h3 i4->h3 i5->h3 i6->h3 i7->h3 i8->h3 i9->h3
0.00
0.00
0.00
0.00
0.00
0.01
0.00
0.00
0.00
0.00
b->h4 i1->h4 i2->h4 i3->h4 i4->h4 i5->h4 i6->h4 i7->h4 i8->h4 i9->h4
0.00
0.00
0.00
0.00
0.00
0.01
0.00
0.00
0.00
0.00
b->o h1->o h2->o h3->o h4->o
-0.81 -0.81 -0.81 -0.81 -0.81
#residuals vs fitted values
plot(as.numeric(holdnnet$fitted.values),as.numeric(holdnnet$residuals),
pch=16,xlab="Fitted Values", ylab="Residuals",main="Fitted values vs. Residuals"
,cex.axis=1.3,cex.lab=1.3)
abline(h = 0,lwd=2,col=2,lty=2)
Residuals
-0.01
0.01723805 0.01723830
Fitted Values
response
0.01
0.01723805 0.01723830
Fitted Values
#qq plot
qqnorm(as.numeric(holdppr$residuals),cex.axis=1.3,cex.lab=1.3,pch=16,main="QQ PLot")
qqline(as.numeric(holdppr$residuals))
Sample Quantiles
-0.003
QQ PLot
-2 -1 0 1 2
Theoretical Quantiles
save(finalmod, file="LTRIANAL_models.Robj")