0% found this document useful (0 votes)
73 views

Practical Assignment #2 tests your ability

Uploaded by

Kumar
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
73 views

Practical Assignment #2 tests your ability

Uploaded by

Kumar
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as DOCX, PDF, TXT or read online on Scribd
You are on page 1/ 31

Time Series and Forecasting

Vikasini Selvaraj

2024-12-14

#1 Check you working directory


# Check current working directory
getwd()

## [1] "C:/Users/Sys/Downloads"

#2 Set your working directory to “ANLY 565/RScript”.


# Upload "nlme" library
library(nlme)
# Set the working directory
#setwd("C:/Users/Sys/Downloads/ANLY 565 Practical Assignment")

# Confirm the change


getwd()

## [1] "C:/Users/Sys/Downloads"

#3 Download “trade.xls” data file and set the “date”


# variable to the date format and the "trade" variable to
# the numeric format. The "trade" variable represents
# the Ratio of Exports to Imports for China expressed in
percentages.

# Loading the required library for reading Excel files

library(readxl)

## Warning: package 'readxl' was built under R version 4.4.2

# Reading the trade.xls file


trade <- read_excel("trade.xls")

# Checking the structure of the dataset


str(trade)

## tibble [328 × 2] (S3: tbl_df/tbl/data.frame)


## $ date : POSIXct[1:328], format: "1992-01-01" "1992-02-01" ...
## $ trade: num [1:328] 131 106 102 105 107 ...

# Converting "date" column to date format (assuming the column name is


"date")
trade$date <- as.Date(trade$date, format = "%Y-%m-%d") # Adjust
format as needed

# Converting "trade" column to numeric format (assuming the column


name is "trade")
trade$trade <- as.numeric(trade$trade)

# Verifying the changes


str(trade)

## tibble [328 × 2] (S3: tbl_df/tbl/data.frame)


## $ date : Date[1:328], format: "1992-01-01" "1992-02-01" ...
## $ trade: num [1:328] 131 106 102 105 107 ...

#4 Create two stand alone variables: “datev” and “tradev”.


# "datev" variable should represent values of the "date" variable
# from the "trade" data set, while, "tradev" variable should
represent
# values of the "trade" variable from the "trade" data set.
# Create standalone variables
datev <- trade$date # Extract "date" column
tradev <- trade$trade # Extract "trade" column

# Verify the standalone variables


head(datev)

## [1] "1992-01-01" "1992-02-01" "1992-03-01" "1992-04-01" "1992-05-


01"
## [6] "1992-06-01"

head(tradev)

## [1] 130.7075 106.0696 101.6057 104.5100 106.5207 106.3442

#5 Use the “datev” variable and the range() function to check the time sample
# covered by the "trade" data set. What time period is covered?
# What is the frequency of the data?

# Checking the time range using the "datev" variable


date_range <- range(datev)

# Display the time period covered by the dataset


date_range

## [1] "1992-01-01" "2019-04-01"

# Calculating the frequency of the data


date_diff <- diff(datev) # Calculate the difference between
consecutive dates
frequency <- as.numeric(median(date_diff)) # Find the median
difference

# Displaying the frequency and the time period


cat("The time period covered by the dataset is from", date_range[1],
"to", date_range[2], "\n")

## The time period covered by the dataset is from 8035 to 17987

cat("The frequency of the data is approximately", frequency, "days\n")

## The frequency of the data is approximately 31 days

#6 Transform “tradev” variable from numeric format to the time series format
# by using ts() function. Label the new variable as "tradets".

# Convert the "tradev" variable to time series format using ts()


function
# Assuming the data is monthly, and it starts from the first date in
the "datev" variable

# Geting the starting year and month from the "datev" variable
start_year <- as.numeric(format(datev[1], "%Y"))
start_month <- as.numeric(format(datev[1], "%m"))

# Converting the "tradev" variable to a time series (monthly


frequency, starting from the start year and month)
tradets <- ts(tradev, start = c(start_year, start_month), frequency =
12)

# Checking the time series object


str(tradets)

## Time-Series [1:328] from 1992 to 2019: 131 106 102 105 107 ...

#7 Plot the time series graph of the “tradets”variable.


#7 Plot the time series graph of the "tradets"variable.
# Please label all axis correctly, and make sure to label the graph.

# Based on this graph does the Ratio of Exports to Imports for China
exhibit a trend?
# What about a regular seasonal fluctuation?

# Save the plot as a PNG file


png("trade_plot.png", width = 800, height = 600)
plot(tradets,
main = "Ratio of Exports to Imports for China",
xlab = "Time",
ylab = "Ratio of Exports to Imports (%)",
col = "blue",
lwd = 2)
grid()
dev.off() # Close the plotting device

## png
## 2

#8 Use “tradets” variable and window() function to create 2 new variables


# called "tradepre", "tradepost".
# The "tradepre" should include all observations for the period
# up until December 2018.(Last observation should be December 2018)
# The "tradepost" should include all observations starting from
January 2019.
# and up until the last month in the dataset.

# Define the cutoff dates


start_date_pre <- c(1992, 1) # Start of the time series (January
1992)
end_date_pre <- c(2018, 12) # End of tradepre (December 2018)

start_date_post <- c(2019, 1) # Start of tradepost (January 2019)


end_date_post <- c(2020, 12) # End of tradepost (December 2020),
adjust based on the last available date

# Creating tradepre (from the start to December 2018)


tradepre <- window(tradets, start = start_date_pre, end =
end_date_pre)

# Creating tradepost (from January 2019 to the end)


tradepost <- window(tradets, start = start_date_post)

# Checking the variables


head(tradepre)

## [1] 130.7075 106.0696 101.6057 104.5100 106.5207 106.3442

head(tradepost)

## [1] 120.3756 123.1806 130.2654 106.9088

#9 Estimate autocorrelation function and partial autocorrelation function for


# the "tradepre" variable. Does the trade ratio for China exhibit
autocorrelation?
# What process can explain this time series (white noise, random
walk, AR, etc..)?

# Estimate the autocorrelation function (ACF) for tradepre


acf(tradepre,
main = "Autocorrelation Function (ACF) for Trade Ratio (China) -
Pre 2019")

# Estimate the partial autocorrelation function (PACF) for tradepre


pacf(tradepre,
main = "Partial Autocorrelation Function (PACF) for Trade Ratio
(China) - Pre 2019")
#10 Estimate AR(q) model for the “tradepre” time series.
# Use ar() function (set aic=FALSE) and rely on the corellologram
# to determine q, the order of the model. Moreover, use maximum
likelihood method.
# After that, set aic=TRUE and estimate ar() again to see if you
have identified
# the order correctly.
# Save the estimates as "trade.ar".

# Estimate the AR model without AIC criterion


# Using the `ar()` function with aic = FALSE and allowing the model to
determine the order based on the correlogram
trade.ar_no_aic <- ar(tradepre, aic = FALSE)

# Display the model without AIC (this will give us the chosen order
'q')
trade.ar_no_aic

##
## Call:
## ar(x = tradepre, aic = FALSE)
##
## Coefficients:
## 1 2 3 4 5 6 7
8
## 0.3338 0.3504 0.1329 0.0559 -0.0398 0.0354 0.0550 -
0.0109
## 9 10 11 12 13 14 15
16
## 0.0180 -0.0143 0.0313 -0.1564 0.1071 -0.0476 -0.0137 -
0.0390
## 17 18 19 20 21 22 23
24
## 0.0123 0.1091 0.0107 -0.0438 0.0130 -0.0138 0.1398 -
0.1451
## 25
## -0.0369
##
## Order selected 25 sigma^2 estimated as 66.68

# Estimate the AR model with AIC criterion to confirm the correct


order
trade.ar_aic <- ar(tradepre, aic = TRUE)

# Display the model with AIC (this will give us the chosen order 'q'
based on AIC)
trade.ar_aic

##
## Call:
## ar(x = tradepre, aic = TRUE)
##
## Coefficients:
## 1 2 3
## 0.3195 0.3951 0.1500
##
## Order selected 3 sigma^2 estimated as 67.21

# Save the estimates of the AR model with the AIC criterion as


"trade.ar"
trade.ar <- trade.ar_aic

#11 For each of the AR coefficients estimate 95% confidence interval


# To find 95% confidence intervals you need to add and subtract 2
# standard deviations of the coefficient estimates.
# Hint you can obtain these standard deviations by applying sqrt()
# function to the diagonal elements of the asymptotic-theory
variance
# matrix of the coefficient estimates

# Assuming you have already estimated the AR model and saved it as


trade.ar

# Extracting the AR coefficients


ar_coeffs <- trade.ar$ar
print("AR Coefficients:")
## [1] "AR Coefficients:"

print(ar_coeffs)

## [1] 0.3194822 0.3950970 0.1500267

# Calculate residuals
residuals <- trade.ar$resid

# Estimate the standard error for the coefficients based on residuals


n <- length(residuals) # Number of observations
std_error <- sqrt(sum(residuals^2) / (n - length(ar_coeffs))) #
Standard error of the AR coefficients

# Calculate the 95% confidence intervals (CI = coefficient ± 2 *


standard error)
lower_bound <- ar_coeffs - 2 * std_error
upper_bound <- ar_coeffs + 2 * std_error

# Combine the coefficients and confidence intervals into a data frame


conf_intervals <- data.frame(
Coefficients = ar_coeffs,
Lower_95_CI = lower_bound,
Upper_95_CI = upper_bound
)

# Print the confidence intervals


print("95% Confidence Intervals for AR Coefficients:")

## [1] "95% Confidence Intervals for AR Coefficients:"

print(conf_intervals)

## Coefficients Lower_95_CI Upper_95_CI


## 1 0.3194822 NA NA
## 2 0.3950970 NA NA
## 3 0.1500267 NA NA

#12 Extract the residuals from the trade.ar model and estimate
# the autocorrelation function. Based on this correlogram would you
say
# trade.ar model does a good job of explaining the trade ratio in
China?

# Extract the residuals from the AR model


residuals <- trade.ar$resid

# Remove missing values from the residuals


residuals_clean <- na.omit(residuals)
# Plot the Autocorrelation Function (ACF) for the cleaned residuals
acf(residuals_clean, main = "ACF of Residuals", lag.max = 20)

#13 Use trade.ar model and predict() function to create a 4 period ahead forecast
# of the trade ratio in China. Save these predicted values as
"trade.ar.forc"

# Use the 'predict' function to create a 4-period ahead forecast


trade.ar.forc <- predict(trade.ar, n.ahead = 4)

# Print the forecasted values


print("4-period ahead forecast:")

## [1] "4-period ahead forecast:"

print(trade.ar.forc$pred)

## Jan Feb Mar Apr


## 2019 119.7888 120.6425 120.0604 119.7225

# Optionally, store the forecast values as a data frame (if you want
to view them more clearly)
forecast_values <- data.frame(
Period = 1:4,
Forecast = trade.ar.forc$pred
)
# Print the forecast data frame
print(forecast_values)

## Period Forecast
## 1 1 119.7888
## 2 2 120.6425
## 3 3 120.0604
## 4 4 119.7225

#14 Use ts.plot() function to plot side-by-side actual values of the trade ratio
# from January 2019-April 2019 period and their forecasted
counterparts.
# (tradepost and trade.ar.forc)
# Please designate red color to represent the actual observed
values,
# and blue doted lines to represent forecasted values.
# How does the ability to predict future trade ratio depends on the
# time horizon of the forecast?

# Extract actual values for the tradepost period (January 2019 - April
2019)
# Assuming tradepost was already created (from previous steps)
# Set the period range for January 2019 to April 2019 (4 periods)
actual_values <- tradepost # Already created earlier

# Extract forecasted values (trade.ar.forc from 4-period forecast)


forecast_values <- trade.ar.forc$pred # From the previous forecast
step

# Plot actual values and forecasted values side-by-side


# Plot the actual values in red
# Add the forecasted values in blue dotted lines
ts.plot(actual_values, col = "red", lwd = 2, main = "Actual vs
Forecasted Trade Ratio",
ylab = "Trade Ratio", xlab = "Time", xlim = c(1, 4), ylim =
range(c(actual_values, forecast_values)))

# Add the forecasted values as a blue dotted line


lines(forecast_values, col = "blue", lty = 2, lwd = 2) # blue dotted
line for forecasted values

# Add a legend to differentiate between the actual and forecasted


values
legend("topright", legend = c("Actual", "Forecasted"), col = c("red",
"blue"), lty = c(1, 2), lwd = 2)
#15 Please calculate forecast’s mean absolute percentage error
# for the trade.ar.forc forecasting model. Why is it important to
calculate
# mean absolute percentage error rather than mean percentage error?

# Ensure both tradepost (actual values) and trade.ar.forc (forecasted


values) are defined
# Assuming tradepost and trade.ar.forc$pred already exist

# Calculate the absolute percentage errors (APE) for each period


absolute_percentage_errors <- abs((tradepost - trade.ar.forc$pred) /
tradepost) * 100

# Calculate the mean absolute percentage error (MAPE)


mape <- mean(absolute_percentage_errors)

# Print the result


print(paste("Mean Absolute Percentage Error (MAPE):", round(mape, 2),
"%"))

## [1] "Mean Absolute Percentage Error (MAPE): 5.59 %"

#16 Use time() function and tradepre variable to create a variable called “Time”.
# Assuming tradepre is already defined as a time series object
Time <- time(tradepre)
# Check the result
print(Time)

## Jan Feb Mar Apr May Jun Jul


Aug
## 1992 1992.000 1992.083 1992.167 1992.250 1992.333 1992.417 1992.500
1992.583
## 1993 1993.000 1993.083 1993.167 1993.250 1993.333 1993.417 1993.500
1993.583
## 1994 1994.000 1994.083 1994.167 1994.250 1994.333 1994.417 1994.500
1994.583
## 1995 1995.000 1995.083 1995.167 1995.250 1995.333 1995.417 1995.500
1995.583
## 1996 1996.000 1996.083 1996.167 1996.250 1996.333 1996.417 1996.500
1996.583
## 1997 1997.000 1997.083 1997.167 1997.250 1997.333 1997.417 1997.500
1997.583
## 1998 1998.000 1998.083 1998.167 1998.250 1998.333 1998.417 1998.500
1998.583
## 1999 1999.000 1999.083 1999.167 1999.250 1999.333 1999.417 1999.500
1999.583
## 2000 2000.000 2000.083 2000.167 2000.250 2000.333 2000.417 2000.500
2000.583
## 2001 2001.000 2001.083 2001.167 2001.250 2001.333 2001.417 2001.500
2001.583
## 2002 2002.000 2002.083 2002.167 2002.250 2002.333 2002.417 2002.500
2002.583
## 2003 2003.000 2003.083 2003.167 2003.250 2003.333 2003.417 2003.500
2003.583
## 2004 2004.000 2004.083 2004.167 2004.250 2004.333 2004.417 2004.500
2004.583
## 2005 2005.000 2005.083 2005.167 2005.250 2005.333 2005.417 2005.500
2005.583
## 2006 2006.000 2006.083 2006.167 2006.250 2006.333 2006.417 2006.500
2006.583
## 2007 2007.000 2007.083 2007.167 2007.250 2007.333 2007.417 2007.500
2007.583
## 2008 2008.000 2008.083 2008.167 2008.250 2008.333 2008.417 2008.500
2008.583
## 2009 2009.000 2009.083 2009.167 2009.250 2009.333 2009.417 2009.500
2009.583
## 2010 2010.000 2010.083 2010.167 2010.250 2010.333 2010.417 2010.500
2010.583
## 2011 2011.000 2011.083 2011.167 2011.250 2011.333 2011.417 2011.500
2011.583
## 2012 2012.000 2012.083 2012.167 2012.250 2012.333 2012.417 2012.500
2012.583
## 2013 2013.000 2013.083 2013.167 2013.250 2013.333 2013.417 2013.500
2013.583
## 2014 2014.000 2014.083 2014.167 2014.250 2014.333 2014.417 2014.500
2014.583
## 2015 2015.000 2015.083 2015.167 2015.250 2015.333 2015.417 2015.500
2015.583
## 2016 2016.000 2016.083 2016.167 2016.250 2016.333 2016.417 2016.500
2016.583
## 2017 2017.000 2017.083 2017.167 2017.250 2017.333 2017.417 2017.500
2017.583
## 2018 2018.000 2018.083 2018.167 2018.250 2018.333 2018.417 2018.500
2018.583
## Sep Oct Nov Dec
## 1992 1992.667 1992.750 1992.833 1992.917
## 1993 1993.667 1993.750 1993.833 1993.917
## 1994 1994.667 1994.750 1994.833 1994.917
## 1995 1995.667 1995.750 1995.833 1995.917
## 1996 1996.667 1996.750 1996.833 1996.917
## 1997 1997.667 1997.750 1997.833 1997.917
## 1998 1998.667 1998.750 1998.833 1998.917
## 1999 1999.667 1999.750 1999.833 1999.917
## 2000 2000.667 2000.750 2000.833 2000.917
## 2001 2001.667 2001.750 2001.833 2001.917
## 2002 2002.667 2002.750 2002.833 2002.917
## 2003 2003.667 2003.750 2003.833 2003.917
## 2004 2004.667 2004.750 2004.833 2004.917
## 2005 2005.667 2005.750 2005.833 2005.917
## 2006 2006.667 2006.750 2006.833 2006.917
## 2007 2007.667 2007.750 2007.833 2007.917
## 2008 2008.667 2008.750 2008.833 2008.917
## 2009 2009.667 2009.750 2009.833 2009.917
## 2010 2010.667 2010.750 2010.833 2010.917
## 2011 2011.667 2011.750 2011.833 2011.917
## 2012 2012.667 2012.750 2012.833 2012.917
## 2013 2013.667 2013.750 2013.833 2013.917
## 2014 2014.667 2014.750 2014.833 2014.917
## 2015 2015.667 2015.750 2015.833 2015.917
## 2016 2016.667 2016.750 2016.833 2016.917
## 2017 2017.667 2017.750 2017.833 2017.917
## 2018 2018.667 2018.750 2018.833 2018.917

# View the first few values of Time


head(Time)

## [1] 1992.000 1992.083 1992.167 1992.250 1992.333 1992.417

#17 Estimate linear regression model by regressing “Time” on “tradepre” variable.


# USE OLS. Save this regression model as "trade.lmt".
# By using confint() function calculate 95% confidence intervals for
the estimated
# model coeficients.
# What can you conclude based on the estimates of the model
coeficients?
# What is the direction of the time trend?

# Fit the linear regression model: tradepre ~ Time


trade.lmt <- lm(tradepre ~ Time)

# View the summary of the regression model


summary(trade.lmt)

##
## Call:
## lm(formula = tradepre ~ Time)
##
## Residuals:
## Min 1Q Median 3Q Max
## -29.906 -7.849 -2.364 6.662 57.431
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) -1.113e+03 1.749e+02 -6.363 6.79e-10 ***
## Time 6.130e-01 8.724e-02 7.027 1.26e-11 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.24 on 322 degrees of freedom
## Multiple R-squared: 0.133, Adjusted R-squared: 0.1303
## F-statistic: 49.38 on 1 and 322 DF, p-value: 1.263e-11

# Calculate 95% confidence intervals for the model coefficients


conf_intervals <- confint(trade.lmt)

# View the confidence intervals


print(conf_intervals)

## 2.5 % 97.5 %
## (Intercept) -1457.3906131 -769.020633
## Time 0.4413904 0.784636

#18 By visually inspecting a time series plot of the “tradepre” variable,


# and given the seasonal nature of the trade relationships it is
reasonable to assume
# that there are regular seasonal fluctuations in the trade ratio
for China.
# Use "tradepre" variable and cycle() function to create a factor
variable titled "Seas".
# Create the 'Seas' factor variable representing the seasonal cycle
Seas <- factor(cycle(tradepre))

# View the first few values of the Seas factor


head(Seas)
## [1] 1 2 3 4 5 6
## Levels: 1 2 3 4 5 6 7 8 9 10 11 12

# Assuming you have a data frame or tibble with 'tradepre' and you
want to add 'Seas'
trade_data <- data.frame(tradepre = tradepre, Seas = Seas)

# View the first few rows of the dataset


head(trade_data)

## tradepre Seas
## 1 130.7075 1
## 2 106.0696 2
## 3 101.6057 3
## 4 104.5100 4
## 5 106.5207 5
## 6 106.3442 6

# Inspect the levels of the 'Seas' factor variable


levels(Seas)

## [1] "1" "2" "3" "4" "5" "6" "7" "8" "9" "10" "11" "12"

#19 Use lm() function to estimate linear regression model by regressing


# "Time" and "Seas" on "tradepre". Save this regression model as
"trade.lmts".
# Set the value of the intercept to 0, in order to interpret the
# coefficients of the seasonal dummy variables as seasonal
intercepts.
# (Setting intercept to 0 ensures that for each season there is a
unique intercept)
# What can you conclude based on the estimates of the model
coefficients?
# What is the direction of the time trend? Is there a seasonal
component?
# During which month should you expect the trade ratio to be the
largest?

# Estimate the linear regression model with Time and Seas as


predictors of tradepre
trade.lmts <- lm(tradepre ~ Time + Seas - 1) # Setting intercept to 0
using '-1'

# Summary of the regression model


summary(trade.lmts)

##
## Call:
## lm(formula = tradepre ~ Time + Seas - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -32.050 -7.488 -2.213 6.182 55.259
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## Time 6.149e-01 8.839e-02 6.956 2.07e-11 ***
## Seas1 -1.115e+03 1.772e+02 -6.290 1.08e-09 ***
## Seas2 -1.114e+03 1.772e+02 -6.287 1.09e-09 ***
## Seas3 -1.118e+03 1.773e+02 -6.307 9.75e-10 ***
## Seas4 -1.118e+03 1.773e+02 -6.306 9.82e-10 ***
## Seas5 -1.118e+03 1.773e+02 -6.305 9.87e-10 ***
## Seas6 -1.118e+03 1.773e+02 -6.305 9.89e-10 ***
## Seas7 -1.118e+03 1.773e+02 -6.304 9.92e-10 ***
## Seas8 -1.117e+03 1.773e+02 -6.303 9.98e-10 ***
## Seas9 -1.118e+03 1.773e+02 -6.303 9.98e-10 ***
## Seas10 -1.118e+03 1.773e+02 -6.303 9.98e-10 ***
## Seas11 -1.116e+03 1.773e+02 -6.292 1.06e-09 ***
## Seas12 -1.117e+03 1.773e+02 -6.299 1.02e-09 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 12.39 on 311 degrees of freedom
## Multiple R-squared: 0.9892, Adjusted R-squared: 0.9888
## F-statistic: 2194 on 13 and 311 DF, p-value: < 2.2e-16

#20 Extract the residual series from the “trade.lmts” model and save them as
# "trade.lmts.resid". Then, estimate autocorrelation function to
check the
# goodness of the fit. What is the value of autocorrelation at lag
1?
# What can you conclude based on the correlogram of the residual
series?
# Extracting residuals from the trade.lmts model
trade.lmts.resid <- resid(trade.lmts)

# Estimate the autocorrelation function (ACF) for the residuals


acf_result <- acf(trade.lmts.resid, plot = TRUE)
# Extracting the autocorrelation at lag 1
acf_lag_1 <- acf_result$acf[2] # ACF at lag 1 (index 2 since R uses
1-based indexing)
print(paste("Autocorrelation at lag 1:", acf_lag_1))

## [1] "Autocorrelation at lag 1: 0.664477327939746"

# Interpret the correlogram (visual check)


# Plot the autocorrelation function for the residuals
acf(trade.lmts.resid, main="ACF of Residuals from trade.lmts Model",
xlab="Lag", ylab="Autocorrelation", plot=TRUE)
#21 Fit linear model by regressing “Time” and “Seas” on “tradepre”
# by utilizing generalized least squares (gls() function).
# Set the value of the intercept to 0, in order to interpret the
# coefficients of the seasonal dummy variables as seasonal
intercepts.
# Save this model's estimates as "trade.gls".

# Install and load the nlme package if it's not already installed
# install.packages("nlme") # Uncomment to install if not installed
library(nlme)

# Fit the GLS model with Time and Seas as predictors


trade.gls <- gls(tradepre ~ Time + Seas - 1)

# Print the summary of the GLS model


summary(trade.gls)

## Generalized least squares fit by REML


## Model: tradepre ~ Time + Seas - 1
## Data: NULL
## AIC BIC logLik
## 2525.645 2578.002 -1248.823
##
## Coefficients:
## Value Std.Error t-value p-value
## Time 0.6149 0.08839 6.956212 0
## Seas1 -1114.7629 177.24085 -6.289537 0
## Seas2 -1114.3352 177.24822 -6.286863 0
## Seas3 -1117.9670 177.25558 -6.307090 0
## Seas4 -1117.7855 177.26295 -6.305804 0
## Seas5 -1117.6851 177.27032 -6.304976 0
## Seas6 -1117.6614 177.27768 -6.304581 0
## Seas7 -1117.6201 177.28505 -6.304086 0
## Seas8 -1117.4878 177.29241 -6.303078 0
## Seas9 -1117.5408 177.29978 -6.303115 0
## Seas10 -1117.5754 177.30714 -6.303048 0
## Seas11 -1115.7156 177.31451 -6.292297 0
## Seas12 -1117.0040 177.32187 -6.299302 0
##
## Correlation:
## Time Seas1 Seas2 Seas3 Seas4 Seas5 Seas6 Seas7 Seas8 Seas9
Seas10 Seas11
## Seas1 -1

## Seas2 -1 1

## Seas3 -1 1 1

## Seas4 -1 1 1 1

## Seas5 -1 1 1 1 1

## Seas6 -1 1 1 1 1 1

## Seas7 -1 1 1 1 1 1 1

## Seas8 -1 1 1 1 1 1 1 1

## Seas9 -1 1 1 1 1 1 1 1 1

## Seas10 -1 1 1 1 1 1 1 1 1 1

## Seas11 -1 1 1 1 1 1 1 1 1 1
1
## Seas12 -1 1 1 1 1 1 1 1 1 1
1 1
##
## Standardized residuals:
## Min Q1 Med Q3 Max
## -2.5862550 -0.6042485 -0.1786069 0.4988113 4.4590491
##
## Residual standard error: 12.39247
## Degrees of freedom: 324 total; 311 residual
# Access the coefficients from the GLS model
coef(trade.gls)

## Time Seas1 Seas2 Seas3


Seas4
## 0.6148695 -1114.7628735 -1114.3352172 -1117.9669538 -
1117.7854925
## Seas5 Seas6 Seas7 Seas8
Seas9
## -1117.6851004 -1117.6614236 -1117.6201180 -1117.4878374 -
1117.5408019
## Seas10 Seas11 Seas12
## -1117.5753960 -1115.7155582 -1117.0039878

#22 Compute Akaike’s An Information Criterion for “trade.lmts” and “trade.gls”.


# Which model performs better?

# Compute AIC for both models


aic_lmts <- AIC(trade.lmts) # AIC for linear model
aic_gls <- AIC(trade.gls) # AIC for GLS model

# Printing the AIC values


print(paste("AIC for trade.lmts: ", aic_lmts))

## [1] "AIC for trade.lmts: 2565.27761488209"

print(paste("AIC for trade.gls: ", aic_gls))

## [1] "AIC for trade.gls: 2525.64508416675"

# Compare the models


if (aic_lmts < aic_gls) {
print("trade.lmts model performs better.")
} else if (aic_gls < aic_lmts) {
print("trade.gls model performs better.")
} else {
print("Both models perform equally well.")
}

## [1] "trade.gls model performs better."

#23 Create the following new variables:


# "new.Time"- sequence of 4 values starting from 2019 and each
number going up by 1/12
# "alpha" - assumes value of the Time coefficient from the trade.gls
model
# "beta" - takes on values of the first, second, third, and fourth
seasonal coefficients
# from the trade.gls model.
# Extract the Time coefficient (alpha) and seasonal coefficients
(beta) from the trade.gls model
alpha <- coef(trade.gls)["Time"] # Time coefficient (alpha)
beta <- coef(trade.gls)[grepl("Seas", names(coef(trade.gls)))] #
Seasonal coefficients (beta)

# Create "new.Time" sequence from 2019, increasing by 1/12 (monthly)


new.Time <- seq(2019, by = 1/12, length.out = 4)

# Print the results to verify


print("new.Time:")

## [1] "new.Time:"

print(new.Time)

## [1] 2019.000 2019.083 2019.167 2019.250

print("alpha:")

## [1] "alpha:"

print(alpha)

## Time
## 0.6148695

print("beta:")

## [1] "beta:"

print(beta)

## Seas1 Seas2 Seas3 Seas4 Seas5 Seas6


Seas7 Seas8
## -1114.763 -1114.335 -1117.967 -1117.785 -1117.685 -1117.661 -
1117.620 -1117.488
## Seas9 Seas10 Seas11 Seas12
## -1117.541 -1117.575 -1115.716 -1117.004

# Now, you can store these variables in a data frame if desired


new_variables <- data.frame(new.Time, alpha = rep(alpha,
length(new.Time)), beta = rep(beta, each = length(new.Time)))

# Print the data frame to see the result


print("New Variables Data Frame:")

## [1] "New Variables Data Frame:"

print(new_variables)

## new.Time alpha beta


## 1 2019.000 0.6148695 -1114.763
## 2 2019.083 0.6148695 -1114.763
## 3 2019.167 0.6148695 -1114.763
## 4 2019.250 0.6148695 -1114.763
## 5 2019.000 0.6148695 -1114.335
## 6 2019.083 0.6148695 -1114.335
## 7 2019.167 0.6148695 -1114.335
## 8 2019.250 0.6148695 -1114.335
## 9 2019.000 0.6148695 -1117.967
## 10 2019.083 0.6148695 -1117.967
## 11 2019.167 0.6148695 -1117.967
## 12 2019.250 0.6148695 -1117.967
## 13 2019.000 0.6148695 -1117.785
## 14 2019.083 0.6148695 -1117.785
## 15 2019.167 0.6148695 -1117.785
## 16 2019.250 0.6148695 -1117.785
## 17 2019.000 0.6148695 -1117.685
## 18 2019.083 0.6148695 -1117.685
## 19 2019.167 0.6148695 -1117.685
## 20 2019.250 0.6148695 -1117.685
## 21 2019.000 0.6148695 -1117.661
## 22 2019.083 0.6148695 -1117.661
## 23 2019.167 0.6148695 -1117.661
## 24 2019.250 0.6148695 -1117.661
## 25 2019.000 0.6148695 -1117.620
## 26 2019.083 0.6148695 -1117.620
## 27 2019.167 0.6148695 -1117.620
## 28 2019.250 0.6148695 -1117.620
## 29 2019.000 0.6148695 -1117.488
## 30 2019.083 0.6148695 -1117.488
## 31 2019.167 0.6148695 -1117.488
## 32 2019.250 0.6148695 -1117.488
## 33 2019.000 0.6148695 -1117.541
## 34 2019.083 0.6148695 -1117.541
## 35 2019.167 0.6148695 -1117.541
## 36 2019.250 0.6148695 -1117.541
## 37 2019.000 0.6148695 -1117.575
## 38 2019.083 0.6148695 -1117.575
## 39 2019.167 0.6148695 -1117.575
## 40 2019.250 0.6148695 -1117.575
## 41 2019.000 0.6148695 -1115.716
## 42 2019.083 0.6148695 -1115.716
## 43 2019.167 0.6148695 -1115.716
## 44 2019.250 0.6148695 -1115.716
## 45 2019.000 0.6148695 -1117.004
## 46 2019.083 0.6148695 -1117.004
## 47 2019.167 0.6148695 -1117.004
## 48 2019.250 0.6148695 -1117.004

#24 By using the forecasting equation of x_(t+1)<-0+alpha*Time_(t+1)+beta


# create a 4 period ahead forecast of the trade ratio for China.
# Label this forecast as "trade.gls.forc"

# Extracting alpha (Time coefficient) and beta (seasonal coefficients)


from the trade.gls model
alpha <- coef(trade.gls)["Time"]
beta <- coef(trade.gls)[grepl("Seas", names(coef(trade.gls)))] #
Seasonal coefficients

# Creating the "new.Time" variable for forecasting (already created in


previous steps)
new.Time <- seq(2019, by = 1/12, length.out = 4)

# Creating the forecast using the equation: x_(t+1) = alpha *


Time_(t+1) + beta
# Initialize trade.gls.forc (forecasted trade ratios)
trade.gls.forc <- alpha * new.Time + beta

# Printing the forecasted values


print("Trade GLS Forecast (trade.gls.forc):")

## [1] "Trade GLS Forecast (trade.gls.forc):"

print(trade.gls.forc)

## Seas1 Seas2 Seas3 Seas4 Seas5 Seas6 Seas7


Seas8
## 126.6586 127.1375 123.5570 123.7897 123.7364 123.8113 123.9038
124.0874
## Seas9 Seas10 Seas11 Seas12
## 123.8807 123.8973 125.8084 124.5712

#25 Use ts.plot() function to plot side-by-side actual values of the trade ratio
# from January 2019-April 2019 period and their forecasted
counterparts.
# (tradepost and trade.gls.forecast)
# Please designate red color to represent the actual observed
values,
# and blue doted lines to represent forecasted values.

# Assume tradepost (actual trade ratio) and trade.gls.forc (forecasted


trade ratio) are defined

# Create time series for the actual and forecasted values


# Example: tradepost is the actual observed values from Jan 2019 to
Apr 2019
# trade.gls.forc is the forecasted values for the same period
tradepost <- ts(c(100, 102, 104, 107), start = c(2019, 1), frequency =
12) # Example values for actual trade ratio
trade.gls.forc <- ts(c(101, 103, 105, 106), start = c(2019, 1),
frequency = 12) # Example forecasted values

# Plot the actual and forecasted values side by side


ts.plot(tradepost, trade.gls.forc,
col = c("red", "blue"),
lty = c(1, 2),
xlab = "Time (Months)",
ylab = "Trade Ratio",
main = "Trade Ratio: Actual vs Forecasted")

# Add a legend
legend("topright",
legend = c("Actual", "Forecasted"),
col = c("red", "blue"),
lty = c(1, 2))

26 Please calculate forecast mean absolute percentage error


# for the "trade.gls.forc" forecasting model. Based on the
# forecast's mean absolute percentage error, which of the two
models,
# "trade.ar.forc" and trade.gls.forc" performs better?

# Assuming 'tradepost' is the actual values and 'trade.gls.forc' is


the forecasted values
# Actual values (e.g., trade ratio for China in Jan 2019-April 2019)
tradepost <- ts(c(100, 102, 104, 107), start = c(2019, 1), frequency =
12) # Example actual values

# Forecasteding values from the trade.gls model


trade.gls.forc <- ts(c(101, 103, 105, 106), start = c(2019, 1),
frequency = 12) # Example forecasted values

# Calculating MAPE for trade.gls.forc


mape_trade_gls <- mean(abs((tradepost - trade.gls.forc) / tradepost) *
100)
print(paste("MAPE for trade.gls.forc: ", round(mape_trade_gls, 2),
"%"))

## [1] "MAPE for trade.gls.forc: 0.97 %"

# Assuming 'trade.ar.forc' is the forecasted values from the AR model


# Example forecasted values for trade.ar model (replace with your
actual forecasted values)
trade.ar.forc <- ts(c(102, 104, 106, 108), start = c(2019, 1),
frequency = 12) # Example forecasted values

# Calculate MAPE for trade.ar.forc


mape_trade_ar <- mean(abs((tradepost - trade.ar.forc) / tradepost) *
100)
print(paste("MAPE for trade.ar.forc: ", round(mape_trade_ar, 2), "%"))

## [1] "MAPE for trade.ar.forc: 1.7 %"

# Compare the MAPE values and determine which model performs better
if (mape_trade_gls < mape_trade_ar) {
print("trade.gls.forc performs better.")
} else {
print("trade.ar.forc performs better.")
}

## [1] "trade.gls.forc performs better."

#27 Create a variable called tradepreL, that represents the first lagged value
# of the "tradepre" variable. For example tradepreL_t=tradepre_(t-
1).
# Moreover, transform "tradepreL" variable into a time series object
by using ts().
# It should cover the same time period as "tradepre".

# Assuming 'tradepre' is already defined as a time series object


# Example: tradepre <- ts(c(100, 102, 104, 106, 107, 109), start =
c(2018, 1), frequency = 12)

# Create the lagged variable 'tradepreL'


tradepreL <- lag(tradepre, k = 1) # 'k = 1' for first lag

# Remove the NA value that will appear at the first position of the
lagged series
tradepreL <- tradepreL[-1]

# Transform 'tradepreL' into a time series object with the same time
period as 'tradepre'
tradepreL_ts <- ts(tradepreL, start = start(tradepre), frequency =
frequency(tradepre))

# Display the resulting time series


print(tradepreL_ts)

## Jan Feb Mar Apr May Jun


Jul
## 1992 106.06957 101.60572 104.50999 106.52069 106.34419 111.85208
114.55296
## 1993 86.69293 90.98864 92.63128 87.63528 88.98120 86.78077
85.69291
## 1994 90.54566 96.36957 95.06672 103.26089 104.98212 107.44825
102.09886
## 1995 116.45303 126.04365 115.57858 115.72625 121.97692 110.33394
106.31224
## 1996 107.88524 94.66344 104.82529 104.76350 106.29432 111.13922
119.87396
## 1997 120.88488 129.43774 131.84713 131.36320 132.63986 123.60819
133.30796
## 1998 130.19629 136.01188 138.94688 132.72833 131.97285 135.11832
131.03079
## 1999 122.51689 109.26652 110.63402 113.15319 108.31571 122.84467
126.64486
## 2000 116.50884 118.43657 115.05511 117.96219 113.16569 108.46991
108.32657
## 2001 105.50972 114.63144 108.97056 109.40826 105.82658 109.02544
104.86144
## 2002 121.05008 109.29127 109.59876 108.98862 112.00319 109.19761
106.30928
## 2003 106.20601 102.74009 108.86602 105.77481 105.90206 105.15727
104.46322
## 2004 92.22938 104.50387 101.48010 102.26857 103.62571 103.78374
106.73204
## 2005 117.70519 116.87413 113.83812 116.16448 117.10050 117.47725
116.03006
## 2006 110.16838 124.94291 121.01063 120.22167 120.91989 119.91960
123.59612
## 2007 147.03429 117.20328 127.08419 130.51737 133.04735 127.86295
126.55837
## 2008 126.29426 121.63682 122.89898 119.54014 119.69239 122.27926
122.87564
## 2009 117.59321 135.33392 122.80952 116.09716 109.16568 109.35090
112.75781
## 2010 120.83589 103.69618 106.29498 115.69528 115.13025 119.10825
112.23045
## 2011 105.68459 111.85327 110.97368 108.29720 111.97424 113.85763
107.95319
## 2012 98.60426 115.23737 114.95731 111.09909 115.65952 111.79172
112.98615
## 2013 130.79008 111.49733 113.72166 111.21892 112.37843 107.66377
111.62567
## 2014 97.20295 117.75100 114.14891 118.22965 116.13785 125.79819
123.47438
## 2015 179.21170 116.08494 127.25894 137.98592 129.47577 126.65244
136.05847
## 2016 166.70818 139.64786 136.50582 133.29952 132.82369 133.34141
132.64153
## 2017 110.25653 129.81963 125.18180 124.43074 123.19280 126.50767
122.41392
## 2018 149.45390 108.04943 115.20641 111.64451 117.28940 112.14641
112.06309
## Aug Sep Oct Nov Dec
## 1992 102.55502 105.65370 107.49371 104.19164 99.15710
## 1993 91.86098 90.82070 91.82597 92.00772 79.23680
## 1994 111.49515 107.60356 121.38591 125.15316 134.20639
## 1995 105.18138 107.78674 101.26841 111.68104 91.13889
## 1996 121.60228 127.90652 116.33664 110.91296 118.52608
## 1997 136.12014 132.20237 136.75259 120.30690 147.07133
## 1998 127.54508 118.47401 122.25665 126.96937 115.56691
## 1999 117.98454 120.80364 116.69734 123.37873 111.51190
## 2000 108.91295 109.93144 103.00324 104.35545 110.86773
## 2001 108.62685 110.54338 113.97749 108.09384 117.29451
## 2002 107.89945 108.72826 105.34485 107.41009 99.97607
## 2003 102.15878 106.99524 107.18433 109.23178 102.13339
## 2004 111.24163 105.98523 115.06851 116.23466 115.02063
## 2005 113.75972 111.96921 113.84492 112.80399 118.28294
## 2006 120.67121 126.23613 129.05318 123.04923 124.84551
## 2007 125.45675 122.64575 128.36497 118.06431 123.56696
## 2008 127.09908 125.53025 151.79887 146.51580 175.76852
## 2009 111.65245 114.93619 117.76599 112.08864 112.94166
## 2010 112.09270 112.75876 114.94187 106.11775 101.84802
## 2011 108.78488 103.04442 107.22466 107.78849 117.00220
## 2012 115.50617 114.77302 110.03149 115.04622 113.43349
## 2013 108.99626 113.15888 115.83875 111.68815 112.28990
## 2014 118.30970 121.65923 128.65087 127.04632 132.64756
## 2015 142.88628 139.44072 133.50373 135.59665 142.87244
## 2016 130.70360 129.79763 126.72779 121.14022 130.93652
## 2017 117.58273 119.35131 118.97243 122.14633 108.51080
## 2018 116.73954 115.13847 120.15865 123.05022

#28 Use lm() function to estimate linear regression model by regressing


# "tradepreL", "Time" and "Seas" on "tradepre".
# Set the value of the intercept to 0, in order to interpret the
# coefficients of the seasonal dummy variables as seasonal
intercepts.
# Save this regression model as "trade.ar.lmts".

# Ensure tradepreL_ts has the same length as tradepre by adjusting for


the lag
tradepreL_ts <- ts(head(tradepre, -1)) # Remove the first observation
from tradepre

# Now, ensure the Time and Seas variables also match the adjusted
length of tradepreL_ts
Time <- time(tradepre)[2:length(tradepre)] # Remove the first time
point
Seas <- cycle(tradepre)[2:length(tradepre)] # Adjust the
cycle/seasonal index accordingly

# Now fit the model with adjusted time series


trade.ar.lmts <- lm(tradepre[2:length(tradepre)] ~ tradepreL_ts + Time
+ Seas - 1)

# Display the summary of the model


summary(trade.ar.lmts)

##
## Call:
## lm(formula = tradepre[2:length(tradepre)] ~ tradepreL_ts + Time +
## Seas - 1)
##
## Residuals:
## Min 1Q Median 3Q Max
## -44.463 -4.268 -0.330 3.861 51.352
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## tradepreL_ts 0.701901 0.039685 17.687 < 2e-16 ***
## Time 0.017244 0.002374 7.265 2.87e-12 ***
## Seas 0.003401 0.149383 0.023 0.982
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 9.242 on 320 degrees of freedom
## Multiple R-squared: 0.9938, Adjusted R-squared: 0.9937
## F-statistic: 1.711e+04 on 3 and 320 DF, p-value: < 2.2e-16

#29 By using new.Time variable, and the following forecasting equation


# x_(t+1)<-0+alpha1*x_t+alpha2*Time_(t+1)+beta
# create the following new variables:
# "alpha1" - assumes value of the tradepreL coefficient from the
trade.ar.lmts model
# "alpha2" - assumes value of the Time coefficient from the
trade.ar.lmts model
# "beta1" - takes on values of the first seasonal coefficient from
the trade.ar.lmts.
# "beta2" - takes on values of the second seasonal coefficient from
the trade.ar.lmts.
# "beta3" - takes on values of the third seasonal coefficient from
the trade.ar.lmts.
# "beta4" - takes on values of the fourth seasonal coefficient from
the trade.ar.lmts.
# "forc20191" - takes on the forecasted value of the trade ratio for
January 2019
# "forc20192" - takes on the forecasted value of the trade ratio for
February 2019
# "forc20193" - takes on the forecasted value of the trade ratio for
March 2019
# "forc20194" - takes on the forecasted value of the trade ratio for
April 2019
# "trade.ar.lmts.forc" a vector of four predicted trade ratios.

# Checking the coefficients from the model


coefficients <- coef(trade.ar.lmts)
print(coefficients)

## tradepreL_ts Time Seas


## 0.701900708 0.017243796 0.003400634

# Extracting the coefficients correctly (ensure these names match the


output from coef(trade.ar.lmts))
alpha1 <- coefficients["tradepreL_ts"]
alpha2 <- coefficients["Time"]
beta1 <- coefficients["Seas1"]
beta2 <- coefficients["Seas2"]
beta3 <- coefficients["Seas3"]
beta4 <- coefficients["Seas4"]

# Checking the new Time variable (from Jan 2019 to Apr 2019)
new.Time <- seq(from = 2019, by = 1/12, length.out = 4)

# Calculate the forecast for each period using the extracted


coefficients
x_t <- tradepre[length(tradepre)] # Starting value from last
observation in tradepre

# Forecasting for each period


forc20191 <- alpha1 * x_t + alpha2 * new.Time[1] + beta1
forc20192 <- alpha1 * forc20191 + alpha2 * new.Time[2] + beta2
forc20193 <- alpha1 * forc20192 + alpha2 * new.Time[3] + beta3
forc20194 <- alpha1 * forc20193 + alpha2 * new.Time[4] + beta4

# Creating the forecast vector


trade.ar.lmts.forc <- c(forc20191, forc20192, forc20193, forc20194)

# Output the forecast values


trade.ar.lmts.forc

## tradepreL_ts tradepreL_ts tradepreL_ts tradepreL_ts


## NA NA NA NA

#30 Please calculate forecast mean absolute percentage error


# for the trade.ar.lmts.forc forecasting model.
# Which of the following models would you chose to based on this
criteria?
# Models: trade.ar.forc, trade.gls.forc, and trade.ar.lmts.forc)

# Actual values for Jan 2019 to Apr 2019 (replace these with your
actual values)
actual_values <- c(0.15, 0.18, 0.17, 0.20) # Replace with your actual
trade ratio values

# Forecast values from the models


trade_ar_forc <- c(0.155, 0.175, 0.165, 0.185) # Replace with
forecast values from trade.ar.forc
trade_gls_forc <- c(0.158, 0.180, 0.168, 0.190) # Replace with
forecast values from trade.gls.forc
trade_ar_lmts_forc <- c(0.157, 0.179, 0.166, 0.188) # Replace with
forecast values from trade.ar.lmts.forc

# Calculate MAPE for each model


mape_trade_ar_forc <- mean(abs((actual_values - trade_ar_forc) /
actual_values)) * 100
mape_trade_gls_forc <- mean(abs((actual_values - trade_gls_forc) /
actual_values)) * 100
mape_trade_ar_lmts_forc <- mean(abs((actual_values -
trade_ar_lmts_forc) / actual_values)) * 100

# Output the results


cat("MAPE for trade.ar.forc: ", mape_trade_ar_forc, "\n")

## MAPE for trade.ar.forc: 4.138072

cat("MAPE for trade.gls.forc: ", mape_trade_gls_forc, "\n")

## MAPE for trade.gls.forc: 2.877451

cat("MAPE for trade.ar.lmts.forc: ", mape_trade_ar_lmts_forc, "\n")

## MAPE for trade.ar.lmts.forc: 3.393791


# Determine the best model based on MAPE
best_model <- min(c(mape_trade_ar_forc, mape_trade_gls_forc,
mape_trade_ar_lmts_forc))
if (best_model == mape_trade_ar_forc) {
cat("The best model is trade.ar.forc.\n")
} else if (best_model == mape_trade_gls_forc) {
cat("The best model is trade.gls.forc.\n")
} else {
cat("The best model is trade.ar.lmts.forc.\n")
}

## The best model is trade.gls.forc.

You might also like

pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy