.Rhistory

rand_data = sample(1:nrow(region_data),50)
test_data = region_data[rand_data,]  #Test data has 50 observations
train_data = region_data[-rand_data,] #Train data has 1800 observations
##Getting 4 Different Samples of Data from training Data - Samples of size 20.
set.seed(2)
##Taking Random Samples with out replacement 4 times for 4 different samples
sample_data_list_20 <- lapply(1: 4, function(i) train_data[sample(1:nrow(train_data),20),])
##============================================================================================
## TAKING FIRST SAMPLE
sample_train_data <- sample_data_list_20[[1]]
#For Loop to Calculate Train RSS and Test RSS
df_rss_s20 = data.frame(complexity = model_complexity)
model_complexity = c()
lst_models = list(m1, m2, m7, m8, m9, m10)
##Taking Random Samples with out replacement 4 times for 4 different samples
sample_data_list_20 <- lapply(1: 4, function(i) train_data[sample(1:nrow(train_data),20),])
##============================================================================================
## TAKING FIRST SAMPLE
sample_train_data <- sample_data_list_20[[1]]
# FITTING THE MODEL OF ORDER 1
m1 <- lm(total_purchase_value ~ clicks, sample_train_data)
# FITTING THE MODERL OF ORDER 2
m2 <- lm(total_purchase_value ~ clicks + I(clicks^2), sample_train_data)
# FITTING THE MODERL OF ORDER 7
m7 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7), sample_train_data)
# FITTING THE MODERL OF ORDER 8
m8 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7) + I(clicks^8), sample_train_data)
# FITTING THE MODERL OF ORDER 9
m9 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7) + I(clicks^8) + I(clicks^9), sample_train_data)
# FITTING THE MODERL OF ORDER 10
m10 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7) + I(clicks^8) + I(clicks^9)+ I(clicks^10), sample_train_data)
lst_models = list(m1, m2, m7, m8, m9, m10)
model_complexity = c()
##For Loop For Saving Graphs into a Plots Folder
for (i in lst_models){
#Getting the order number from model
order_num = nrow(summary(i)$coefficients)-1
model_complexity <- c(model_complexity, order_num)
#Concatinating the Graph Name with Order Number
graph_name = paste(c("Order",order_num,"Sample-20"),collapse = "_")
#Concatinating the Graph Name with Folder Name - Plots
graph_name_foldername = paste(c("Plots",graph_name),collapse = '/')
#print(graph_name_foldername)
#Name for Title of Plot
main_title_1 = paste("Polynomial Regression of Order-", order_num, sep = "")
main_title = paste(main_title_1,"(Sample Size = 20)")
#Saving the Plot Name to JPEG Format
jpeg(paste(c(graph_name_foldername,'jpg'),collapse = '.'))
#PLOTTING THE MODELS OVER THE DATA
plot(sample_train_data$clicks, sample_train_data$total_purchase_value, pch = 19, cex = 0.5, xlab = "Number of Clicks",
ylab = "Total Purchase value", main = main_title)
lines(sort(sample_train_data$clicks), fitted(i)[order(sample_train_data$clicks)], col = 'magenta', type = 'l', pch = 20)
dev.off()
}
#PLOTTING THE MODELS OVER THE DATA
plot(sample_train_data$clicks, sample_train_data$total_purchase_value, pch = 19, cex = 0.5, xlab = "Number of Clicks",
ylab = "Total Purchase value", main = "Polynomial Regression of Different Orders (Sample Size = 20)")
lines(sort(sample_train_data$clicks), fitted(m1)[order(sample_train_data$clicks)], col = 'magenta', type = 'l',lwd =  2,pch = 20)
lines(sort(sample_train_data$clicks), fitted(m2)[order(sample_train_data$clicks)], col = 'darkgreen', type = 'l',lwd =  2, pch = 20)
lines(sort(sample_train_data$clicks), fitted(m7)[order(sample_train_data$clicks)], col = 'red', type = 'l', lwd =  2,pch = 20)
lines(sort(sample_train_data$clicks), fitted(m8)[order(sample_train_data$clicks)], col = 'blue', type = 'l', lwd =  2,pch = 20)
lines(sort(sample_train_data$clicks), fitted(m9)[order(sample_train_data$clicks)], col = 'brown', type = 'l',lwd =  2, pch = 20)
lines(sort(sample_train_data$clicks), fitted(m10)[order(sample_train_data$clicks)], col = 'orange', type = 'l',lwd =  2, pch = 20)
# Add a legend
legend("topright", legend=c("Order-1", "Order-2","Order-7", "Order-8","Order-9","Order-10"),
col=c("magenta", "darkgreen","red","blue","brown","orange"),lty=1, cex=0.8, text.font = 4, bg = "lightblue",
title = "Different orders")
#For Loop to Calculate Train RSS and Test RSS
df_rss_s20 = data.frame(complexity = model_complexity)
#Looping through all the samples of same size, N = 20
for (i in sample_data_list_20){
sample_train_data = i
train_rss_vec = c() # To Store Training RSS
test_rss_vec = c() # To Store Testing RSS
# FITTING THE MODEL OF ORDER 1
m1 <- lm(total_purchase_value ~ clicks, sample_train_data)
# FITTING THE MODERL OF ORDER 2
m2 <- lm(total_purchase_value ~ clicks + I(clicks^2), sample_train_data)
# FITTING THE MODERL OF ORDER 7
m7 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7), sample_train_data)
# FITTING THE MODERL OF ORDER 8
m8 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7) + I(clicks^8), sample_train_data)
# FITTING THE MODERL OF ORDER 9
m9 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7) + I(clicks^8) + I(clicks^9), sample_train_data)
# FITTING THE MODERL OF ORDER 10
m10 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7) + I(clicks^8) + I(clicks^9)+ I(clicks^10), sample_train_data)
lst_models = list(m1, m2, m7, m8, m9, m10)
for (j in lst_models){
#TRAIN AND TEST ACCURACY
train_rss <- sum(j$residuals^2) #Training Error
pred = predict(j, newdata=test_data)
test_error = sum((pred-test_data$total_purchase_value)^2) #Test Error
train_rss_vec <- c(train_rss_vec, train_rss)
test_rss_vec <- c(test_rss_vec, test_error)
}
df_rss_s20 <- cbind(df_rss_s20, train_rss_vec, test_rss_vec)
}
colnames(df_rss_s20) <- c("com", "train_s20_1", 'test_s20_1','train_s20_2','test_s20_2','train_s20_3', 'test_s20_3','train_s20_4','test_s20_4')
#FOR SAMPLE - 1, TEST RSS
ggplot(df_rss_s20, aes(x = com, y = test_s20_1, color = "Sample-1")) + geom_point(size = 2) +
geom_line(size = 1.2) + labs(color = "Sample") + xlab("Model Complexity (Order of Polynomial)") +
ylab("TestError(RMSE)") + ggtitle("Model Error vs Model Complexity (Sample Size = 20)") +
theme(axis.title.x = element_text(color = 'Blue', size = 13),
axis.title.y = element_text(color = 'Blue', size = 13),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.title = element_text(hjust = 0.5),
legend.title = element_text(size = 15))
library(ggplot2)
#FOR SAMPLE - 1, TEST RSS
ggplot(df_rss_s20, aes(x = com, y = test_s20_1, color = "Sample-1")) + geom_point(size = 2) +
geom_line(size = 1.2) + labs(color = "Sample") + xlab("Model Complexity (Order of Polynomial)") +
ylab("TestError(RMSE)") + ggtitle("Model Error vs Model Complexity (Sample Size = 20)") +
theme(axis.title.x = element_text(color = 'Blue', size = 13),
axis.title.y = element_text(color = 'Blue', size = 13),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.title = element_text(hjust = 0.5),
legend.title = element_text(size = 15))
#FOR SAMPLE - 2, TEST RSS
ggplot(df_rss_s20, aes(x = com, y = test_s20_2, color = "Sample-2")) + geom_point(size = 2) +
geom_line(size = 1.2) + labs(color = "Sample") + xlab("Model Complexity (Order of Polynomial)") +
ylab("TestError(RMSE)") + ggtitle("Model Error vs Model Complexity (Sample Size = 20)") +
theme(axis.title.x = element_text(color = 'Blue', size = 13),
axis.title.y = element_text(color = 'Blue', size = 13),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.title = element_text(hjust = 0.5),
legend.title = element_text(size = 15)) + scale_color_manual(values=c("#2E8B57"))
#FOR SAMPLE - 3, TEST RSS
ggplot(df_rss_s20, aes(x = com, y = test_s20_1, color = "Sample-3")) + geom_point(size = 2) +
geom_line(size = 1.2) + labs(color = "Sample") + xlab("Model Complexity (Order of Polynomial)") +
ylab("TestError(RMSE)") + ggtitle("Model Error vs Model Complexity (Sample Size = 20)") +
theme(axis.title.x = element_text(color = 'Blue', size = 13),
axis.title.y = element_text(color = 'Blue', size = 13),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.title = element_text(hjust = 0.5),
legend.title = element_text(size = 15)) + scale_color_manual(values=c("#00008B"))
#FOR SAMPLE - 4, TEST RSS
ggplot(df_rss_s20, aes(x = com, y = test_s20_1, color = "Sample-4")) + geom_point(size = 2) +
geom_line(size = 1.2) + labs(color = "Sample") + xlab("Model Complexity (Order of Polynomial)") +
ylab("TestError(RMSE)") + ggtitle("Model Error vs Model Complexity (Sample Size = 20)") +
theme(axis.title.x = element_text(color = 'Blue', size = 13),
axis.title.y = element_text(color = 'Blue', size = 13),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.title = element_text(hjust = 0.5),
legend.title = element_text(size = 15)) + scale_color_manual(values=c("#C71585"))
##Taking Random Samples with out replacement 4 times for 4 different samples
sample_data_list_20 <- lapply(1: 4, function(i) train_data[sample(1:nrow(train_data),30),])
##============================================================================================
## TAKING FIRST SAMPLE
sample_train_data <- sample_data_list_20[[1]]
# FITTING THE MODEL OF ORDER 1
m1 <- lm(total_purchase_value ~ clicks, sample_train_data)
# FITTING THE MODERL OF ORDER 2
m2 <- lm(total_purchase_value ~ clicks + I(clicks^2), sample_train_data)
# FITTING THE MODERL OF ORDER 7
m7 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7), sample_train_data)
# FITTING THE MODERL OF ORDER 8
m8 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7) + I(clicks^8), sample_train_data)
# FITTING THE MODERL OF ORDER 9
m9 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7) + I(clicks^8) + I(clicks^9), sample_train_data)
# FITTING THE MODERL OF ORDER 10
m10 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7) + I(clicks^8) + I(clicks^9)+ I(clicks^10), sample_train_data)
lst_models = list(m1, m2, m7, m8, m9, m10)
model_complexity = c()
##For Loop For Saving Graphs into a Plots Folder
for (i in lst_models){
#Getting the order number from model
order_num = nrow(summary(i)$coefficients)-1
model_complexity <- c(model_complexity, order_num)
#Concatinating the Graph Name with Order Number
graph_name = paste(c("Order",order_num,"Sample-20"),collapse = "_")
#Concatinating the Graph Name with Folder Name - Plots
graph_name_foldername = paste(c("Plots",graph_name),collapse = '/')
#print(graph_name_foldername)
#Name for Title of Plot
main_title_1 = paste("Polynomial Regression of Order-", order_num, sep = "")
main_title = paste(main_title_1,"(Sample Size = 20)")
#Saving the Plot Name to JPEG Format
jpeg(paste(c(graph_name_foldername,'jpg'),collapse = '.'))
#PLOTTING THE MODELS OVER THE DATA
plot(sample_train_data$clicks, sample_train_data$total_purchase_value, pch = 19, cex = 0.5, xlab = "Number of Clicks",
ylab = "Total Purchase value", main = main_title)
lines(sort(sample_train_data$clicks), fitted(i)[order(sample_train_data$clicks)], col = 'magenta', type = 'l', pch = 20)
dev.off()
}
#PLOTTING THE MODELS OVER THE DATA
plot(sample_train_data$clicks, sample_train_data$total_purchase_value, pch = 19, cex = 0.5, xlab = "Number of Clicks",
ylab = "Total Purchase value", main = "Polynomial Regression of Different Orders (Sample Size = 20)")
lines(sort(sample_train_data$clicks), fitted(m1)[order(sample_train_data$clicks)], col = 'magenta', type = 'l',lwd =  2,pch = 20)
lines(sort(sample_train_data$clicks), fitted(m2)[order(sample_train_data$clicks)], col = 'darkgreen', type = 'l',lwd =  2, pch = 20)
lines(sort(sample_train_data$clicks), fitted(m7)[order(sample_train_data$clicks)], col = 'red', type = 'l', lwd =  2,pch = 20)
lines(sort(sample_train_data$clicks), fitted(m8)[order(sample_train_data$clicks)], col = 'blue', type = 'l', lwd =  2,pch = 20)
lines(sort(sample_train_data$clicks), fitted(m9)[order(sample_train_data$clicks)], col = 'brown', type = 'l',lwd =  2, pch = 20)
lines(sort(sample_train_data$clicks), fitted(m10)[order(sample_train_data$clicks)], col = 'orange', type = 'l',lwd =  2, pch = 20)
# Add a legend
legend("topright", legend=c("Order-1", "Order-2","Order-7", "Order-8","Order-9","Order-10"),
col=c("magenta", "darkgreen","red","blue","brown","orange"),lty=1, cex=0.8, text.font = 4, bg = "lightblue",
title = "Different orders")
dev.off()
#PLOTTING THE MODELS OVER THE DATA
plot(sample_train_data$clicks, sample_train_data$total_purchase_value, pch = 19, cex = 0.5, xlab = "Number of Clicks",
ylab = "Total Purchase value", main = "Polynomial Regression of Different Orders (Sample Size = 20)")
lines(sort(sample_train_data$clicks), fitted(m1)[order(sample_train_data$clicks)], col = 'magenta', type = 'l',lwd =  2,pch = 20)
lines(sort(sample_train_data$clicks), fitted(m2)[order(sample_train_data$clicks)], col = 'darkgreen', type = 'l',lwd =  2, pch = 20)
lines(sort(sample_train_data$clicks), fitted(m7)[order(sample_train_data$clicks)], col = 'red', type = 'l', lwd =  2,pch = 20)
lines(sort(sample_train_data$clicks), fitted(m8)[order(sample_train_data$clicks)], col = 'blue', type = 'l', lwd =  2,pch = 20)
lines(sort(sample_train_data$clicks), fitted(m9)[order(sample_train_data$clicks)], col = 'brown', type = 'l',lwd =  2, pch = 20)
lines(sort(sample_train_data$clicks), fitted(m10)[order(sample_train_data$clicks)], col = 'orange', type = 'l',lwd =  2, pch = 20)
# Add a legend
legend("topright", legend=c("Order-1", "Order-2","Order-7", "Order-8","Order-9","Order-10"),
col=c("magenta", "darkgreen","red","blue","brown","orange"),lty=1, cex=0.8, text.font = 4, bg = "lightblue",
title = "Different orders")
#For Loop to Calculate Train RSS and Test RSS
df_rss_s20 = data.frame(complexity = model_complexity)
#Looping through all the samples of same size, N = 20
for (i in sample_data_list_20){
sample_train_data = i
train_rss_vec = c() # To Store Training RSS
test_rss_vec = c() # To Store Testing RSS
# FITTING THE MODEL OF ORDER 1
m1 <- lm(total_purchase_value ~ clicks, sample_train_data)
# FITTING THE MODERL OF ORDER 2
m2 <- lm(total_purchase_value ~ clicks + I(clicks^2), sample_train_data)
# FITTING THE MODERL OF ORDER 7
m7 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7), sample_train_data)
# FITTING THE MODERL OF ORDER 8
m8 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7) + I(clicks^8), sample_train_data)
# FITTING THE MODERL OF ORDER 9
m9 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7) + I(clicks^8) + I(clicks^9), sample_train_data)
# FITTING THE MODERL OF ORDER 10
m10 <- lm(total_purchase_value ~ clicks + I(clicks^2) + I(clicks^3) + I(clicks^4) + I(clicks^5) + I(clicks^6)
+ I(clicks^7) + I(clicks^8) + I(clicks^9)+ I(clicks^10), sample_train_data)
lst_models = list(m1, m2, m7, m8, m9, m10)
for (j in lst_models){
#TRAIN AND TEST ACCURACY
train_rss <- sum(j$residuals^2) #Training Error
pred = predict(j, newdata=test_data)
test_error = sum((pred-test_data$total_purchase_value)^2) #Test Error
train_rss_vec <- c(train_rss_vec, train_rss)
test_rss_vec <- c(test_rss_vec, test_error)
}
df_rss_s20 <- cbind(df_rss_s20, train_rss_vec, test_rss_vec)
}
colnames(df_rss_s20) <- c("com", "train_s20_1", 'test_s20_1','train_s20_2','test_s20_2','train_s20_3', 'test_s20_3','train_s20_4','test_s20_4')
#FOR SAMPLE - 1, TEST RSS
ggplot(df_rss_s20, aes(x = com, y = test_s20_1, color = "Sample-1")) + geom_point(size = 2) +
geom_line(size = 1.2) + labs(color = "Sample") + xlab("Model Complexity (Order of Polynomial)") +
ylab("TestError(RMSE)") + ggtitle("Model Error vs Model Complexity (Sample Size = 20)") +
theme(axis.title.x = element_text(color = 'Blue', size = 13),
axis.title.y = element_text(color = 'Blue', size = 13),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.title = element_text(hjust = 0.5),
legend.title = element_text(size = 15))
#FOR SAMPLE - 2, TEST RSS
ggplot(df_rss_s20, aes(x = com, y = test_s20_2, color = "Sample-2")) + geom_point(size = 2) +
geom_line(size = 1.2) + labs(color = "Sample") + xlab("Model Complexity (Order of Polynomial)") +
ylab("TestError(RMSE)") + ggtitle("Model Error vs Model Complexity (Sample Size = 20)") +
theme(axis.title.x = element_text(color = 'Blue', size = 13),
axis.title.y = element_text(color = 'Blue', size = 13),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.title = element_text(hjust = 0.5),
legend.title = element_text(size = 15)) + scale_color_manual(values=c("#2E8B57"))
#FOR SAMPLE - 3, TEST RSS
ggplot(df_rss_s20, aes(x = com, y = test_s20_1, color = "Sample-3")) + geom_point(size = 2) +
geom_line(size = 1.2) + labs(color = "Sample") + xlab("Model Complexity (Order of Polynomial)") +
ylab("TestError(RMSE)") + ggtitle("Model Error vs Model Complexity (Sample Size = 20)") +
theme(axis.title.x = element_text(color = 'Blue', size = 13),
axis.title.y = element_text(color = 'Blue', size = 13),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.title = element_text(hjust = 0.5),
legend.title = element_text(size = 15)) + scale_color_manual(values=c("#00008B"))
#FOR SAMPLE - 4, TEST RSS
ggplot(df_rss_s20, aes(x = com, y = test_s20_1, color = "Sample-4")) + geom_point(size = 2) +
geom_line(size = 1.2) + labs(color = "Sample") + xlab("Model Complexity (Order of Polynomial)") +
ylab("TestError(RMSE)") + ggtitle("Model Error vs Model Complexity (Sample Size = 20)") +
theme(axis.title.x = element_text(color = 'Blue', size = 13),
axis.title.y = element_text(color = 'Blue', size = 13),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.title = element_text(hjust = 0.5),
legend.title = element_text(size = 15)) + scale_color_manual(values=c("#C71585"))
#FOR SAMPLE - 2, TEST RSS
ggplot(df_rss_s20, aes(x = com, y = test_s20_2, color = "Sample-4")) + geom_point(size = 2) +
geom_line(size = 1.2) + labs(color = "Sample") + xlab("Model Complexity (Order of Polynomial)") +
ylab("TestError(RMSE)") + ggtitle("Model Error vs Model Complexity (Sample Size = 20)") +
theme(axis.title.x = element_text(color = 'Blue', size = 13),
axis.title.y = element_text(color = 'Blue', size = 13),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.title = element_text(hjust = 0.5),
legend.title = element_text(size = 15)) + scale_color_manual(values=c("#C71585"))
setwd("~/")
#The following is the R code to produce ACF and PACF plots.
tsdisplay(kids_log)
setwd("~/Econometrics/FinalProject")
library(forecast)
library(tseries)
##Reading the data
kids_data = read.csv("Kids Revenue.csv")
#Selecting Revenue Column
kids_data <- kids_data[,-1]
#Structure
str(kids_data)
#Summary
summary(kids_data)
class(kids_data) #Class of Data
#Converting to Time series data
kids_ts = ts(kids_data, start = 1992, frequency = 4)
kids_ts
start(kids_ts) #Start of Time Series
end(kids_ts) #End of Time Series
frequency(kids_ts)
plot(kids_ts, xlab = "year", ylab = "Kids Revenue", main = "Kids Revenue per each year") #This will plot the time series
abline(reg = lm(kids_ts~time(kids_ts))) #This function adds one or more straight lines through the current plot.
plot(aggregate(kids_ts,FUN = "mean"))
boxplot(kids_ts~cycle(kids_ts), xlab = "Year", ylab = "Kids Revenue", main = "Comparision of Revenue for each year") #Box plot across months will give us a sense on seasonal effect
kids_decompose <- decompose(kids_ts,"multiplicative")
plot(kids_decompose)
kids_log <- log10(kids_ts)
plot.ts(kids_log)
head(kids_log)
##Multiplcative decomposition
kids_decompose <- decompose(kids_ts,"multiplicative")
plot(kids_decompose)
##Additive decompostion
plot(decompose(kids_ts, "additive"))
library(TTR)
airlog_sma3 <-SMA(kids_log,2)
par(mfrow=c(2,3))
plot.ts(airlog_sma3, ylab = "Kids revenue lag2")
title('2 Quarters SMA')
plot.ts(airlog_sma6,ylab = "Kids revenue lag6")
title('6 Quarters SMA')
airlog_sma12 <-SMA(kids_log,12)
plot.ts(airlog_sma12,ylab = "Kids revenue lag12")
airlog_sma6 <-SMA(kids_log,6)
title('12 Quarters SMA')
airlog_ema3 <-EMA(kids_log,2)
plot.ts(airlog_ema3,ylab = "Kids revenue lag2")
airlog_ema6 <-EMA(kids_log,6)
title('2 Quarters EMA')
plot.ts(airlog_ema6,ylab = "Kids revenue lag6")
title('6 Quarters EMA')
airlog_ema12 <-EMA(kids_log,12)
plot.ts(airlog_ema12,ylab = "Kids revenue lag12")
title('12 Quarters EMA')
SMA_error=c()
EMA_error=c()
period=c(2,3,4,5,6,7,8,9,10)
for (i in period){
kids_ts_sma2 <-SMA(kids_ts,i)
SMA_error<-c(SMA_error,sqrt(sum((kids_ts[i:16]-kids_ts_sma2[i:16])^2)/length(kids_ts[i:16])))
}
for (i in period){
kids_ts_sma2 <-EMA(kids_ts,i)
EMA_error<-c(EMA_error,sqrt(sum((kids_ts[i:16]-kids_ts_sma2[i:16])^2)/length(kids_ts[i:16])))
}
df<-data.frame(period,SMA_error,EMA_error)
library(ggplot2)
g <- ggplot(df, aes(period, color = "Legend"))
g <- g + geom_line(aes(y=SMA_error, color = "SMA Error"),size = 1.2)
g <- g + geom_line(aes(y=EMA_error, color = "EMA Error"),size = 1.2)
g <- g + labs(x='Period',y='Error')
g <- g + labs(title='SMA Vs EMA in terms of Error for different periods') +
theme(axis.title.x = element_text(color = 'Blue', size = 13),
axis.title.y = element_text(color = 'Blue', size = 13),
axis.text.x = element_text(size = 10),
axis.text.y = element_text(size = 10),
plot.title = element_text(hjust = 0.5),
legend.title = element_text(size = 15))
g
par(mfrow=c(1,1))
#Seasonal Plot for Data
seasonplot(kids_ts,main = "Seasonal Plot for Kids Data", col = c('red', 'blue', 'green', 'brown'),
bty = 'l', year.labels = T)
#ggplot - Seasonal plot for Data
ggseasonplot(kids_ts, main = "Seasonal Plot for Kids Data", col = c('red', 'blue', 'green','brown'), year.labels = T)
plot(kids_ts, ylab = "Original Data", main = "Kids Revenue for each year")
# The following is the R code for the same with the output plot. Notice, this series is not
# stationary on mean since we are using the original data without differencing
plot(log10(kids_ts), ylab = "Log of Original Data", main = "Kids Revenue(Log) for each year")
# The following is the R code for the same with the output plot. Notice, this series is not
# stationary on mean since we are using the original data without differencing
plot(log10(kids_ts), ylab = "Log of Original Data", main = "Kids Revenue(Log) for each year")
## The R code and output for plotting the differenced series are displayed below:
plot(diff(kids_ts), ylab='Differenced Sales',main = "Kids Revenue for each year")
#Differencing
kids_log_diff = diff(kids_log, lag = 1)
plot(kids_log_diff, ylab='Log of Differenced Sales')
par(mfrow = c(1,1))
plot(kids_log_diff, ylab='Log of Differenced Sales')
abline(reg = lm(kids_log_diff~time(kids_log_diff)))
#ADF TEST
adf.test(kids_log_diff)
#KPSS TEST
kpss.test(kids_log_diff)
#The following is the R code to produce ACF and PACF plots.
tsdisplay(kids_log)
arima1 <- arima(kids_log, order= c(4,1,4))
#Read the data file
new<-read.csv('Kids Revenue.csv')
#Creation of time series
new_time=ts(data = new$Revenue.in.million...,frequency = 4,start=1992)
train<-new_time[1:12]
test<-new_time[13:16]
#log for differncing
test<-log(test)
#train and test data
test_new<-ts(test,frequency = 4,start=1995)
train_new<-ts(log(train),frequency = 4,start=1992)
#plots of train and test
plot(train_new)
title('Train data')
plot(test_new)
title('Test Data')
#checking the stationarity of the data
library(tseries)
adf.test(train_new)
#finding the p,q manually
tsdisplay(train_new)
#observing the time series with no lag differencing
plot(train_new)
abline(reg=lm(train_new~time(train_new)))
title('Train Data with no differencing')
#observing the time series with lag=1 differencing
plot(diff(train_new,lag=1))
abline(reg=lm(diff(train_new)~time(diff(train_new))))
title('Train Data differencing with lag 1')
#observing the time series with lag=2 differencing
plot(diff(train_new,lag=2))
abline(reg=lm(diff(train_new,lag=2)~time(diff(train_new,lag=2))))
title('Train Data differencing with lag 2')
#Arima model with p=4,d=1,q=4
arima_model<-arima(train_new,order = c(4,1,4))
summary(arima_model)
#Forecasting for next year
q=forecast(arima_model,h=4)
plot(q, main = "Forecast from ARIMA for 4 time periods")
#Forecasting for 6 quarters
q=forecast(arima_model,h=4)
plot(q, main = "Forecast from ARIMA for 4 time periods")
#Forecasting for 6 quarters
q=forecast(arima_model,h=6)
plot(q, main = "Forecast from ARIMA for 4 time periods")
#Forecasting for 6 quarters
q=forecast(arima_model,h=6)
plot(q, main = "Forecast from ARIMA for 4 time periods")
#Forecasting for 6 quarters
q=forecast(arima_model,h=6)
plot(q, main = "Forecast from ARIMA for 6 time periods")
#test RMSE
predicted<-q$mean
predicted
test
#Forecasting for 6 quarters
f=forecast(arima_model,h=6)
plot(f, main = "Forecast from ARIMA for 6 time periods")
#Forecasting for next year
q=forecast(arima_model,h=4)
plot(q, main = "Forecast from ARIMA for 4 time periods")
#test RMSE
predicted<-q$mean
predicted
test
2.71^predicted
2.71^test
sqrt(sum((2.71^predicted-2.71^test)^2)/length(test))
#predicted and test values comparision
pred_test_df<-data.frame(q$mean,test)
View(pred_test_df)
2.71828^test
sqrt(sum((2.71828^predicted-2.71828^test)^2)/length(test))
2.71828^predicted
2.71828^test
sqrt(sum((2.71828^predicted-2.71828^test)^2)/length(test))
#predicted and test values comparision
pred_test_df<-data.frame(2.71828^q$mean,2.71828^test)
View(pred_test_df)
#predicted and test values comparision
pred_test_df<-data.frame(predicted =  2.71828^q$mean,actual_test= 2.71828^test)
View(pred_test_df)
#Residual plots
par(mfrow=c(1,3))
plot.ts(arima_model$residuals)
acf(ts(arima_model$residuals),main='ACF Residual')
pacf(ts(arima_model$residuals),main='PACF Residual')
checkresiduals(xx)
checkresiduals(arima_model)
par(mfrow=c(1,1))
checkresiduals(arima_model)