#using packages library(ggplot2, quietly = TRUE) library(scales, quietly = TRUE) library(zoo, quietly = TRUE) library(readstata13, quietly = TRUE) library(haven, quietly = TRUE) library(forecast, quietly = TRUE) library(prophet, quietly = TRUE) #looking into ets, tbats, and auto arima blackMarket <- subset(wowgold, region=="Wow") #we have some missing values and need to do some imputation to fill them up for time series. I gathered the data and have no idea why #some of the values are missing therefore im going un the asumption they are randomly missing and not some bias library(mice, quietly = TRUE) md.pattern(blackMarket, plot = FALSE)

## region Month Year date time rtime releasedatecata releasedatemop ## 104 1 1 1 1 1 1 1 1 ## 16 1 1 1 1 1 1 1 1 ## 0 0 0 0 0 0 0 0 ## releasedatedra releasedatelegi releasedatebfa releasedategold usdtog ## 104 1 1 1 1 1 ## 16 1 1 1 1 0 ## 0 0 0 0 16 ## gtousd ## 104 1 0 ## 16 0 2 ## 16 32

#we are missing 16 values for our gold to usd ratios #m is the number of imputations, maxit is iterations, meth is the type of method you want to use to find the values #with pmm being predictive mean matching tempData <- mice(blackMarket,m=5,maxit=50,meth='pmm',seed=1337, print=FALSE)

## Warning: Number of logged events: 509

#summary(tempData) completedBlack<-complete(tempData,1) trainBlack <- head(completedBlack,-3) testBlack <- tail(completedBlack, 3)

Now that we have our testing and training sets determined we can no start comparing forecasting techniques. The first 3 techniques all come from the Forecasting package by Hyndman.

##forecast package arimaGold <- auto.arima(trainBlack$gtousd) fcastarimagold <- forecast(arimaGold, h= 3) #plot(fcastarimagold) etsGold <- ets(trainBlack$gtousd) fcastetsgold <- forecast(etsGold, h=3) #plot(fcastetsgold) tbatsGold <- tbats(trainBlack$gtousd) fcasttbatsgold <- forecast(tbatsGold, h=3) #plot(fcasttbatsgold)

This next technique comes from our boy Zuckerberg over at Facebook. They have developed a rather intresting time-series package called prophet. It is very simple to use, the only part that is annoying is having to rename the date and data columns to ds and y respectivley.

##prophet #prophet does its own imputations reverting back to orginal data prophetBlack <- completedBlack prophetBlack <- prophetBlack[c(7:8)] #have to change names for prophet package library(tidyverse, quietly = TRUE) prophetBlack <- rename(prophetBlack, ds = rtime, y = gtousd) pBlack <- prophet(prophetBlack) fprophet <- make_future_dataframe(pBlack, periods = 3, freq = "month") fcastprophet <- predict(pBlack, fprophet) plot(pBlack, fcastprophet)

Next, we are going to look at another one of Hyndman's packages called forecasthybrid which combines exponential smoothing, arima, tbats, STLM, neural net, theif, and a naive forecast. You can use equal weights for all those or make a vector that you set to determine the best weights. You can also use in-sample errors to do it as well.

##forecasthybrid library(forecastHybrid, quietly = TRUE) hybridblack <- hybridModel(trainBlack$gtousd)

## Warning in removeModels(y = y, models = expandedModels): The stlm model ## requires that the input data be a seasonal ts object. The stlm model will ## not be used.

blackfcast <- forecast(hybridblack, h=3) #plot(blackfcast) hybridblack1 <- hybridModel(completedBlack$gtousd, weights = "insample.errors")

## Warning in hybridModel(completedBlack$gtousd, weights = "insample.errors"): ## Using insample.error weights is not recommended for accuracy and may be ## deprecated in the future. ## Warning in hybridModel(completedBlack$gtousd, weights = "insample.errors"): ## The stlm model requires that the input data be a seasonal ts object. The ## stlm model will not be used.

blackfcast1 <- forecast(hybridblack1, h=3) #plot(blackfcast1)

Now that we have all of the forecasts we want to compare, we need to see the error metrics of each of them to deterime which one is the most accurate.

#now we can look at all the forecasts and compare metrics to them all actVsFcast <- cbind(ts(completedBlack$gtousd), ts(fcastetsgold$mean, start = 117), ts(fcastarimagold$mean, start = 117), ts(fcasttbatsgold$mean, start = 117), ts(tail(fcastprophet$yhat,3), start = 117), ts(blackfcast$mean, start = 117), ts(blackfcast1$mean, start = 117)) colnames(actVsFcast) <- c("Orignal", "ETS", "Arima", "Tbats", "Prophet", "HybridE", "HybridIn") autoplot(actVsFcast)

Suprisingly, facebooks package is off by a fair bit, around 4% with MAPE. It looks like their impution method is rather off as opposed to the mice packages. It will be intresting to see if this stays the same when looking at wowtoken prices.

#lets look at the metrics of our forecasted values library(Metrics, quietly = TRUE) ETS <- rbind(mape(testBlack$gtousd,fcastetsgold$mean), rmse(testBlack$gtousd, fcastetsgold$mean), mse(testBlack$gtousd, fcastetsgold$mean)) ARIMA <- rbind(mape(testBlack$gtousd,fcastarimagold$mean), rmse(testBlack$gtousd, fcastarimagold$mean), mse(testBlack$gtousd, fcastarimagold$mean)) TBATS <- rbind(mape(testBlack$gtousd,fcasttbatsgold$mean), rmse(testBlack$gtousd, fcasttbatsgold$mean), mse(testBlack$gtousd, fcasttbatsgold$mean)) PROPHET <- rbind(mape(testBlack$gtousd,tail(fcastprophet$yhat,3)), rmse(testBlack$gtousd, tail(fcastprophet$yhat,3)), mse(testBlack$gtousd, tail(fcastprophet$yhat,3))) HYBRIDE <- rbind(mape(testBlack$gtousd,blackfcast$mean), rmse(testBlack$gtousd, blackfcast$mean), mse(testBlack$gtousd, blackfcast$mean)) HYBRIDIN <- rbind(mape(testBlack$gtousd,blackfcast1$mean), rmse(testBlack$gtousd, blackfcast1$mean), mse(testBlack$gtousd, blackfcast1$mean)) error_metrics <- cbind(ETS, ARIMA, TBATS, PROPHET, HYBRIDE, HYBRIDIN) colnames(error_metrics) <-c("ETS", "Arima", "Tbats", "Prophet", "HybridE", "HybridIn") error_metrics

## ETS Arima Tbats Prophet HybridE ## [1,] 0.0043660597 0.0032301532 0.011666745 0.04408857 0.0056646069 ## [2,] 0.0211839256 0.0130772015 0.046330561 0.18052067 0.0242256639 ## [3,] 0.0004487587 0.0001710132 0.002146521 0.03258771 0.0005868828 ## HybridIn ## [1,] 0.0024083682 ## [2,] 0.0133860867 ## [3,] 0.0001791873

Looking at the tablee we see that hybridmodel() with in-sample errors choosing the weights is the most accurate base off all of our error metrics. Let's plot the original data vs the most precise forecast to see how close we got.

most_precise <- cbind(ts(completedBlack$gtousd), ts(blackfcast1$mean, start = 117)) colnames(most_precise) <- c("Original", "HybridIn") autoplot(most_precise)

We have an almost perfect forecast of what gold is going to be about 3 months out. This is only for the black market gold prices and one could argue that we imputed a fair amount of values. Next, we are going to look at each region and determine the exact same thing here but we will make it into a function for ease of use.