####2019####
library(tseries)
library(forecast)
library(TSA)
setwd('//Users//wangxinran//Desktop//4064')
beer <- ts(OzBeer,start = c(1956,1), frequency = 4)
lbeer <- log(beer)
#(a)
plot(stl(beer, s.window = 20, t.window = 20))
plot(stl(lbeer, s.window = 20, t.window = 20))
#log后的seasonal的variance变得小且稳定
#(b)
lbeer.d1 = diff(lbeer) # removing trend δ
lbeer.D1 = diff(lbeer, lag = 4) # removing seasonal δ^4
lbeer.d1D1 = diff(lbeer.d1, lag = 4) # removing trend and removing seasonal, then the series appears stationary
#δδ^4
plot(lbeer.d1D1)#看起来比较稳定
layout(mat= matrix(c(1,2,3,4,5,6), nrow =2, byrow=TRUE))
acf(lbeer.d1); acf(lbeer.D1); acf(lbeer.d1D1)
pacf(lbeer.d1); pacf(lbeer.D1); pacf(lbeer.d1D1)
#removing trend能看出来有seasonal
#seems staionary, ready to fit model
#(c)
## how to chose the order: (from the 3rd plots in b)## SARIMA(2,1,1)*(0,1,1)4.
## d,D:稳定之后,看之前做了几次d,几次D
## p,P:稳定之后的pacf
## q,Q:稳定之后的acf
## 进入蓝线就视为0,进去之前的那个是lag after, 没标就是从1开始
## p=2:In pacf, 2 lags are outside of the interval
## d=1:differencing once
## q=1:the first lag in ACF is outside.
## P=0:in PACF, the 4th lag is inside, same as 8th ect.
## D=1: seasnoal differencing once.
## Q=in ACF, 4th lag is outside, but others are inside.
fit1=arima(lbeer,order=c(2,1,1),seasonal = c(0,1,1))
fit1
## AIC=-513.44
#(d)
tsdiag(fit1)
## 标准化残差绘制在第一个子图中。
## 残差的模式似乎不规则,并且在 0 附近移动。
##第二个是模型残差的ACF。
## ACF 是tail-off,除了lag 0,没有任何滞后在区间之外,检测模型还不错。
## LB 测试,H0:残差是独立的。
## 所有 p 值都显着大于 0.05,我们不能拒绝 H0。 因此,残差是 independent。
#(e)
anew <- window(beer, start = c(1975,1))
plot(decompose(anew))
layout(mat= matrix(c(1,2,3), nrow =3, byrow=TRUE))
plot(anew);acf(anew);pacf(anew)
#Reduction to Stationary
anew.d1 = diff(anew) # removing trend δ
anew.D1 = diff(anew, lag = 4) # removing seasonal δ^4
anew.d1D1 = diff(anew.d1, lag = 4) # removing trend and removing seasonal, then the series appears stationary
layout(mat= matrix(c(1,2,3,4,5,6), nrow =2, byrow=TRUE))
acf(anew.d1); acf(anew.D1); acf(anew.d1D1)
pacf(anew.d1); pacf(anew.D1); pacf(anew.d1D1)
plot(anew.d1D1)
mod1=arima(anew,order=c(2,1,1),seasonal = c(0,1,1))#用原数据
AIC(mod1)#645.6816 AIC BIC都是越小越好
tsdiag(mod1)
cpgram(residuals(mod1), main= "SARIMA(2,1,1)x(0,1,1)[4]")
mod2=auto.arima(anew,trace=F)
#ARIMA(1,0,2)(0,1,1)[4]
AIC(mod2)#645.2691
tsdiag(mod2)
cpgram(residuals(mod2), main= "SARIMA(0,1,1)(0,1,1)[12]")
plot(armasubsets(anew.d1D1,5,5))
mod3=arima(anew.d1D1, order=c(2,1,4))
AIC(mod3)#655.7698
tsdiag(mod3)
cpgram(residuals(mod3), main= "ARIMA(2,1,4)")
#Q2
Dubuque.ts=ts(Dubuque,start=c(1964,1),frequency=12)
#(1)
auto.arima(Dubuque.ts)
#ARIMA(0,0,0)(2,1,0)[12]
mod1=arima(Dubuque.ts,order=c(0,0,0),seasonal = c(2,1,0))
tsdiag(mod1)
#(2)
Dubuque1975 <- c(Dubuque1975)#vector
data.test=ts(Dubuque1975,start=c(1975,1),frequency=12)
# plot forecasts:
plot(Dubuque.ts,xlim=c(1964,1976),ylim=c(10,80))
preds = predict(mod1, n.ahead = length(data.test))
lines(preds$pred,col="red")
lines(preds$pred+2*preds$se,col="red")
lines(preds$pred-2*preds$se,col="red")
lines(data.test, col="blue")
legend("topleft", legend=c("predice","se","ture"),lty=c(1,3,6),
col=c("blue","red","green"))
####判断哪个模型更好####
#AIC BIC谁更小谁好
#cpgram(residuals(mod)) cumulative periodogram.
#tsdiag(fit1)
## 标准化残差绘制在第一个子图中。
## 残差的模式似乎不规则,并且在 0 附近移动。
##第二个是模型残差的ACF。
## ACF 是tail-off,除了lag 0,没有任何滞后在区间之外,检测模型还不错。
## LB 测试,H0:残差是独立的。
## 所有 p 值都显着大于 0.05,我们不能拒绝 H0。 因此,残差是 id。
####report####
#Total value of House sales in Cork Monthly
library(tseries)
library(forecast)
setwd('//Users//wangxinran//Desktop//4064')
####Dataset####
data=read.csv("118102037.csv")
data.sub=subset(data,Statistic=='Value of Sales')
data.ts=ts(data.sub$VALUE,frequency = 12,start = c(2010,1))
#看一眼数据什么样
plot(decompose(data.ts))
tsdisplay(data.ts)
####Reduction to Stationarity####
#select train data
#delete the last 30 data as test data
data.train = window(data.ts, start = c(2010,1), end = c(2018,12))
#train time series plot
plot(log(data.train))
plot(decompose(data.train))
acf(data.train)
pacf(data.train)
#Ljung-Box test
#judge if d=0
#when m=1
r1=cov(data.train[-1],data.train[-length(data.train)])
Q=length(data.train)*(length(data.train)+2)*(r1)^2/(length(data.train)-1)#Q=20049580
Box.test(data.train, lag = 1)
#Q>X-squared
#reject H0
#the model exhibits lack-of-fit
#d is not equal to 0
#Dickey-Fuller Test
adf.test(data.train)
#p-value = 0.2095>0.05
#the time series is not stationary
#Taking lag-1 difference to remove trend.
data.train.d1 = diff(data.train, lag = 1)
layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))
plot(data.train.d1)
acf(data.train.d1)
pacf(data.train.d1)
#ARIMA(1,1,0)(1,1,0)[12]
#Dickey-Fuller Test again
adf.test(data.train.d1)
#p-value = 0.01<0.05
#the time series is stationary
####Model fitting and Model Criticism####
#method1 用acf和pacf肉眼看
fit1 <- arima(data.train, order = c(1,1,0), list(order = c(1,1,0), period = 12))
AIC(fit1)
tsdiag(fit1)
cpgram(residuals(fit1), main= "SARIMA(1,1,0)x(1,1,0)[12]")
#good
#method 2
#find arima(p,d,q)
auto.arima(data.train,trace=F)
#ARIMA(0,1,1)(0,1,1)[12]
fit2 <- arima(data.train, order = c(0,1,1), list(order = c(0,1,1), period = 12))
AIC(fit2)
tsdiag(fit2)
cpgram(residuals(fit2), main= "SARIMA(0,1,1)(0,1,1)[12]")
#better
#method 3
#install.packages('TSA')
library(TSA)
plot(armasubsets(data.train.d1,10,10))
#ar=8 ma=6
fit3 <- arima(data.train, order = c(8,1,6))
AIC(fit3)
tsdiag(fit3)
cpgram(residuals(fit3), main= "ARIMA(8,1,6)")
#good
####forecast and discussion####
data.test = window(data.ts, start = c(2019,1))
plot(data.train,xlim=c(2010,2022),ylim=c(0,180))
pred = predict(fit2, n.ahead = length(data.test))
lines(pred$pred,col="red")
lines(pred$pred+2*pred$se,col="green")
lines(pred$pred-2*pred$se,col="green")
lines(data.test, col="blue")
#test if residuals obey normal distribution
mean(residuals(fit2))
sd(residuals(fit2))
1/sqrt(length(data.train))
shapiro.test(residuals(fit2))
#p-value = 0.001969 < 0.05
#residuals not a normal distribution
#fewest ARMA parameters 4,6 比5,5好
####精简代码####
#install.packages("tseries")
#install.packages("forecast")
#install.packages("TSA")
library(tseries)
library(forecast)
library(TSA)
####查看数据的相关性质####
frequency(co2)
cycle(co2)
time(co2)
####用 window 做数据提取####
window(co2, start = c(1959,1), end = c(1962,12))
window(co2, frequency = 4) #将整个时间序列的列数4等分,取每一份的第一个
####做平滑方法1####
co2.yr = window(co2, frequency = 1)
co2.y = c(co2.yr)
co2.x = c(time(co2.yr))
lines(co2.x, co2.y, type = "o", lwd = 3, col = "red")
## 做平滑方法2)lowess 拟合
co2.lowess = lowess(co2, f=1/3)
lines(co2.lowess$x, co2.lowess$y, lwd = 2, col = "blue")
## 做平滑方法3)moving average——用filter()
plot(co2)
lines(filter(co2, filter = rep(1,6)/6), lwd = 2, col = "red")
## 做平滑方法4
lines(ksmooth(time(数据), 数据, "normal", bandwidth=5/52))#越大越平滑
## 做平滑方法5
lines(supsmu(time(数据), 数据, span=.01))#越大越平滑
## 做平滑方法6
lines(smooth.spline(time(数据), 数据))
###时间序列的拆分####
####method1)decompose()####
plot(decompose(co2))
co2.decomp = decompose(co2)
names(co2.decomp)
plot(co2.decomp)
####method2)stl()使用低平滑将时间序列分解为季节、趋势和残差 ####
fit <- stl(log(co2), s.window = 20, t.window = 20)
plot(fit)
#s.window,t.window的数值越大,表示seasonal曲线越规律,trend的曲线越平滑,这样会造成误差越大
####绘制时间序列的子序 ####
op <- par(mfrow = c(2,2))
fit <- stl(log(co2), s.window = 20, t.window = 20)
monthplot(co2, ylab = "data", cex.axis = 0.8)
monthplot(fit, choice = "seasonal", cex.axis = 0.8)
monthplot(fit, choice = "trend", cex.axis = 0.8)
monthplot(fit, choice = "remainder", type = "h", cex.axis = 0.8)
par(op)
####作图技巧####
abline(h = , col = "", lty=2)#虚线
abline(v = , col = "", lty=1)#实线
text(12,12,"xxx",lwd = 2, cex = 1)
title(main = "")
####画实线和虚线图 ####
# method 1 先用全部数据,画虚线,做回归,再取前半部分的数据用lines描上实线
# method 2 取前半部分的数据画实线,取后半部分的数据用lines描上虚线,最后用整体数据做回
#### lag 1 correlation (start 1 end 48) ####
cor(lh[-1],lh[-48])
####lag 2 correlation ####
cor(lh[-(1:2)],lh[-(47:48)])
####决定后面图像的排列方法####
layout(mat= matrix(c(), ncol=3, byrow=TRUE))
####acf,pacf####
acf(lh, lag.max = 24)
pacf(lh, lag.max = 24)
####标acf上的前n个点####
for (i in 1:n) {
r = round(cor(lh[-(1:i)],lh[-((49-i):48)]), digits=3)
points(i, r, pch=16, col="red")
}
####去除趋势,去除季节性####
co2.d1 = diff(co2) # 去除趋势
co2.D1 = diff(co2, lag = 12) # 去除季节性
co2.d1D1 = diff(co2.d1, lag = 12) # 去除趋势并去除季节性,则序列平稳的
####Gaussian Noise####
g = as.ts(rnorm(100))
#### White Noise ####
w = as.ts(rt(100, df=5))
####AIC####
num = length(数据/模型)
AIC(fit)/num - log(2*pi)
BIC(fit)/num - log(2*pi)
####使用 AIC 作为变量拒绝标准的backward,有三个方法,backward最好####
mod <- lm( fmla , data = dat)
mod.aic = step(mod, direction = "backward", k = 2)
mod.bic = step(mod, direction = "backward", k = log(nrow(dat)))
plot(fitted(mod.bic), residuals(mod.bic), pch = ".")
####从 ARIMA 模型模拟,一个空列表给出了一个 ARIMA(0, 0, 0) 模型,即默认的白噪声。####
Yt = arima.sim(model = list(ar = 0.7), n = 101)
#simulates an AR(2) time series process Yt=0.7*Yt-1-0.4*Yt-2+et, with sigma(e)^2=1
Yt = arima.sim(list(ar = c(0.7, -0.4)), n=101)
####ARIMA(1,2,1) #ar,ma 确定系数 ####
ts.sim = arima.sim(list(order = c(1,2,1), ar = 0.7, ma=0.5), n = 500)
?arima.sim
####AR(1),系数是0.7,其理论上的ACF和PACF####
AR1.acf = ARMAacf(ar = 0.7, lag.max = 25)
AR1.pacf = ARMAacf(ar = 0.7, lag.max = 25, pacf = TRUE)
####比较模拟的数据的模型,与理论模型的区别####
layout(mat= matrix(c(1,1,2:5), byrow=TRUE, ncol=2))
plot(Yt, main= "Simulated AR(1) Process")
acf(Yt, lag.max = 25, main= "Estimated ACF")
pacf(Yt, lag.max = 25, main= "Estimated PACF")
barplot(AR1.acf, col = "blue", main= "Theoretical ACF")
barplot(AR1.pacf, col = "red", main= "Theoretical PACF")
####拟合ar的φ,有几个,都是多少####
ar(lh)
####找B####
polyroot(z = c(1,-1.7,0.7) )#p(x) = z1 + z2 * x + … + z[n] * x^(n-1)
#要按升序排列
####全部代码####
#### 机房class1 ####
#1、查看数据来源
?co2
co2
#2、查看数据类型
class(co2) #time series
#3、画出时间序列图像
plot(co2, ylab = expression("Atmospheric concentration of CO"[2]), las = 1)#画出时间序列对应的图像
title(main = "co2 data set")
#4、查看数据的相关性质
frequency(co2)
cycle(co2)
time(co2)
#5、用 window 做数据提取
#用window提取在开始和结束时间之间观察到的对象x的子集。如果指定了一个频率,则按新的频率重新对该序列进行采样。
window(co2, start = c(1959,1), end = c(1962,12))
window(co2, frequency = 4) #将整个时间序列的列数4等分,取每一份的第一列
#6、画出smooth曲线
## method(1)用normal代替整体
# 取时间序列里一个比较正常的对象,用他的数据来代替整体趋势
#(比如预测0~12岁青少年的身高,有的孩子偏高,有的偏低,我们取样本里面一个比较正常(居中)的孩子,用他每年的身高来代替这一年整体的身高,这就避免了极端值的影响,使序列变得smooth了
co2.yr = window(co2, frequency = 1) # a TS of yearly totals,window返回了一个时间序列, 列数必须是frequency的整数倍
#co2.yr = window(co2, frequency = 1, start=c(1959,4))#从第四个月开始,将时间序列的列数1等分,取每一份的第一列
co2.y = c(co2.yr) # the nesting c( ) command returns a regular vector,用c()函数将其y值转化成一个普通向量
co2.x = c(time(co2.yr))#用c()函数将其x值(时间)转化成一个普通向量
lines(co2.x, co2.y, type = "o", lwd = 3, col = "red")
## method(2)lowess 拟合
co2.lowess = lowess(co2, f=1/3) # the classic LOWESS smoother
lines(co2.lowess$x, co2.lowess$y, lwd = 2, col = "blue")
#f:frequency,越大,曲线越平滑
## method(3)moving average——用filter()
plot(co2)
lines(filter(co2, filter = rep(1,6)/6), lwd = 2, col = "red") #MA(6)
lines(filter(co2, filter = c(1,rep(2,10),1)/22), lwd = 2, col = "blue")#MA(11),MA(2)
# 连续两个 moving average 的代码应用
# eg:MA(4),MA(2)
# 生成20个随机数
x = rnorm(20)
# method 1 直接找到filter: 1/8[1,2,2,2,1]
xx = filter(x, filter = c(1,2,2,2,1)/8)
# method 2
#先MA(2)
y = filter(x, filter = c(1,1)/2)
#再MA(4)
yy = filter(c(NA,y), filter = c(1,1,1,1)/4) #注意,要在前面加一个NA
#最后用round函数做差发现,两种方法得到的结果是一致的
round(xx - yy, 6)#6是精确度,不重要
#7、时间序列的拆分
## method(1)decompose()
?decompose # 使用移动平均线将时间序列分解为季节性、趋势和不规则成分。处理季节成分的加法或乘法。
plot(decompose(co2))
### saves and plots
co2.decomp = decompose(co2)
names(co2.decomp)
plot(co2.decomp)
## method(2)stl()函数stl()使用低平滑将时间序列分解为季节、趋势和不规则成分
fit <- stl(log(co2), s.window = 20, t.window = 20)
plot(fit)
# s.window,t.window的数值越大,表示seasonal曲线越规律,trend的曲线越平滑,这样会造成误差越大(第四个图表示误差)
#8、线性滤波
#用函数filter()对单变量时间序列或多元时间序列的每个序列分别应用线性滤波。
plot(co2)
lines(filter(co2, filter = rep(1,6)/6), lwd = 2, col = "red") #MA(6)
lines(filter(co2, filter = c(1,rep(2,10),1)/22), lwd = 2, col = "blue")#MA(11),MA(2)
#9、绘制时间序列的子序列 monthplot()
op <- par(mfrow = c(2,2))
monthplot(co2, ylab = "data", cex.axis = 0.8)
monthplot(fit, choice = "seasonal", cex.axis = 0.8)
monthplot(fit, choice = "trend", cex.axis = 0.8)
monthplot(fit, choice = "remainder", type = "h", cex.axis = 0.8)
par(op)
#10、其他与CO2相似的数据,可以课下自己尝试
nottem
AirPassengers
#11、多个时间序列R对象 Multiple Time Series R Objects
EuStockMarkets
plot(EuStockMarkets)
plot(log(EuStockMarkets))
plot(diff(log(EuStockMarkets)))
UKDriverDeaths
plot(Seatbelts)
plot(UKDriverDeaths)
plot(stl(UKDriverDeaths, s.window=12))
#### exercise1 ####
# method 1 先用全部数据,画虚线,做回归,再取前半部分的数据用lines描上实线
co2
plot(co2, ylab = expression("CO"[2]), las = 1, lty=2)
title(main = "Xinran Wang 118102037")
co2.lowess = lowess(co2, f=1/3) # the classic LOWESS smoother
lines(co2.lowess$x, co2.lowess$y, lwd = 2, col = "red")
abline(h = 350, col = "light grey", lty=1)
abline(v = 1988, col = "light grey", lty=1)
text(1961,352,"350 ppm",lwd = 2, cex = 1.3)
text(1990,314,"1988",lwd = 2, cex=1.3)
(co2.yr = window(co2, start = c(1959,1), end = c(1981,12))) # a TS of yearly totals
co2.y = c(co2.yr) # the nesting c( ) command returns a regular vector
co2.x = c(time(co2.yr))
lines(co2.x, co2.y, lwd = 3, col = "black")
# method 2 取前半部分的数据画实线,取后半部分的数据用lines描上虚线,最后用整体数据做回归
co2
(co2.1 = window(co2, start = c(1959,1), end = c(1981,12)))
plot(co2.1, ylab = expression("CO"[2]), las = 1, xlim=c(1959,1992), ylim=c(313,360))
title(main = "Xinran Wang 118102037")
(co2.yr = window(co2, start = c(1981,1), end = c(1997,12))) # a TS of yearly totals
co2.y = c(co2.yr) # the nesting c( ) command returns a regular vector
co2.x = c(time(co2.yr))
lines(co2.x, co2.y, lwd = 1, lty=2)
co2.lowess = lowess(co2, f=1/3) # the classic LOWESS smoother
lines(co2.lowess$x, co2.lowess$y, lwd = 2, col = "red")
abline(h = 350, col = "light grey", lty=1)
abline(v = 1988, col = "light grey", lty=1)
text(1961,352,"350 ppm",lwd = 2, cex = 1.3)
text(1990,314,"1988",lwd = 2, cex=1.3)
#### 机房class2 ####
###correlation###
lh
#First, plot the data.
plot(lh, type="o")
#Look at the correlation between current values and previous values (lh at lag 1).
plot( x= lag(lh,k=1), y= lh, xy.lines= FALSE, xy.labels= FALSE)
title(main= paste("r =", round(cor(lh[-1],lh[-48]), digits=3) ))
#lag 1 correlation (start 1 end 48)
cor(lh[-1],lh[-48])
#Look at the correlation between current values and values before last (lh at lag 2).
plot( x= lag(lh,k=2), y= lh, xy.lines= FALSE, xy.labels= FALSE)
title(main= paste("r =", round(cor(lh[-(1:2)],lh[-(47:48)]), digits=3) ))
#lag 2 correlation
cor(lh[-(1:2)],lh[-(47:48)])
#用一个循环包括各种滞后,看不同滞后的correlation图像
layout(mat= matrix(seq(6), ncol=3, byrow=TRUE)) #决定后面图像的排列方式
for (i in 1:6) {
plot( x= lag(lh,k=i), y= lh, xy.lines= FALSE, xy.labels= FALSE,
ylab= paste("lh Series at lag ", i))
title(main= paste("r =", round(cor(lh[-(1:i)],lh[-((49-i):48)]), digits=3) ))
}
###acf()###
acf(lh)
#The acf plot is consistent with the previous calculations, as can be verified as follows:
for (i in 1:6) {
r = round(cor(lh[-(1:i)],lh[-((49-i):48)]), digits=3)
points(i, r, pch=16, col="blue")
}
#评价:对于这个时间序列,滞后 1 处的样本自相关幅度很大,但是对于更高的滞后,这种相互依赖性会立即消失。
###Partial Autocorrelation ###
#偏自相关 Yt-k, Yt-k+1,...., Yt-1,Yt.消除了 Yt-k+1,...., Yt-1的影响后,Yt和Yt-k的相关系数函数
acf(lh, lag.max = 24)
pacf(lh, lag.max = 24)
acf(BJsales)
pacf(BJsales)
ldax = log( EuStockMarkets[,"DAX"] )
acf(ldax)
pacf(ldax)
ldax.d1 = diff(ldax)
acf(ldax.d1)
pacf(ldax.d1)
###Differencing###
co2.d1 = diff(co2) # removing trend
co2.D1 = diff(co2, lag = 12) # removing seasonal
co2.d1D1 = diff(co2.d1, lag = 12) # removing trend and removing seasonal, then the series appears stationary
co2.D1d1 = diff(co2.D1,lag = 1)
co2.d1D1-co2.D1d1
par(mfrow=c(2,2))
plot(co2); plot(co2.d1); plot(co2.D1); plot(co2.d1D1)
acf(co2); acf(co2.d1); acf(co2.D1); acf(co2.d1D1)
###其他尝试###
acf(BJsales)
pacf(BJsales)
#Gaussian Noise
g = as.ts(rnorm(100))
plot(g)
acf(g)
pacf(g)
# White Noise
w = as.ts(rt(100, df=5))
plot(w)
acf(g)
pacf(g)
####exercise2####
layout(mat= matrix(c(1,1,2,3), ncol=2, byrow=TRUE)) #c(1,1,2,3)第一个位置和第二个位置都是第一幅图,第三个位置是第二幅图,第四个位置是第三幅图
plot(lh, type="b",main="Luteinizing Hormone 118102047",ylab="Hormone level",xlab="Time(10 miniutes interval)")
acf(lh)
for (i in 1:6) {
r = round(cor(lh[-(1:i)],lh[-((49-i):48)]), digits=3)
points(i, r, pch=16, col="blue")
}
pacf(lh)
####机房3####
install.packages("astsa")
library(astsa)
? lap
# # kill plot before moving on
par(mfrow=c(3,1))
plot(cmort, main="Cardiovascular Mortality", xlab="", ylab="")
plot(tempr, main="Temperature", xlab="", ylab="")
plot(part, main="Particulates", xlab="", ylab="")
dev.new()
pairs(cbind(Mortality=cmort, Temperature=tempr, Particulates=part))#可以看三个数据之间的相关性
# Regression
temp = tempr-mean(tempr) # center temperature
temp2 = temp^2 # square it
trend = time(cmort) # time
#给了指定的模型,用他来回归
fit = lm(cmort~ trend + temp + temp2 + part, na.action=NULL)
summary(fit) # regression results
summary(aov(fit)) # ANOVA table (compare to next line)
summary(aov(lm(cmort~cbind(trend, temp, temp2, part)))) # Table 2.1
#AIC BIC 都是用来评估模型优良的标准
#AIC(fit) 和 BIC(fit)直接能求出fit模型的AIC BIC的值,但是我们想用以下公式来计算
num = length(cmort) # sample size
AIC(fit)/num - log(2*pi) # AIC
BIC(fit)/num - log(2*pi) # BIC
#其他的计算方法
#BIC(fit, k=log(num))/num - log(2*pi) # BIC (alt method)
#(AICc = log(sum(resid(fit)^2)/num) + (num+5)/(num-5-2)) # AICc
#moving average smoother
ma3 = filter(cmort, sides=2, rep(1,2)/2)#MA(2)
ma2.2 = filter(cmort, sides=2, c(1,2,1)/4)#MA(2), after MA(2)
plot(cmort, type="p", ylab="mortality", main="Cardiovascular Mortality")
lines(ma3, col="red")
lines(ma2.2, col="blue")
#Example 2.11
wk = time(cmort) - mean(time(cmort)) # wk is essentially t/52 centered at z wk2 = wk^2
wk3 = wk^3
cs = cos(2*pi*wk)
sn = sin(2*pi*wk)
reg1 = lm(cmort~wk + wk2 + wk3, na.action=NULL)
reg2 = lm(cmort~wk + wk2 + wk3 + cs + sn, na.action=NULL)
plot(cmort, type="p", ylab="mortality")
lines(fitted(reg1))
lines(fitted(reg2))
#Example 2.12
plot(cmort, type="p", ylab="mortality")
lines(ksmooth(time(cmort), cmort, "normal", bandwidth=5/52))
lines(ksmooth(time(cmort), cmort, "normal", bandwidth=2))
#Example 2.13
par(mfrow=c(2,1))
plot(cmort, type="p", ylab="mortality", main="nearest neighbor")
lines(supsmu(time(cmort), cmort, span=.5))
lines(supsmu(time(cmort), cmort, span=.01))
plot(cmort, type="p", ylab="mortality", main="lowess")
lines(lowess(cmort, f=.02))
lines(lowess(cmort, f=2/3))
#Example 2.14
plot(cmort, type="p", ylab="mortality")
lines(smooth.spline(time(cmort), cmort))
lines(smooth.spline(time(cmort), cmort, spar=1))
#Example 2.15
par(mfrow=c(2,1), mar=c(3,2,1,0)+.5, mgp=c(1.6,.6,0))
plot(tempr, cmort, main="lowess", xlab="Temperature", ylab="Mortality")
lines(lowess(tempr,cmort))
plot(tempr, cmort, main="smoothing splines", xlab="Temperature", ylab="Morta")
lines(smooth.spline(tempr, cmort))
####exercise3####
setwd('//Users//wangxinran//Desktop//4064')
dat = matrix( scan(file="nineXvars.txt") , ncol = 10 , byrow = TRUE )
dat = as.data.frame(dat)
names(dat) = c("y", paste("X",1:9, sep=""))
dim(dat)
names(dat)
pairs(dat)
## overview of linear model
fmla = as.formula( paste("y ~ .^2 +", paste("I(X",1:9,"^2)", sep="", collapse=" + ") ))
mod <- lm( fmla , data = dat)
summary(mod)
mod.aic = step(mod, direction = "backward", k = 2)
summary(mod.aic)
mod.bic = step(mod, direction = "backward", k = log(nrow(dat)))
summary(mod.bic)
plot(fitted(mod.bic), residuals(mod.bic), pch = ".")
####机房4####
#simulates an AR(1) time series process Yt=phi*Yt-1+et, where phi=0.7, sigma(e)^2=1, n=101
alpha = 0.7
Yt = arima.sim(model = list(ar = alpha), n = 101)
?arima.sim#Simulate from an ARIMA Model,默认An empty list gives an ARIMA(0, 0, 0) model, that is white noise.
AR1.acf = ARMAacf(ar = alpha, lag.max = 25)
AR1.pacf = ARMAacf(ar = alpha, lag.max = 25, pacf = TRUE)
names(AR1.pacf) = names(AR1.acf)[-1]
layout(mat= matrix(c(1,1,2:5), byrow=TRUE, ncol=2))
plot(Yt, main= "Simulated AR(1) Process")
acf(Yt, lag.max = 25, main= "Estimated ACF")
pacf(Yt, lag.max = 25, main= "Estimated PACF")
barplot(AR1.acf, col = "blue", main= "Theoretical ACF")
barplot(AR1.pacf, col = "red", main= "Theoretical PACF")
#改变phi比较图像0.7 -0.7 0.99 -0.99,with n=101 or 1001
#simulates an AR(2) time series process Yt=0.7*Yt-1-0.4*Yt-2+et, with sigma(e)^2=1
Yt = arima.sim(list(ar = c(0.7, -0.4)), n=1001)
#Repeat the previous exercise using a shorter time series.
Yt = arima.sim(list(ar = c(0.7, 0.4)), n=1001)
#This is wrong, because 'ar' part of model is not stationary.
####画诡异图start####
#Stationarity of the AR(2) Process
n = 5000
a = runif(n, min= -1, max = 1)
b = runif(n, min= -1, max = 1)
Phi.RootsReal = cbind( a+b , -a*b )
x = runif(n, min= -1, max = 1)
y = numeric()
for(i in 1:n) y[i] = runif(1, -1*sqrt(1-(x[i])^2), sqrt(1-(x[i])^2))
Phi.RootsComplex = cbind( 2*x , -(x^2 +y^2) )
plot(Phi.RootsReal, col="blue", xlab= expression(phi[1]), ylab= expression(phi[2]))
points(Phi.RootsComplex, col="magenta")
curve(1-x, add = TRUE, lwd = 3) # condition phi1 + phi2 < 1
curve(1+x, add = TRUE, lwd = 3) # condition phi2 - phi1 < 1
curve(-x^2/4, add = TRUE, lwd = 3) # condition (phi1)^2 + 4phi2 < 0
CheckComplexRoots = (Phi.RootsComplex[,1])^2 + 4* Phi.RootsComplex[,2]
all(CheckComplexRoots < 0)
#ARMA(p,q) Process
Yt = arima.sim(n = 100, list(ar = c(0.9, -0.5), ma = c(-0.2, 0.25)), sd = sqrt(2))
# mildly long-tailed
Xt = arima.sim(n = 100, list(ar = c(0.9, -0.5), ma = c(-0.2, 0.25)),
sd = sqrt(2), rand.gen = function(n, ...) sqrt(0.2) * rt(n, df = 5))
Zt = arima.sim(list(ar = 0.7, ma = -0.7), n = 500)
####画诡异图end####
###
ar(lh)
lh.acf = acf(lh, lag.max=3)
lh.acf
names(lh.acf)
gamma.vec = matrix(data = lh.acf$acf[-1], ncol=1)
Gamma.mtx = diag(3)
diag(Gamma.mtx) = lh.acf$acf[1]
Gamma.mtx[abs(row(Gamma.mtx)-col(Gamma.mtx)) == 1] = lh.acf$acf[2]
Gamma.mtx[abs(row(Gamma.mtx)-col(Gamma.mtx)) == 2] = lh.acf$acf[3]
gamma.vec ; Gamma.mtx
solve(Gamma.mtx, gamma.vec)
(sunspot.ar <- ar(sunspot.year))
sunspot.lead = predict(sunspot.ar, n.ahead = 25)
ts.plot(sunspot.year, sunspot.lead$pred, col=c(5,2))
#ARIMA(p,d,q) Models
####exercise4####
layout(mat= matrix(c(1,1,2,3), nrow=2, byrow=TRUE))
set.seed(118102037)
ts.sim = arima.sim(list(order = c(1,2,1), ar = 0.7, ma=0.5), n = 500)
polyroot(z = c(1,-1.7,0.7) )#要按升序排列
ts.plot(ts.sim)
acf(ts.sim)
pacf(ts.sim)
####机房5####
#Case study 1
options(digits = 5, show.signif.stars = FALSE)#设置R的整数表示能力设为5位小数
?options()#允许用户设置和检查各种全局选项,这些选项影响R计算和显示结果的方式。
######################################
# code 01: look at the data
lh
######################################
# code 02: plot the data. Think: AR or MA(1)? Why?
layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))
plot(lh, type = "o", xlab = "Time (10 minutes intervals)", ylab= "Hormone Level")
acf(lh) ; pacf(lh)
######################################
# code 03: fit an AR(1) model
lh.ar1 = ar(lh, aic = FALSE, order.max = 1)
lh.ar1
names(lh.ar1)
lh.ar1$method
######################################
# code 04: exploting, we see that for
# an AR(1) phi.hat is just:
acf(lh)$acf
round(acf(lh)$acf[2], 4)
######################################
# code 05: criticize fitted AR(1)
# Think: are you satisfied with the behavior of the residuals?
resids = c(lh.ar1$resid)
acf(resids) #doesn't work, why not?
par(mfrow=c(2,2))
acf(resids[-1])
pacf(resids[-1])
qqnorm(resids)
cpgram(resids)
######################################
# code 06: criticise fitted AR(1)
# Using the arima( ) function in R...
tsdiag(lh.ar1) # doesn't work! TRY...
lh.ar1 = arima(lh, order = c(1,0,0))
lh.ar1 # how does the output differ???
tsdiag(lh.ar1)
cpgram(residuals(lh.ar1))
######################################
# code 07: use arima function to investigate
# AR(2), AR(3), and AR(4) models. e.g.
lh.ar2 = arima(lh, order = c(2,0,0))
lh.ar2 # how does the output differ???
tsdiag(lh.ar2)
cpgram(residuals(lh.ar2))
######################################
# code 08: explore AR(p) p=1,...,8
# The "aic" option in ar( ) is a logical flag.
# If TRUE then the Akaike Information Criterion
# is used to choose the order of the AR model.
# If FALSE, the model of order "order.max" is fitted.
# For example, "order.max = 8" fits an AR(8) model.
# Also collects the AICs for AR(1),AR(2),...,AR(8) models.
# Think: which model? (think very carefully!)
lh.ARmods = ar(lh, order.max=8, aic=FALSE)
plot(seq(0,8), lh.ARmods$aic, type="b", pch=16, col="blue",
main = "AIC for LH", xlab="Order of AR model",
ylab="log[ AIC / AIC of AR(3) ]")
######################################
# code 09: Summary of AR(3) model..
# Compare the output...
lh.ar3 = ar(lh, order.max=3)
lh.ar3
# y[t] = 0.6534*y[t-1] -0.0636*y[t-2] -0.2269*y[t-3] + e[t]
# with var{ e[t] } = sigma^2 = 0.1959
lh.ar3 = arima(lh, order = c(3,0,0))
lh.ar3 # what has changed?
lh.ar3 = arima(lh, order = c(3,0,0), method = "CSS-ML")
lh.ar3 # what has changed?
lh.ar3 = arima(lh, order = c(3,0,0), method = "ML")
lh.ar3 # what has changed?
lh.ar3 = arima(lh, order = c(3,0,0), method = "CSS")
lh.ar3 # what has changed?
#
######################################
# code 10: Best ARMA subset regression
# Try this at home. You will need the TSA package.
# Think: which model? (think carefully!)
library(TSA)
lh.mods = armasubsets(y = lh, nar=4, nma=4)
plot(lh.mods)
# PREAMBLE
options(digits = 5, show.signif.stars = FALSE)
######################################
# code 01: look at the data
# Think: AR or MA? What order?
LakeHuron
layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))
plot(LakeHuron, type = "o", ylab = "level (feet)")
title(main= "Annual measurements of the level of Lake Huron 1875-1972.")
acf(LakeHuron); pacf(LakeHuron)
######################################
# code 02: fit some starting models!
# Think: Which model? Why??
lake1 = arima(LakeHuron, order = c(1,0,0))
tsdiag(lake1)
lake2 = arima(LakeHuron, order = c(2,0,0))
tsdiag(lake2)
lake3 = arima(LakeHuron, order = c(3,0,0))
tsdiag(lake3)
lake4 = arima(LakeHuron, order = c(1,0,1))
tsdiag(lake4)
lake1$aic; lake2$aic; lake3$aic; lake4$aic
par(mfrow=c(2,2))
cpgram(residuals(lake1)); cpgram(residuals(lake2))
cpgram(residuals(lake3)); cpgram(residuals(lake4))
######################################
# code 01: look at the data
layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))
plot(Nile, type = "o")
lines(lowess(Nile, f=1/3), col="red", lwd=2)
acf(Nile); pacf(Nile)
######################################
# code 02: look through possible AR models
ar.aic = ar(Nile)
ar.aic # selects order 2
#how did it decide on AR(2)? try...
plot(seq(ar.aic$aic), ar.aic$aic,
type = "b", ylab = "AIC", xlab = "order")
######################################
# code 03: fit a preliminary model
myfit = arima(Nile, c(2, 0, 0))
# Can you explain why the pvalues of the Ljung-Box
# Portmanteau statistic are approaching statistical
# significance at higher lags?
# Examine the data further. Can you do better??
#
######################################
# code 99: Best ARMA subset regression
# Try this at home. You will need the TSA package.
# Think: which model? (think carefully!)
library(TSA)
lh.mods = armasubsets(y = Nile, nar=4, nma=4)
plot(lh.mods)
######################################
# code 03: Select a working model
lake.fit = lake4
######################################
# code 04: produce forecasts...
lake.pred = predict(lake.fit, n.ahead = 12)
names(lake.pred)
lake.pred
# Here we have fitted an ARIMA(1,0,1) model to the
# Lake Huron data. We have predicted the levels of
# Lake Huron for the next 12 years. In this case,
# "Lake.pred" is a list containing two entries, the
# predicted values "Lake.pred$pred" and the standard
# errors of the prediction "Lake.pred$se"}. Using a
# rule of thumb ``prediction +/- 2*standard error,"
# an approximate 95% prediction interval can be
# calculated for these future values.
######################################
# code 05: plot forecasts...
plot(LakeHuron, xlim = c(1875, 2000), ylim = c(575,584),
main="Lake Hurron levels and predicted values")
# First the levels of Lake Huron are plotted.
# To leave space for adding the predicted values,
# the x-axis is set from 1875 to 2000 with
# "xlim=c(1875,2000)"; the use of "ylim" is purely
# for cosmetic purposes here:
# Execute the following code line-by-line...
lake.pred = predict(lake.fit, n.ahead = 28)
lines(lake.pred$pred,col="red")
lines(lake.pred$pred+2*lake.pred$se,col="red",lty=3)
lines(lake.pred$pred-2*lake.pred$se,col="red",lty=3)
abline(h = mean(LakeHuron), col="grey", lty = 2)
# The final command draws a horizontal line at
# the mean of the Lake Huron values. What does
# this lead you to conclude you about the
# potential for long term forecasting?
######################################
# code 06: plot forecasts using one line of R code :) but less colour :(
plot(lake.fit, n.ahead = 12)
######################################
# code 01: read in the data
setwd("... wherever you keep your files")
recife = ts(data = scan("recifetemps.txt"), start = 1953, frequency = 12)
######################################
# code 02: look at the data
# Think: in words, describe the trend and seasonal patterns in the data.
recife
plot(recife, ylab="air temperature")
# and tag the months...
points(y= recife, x=time(recife), pch = as.vector(tag(recife)), cex=0.7)
######################################
# code 03: look at the data again
plot(stl(recife, s.window=11))
# Rethink: in words, describe the trend and seasonal patterns in the data.
######################################
# code 04: differencing
y = diff(recife)
y = diff(y)
y = diff(y, lag = 12)
layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))
plot(y, main= expression((1-B)^{2}*(1-B^{12})*X[t]))
points(y, x=time(y), pch = as.vector(tag(y)), cex=0.7)
acf(y, lag.max = 30); pacf(y, lag.max = 30)
######################################
# code 05: fit an ARIMA(p,d,q)(P,D,Q)
myfit = arima(recife, order=c(3,2,0), seasonal = c(2,1,0))
myfit
resids = residuals(myfit)
par(mfrow=c(1,2))
acf(resids, lag.max = 36)
pacf(resids, lag.max = 36)
# kill plot before proceeding
cpgram(resids)
######################################
# code 06: model criticism
# Think: are you satisfied with the behaviour of the residuals?
# Think: can you produce a better model?
#
######################################
# code 01: load the data
yield = ts(start=1950, frequency = 12, data = c(
2.22,2.23,2.22,2.20,2.09,1.97,2.03,1.98,1.94,1.79,1.74,1.86,
1.78,1.72,1.79,1.82,1.89,1.99,1.89,1.83,1.71,1.70,1.97,2.21,
2.36,2.41,2.92,3.15,3.26,3.51,3.48,3.16,3.01,2.97,2.88,2.91,
3.45,3.29,3.17,3.09,3.02,2.99,2.97,2.94,2.84,2.85,2.86,2.89,
2.93,2.93,2.87,2.82,2.63,2.33,2.22,2.15,2.28,2.28,2.06,2.54,
2.29,2.66,3.03,3.17,3.83,3.99,4.11,4.51,4.66,4.37,4.45,4.58,
4.58,4.76,4.89,4.65,4.51,4.65,4.52,4.52,4.57,4.65,4.74,5.10,
5.00,4.74,4.79,4.83,4.80,4.83,4.77,4.80,5.38,6.18,6.02,5.91,
5.66,5.42,5.06,4.70,4.73,4.64,4.62,4.48,4.43,4.33,4.32,4.30,
4.26,4.02,4.06,4.08,4.09,4.14,4.15,4.20,4.30,4.26,4.15,4.27,
4.69,4.72,4.92,5.10,5.20,5.56,6.08,6.13,6.09,5.99,5.58,5.59,
5.42,5.30,5.44,5.32,5.21,5.47,5.96,6.50,6.48,6.00,5.83,5.91,
5.98,5.91,5.64,5.49,5.43,5.33,5.22,5.03,4.74,4.55,4.68,4.53,
4.67,4.81,4.98,5.00,4.94,4.84,4.76,4.67,4.51,4.42,4.53,4.70,
4.75,4.90,5.06,4.99,4.96,5.03,5.22,5.47,5.45,5.48,5.57,6.33,
6.67,6.52,6.60,6.78,6.79,6.83,6.91,6.93,6.65,6.53,6.50,6.69,
6.58,6.42,6.79,6.82,6.76,6.88,7.22,7.41,7.27,7.03,7.09,7.18,
6.69,6.50,6.46,6.35,6.31,6.41,6.60,6.57,6.59,6.80,7.16,7.51,
7.52,7.40,7.48,7.42,7.53,7.75,7.80,7.63,7.51,7.49,7.64,7.92,
8.10,8.18,8.52,8.56,9.00,9.34,9.04,9.08,9.14,8.99,8.96,8.86,
8.79,8.62,8.29,8.05,8.00,7.89,7.48,7.31,7.42,7.51,7.71,7.99))
######################################
# code 02: plot the data
layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))
plot(yield, type = "o", ylab= "Yield")
acf(yield) ; pacf(yield)
# [1] There is no discernible seasonal variation.
# [2] There is a marked trend, from about 2% at
# the start of the series to about 7% at the end.
# [3] The trend is by no means regular, and to
# assume and to fit a straight line would be a gross
# over simplification. The use of a trend-and-season
# model, or linear regression seems inappropriate.
# [4] These observations are confirmed by the
# spectrum, ACF, and PACF. Kill the last plot. Then...
cpgram(yield)
# suggests a Random Walk Model.
# The non-stationarity indicates that
# some sort of differencing is required.
# Kill the last plot. Then...
######################################
# code 03: difference the data, and plot...
yield.diff = diff(yield)
layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))
plot(yield.diff, type = "o", ylab = "Difference",
main= "Month-to-Month Changes in Yield")
acf(yield.diff) ; pacf(yield)
# Kill the last plot. Then...
cpgram(yield.diff)
# These plots tell us quite a lot.
# First, the differenced series is
# now stationary, and no additional
# differencng is required. The fact
# that there is only one significant
# coefficient at lag 1 indicates an
# ARIMA(0,1,1) model.
######################################
# code 04: Fit and contrast some ARIMA fits...
fit1 = arima(yield,order=c(0,1,1))
tsdiag(fit1)
cpgram(residuals(fit1))
fit2 = arima(yield,order=c(0,1,2))
tsdiag(fit2)
cpgram(residuals(fit2))
fit3 = arima(yield,order=c(0,1,3))
tsdiag(fit3)
cpgram(residuals(fit3))
######################################
# code 05: plot forecasts...
plot(yield, xlim = c(1950, 1972), ylim = c(0,10),
main="Yield (%) on British short term government securities")
yield.pred = predict(fit3, n.ahead = 20)
lines(yield.pred$pred,col="red")
lines(yield.pred$pred+2* yield.pred$se,col="red",lty=3)
lines(yield.pred$pred-2* yield.pred$se,col="red",lty=3)
# What do you conclude about the potential for long term forecasting here?
######################################
# code 01: look at the data
WWWusage
######################################
# code 02: plot the data. Think: AR or MA(1)? Why?
layout(mat=matrix(c(1,1,2,3),byrow=TRUE,ncol=2))
plot(WWWusage, type = "o", main = "Internet Usage per Minute",
xlab = "Time (minute intervals)", ylab= "Number of Users")
acf(WWWusage) ; pacf(WWWusage)
######################################
# code 03: Dicky-Fuller t=Test
# If "tseries" package is available, then try:
library(tseries)
adf.test(WWWusage)
######################################
# code 04: take differences
work <- diff(WWWusage)
par(mfrow = c(2, 1)); plot(WWWusage); plot(work)
######################################
# code 05: Model Search
aics <- matrix(, 6, 6, dimnames = list(p = 0:5, q = 0:5))
for(q in 1:5) aics[1, 1+q] <- arima(WWWusage, c(0, 1, q),
optim.control = list(maxit = 500))$aic
for(p in 1:5)
for(q in 0:5) aics[1+p, 1+q] <- arima(WWWusage, c(p, 1, q),
optim.control = list(maxit = 500))$aic
round(aics - min(aics, na.rm = TRUE), 2)
#
######################################
# code 06: Investigate the following models.
# ARIMA(1,1,0), ARIMA(2,1,0), ARIMA(3,1,0), ARIMA(1,1,1),
## Code 1
## plot the data...
plot(nottem, main = "nottem data",ylab = "Average monthly temperature at Nottingham Castle (deg. F)")
## Code 2
## a few plots to visualise the seasonal components...
Temperature = matrix(c(nottem), nrow = 12, byrow = FALSE)
matplot(Temperature, type = "l")
Temperature = c(nottem)
Month = c(cycle(nottem))
boxplot(Temperature ~ Month)
cpgram(nottem)
plot(stl(nottem, s.window = 4))
## Code 3
## Differencing
nottem.D1 = diff(nottem, lag = 12)
cpgram(nottem.D1)
par(mfrow=c(1,2))
acf(nottem.D1, lag.max=40)
pacf(nottem.D1, lag.max=40)
# (kill plot before proceeding)
## Code 4
## Fit S-ARIMA(p,d,q)(P,D,Q)
fit <- arima(nottem, order = c(1,0,0), list(order = c(2,1,0), period = 12))
tsdiag(fit)
cpgram(residuals(fit))
## Code 5
## Forecasting...
nott.fore <- predict(fit, n.ahead = 36)
ts.plot(nottem, nott.fore$pred, nott.fore$pred+2*nott.fore$se,
nott.fore$pred-2*nott.fore$se, gpars = list(col = c(1,1,4,4)))
####机房6####
######################################
# code 01: look at the data
plot(AirPassengers, ylab="Air Passengers (1000's)")
segments(x0=1951, y0=100, x1=1961, y1=360, col = 2, lty = 2, lwd = 2 )
segments(x0=1950, y0=170, x1=1960.5, y1=620, col = 2, lty = 2, lwd = 2 )
######################################
# code 02: apply a transformation
APs = log(AirPassengers)
plot(APs)
######################################
# code 03: autocorrelation dominated by trend:
layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
plot(APs, ylab ="", main=expression(X[i]))
acf(APs, main = "")
pacf(APs, main = "")
######################################
# code 04: difference the data
AP1dif = diff(APs)
layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
plot(AP1dif, ylab = "", main=expression((1-B)*X[i]))
acf(AP1dif, lag.max = 48, main = "")
pacf(AP1dif, lag.max = 48, main = "")
######################################
# code 05: difference the data, at lag 12
AP12dif = diff(APs,lag=12)
layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
plot(AP12dif, ylab="", main=expression((1-B^{12})*X[i]))
acf(AP12dif, lag.max = 48, main = "")
pacf(AP12dif, lag.max = 48, main = "")
######################################
# code 06: difference the data at lag 12, and difference again at lag 1
Ys = diff(AP12dif)
layout(matrix(c(1,1,2,3), 2, 2, byrow = TRUE))
plot(Ys, ylab="",main=expression(Y[i]==(1-B)*(1-B^{12})*X[i]))
acf(Ys, lag.max = 48, main = "")
pacf(Ys, lag.max = 48, main = "")
# Kill graph before moving on
######################################
# code 07: select an ARIMA model
APfit = arima(APs, order=c(0, 1, 0), seasonal = list(order=c(0, 1 ,0), period=12))
tsdiag(APfit)
APfit = arima(APs, order=c(0, 1, 1), seasonal = list(order=c(0, 1 ,1), period=12))
tsdiag(APfit)
APfit
######################################
# code 08: produce forecasts...
AP.pred = predict(APfit, n.ahead = 30)
AP.pred
######################################
# code 09: plot forecasts...
plot(APs, xlim = c(1949, 1963), ylim = c(4.6,6.8),
main="log[ Air Passengers (1000's) ]")
lines(AP.pred$pred,col="red")
lines(AP.pred$pred+2*AP.pred$se,col="red",lty=3)
lines(AP.pred$pred-2*AP.pred$se,col="red",lty=3)
# Homework: transform the prediction back to the
# original scale and display forecasts with prediction
# bands where Y axis is in 1000's airline passengers
######################################
# code 10: Periodograms
cpgram(AirPassengers)
cpgram(APs)
cpgram(AP1dif)
cpgram(AP12dif)
cpgram(Ys)
cpgram(residuals(APfit))