本文旨在进行时间序列预测,采用较为简单的深度学习模型,数据集格式样式如下:
image.png
用历史的20个周期每天的数量预测下一个周期的数量。
from numpy import array
from numpy import hstack
from keras.models import Sequential
from keras.layers import Dense
import pandas as pd
# 将数据转为深度学习模型的格式
def split_sequence(sequence, n_steps):
X, y = list(), list()
for i in range(len(sequence)):
end_ix = i + n_steps
if end_ix > len(sequence)-1:
break
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
def base_model(n_steps):
model = Sequential()
model.add(Dense(100, activation='relu', input_dim=n_steps))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
return model
file = '数据集.csv'
series = pd.read_csv(file, header=0, index_col=0, error_bad_lines=False,encoding='gbk')
data = series.values
n_steps = 3
model = base_model(3)
result = []
for i in range(len(data)):
X, y = split_sequence(data[i], n_steps)
model.fit(X, y, epochs=2000, verbose=0)
x_input = array(data[i][-3:])
x_input = x_input.reshape((1, n_steps))
yhat = model.predict(x_input, verbose=0)
res = max(0, round(yhat[0][0]))
print(res)
result.append(res)
n_steps=3表明用每一条数据的3个周期和下一个周期进行训练学习。样本第一条处理格式如下。
image.png
image.png
打印前4条预测结果如下:
image.png
第一条数据预测下一周期是20,第二三四条都是0。可以当作深度学习用来进行时间序列的格式。
下面以此格式用CNN进行时间预测,直接上代码了。
from numpy import array
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Flatten
import pandas as pd
from keras.layers.convolutional import Conv1D
from keras.layers.convolutional import MaxPooling1D
def split_sequence(sequence, n_steps):
X, y = list(), list()
for i in range(len(sequence)):
end_ix = i + n_steps
if end_ix > len(sequence)-1:
break
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
def base_model(n_steps,n_features):
model = Sequential()
model.add(Conv1D(filters=64, kernel_size=2, activation='relu', input_shape=(n_steps, n_features)))
model.add(MaxPooling1D(pool_size=2))
model.add(Flatten())
model.add(Dense(50, activation='relu'))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse')
return model
file = '数据集.csv'
series = pd.read_csv(file, header=0, index_col=0, error_bad_lines=False,encoding='gbk')
data = series.values
n_steps = 3
model = base_model(3,1) ##n_features
result = []
for i in range(len(data)):
X, y = split_sequence(data[i], n_steps)
X = X.reshape((X.shape[0], X.shape[1], 1)) ##n_features
model.fit(X, y, epochs=2000, verbose=0)
x_input = array(data[i][-3:])
x_input = x_input.reshape((1, n_steps,1))
yhat = model.predict(x_input, verbose=0)
res = max(0, round(yhat[0][0]))
print(res)
result.append(res)
LSTM时间序列预测效果要比大多的模型好。格式代码如下:
from numpy import array
from keras.models import Sequential
from keras.layers import LSTM
from keras.layers import Dense
import pandas as pd
def split_sequence(sequence, n_steps):
X, y = list(), list()
for i in range(len(sequence)):
end_ix = i + n_steps
if end_ix > len(sequence)-1:
break
seq_x, seq_y = sequence[i:end_ix], sequence[end_ix]
X.append(seq_x)
y.append(seq_y)
return array(X), array(y)
def base_model(n_steps,n_features):
model = Sequential()
model.add(LSTM(50, activation='relu', input_shape=(n_steps, n_features)))
model.add(Dense(1))
model.compile(optimizer='adam', loss='mse') # fit model
return model
file = '数据集.csv'
series = pd.read_csv(file, header=0, index_col=0, error_bad_lines=False,encoding='gbk')
data = series.values
n_steps = 3
model = base_model(3,1)
result = []
for i in range(len(data)):
X, y = split_sequence(data[i], n_steps)
X = X.reshape((X.shape[0], X.shape[1], 1))
# print(X)
model.fit(X, y, epochs=2000, verbose=0)
x_input = array(data[i][-3:])
x_input = x_input.reshape((1, n_steps,1))
yhat = model.predict(x_input, verbose=0)
res = max(0, round(yhat[0][0]))
print(res)
result.append(res)