#import required packagesimport pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline
#read the data
df = pd.read_csv("AirQualityUCI.csv", parse_dates=[['Date', 'Time']])
#check the dtypes
df.dtypes
#missing value treatment
cols = data.columns
for j in cols:
for i inrange(0,len(data)):
if data[j][i] == -200:
data[j][i] = data[j][i-1]
#checking stationarityfrom statsmodels.tsa.vector_ar.vecm import coint_johansen
#since the test works for only 12 variables, I have randomly dropped#in the next iteration, I would drop another and check the eigenvalues
johan_test_temp = data.drop([ 'CO(GT)'], axis=1)
coint_johansen(johan_test_temp,-1,1).eig
#creating the train and validation set
train = data[:int(0.8*(len(data)))]
valid = data[int(0.8*(len(data))):]
#fit the modelfrom statsmodels.tsa.vector_ar.var_model import VAR
model = VAR(endog=train)
model_fit = model.fit()
# make prediction on validation
prediction = model_fit.forecast(model_fit.y, steps=len(valid))
预测采用数组的形式,其中每个列表代表行的预测。 我们将把它转换成更易于呈现的格式。
#converting predictions to dataframe
pred = pd.DataFrame(index=range(0,len(prediction)),columns=[cols])
for j inrange(0,13):
for i inrange(0, len(prediction)):
pred.iloc[i][j] = prediction[i][j]
#check rmsefor i in cols:
print('rmse value for', i, 'is : ', sqrt(mean_squared_error(pred[i], valid[i])))
输出:
rmse value for CO(GT) is : 1.4200393103392812
rmse value for PT08.S1(CO) is : 303.3909208229375
rmse value for NMHC(GT) is : 204.0662895081472
rmse value for C6H6(GT) is : 28.153391799471244
rmse value for PT08.S2(NMHC) is : 6.538063846286176
rmse value for NOx(GT) is : 265.04913993413805
rmse value for PT08.S3(NOx) is : 250.7673347152554
rmse value for NO2(GT) is : 238.92642219826683
rmse value for PT08.S4(NO2) is : 247.50612831072633
rmse value for PT08.S5(O3) is : 392.3129907890131
rmse value for T is : 383.1344361254454
rmse value for RH is : 506.5847387424092
rmse value for AH is : 8.139735443605728
预测代码:
#make final predictions
model = VAR(endog=data)
model_fit = model.fit()
yhat = model_fit.forecast(model_fit.y, steps=1)
print(yhat)