import pandas as pd
import numpy as np
import xgboost as xgb
from xgboost.sklearn import XGBClassifier
from sklearn import model_selection, metrics
from sklearn.model_selection import train_test_split
from sklearn.metrics import f1_score, roc_auc_score, accuracy_score
import warnings
warnings.filterwarnings(module='sklearn*', action='ignore', category=DeprecationWarning)
from sklearn import datasets
from collections import Counter
import os
import pandas as pd
import pandas as pd
import numpy as np
import xgboost as xgb
import matplotlib.pylab as plt
# %matplotlib inline
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
data = pd.read_csv("数字化处理历史真实数据.csv") # 1 3 7 是 预测列
data = data.fillna(0)
# ['日期', '星期', '最高气温', '最低气温', '天气', '风向', '级数']
data_x=data[['最高气温', '最低气温', '天气', '风向', '级数']].values
data_x=np.array(data_x,dtype=np.float16)
print(data_x)
# 序列长度
int_sequence_len=3
# 每个序列的长度
int_a = 5
train_x = []
train_y = []
for i in range(0, len(data_x)-int_sequence_len,1):
train_x.append(data_x[i:i+int_sequence_len])
train_y.append(data_x[i+int_sequence_len][1])
print(len(train_x), len(train_y))
# 划分验证集和测试集
x_train, x_test, y_train, y_test = train_test_split(np.array(train_x), np.array(train_y), test_size=0.2, random_state=1)
x_train = x_train.reshape(len(x_train),-1) # 三维度数据 全部数据长度 序列长度 每个序列维度
y_train = y_train.reshape(len(y_train),1)
print(x_train.shape,y_train.shape)
x_test = x_test.reshape(len(x_test),-1)
# y_test = y_test.reshape(-1,1)
from sklearn.model_selection import GridSearchCV
#Need to research
#research_one: n_epoch
#research_one: max_depth
#定义xgboost模型
xgb1 = xgb.XGBRegressor()
# learning_rate =0.12,
# n_estimators=150,
# max_depth=5,
# min_child_weight=1,
# gamma=0,
# subsample=0.8,
# colsample_bytree=0.8,
# objective= 'reg:squarederror',
# reg_alpha= 0,
# reg_lambda= 1,
# nthread=4,
# scale_pos_weight=1,
# seed=27
param_test1 = {
'min_child_weight': [1, 2, 3,4,5,6,8],
'max_depth':[1,2, 3, 4, 5, 6, 7,8],
'learning_rate':[0.09,0.1,0.12,0.13,0.14],
'n_estimators':[100,120,150]
}
sklearn_model_raw = GridSearchCV(estimator = xgb1,
param_grid = param_test1,
n_jobs=4,
cv=5)
sklearn_model_raw.fit(x_train, y_train)
pred_test_raw = sklearn_model_raw.predict(x_test)
from sklearn.metrics import mean_squared_error # 均方误差
from sklearn.metrics import mean_absolute_error # 平方绝对误差
from sklearn.metrics import r2_score # R square
test_mse = mean_squared_error(y_test, pred_test_raw)
print('mse Score: %.6f mse' % (test_mse))
test_mae = mean_absolute_error(y_test, pred_test_raw)
print('mae Score: %.6f mae' % (test_mae))
test_R2 = r2_score(y_test, pred_test_raw)
print('R2 Score: %.6f R2' % (test_R2))
# 所有画图
len_ = [i for i in range(len(y_test))]
plt.xlabel('标签', fontsize=8)
plt.ylabel('均值', fontsize=8)
plt.plot(len_, y_test, label='y_test', color="blue")
plt.plot(len_, pred_test_raw, label='y1_pred_lstm', color="yellow")
plt.legend(['truh', 'predict'])
plt.title("最低气温预测走势图")
plt.show()
plt.clf()
x_test=[22,18,4,3,1,27,17,0,5,2,28,19,1,7,2]
x_test=np.array(x_test).reshape(1,15)
pred_test_raw = sklearn_model_raw.predict(x_test)
print("预测:", pred_test_raw)