Traceback (most recent call last): File "D:\python\负荷预测测试版.py", line 165, in <module> bagging_model = BaggingRegressor(base_estimator=clone(best_rf_model), n_estimators=10, random_state=42) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ TypeError: BaggingRegressor.__init__() got an unexpected keyword argument 'base_estimator'

你遇到的问题是因为 `BaggingRegressor` 的初始化参数中没有 `base_estimator` 这个参数。`BaggingRegressor` 的正确参数应该是 `estimator` 而不是 `base_estimator`。此外，为了确保兼容性和更好的集成模型效果，我们可以使用 `Pipeline` 和 `BaggingRegressor` 的默认参数进行集成。以下是修正后的代码： ### 修正后的代码 ```python import pandas as pd from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV, cross_val_score from sklearn.ensemble import RandomForestRegressor, BaggingRegressor from sklearn.preprocessing import MinMaxScaler, PolynomialFeatures from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error, mean_absolute_percentage_error import numpy as np import matplotlib.pyplot as plt import matplotlib from scipy.stats import randint, uniform from sklearn.feature_selection import RFE, SelectFromModel from sklearn.pipeline import Pipeline from sklearn.base import clone from mlxtend.regressor import StackingRegressor from xgboost import XGBRegressor from lightgbm import LGBMRegressor # 设置中文字体 matplotlib.rcParams['font.sans-serif'] = ['SimHei'] # 使用黑体显示中文 matplotlib.rcParams['axes.unicode_minus'] = False # 解决负号 '-' 显示为方块的问题 # 加载数据 data = pd.read_csv('2016年电工数学建模竞赛负荷预测数据集.csv') # 查看前几行数据确保正确加载 print(data.head()) # 检查是否有缺失值并处理 print(data.isnull().sum()) data.fillna(data.mean(), inplace=True) # 使用均值填充缺失值 # 创建新特征 data['日期'] = pd.to_datetime(data.index) # 假设日期是索引 data['星期几'] = data['日期'].dt.weekday data['月份'] = data['日期'].dt.month data['是否节假日'] = data['日期'].apply(lambda x: 1 if x.day_name() in ['Saturday', 'Sunday'] else 0) # 更新特征矩阵 X = data[['最高温度℃', '最低温度℃', '平均温度℃', '相对湿度(平均)', '降雨量（mm）', '星期几', '月份', '是否节假日']] y = data['日需求负荷（KWh）'] # 特征选择：使用递归特征消除（RFE）选择重要特征 rf_initial = RandomForestRegressor(random_state=42) selector = RFE(rf_initial, n_features_to_select=5, step=1) X_selected = selector.fit_transform(X, y) selected_features = X.columns[selector.support_] print("Selected Features:", selected_features) # 标准化特征 scaler = MinMaxScaler() # 使用 MinMaxScaler 缩放特征 X_scaled = scaler.fit_transform(X_selected) X_scaled_df = pd.DataFrame(X_scaled, columns=selected_features) # 添加多项式特征 poly = PolynomialFeatures(degree=2, interaction_only=True, include_bias=False) X_poly = poly.fit_transform(X_scaled_df) X_poly_df = pd.DataFrame(X_poly, columns=poly.get_feature_names_out(selected_features)) # 分割数据为训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X_poly_df, y, test_size=0.2, random_state=42) # 定义随机森林超参数空间 param_dist = { 'n_estimators': randint(100, 501), 'max_depth': [None] + list(range(10, 51, 10)), 'min_samples_split': randint(2, 11), 'min_samples_leaf': randint(1, 5), 'bootstrap': [True], 'max_features': ['sqrt', 'log2'] + list(np.linspace(0.1, 1.0, 10)), # 添加合理的浮点数值 'criterion': ['squared_error', 'friedman_mse', 'absolute_error', 'poisson'] # 使用有效的损失函数 } rf_model = RandomForestRegressor(random_state=42) # 使用随机搜索进行粗略超参数调优 random_search = RandomizedSearchCV( estimator=rf_model, param_distributions=param_dist, n_iter=50, # 先进行较少量的搜索 cv=5, verbose=2, random_state=42, n_jobs=-1, error_score='raise' ) random_search.fit(X_train, y_train) # 输出粗略调优后的最佳参数 print("Initial Best Parameters:", random_search.best_params_) # 使用 GridSearchCV 进行精细超参数调优 param_grid = { 'n_estimators': [random_search.best_params_['n_estimators']], 'max_depth': [random_search.best_params_['max_depth']], 'min_samples_split': [random_search.best_params_['min_samples_split']], 'min_samples_leaf': [random_search.best_params_['min_samples_leaf']], 'bootstrap': [True], 'max_features': [random_search.best_params_['max_features']], 'criterion': [random_search.best_params_['criterion']] } grid_search = GridSearchCV( estimator=rf_model, param_grid=param_grid, cv=5, verbose=2, n_jobs=-1, scoring='neg_mean_squared_error' ) grid_search.fit(X_train, y_train) # 输出最终最佳参数 print("Final Best Parameters:", grid_search.best_params_) # 使用最佳参数训练模型 best_rf_model = grid_search.best_estimator_ # 交叉验证评估 cv_scores_rmse = cross_val_score(best_rf_model, X_train, y_train, cv=5, scoring='neg_mean_squared_error') cv_scores_mae = cross_val_score(best_rf_model, X_train, y_train, cv=5, scoring='neg_mean_absolute_error') cv_scores_r2 = cross_val_score(best_rf_model, X_train, y_train, cv=5, scoring='r2') print(f'Random Forest CV RMSE: {-np.sqrt(-cv_scores_rmse.mean()):.2f}') print(f'Random Forest CV MAE: {-cv_scores_mae.mean():.2f}') print(f'Random Forest CV R^2: {cv_scores_r2.mean():.4f}') # 在测试集上进行预测 y_pred_rf = best_rf_model.predict(X_test) # 计算多种评估指标 rmse_rf = np.sqrt(mean_squared_error(y_test, y_pred_rf)) mae_rf = mean_absolute_error(y_test, y_pred_rf) mape_rf = mean_absolute_percentage_error(y_test, y_pred_rf) r2_test = r2_score(y_test, y_pred_rf) print(f'Random Forest Test RMSE: {rmse_rf:.2f}') print(f'Random Forest Test MAE: {mae_rf:.2f}') print(f'Random Forest Test MAPE: {mape_rf:.4f}') print(f'Random Forest Test R^2: {r2_test:.4f}') # 特征重要性分析 importances = best_rf_model.feature_importances_ feature_importances = pd.Series(importances, index=X_poly_df.columns) feature_importances.sort_values(ascending=False).plot(kind='bar', figsize=(10, 6)) plt.title('特征重要性') plt.show() # 绘制预测值和实际值的对比图 plt.figure(figsize=(10, 6)) plt.plot(y_test.values, label='实际值', color='blue', marker='o') plt.plot(y_pred_rf, label='RF预测值', color='red', marker='x') plt.title('实际值 vs 随机森林预测值') plt.xlabel('样本索引') plt.ylabel('日需求负荷 (KWh)') plt.legend() plt.grid(True) plt.show() # 使用袋外分数评估模型（如果使用 oob_score=True） if hasattr(best_rf_model, 'oob_score_'): print(f'OOB Score: {best_rf_model.oob_score_:.4f}') # 模型集成：使用 Bagging 和 Stacking bagging_model = BaggingRegressor(estimator=best_rf_model, n_estimators=10, random_state=42) bagging_model.fit(X_train, y_train) xgb_model = XGBRegressor(objective='reg:squarederror', n_jobs=-1) lgbm_model = LGBMRegressor(boosting_type='gbdt', objective='regression', n_jobs=-1) stacking_model = StackingRegressor( regressors=[best_rf_model, xgb_model, lgbm_model], meta_regressor=RandomForestRegressor(random_state=42) ) stacking_model.fit(X_train, y_train) # 在测试集上进行预测 y_pred_bagging = bagging_model.predict(X_test) y_pred_stacking = stacking_model.predict(X_test) # 计算集成模型的评估指标 rmse_bagging = np.sqrt(mean_squared_error(y_test, y_pred_bagging)) mae_bagging = mean_absolute_error(y_test, y_pred_bagging) mape_bagging = mean_absolute_percentage_error(y_test, y_pred_bagging) r2_bagging = r2_score(y_test, y_pred_bagging) rmse_stacking = np.sqrt(mean_squared_error(y_test, y_pred_stacking)) mae_stacking = mean_absolute_error(y_test, y_pred_stacking) mape_stacking = mean_absolute_percentage_error(y_test, y_pred_stacking) r2_stacking = r2_score(y_test, y_pred_stacking) print(f'Bagging Test RMSE: {rmse_bagging:.2f}, MAE: {mae_bagging:.2f}, MAPE: {mape_bagging:.4f}, R^2: {r2_bagging:.4f}') print(f'Stacking Test RMSE: {rmse_stacking:.2f}, MAE: {mae_stacking:.2f}, MAPE: {mape_stacking:.4f}, R^2: {r2_stacking:.4f}') # 绘制集成模型预测值和实际值的对比图 plt.figure(figsize=(10, 6)) plt.plot(y_test.values, label='实际值', color='blue', marker='o') plt.plot(y_pred_bagging, label='Bagging预测值', color='green', marker='^') plt.plot(y_pred_stacking, label='Stacking预测值', color='purple', marker='s') plt.title('实际值 vs 集成模型预测值') plt.xlabel('样本索引') plt.ylabel('日需求负荷 (KWh)') plt.legend() plt.grid(True) plt.show() ``` ### 关键修正点 1. **修正 `BaggingRegressor` 初始化参数**： - 使用 `estimator` 而不是 `base_estimator`。 2. **确保参数合理性**： - 修正了参数名称以确保代码的正确性。通过这些修正，`BaggingRegressor` 和 `StackingRegressor` 应该可以正常工作。如果有任何问题或需要进一步调整，请告诉我！ ### 进一步优化建议 1. **减少参数空间**：如果调优时间过长，可以适当减少参数范围或调优次数。 2. **使用 `GridSearchCV`**：对于关键参数，可以考虑使用 `GridSearchCV` 进行更细粒度的搜索。 3. **模型集成**：可以尝试集成多个模型（如 Bagging、Stacking）以提高模型的鲁棒性和泛化能力。

阅读全文

相关推荐

linux 下 pip3 报错“ File “/usr/bin/pip3”, line 9, in from pip import mainImportError: canno”处理

【Python入门自学笔记专辑】——Python异常处理

Traceback (most recent call last): File D:\practice\python\play\爱心.py, line 1, in <module> import turtle File D:\Programs\Python\Python310\lib\turtle.py, line 107, in <module> import t

大家在看

01.WS 445-2014 电子病历基本数据集.rar

IVT-Dongle--paire.rar_LABVIEW 蓝牙_bluetooth labview_labview don

ISO/IEC 27001:2022与ISO 27002:2022最新版中英文版合集

lingo语法例子。。PPT

C# Socket 实现的淘宝秒杀器（抢拍器）

最新推荐

构建基于ajax, jsp, Hibernate的博客网站源码解析

【Unity Sunny Land关卡设计高级指南】：打造完美关卡的8大技巧

C++ 模版

C#随机数摇奖系统功能及隐藏开关揭秘

【数据驱动的力量】：管道缺陷判别方法论与实践经验

EditPlus中实现COBOL语言语法高亮的设置

影子系统(windows)问题排查：常见故障诊断与修复

ASP+CSS+DIV制作仿手机QQ请求状态条教程