import pandas as pd import numpy as np from sklearn.ensemble import RandomForestRegressor from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error from sklearn.model_selection import train_test_split if __name__ == '__main__': data = pd.read_csv('tr_user_tj.csv', header=0) data1 = data[['star_num', 'sign_num', 'coll_num', 'dna_num', 'task_num', 'word_num', 'balance_amt', 'earn_amt', 'season_point', 'point', 'star_score', 'term_amt', 'match_num']] # 分离特征和目标变量 X = data1.drop('match_num', axis=1) y = data1['match_num'] # 方法1: 增强重要特征的权重 - 通过对重要特征进行放大 X_weighted = X.copy() # 对重要特征进行加权(增加倍数) X_weighted['dna_num'] = X_weighted['dna_num'] * 2 X_weighted['task_num'] = X_weighted['task_num'] * 1.5 X_weighted['season_point'] = X_weighted['season_point'] * 1.5 # 划分训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X_weighted, y, test_size=0.2, random_state=42) # 创建随机森林回归器 rf_regressor = RandomForestRegressor(n_estimators=100, random_state=42) # 训练模型 rf_regressor.fit(X_train, y_train) # 预测 y_pred = rf_regressor.predict(X_test) # 计算评估指标 mse = mean_squared_error(y_test, y_pred) rmse = np.sqrt(mse) mae = mean_absolute_error(y_test, y_pred) r2 = r2_score(y_test, y_pred) print(f"Weighted Features Results:") print(f"Mean Squared Error: {mse}") print(f"Root Mean Squared Error: {rmse}") print(f"Mean Absolute Error: {mae}") print(f"R² Score: {r2}") # 特征重要性 feature_importance = pd.DataFrame({ 'feature': X_weighted.columns, 'importance': rf_regressor.feature_importances_ }).sort_values('importance', ascending=False) print("\nFeature Importance:") print(feature_importance)