1. 读取data数据
2. 对离散变量进行one-hot编码
3. 对独热编码后的变量转化为int类型
4. 对所有缺失值进行填充
import pandas as pd
data = pd.read_csv('D:\Pycode\Python打卡\data.csv')
discrete_lists = []
for discrete_features in data.columns:
if data[discrete_features].dtypes == 'object':
discrete_lists.append(discrete_features)
data = pd.get_dummies(data,columns=discrete_lists,drop_first=True)
list_final = []
data2 = pd.read_csv('D:\Pycode\Python打卡\data.csv')
for i in data.columns:
if i not in data2.columns:
list_final.append(i)
for k in list_final:
data[k] = data[k].astype(int)
for a in data.columns:
if data[a].isnull().sum() > 0:
mean_val = data[a].mean()
data[a].fillna(mean_val,inplace=True)
data.isnull().sum()
print(f"""
{data.dtypes}
{data.isnull().sum()}
{list_final}
""")
结果:
Id int64
Annual Income float64
Tax Liens float64
Number of Open Accounts float64
Years of Credit History float64
Maximum Open Credit float64
Number of Credit Problems float64
Months since last delinquent float64
Bankruptcies float64
Current Loan Amount float64
Current Credit Balance float64
Monthly Debt float64
Credit Score float64
Credit Default int64
Home Ownership_Home Mortgage int32
Home Ownership_Own Home int32
Home Ownership_Rent int32
Years in current job_10+ years int32
Years in current job_2 years int32
Years in current job_3 years int32
Years in current job_4 years int32
Years in current job_5 years int32
Years in current job_6 years int32
Years in current job_7 years int32
Years in current job_8 years int32
Years in current job_9 years int32
Years in current job_< 1 year int32
Purpose_buy a car int32
Purpose_buy house int32
Purpose_debt consolidation int32
Purpose_educational expenses int32
Purpose_home improvements int32
Purpose_major purchase int32
Purpose_medical bills int32
Purpose_moving int32
Purpose_other int32
Purpose_renewable energy int32
Purpose_small business int32
Purpose_take a trip int32
Purpose_vacation int32
Purpose_wedding int32
Term_Short Term int32
dtype: object
Id 0
Annual Income 0
Tax Liens 0
Number of Open Accounts 0
Years of Credit History 0
Maximum Open Credit 0
Number of Credit Problems 0
Months since last delinquent 0
Bankruptcies 0
Current Loan Amount 0
Current Credit Balance 0
Monthly Debt 0
Credit Score 0
Credit Default 0
Home Ownership_Home Mortgage 0
Home Ownership_Own Home 0
Home Ownership_Rent 0
Years in current job_10+ years 0
Years in current job_2 years 0
Years in current job_3 years 0
Years in current job_4 years 0
Years in current job_5 years 0
Years in current job_6 years 0
Years in current job_7 years 0
Years in current job_8 years 0
Years in current job_9 years 0
Years in current job_< 1 year 0
Purpose_buy a car 0
Purpose_buy house 0
Purpose_debt consolidation 0
Purpose_educational expenses 0
Purpose_home improvements 0
Purpose_major purchase 0
Purpose_medical bills 0
Purpose_moving 0
Purpose_other 0
Purpose_renewable energy 0
Purpose_small business 0
Purpose_take a trip 0
Purpose_vacation 0
Purpose_wedding 0
Term_Short Term 0
dtype: int64
['Home Ownership_Home Mortgage', 'Home Ownership_Own Home', 'Home Ownership_Rent', 'Years in current job_10+ years', 'Years in current job_2 years', 'Years in current job_3 years', 'Years in current job_4 years', 'Years in current job_5 years', 'Years in current job_6 years', 'Years in current job_7 years', 'Years in current job_8 years', 'Years
in current job_9 years', 'Years in current job_< 1 year', 'Purpose_buy a car', 'Purpose_buy house', 'Purpose_debt consolidation', 'Purpose_educational expenses', 'Purpose_home improvements', 'Purpose_major purchase', 'Purpose_medical bills', 'Purpose_moving', 'Purpose_other', 'Purpose_renewable energy', 'Purpose_small business', 'Purpose_take a trip', 'Purpose_vacation', 'Purpose_wedding', 'Term_Short Term']
@浙大疏锦行