import pandas as pd
import numpy as np
import datetime
from dateutil.relativedelta import relativedelta
today=datetime.date.today()
filepath='/Users/kangyongqing/Documents/kangyq/202303/分析模版/课消月度环比/'
file1='活跃学员课次20250226_093217.csv'
file2='活跃学员数20250226_093147.csv'
df1=pd.read_csv(filepath+file1)
df2=pd.read_csv(filepath+file2)
print(df1.columns,df2.columns)
df2.columns=['Time','一对一学生数','学生数','小班课学生数']
df=pd.merge(df1,df2,on='Time',how='left')
df['一对一生均']=df['一对一']/df['一对一学生数']
df['小班课生均']=df['小班课']/df['小班课学生数']
df['总生均']=df['课次']/df['学生数']
#保留小数位数
df=df.round({'一对一生均':2,'小班课生均':2,'总生均':2})
print(df.head())
benyue=datetime.date(today.year,today.month,1)
shangyue=benyue-relativedelta(months=1)
xiayue=benyue+relativedelta(months=1)
print(benyue,shangyue,xiayue)
qbenyue=benyue-relativedelta(years=1)
qshangyue=shangyue-relativedelta(years=1)
qxiayue=xiayue-relativedelta(years=1)
print(qbenyue,qshangyue,qxiayue)
qqbenyue=benyue-relativedelta(years=2)
qqshangyue=shangyue-relativedelta(years=2)
qqxiayue=xiayue-relativedelta(years=2)
print(qqbenyue,qqshangyue,qqxiayue)
L1=pd.DataFrame(data=[benyue,shangyue,xiayue,qbenyue,qshangyue,qxiayue,qqbenyue,qqshangyue,qqxiayue],columns=['dtt'])
# 在format的格式字符串中,各种时间元素都有特定的字符表示,例如:
# %Y表示四位数的年份,
# %m表示两位数的月份,
# %d表示两位数的日期,
# %H表示小时数(24小时格式),
# %M表示分钟数,
# %S表示秒数
L1['dtt']=pd.to_datetime(L1['dtt'],format='%Y-%m-%d').astype('str')
#先转换为日期格式,然后再转换为字符串,方便后续进行匹配
dff=pd.merge(L1,df,left_on='dtt',right_on='Time',how='left')
dff=dff.set_index('dtt').sort_index()
print(dff.columns)
dff1=dff[['学生数', '课次', '总生均', '一对一学生数', '一对一', '一对一生均']].copy()
for i in dff1.columns:
# print(i)
iname=i+'环比'
# print(iname)
#np.nan为数字类型,如果单独使用''则为object类型,最后做数据处理时候还需要转换为float64类型才可以计算保留小数位数
dff1[iname]=np.nan
#df.columns.get_loc('列名')获取列所在的索引
dff1.iloc[1, dff1.columns.get_loc(iname)]=dff1.iloc[1, dff1.columns.get_loc(i)]/dff1.iloc[0, dff1.columns.get_loc(i)]-1
dff1.iloc[2, dff1.columns.get_loc(iname)] = dff1.iloc[2, dff1.columns.get_loc(i)] / dff1.iloc[1, dff1.columns.get_loc(i)] - 1
dff1.iloc[4, dff1.columns.get_loc(iname)] = dff1.iloc[4, dff1.columns.get_loc(i)] / dff1.iloc[3, dff1.columns.get_loc(i)] - 1
dff1.iloc[5, dff1.columns.get_loc(iname)] = dff1.iloc[5, dff1.columns.get_loc(i)] / dff1.iloc[4, dff1.columns.get_loc(i)] - 1
dff1.iloc[7, dff1.columns.get_loc(iname)] = dff1.iloc[7, dff1.columns.get_loc(i)] / dff1.iloc[6, dff1.columns.get_loc(i)] - 1
print(dff1.columns)
# dff1=dff1.round({'学生数环比':4, '课次环比':4, '总生均环比':4,
# '一对一学生数环比':4, '一对一环比':4, '一对一生均环比':4})
for col in dff1.columns:
if col.endswith('环比'):
dff1[col]=dff1[col].round(4)
print(dff1.dtypes)
print(dff1)
dff1.to_excel(filepath+f'课消历史数据环比分析{str(today)}.xlsx')
统计结果如下:
看年度环比数据,进行月度预测及下标。