思路:
按L0、L1、L2...L12对应的流失率(取过去12个月平均值),及L13+的流失率,附加当月新进用户数,迭代产出当月预计流失的用户数;
口径详解参考:
数据源:
文件1
文件2代码如下:
import pandas as pd
import numpy as np
import datetime
today=datetime.date.today()
cur_month_day=datetime.date(today.year,today.month,1)
print(cur_month_day)
last_month_day=datetime.date((cur_month_day+datetime.timedelta(days=-1)).year,(cur_month_day+datetime.timedelta(days=-1)).month,1)
print(last_month_day)
before_day=cur_month_day+datetime.timedelta(days=-370)
before_Y_day=datetime.date(before_day.year,before_day.month,1)
print(before_Y_day)
filepath='/Users/kangyongqing/Documents/kangyq/202209/OKR预占/2024到期不续费/月度流失人数估计/'
file1='月度新增人数20241217_150750.csv'
file2='流失对应表20241217_150838.csv'
df1=pd.read_csv(filepath+file1).sort_values(by='firdate',ascending=True)
print(df1.head())
df2=pd.read_csv(filepath+file2)
print(df2.head())
df1['firdate']=pd.to_datetime(df1['firdate'])
thirteen=df1[df1['firdate']<pd.to_datetime(before_Y_day)]['新学员数'].sum()
print(thirteen)
df2['firdate']=pd.to_datetime(df2['firdate'])
df2['lastdate']=pd.to_datetime(df2['lastdate'])
thirteenlost=df2[(df2['firdate']<pd.to_datetime(before_Y_day))&(df2['lastdate']==pd.to_datetime(last_month_day))]['流失学生'].sum()
print(thirteenlost)
thirteenlv=np.round(thirteenlost/thirteen,4)
print(thirteenlv)
new_add=df1[df1['firdate']==pd.to_datetime(before_Y_day)]['新学员数'].sum()
print(new_add)
thirteen_new=thirteen+new_add
print(thirteen_new)
df2['LTV']=(df2['lastdate'].dt.year-df2['firdate'].dt.year)*12+df2['lastdate'].dt.month-df2['firdate'].dt.month
print(df2.head())
piv1=pd.pivot_table(df2,index='firdate',columns='LTV',values='流失学生',aggfunc='sum')
# piv1.to_excel(filepath+'ceshi.xlsx')
piv2=pd.pivot_table(df2,index='firdate',columns='lastdate',values='流失学生',aggfunc='sum')
# piv2.to_excel(filepath+'ceshi2.xlsx')
df3=pd.merge(df1[df1['firdate']>='2021-01-01'],piv1.iloc[:,:13].reset_index(),on='firdate',how='left')
# df3.to_excel(filepath+'ceshi3.xlsx')
i=0
list=[]
while i<=12:
s=df3.shape[0]
fenzi=df3.iloc[s-12-i:,i+2].sum()
print(fenzi)
fenmu = df3.iloc[s - 12-i:-i, 1].sum()
if i==0:
fenmu = df3.iloc[s - 12 - i:, 1].sum()
print(fenmu)
list.append(np.round(fenzi/fenmu,4))
i+=1
print(list)
df4=pd.DataFrame(data=list,columns=['L'])
print(df4)
before_set=df1[df1['firdate']<=pd.to_datetime(before_Y_day)]['新学员数'].sum()
print(before_set,before_set*thirteenlv)
df5=df1[df1['firdate']>pd.to_datetime(before_Y_day)]._append(pd.DataFrame({'firdate':pd.to_datetime([cur_month_day]),'新学员数':[1600]}),ignore_index=True) #为当前月赋值新生数量,作为下个月L0流失率基数
df5=df5.sort_values(by='firdate',ascending=False).reset_index(drop=True)
print(df5)
df6=pd.concat((df5,df4),axis=1)
print(df6)
new_data=pd.DataFrame({'firdate':['thirteen_new'],'新学员数':[thirteen_new],'L':[thirteenlv]})
print(new_data)
df7=df6._append(new_data,ignore_index=True)
print(df7)
df7.loc[:,'下月流失学生数估计']=np.round(df7['新学员数']*df7['L'],0)
print(df7)
print('下月预计流失学员数:',df7['下月流失学生数估计'].sum())
df7.rename(columns={'L':'L对应流失率'},inplace=True)
writer=pd.ExcelWriter(filepath+f'下月流失学生数预估{today}.xlsx')
df7.to_excel(writer,sheet_name='预测')
df3.to_excel(writer,sheet_name='L12过程',index=False)
piv2.to_excel(writer,sheet_name='L13+过程')
df1.to_excel(writer,sheet_name='新学员数量')
writer._save()
from reset_col import reset_col
reset_col(writer)
输出结果:
关键数据处理:
- 当前日期月初时间获取、上个月初时间获取、去年同期日期获取
- 根据日期做数据切分和迭代
- 不同的透视表方法获取不同的流失率统计
- 流失率集合构造统计
- 预估数据加入数据集
- 最终估计新的流失数据