月度流失人数估计

Darin2017

已于 2024-12-18 09:50:31 修改

阅读量226

点赞数 2

CC 4.0 BY-SA版权

文章标签： pandas

于 2024-12-18 09:48:34 首次发布

本文链接：https://blog.csdn.net/Darin2017/article/details/144551015

思路：

按L0、L1、L2...L12对应的流失率（取过去12个月平均值），及L13+的流失率，附加当月新进用户数，迭代产出当月预计流失的用户数；

口径详解参考：

数据源：

文件1

文件2代码如下：

import pandas as pd
import numpy as np
import datetime
today=datetime.date.today()
cur_month_day=datetime.date(today.year,today.month,1)
print(cur_month_day)
last_month_day=datetime.date((cur_month_day+datetime.timedelta(days=-1)).year,(cur_month_day+datetime.timedelta(days=-1)).month,1)
print(last_month_day)
before_day=cur_month_day+datetime.timedelta(days=-370)
before_Y_day=datetime.date(before_day.year,before_day.month,1)
print(before_Y_day)


filepath='/Users/kangyongqing/Documents/kangyq/202209/OKR预占/2024到期不续费/月度流失人数估计/'
file1='月度新增人数20241217_150750.csv'
file2='流失对应表20241217_150838.csv'

df1=pd.read_csv(filepath+file1).sort_values(by='firdate',ascending=True)
print(df1.head())
df2=pd.read_csv(filepath+file2)
print(df2.head())

df1['firdate']=pd.to_datetime(df1['firdate'])
thirteen=df1[df1['firdate']<pd.to_datetime(before_Y_day)]['新学员数'].sum()
print(thirteen)
df2['firdate']=pd.to_datetime(df2['firdate'])
df2['lastdate']=pd.to_datetime(df2['lastdate'])
thirteenlost=df2[(df2['firdate']<pd.to_datetime(before_Y_day))&(df2['lastdate']==pd.to_datetime(last_month_day))]['流失学生'].sum()
print(thirteenlost)
thirteenlv=np.round(thirteenlost/thirteen,4)
print(thirteenlv)
new_add=df1[df1['firdate']==pd.to_datetime(before_Y_day)]['新学员数'].sum()
print(new_add)
thirteen_new=thirteen+new_add
print(thirteen_new)

df2['LTV']=(df2['lastdate'].dt.year-df2['firdate'].dt.year)*12+df2['lastdate'].dt.month-df2['firdate'].dt.month
print(df2.head())
piv1=pd.pivot_table(df2,index='firdate',columns='LTV',values='流失学生',aggfunc='sum')
# piv1.to_excel(filepath+'ceshi.xlsx')
piv2=pd.pivot_table(df2,index='firdate',columns='lastdate',values='流失学生',aggfunc='sum')
# piv2.to_excel(filepath+'ceshi2.xlsx')

df3=pd.merge(df1[df1['firdate']>='2021-01-01'],piv1.iloc[:,:13].reset_index(),on='firdate',how='left')
# df3.to_excel(filepath+'ceshi3.xlsx')

i=0
list=[]
while i<=12:
    s=df3.shape[0]
    fenzi=df3.iloc[s-12-i:,i+2].sum()
    print(fenzi)
    fenmu = df3.iloc[s - 12-i:-i, 1].sum()
    if i==0:
        fenmu = df3.iloc[s - 12 - i:, 1].sum()
    print(fenmu)
    list.append(np.round(fenzi/fenmu,4))
    i+=1
print(list)

df4=pd.DataFrame(data=list,columns=['L'])
print(df4)

before_set=df1[df1['firdate']<=pd.to_datetime(before_Y_day)]['新学员数'].sum()
print(before_set,before_set*thirteenlv)
df5=df1[df1['firdate']>pd.to_datetime(before_Y_day)]._append(pd.DataFrame({'firdate':pd.to_datetime([cur_month_day]),'新学员数':[1600]}),ignore_index=True)   #为当前月赋值新生数量，作为下个月L0流失率基数
df5=df5.sort_values(by='firdate',ascending=False).reset_index(drop=True)
print(df5)

df6=pd.concat((df5,df4),axis=1)
print(df6)
new_data=pd.DataFrame({'firdate':['thirteen_new'],'新学员数':[thirteen_new],'L':[thirteenlv]})
print(new_data)
df7=df6._append(new_data,ignore_index=True)
print(df7)

df7.loc[:,'下月流失学生数估计']=np.round(df7['新学员数']*df7['L'],0)
print(df7)
print('下月预计流失学员数：',df7['下月流失学生数估计'].sum())
df7.rename(columns={'L':'L对应流失率'},inplace=True)

writer=pd.ExcelWriter(filepath+f'下月流失学生数预估{today}.xlsx')
df7.to_excel(writer,sheet_name='预测')
df3.to_excel(writer,sheet_name='L12过程',index=False)
piv2.to_excel(writer,sheet_name='L13+过程')
df1.to_excel(writer,sheet_name='新学员数量')

writer._save()

from reset_col import reset_col
reset_col(writer)

输出结果：