Python实现RFM模型

模型介绍

RFM模型即"R"——Recency(最近一次消费时间)、“F”——Frequency(一段时间内消费频次)、“M”——(一段时间内消费总额)。这三个指标可以将我们的用户划分成不同的等级和层次,目的是为了衡量他们的用户价值,从而能够更准确地将成本和精力花在更精确的用户层次身上。一个典型的例子就是针对一个明显无意愿的流失用户,对其继续push自己的核心产品,费时费力也费钱。

数据清洗

import pandas as pd
import numpy as np
import os
df = pd.read_excel('PYTHON-RFM实战数据.xlsx')
df.head()
品牌名称买家昵称付款日期订单状态实付金额邮费省份城市购买数量
0数据不吹牛叫我李22019-01-01 00:17:59交易成功1866上海上海市1
1数据不吹牛0cyb19922019-01-01 00:59:54交易成功1450广东省广州市1
2数据不吹牛萝污萌莉2019-01-01 07:48:48交易成功1948山东省东营市1
3数据不吹牛atblovemyy2019-01-01 09:15:49付款以后用户退款成功,交易自动关闭840江苏省镇江市1
4数据不吹牛小星期鱼2019-01-01 09:59:33付款以后用户退款成功,交易自动关闭740上海上海市1
df['订单状态'].unique()
array(['交易成功', '付款以后用户退款成功,交易自动关闭'], dtype=object)
df.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 28833 entries, 0 to 28832
Data columns (total 9 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   品牌名称    28833 non-null  object        
 1   买家昵称    28833 non-null  object        
 2   付款日期    28833 non-null  datetime64[ns]
 3   订单状态    28833 non-null  object        
 4   实付金额    28833 non-null  int64         
 5   邮费      28833 non-null  int64         
 6   省份      28833 non-null  object        
 7   城市      28832 non-null  object        
 8   购买数量    28833 non-null  int64         
dtypes: datetime64[ns](1), int64(3), object(5)
memory usage: 2.0+ MB
df = df.loc[df['订单状态'] == '交易成功',:]
print('剔除退款后还剩:%d行' % len(df))
剔除退款后还剩:27793行
df = df[['买家昵称','付款日期','实付金额']]
df.head()
买家昵称付款日期实付金额
0叫我李22019-01-01 00:17:59186
10cyb19922019-01-01 00:59:54145
2萝污萌莉2019-01-01 07:48:48194
5重碎叠2019-01-01 10:00:07197
6iho_jann2019-01-01 10:00:08168
R值构造
r = df.groupby('买家昵称')['付款日期'].max().reset_index()
r.head()
买家昵称付款日期
0.blue_ram2019-02-04 17:49:34.000
1.christiny2019-01-29 14:17:15.000
2.willn12019-01-11 03:46:18.000
3.托托m2019-01-11 02:26:33.000
40000妮2019-06-28 16:53:26.458
r['R'] = (pd.to_datetime('2019-7-1') - r['付款日期']).dt.days
r = r[['买家昵称','R']]
r.head()
买家昵称R
0.blue_ram146
1.christiny152
2.willn1170
3.托托m170
40000妮2

F值构造

#引入日期标签辅助列
df['日期标签'] = df['付款日期'].astype(str).str[:10]
#把单个用户一天内订单合并
dup_f = df.groupby(['买家昵称','日期标签'])['付款日期'].count().reset_index()
#对合并后的用户统计频次
f = dup_f.groupby('买家昵称')['付款日期'].count().reset_index()
f.columns = ['买家昵称','F']
f.head()
买家昵称F
0.blue_ram1
1.christiny1
2.willn11
3.托托m1
40000妮1

M值构造

sum_m = df.groupby('买家昵称')['实付金额'].sum().reset_index()
sum_m.columns = ['买家昵称','总支付金额']
com_m = pd.merge(sum_m,f,left_on = '买家昵称',right_on = '买家昵称',how = 'inner')
#计算用户平均支付金额
com_m['M'] = com_m['总支付金额'] / com_m['F']
com_m.head()
买家昵称总支付金额FM
0.blue_ram49149.0
1.christiny1831183.0
2.willn134134.0
3.托托m37137.0
40000妮1641164.0
rfm = pd.merge(r,com_m,left_on = '买家昵称',right_on = '买家昵称',how = 'inner')
rfm = rfm[['买家昵称','R','F','M']]
rfm.head()
买家昵称RFM
0.blue_ram146149.0
1.christiny1521183.0
2.willn1170134.0
3.托托m170137.0
40000妮21164.0

STEP 3.维度确认(不涉及代码故省略)
STEP 4.分值计算

rfm['R-SCORE'] = pd.cut(rfm['R'],bins = [0,30,60,90,120,1000000],labels = [5,4,3,2,1],right = False).astype(float)
rfm.head()
买家昵称RFMR-SCORE
0.blue_ram146149.01.0
1.christiny1521183.01.0
2.willn1170134.01.0
3.托托m170137.01.0
40000妮21164.05.0

F、M值计算

rfm['F-SCORE'] = pd.cut(rfm['F'],bins = [1,2,3,4,5,1000000],labels = [1,2,3,4,5],right = False).astype(float)
rfm['M-SCORE'] = pd.cut(rfm['M'],bins = [0,50,100,150,200,1000000],labels = [1,2,3,4,5],right = False).astype(float)
rfm.head()
买家昵称RFMR-SCOREF-SCOREM-SCORE
0.blue_ram146149.01.01.01.0
1.christiny1521183.01.01.04.0
2.willn1170134.01.01.01.0
3.托托m170137.01.01.01.0
40000妮21164.05.01.04.0
#和平均值对比,减少客户分类数量
rfm['R是否大于均值'] = (rfm['R-SCORE'] > rfm['R-SCORE'].mean()) * 1
rfm['F是否大于均值'] = (rfm['F-SCORE'] > rfm['F-SCORE'].mean()) * 1
rfm['M是否大于均值'] = (rfm['M-SCORE'] > rfm['M-SCORE'].mean()) * 1
rfm.head()
买家昵称RFMR-SCOREF-SCOREM-SCORER是否大于均值F是否大于均值M是否大于均值
0.blue_ram146149.01.01.01.0000
1.christiny1521183.01.01.04.0001
2.willn1170134.01.01.01.0000
3.托托m170137.01.01.01.0000
40000妮21164.05.01.04.0101

STEP 5.客户分层

构建合并指标

rfm['人群数值'] = (rfm['R是否大于均值'] * 100) + (rfm['F是否大于均值'] * 10) + (rfm['M是否大于均值'] * 1)
rfm.head()
买家昵称RFMR-SCOREF-SCOREM-SCORER是否大于均值F是否大于均值M是否大于均值人群数值
0.blue_ram146149.01.01.01.00000
1.christiny1521183.01.01.04.00011
2.willn1170134.01.01.01.00000
3.托托m170137.01.01.01.00000
40000妮21164.05.01.04.0101101
def transform_label(x):
    if x == 111:
        label = '重要价值客户'
    elif x == 110:
        label = '消费潜力客户'
    elif x == 101:
        label = '频次深耕客户'
    elif x == 100:
        label = '新客户'
    elif x == 11:
        label = '重要价值流失预警客户'
    elif x == 10:
        label = '一般客户'
    elif x == 1:
        label = '高消费唤回客户'
    elif x == 0:
        label = '流失客户'
    return label
rfm['人群类型'] = rfm['人群数值'].apply(transform_label)
rfm.head()
买家昵称RFMR-SCOREF-SCOREM-SCORER是否大于均值F是否大于均值M是否大于均值人群数值人群类型
0.blue_ram146149.01.01.01.00000流失客户
1.christiny1521183.01.01.04.00011高消费唤回客户
2.willn1170134.01.01.01.00000流失客户
3.托托m170137.01.01.01.00000流失客户
40000妮21164.05.01.04.0101101频次深耕客户

人数统计

count = rfm['人群类型'].value_counts().reset_index()
count.columns = ['客户类型','人数']
count['人数占比'] = count['人数'] / count['人数'].sum()
count
客户类型人数人数占比
0高消费唤回客户73380.288670
1流失客户66800.262785
2频次深耕客户54270.213493
3新客户42240.166168
4重要价值客户7560.029740
5消费潜力客户4500.017703
6重要价值流失预警客户3600.014162
7一般客户1850.007278

金额统计

rfm['购买总金额'] = rfm['F'] * rfm['M']
mon = rfm.groupby('人群类型')['购买总金额'].sum().reset_index()
mon.columns = ['客户类型','消费金额']
mon['金额占比'] = mon['消费金额'] / mon['消费金额'].sum()
mon
客户类型消费金额金额占比
0一般客户25803.00.007349
1新客户270869.00.077142
2流失客户444617.00.126624
3消费潜力客户64075.00.018248
4重要价值客户269230.00.076675
5重要价值流失预警客户116665.00.033226
6频次深耕客户981893.00.279638
7高消费唤回客户1338153.00.381098
#输入源数据文件名
def get_rfm(name = 'PYTHON-RFM实战数据.xlsx'):
    df = pd.read_excel(name)
    df = df.loc[df['订单状态'] == '交易成功',:]
    print('剔除退款后还剩:%d行' % len(df))
    df = df[['买家昵称','付款日期','实付金额']]

    r = df.groupby('买家昵称')['付款日期'].max().reset_index()
    r['R'] = (pd.to_datetime('2019-7-1') - r['付款日期']).dt.days
    r = r[['买家昵称','R']]

    #引入日期标签辅助列
    df['日期标签'] = df['付款日期'].astype(str).str[:10]

    #把单个用户一天内订单合并
    dup_f = df.groupby(['买家昵称','日期标签'])['付款日期'].count().reset_index()

    #对合并后的用户统计频次
    f = dup_f.groupby('买家昵称')['付款日期'].count().reset_index()
    f.columns = ['买家昵称','F']

    sum_m = df.groupby('买家昵称')['实付金额'].sum().reset_index()
    sum_m.columns = ['买家昵称','总支付金额']
    com_m = pd.merge(sum_m,f,left_on = '买家昵称',right_on = '买家昵称',how = 'inner')

    #计算用户平均支付金额
    com_m['M'] = com_m['总支付金额'] / com_m['F']

    rfm = pd.merge(r,com_m,left_on = '买家昵称',right_on = '买家昵称',how = 'inner')
    rfm = rfm[['买家昵称','R','F','M']]


    rfm['R-SCORE'] = pd.cut(rfm['R'],bins = [0,30,60,90,120,1000000],labels = [5,4,3,2,1],right = False).astype(float)
    rfm['F-SCORE'] = pd.cut(rfm['F'],bins = [1,2,3,4,5,1000000],labels = [1,2,3,4,5],right = False).astype(float)
    rfm['M-SCORE'] = pd.cut(rfm['M'],bins = [0,50,100,150,200,1000000],labels = [1,2,3,4,5],right = False).astype(float)

    rfm['R是否大于均值'] = (rfm['R-SCORE'] > rfm['R-SCORE'].mean()) * 1
    rfm['F是否大于均值'] = (rfm['F-SCORE'] > rfm['F-SCORE'].mean()) * 1
    rfm['M是否大于均值'] = (rfm['M-SCORE'] > rfm['M-SCORE'].mean()) * 1

    rfm['人群数值'] = (rfm['R是否大于均值'] * 100) + (rfm['F是否大于均值'] * 10) + (rfm['M是否大于均值'] * 1)

    rfm['人群类型'] = rfm['人群数值'].apply(transform_label)

    count = rfm['人群类型'].value_counts().reset_index()
    count.columns = ['客户类型','人数']
    count['人数占比'] = count['人数'] / count['人数'].sum()

    rfm['购买总金额'] = rfm['F'] * rfm['M']
    mon = rfm.groupby('人群类型')['购买总金额'].sum().reset_index()
    mon.columns = ['客户类型','消费金额']
    mon['金额占比'] = mon['消费金额'] / mon['消费金额'].sum()

    result = pd.merge(count,mon,left_on = '客户类型',right_on = '客户类型')

    return result


#判断R/F/M是否大于均值
def transform_label(x):
    if x == 111:
        label = '重要价值客户'
    elif x == 110:
        label = '消费潜力客户'
    elif x == 101:
        label = '频次深耕客户'
    elif x == 100:
        label = '新客户'
    elif x == 11:
        label = '重要价值流失预警客户'
    elif x == 10:
        label = '一般客户'
    elif x == 1:
        label = '高消费唤回客户'
    elif x == 0:
        label = '流失客户'
    return label
res = get_rfm(name = 'PYTHON-RFM实战数据.xlsx')
res
剔除退款后还剩:27793行
客户类型人数人数占比消费金额金额占比
0高消费唤回客户73380.2886701338153.00.381098
1流失客户66800.262785444617.00.126624
2频次深耕客户54270.213493981893.00.279638
3新客户42240.166168270869.00.077142
4重要价值客户7560.029740269230.00.076675
5消费潜力客户4500.01770364075.00.018248
6重要价值流失预警客户3600.014162116665.00.033226
7一般客户1850.00727825803.00.007349
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值