前言
关于亚马逊订单数据的探索!
次项目大家就仅当作学习使用好了
导入库
import pandas as pd
from pyecharts.charts import *
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
Python从零基础入门到实战系统教程、源码、视频,想要数据集的同学也可以点这里
数据处理
- 对时间字段进行处理,转为datetime;
- 对配送州字段进行处理,原始数据中既有州缩写也有全称,统一为全称呼;
df_c = pd.read_excel('C:/Users/Administrator/Desktop/市场占有率.xls')
df = pd.read_excel('C:/Users/Administrator/Desktop/亚马逊入驻商订单报表.xls', header=1)
df['支付时间'] = pd.to_datetime(df['支付时间'], utc=False)# .dt.strftime('%Y-%m-%d %H:%M:%S')
df['下单时间'] = pd.to_datetime(df['下单时间'], utc=False)# .dt.strftime('%Y-%m-%d %H:%M:%S')
df['最早配送时间'] = pd.to_datetime(df['最早配送时间'], utc=False)
df['最晚配送时间'] = pd.to_datetime(df['最晚配送时间'], utc=False)
df['最早送达时间'] = pd.to_datetime(df['最早送达时间'], utc=False)
df['最晚送达时间'] = pd.to_datetime(df['最晚送达时间'], utc=False)
c_map = dict()
for idx, row in df_c.iterrows():
c_map[row['州名简写']] = row['美国州名英文'].replace(u'\xa0', u' ')
c_map['SD'] = 'South Dakota'
c_map['NM'] = 'New Mexico'
c_map['SC'] = 'South Carolina'
c_map['NH'] = 'New Hampshire'
c_map['NJ'] = 'New Jersey'
def format_state(state):
try:
c = state.upper().replace('.', '')
if c in c_map.keys():
return c_map[c]
elif c in [x.upper() for x in c_map.values()]:
return list(c_map.values())[[x.upper() for x in c_map.values()].index(c)]
else:
return None
except AttributeError:
return None
df['配送州'] = df['配送州'].map(format_state)
df.head()
各时间段订单量
早上的订单最多,好像和国内用户习惯不太一样呢~
data = df.groupby([df['下单时间'].dt.hour])['订单ID'].count().reset_index()
data_x = ['{}点'.format(int(i)) for i in data['下单时间']]
data_y = data['订单ID'].tolist()
area_color_js = """
new echarts.graphic.LinearGradient(
0, 0, 0, 1,
[{offset: 0, color: 'rgba(128, 255, 165)'},
{offset: 1, color: 'rgba(1, 191, 236)'}],
false)
"""
bg_color_js = """
new echarts.graphic.LinearGradient(
0, 0, 0, 1,
[{offset: 0, color: 'rgba(128, 255, 165, 0.2)'},
{offset: 1, color: 'rgba(1, 191, 236, 0.2)'}],
false)
"""
line = Line(init_opts=opts.InitOpts(theme='white', width='1000px', height='500px', bg_color=JsCode(bg_color_js)))
line