import pandas as pd
import numpy as np
import datetime
today=str(datetime.date.today())
import matplotlib.pyplot as plt
from matplotlib.font_manager import FontProperties
font=FontProperties(fname='/System/Library/Fonts/Supplemental/Arial Unicode.ttf',size=10)
from openpyxl.drawing.image import Image
filepath='/Users/kangyongqing/Documents/kangyq/202207/课消管理看板/2023Q3课消/亚欧课消11月/课消定标方案新版/202401课消标牛浩/升级学员专项课排课/'
file1='01专项课排课情况附教师2024-01-24.xlsx'
file2='02升级学生明细导出.csv'
file3='在岗教师信息_副本.csv'
file4='教管架构.xlsx'
df1=pd.read_excel(filepath+file1,dtype='object')
df1['keci']=df1['keci'].astype('int')
print(df1.info())
print(df1.head())
df2=pd.read_csv(filepath+file2,usecols=['学员id','学员区域'],dtype='object')
df2['学员id']=df2['学员id'].str[0:16]
print(df2.columns)
print(df2.head())
df3=pd.read_csv(filepath+file3,usecols=['tutor_user_id', '老师姓名', '教管'],dtype='object')
print(df3.columns)
print(df3.head())
df4=pd.read_excel(filepath+file4,usecols=['教管', '教学经理'])
print(df4.columns)
df5=pd.merge(df1,df2,on='学员id',how='left')
df5['类别']=np.where(df5['学员区域'].isnull(),'非升级学员','升级学员')
print(df5.head())
df6=pd.merge(df5,df3,on='tutor_user_id',how='left')
df7=pd.merge(df6,df4,on='教管',how='left')
print(df7.head())
print(df7.columns)
df8=pd.pivot_table(df7,index='教学经理',columns='类别',values=('keci','学员id'),aggfunc={'keci':'sum','学员id':lambda x:len(x.unique())},margins=True)
print(df8)
df9=pd.pivot_table(df7,index='教管',columns='类别',values=('keci','学员id'),aggfunc={'keci':'sum','学员id':lambda x:len(x.unique())},margins=True)
df9.columns=['升级学员课次','非升级学员课次','总课次','升级学员数量','非升级学员数量','总学员数量']
df9.sort_values(by='升级学员课次',ascending=True,inplace=True)
print(df9.columns)
print(df9)
fig1=plt.figure(figsize=(10,6))
tp=plt.bar(df9.index[0:-1],df9['升级学员课次'][0:-1])
plt.xticks(rotation=90)
plt.xticks(font=font)
plt.bar_label(tp,df9['升级学员课次'][0:-1])
plt.title('教管升级学员课次分布',fontproperties=font)
plt.savefig(filepath+f'06教管升级学员课次分布{today}.png')
plt.close()
image1=Image(filepath+f'06教管升级学员课次分布{today}.png')
writer=pd.ExcelWriter(filepath+f'05专项课排课情况分布{today}.xlsx',engine='openpyxl')
df8.to_excel(writer,sheet_name='经理汇总')
df9.to_excel(writer,sheet_name='教管汇总')
writer.sheets['教管汇总'].add_image(image1,'I2')
writer._save()
from openpyxl import load_workbook
from openpyxl.utils import get_column_letter
def reset_col(filename):
wb=load_workbook(filename)
for sheet in wb.sheetnames[0:-1]:#最后一页有图片,不做列宽适配,会报错
ws=wb[sheet]
df=pd.read_excel(filename,sheet).fillna('-')
df.loc[len(df)]=list(df.columns) #把标题行附件到最后一行
for col in df.columns:
index=list(df.columns).index(col) #列序号
letter=get_column_letter(index+1) #列字母
collen=df[col].apply(lambda x:len(str(x).encode())).max() #获取这一列长度的最大值 当然也可以用min获取最小值 mean获取平均值
ws.column_dimensions[letter].width=collen*1.2+2 #也就是列宽为最大长度*1.2 可以自己调整
wb.save(filename)
reset_col(writer)
- 设置字体;
- 透视表;
- 柱状图;
- 保存excel;