文件1 data.py
'''
data:2021-08-21
Author: Ling Li
Description:使用Pandas拼接多个CSV文件到一个文件(即合并)
'''
import pandas as pd
import os
Folder_Path = r'C:UserselxxlngVideosTicketData' #要拼接的文件夹及其完整路径,注意不要包含中文
SaveFile_Path = r'C:UserselxxlngVideosTicketData' #拼接后要保存的文件路径
SaveFile_Name = r'all.csv' #合并后要保存的文件名
#修改当前工作目录
os.chdir(Folder_Path)
#将该文件夹下的所有文件名存入一个列表
file_list = os.listdir()
#读取第一个CSV文件并包含表头
df = pd.read_csv(Folder_Path +'\'+ file_list[0]) #编码默认UTF-8,若乱码自行更改
#将读取的第一个CSV文件写入合并后的文件保存
df.to_csv(SaveFile_Path+'\'+ SaveFile_Name,encoding="utf_8_sig",index=False)
#循环遍历列表中各个CSV文件名,并追加到合并后的文件
for i in range(1,len(file_list)):
df = pd.read_csv(Folder_Path + '\'+ file_list[i])
df.to_csv(SaveFile_Path+'\'+ SaveFile_Name,encoding="utf_8_sig",index=False, header=False, mode='a+')
文件2 create_solved.py
'''
data:2021-08-21
Author: Ling Li
Description:查看ticket的create和solved曲线
'''
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.dates as dates
from Ini_set import *
#time_conver:导入的日期格式为字符串形式,转换成日期的形式
def time_conver(data):
data['Resolved'] = pd.to_datetime(data['Resolved'])
data['Created'] = pd.to_datetime(data['Created'])
data['Resolved'] = data['Resolved'].dt.strftime('%Y-%m-%d')
data['Created'] = data['Created'].dt.strftime('%Y-%m-%d')
return data
#time_conver_month:导入的日期格式为字符串形式,转换成日期的形式
# %W 每年的第几周,把星期一做为第一天(值从0到53)
# %V 每年的第几周,使用基于周的年
# %h 简写的月份名
def time_conver_month(data):
data['Resolved'] = pd.to_datetime(data['Resolved'])
data['Created'] = pd.to_datetime(data['Created'])
data['Resolved'] = data['Resolved'].dt.strftime('%Y-%W')
data['Created'] = data['Created'].dt.strftime('%Y-%W')
return data
#Resolved, Created 曲线绘制准备
def plot_solve_create(data,average):
Number_resolve = data.loc[:,'Resolved'].value_counts()
Number_create = data.loc[:,'Created'].value_counts()
##时间变量处理,保证横坐标保持一致
for idx in (set(Number_create.index) - set(Number_resolve.index)):
Number_resolve[idx]=0
for idx in (set(Number_resolve.index) - set(Number_create.index)):
Number_create[idx]=0
#the type of temp_resolve&temp_create are 'pandas.core.series.Series'
temp_resolve = Number_resolve.sort_index(ascending=True)
temp_create = Number_create.sort_index(ascending=True)
#准备横纵坐标
x1_dates = temp_resolve.index
y1 = temp_resolve.tolist()
x2_dates = temp_create.index
y2 = temp_create.tolist()
fig,ax=plt.subplots(figsize=(6,6))
fig.suptitle('Created VS Resolved ticket number of every day') # 添加标题以便我们辨别
ax.plot_date(x2_dates,y2,'r-x',label='Created')
ax.plot_date(x1_dates, y1, 'g-o',label='Resolved')
t = []
for i in x1_dates:
t.append(average)
ax.plot_date(x1_dates, t, 'p-',label='Average ticket number every working day')
#增加注释
x0 = x1_dates[0]
y0 = max(y1)
plt.text(x0, y0, "Total ticket number solved: %d"%sum(y1),
fontsize=12, color = "royalblue",weight="bold")
plt.text(x0, y0-1, "Average ticket numbet solved by everyday is: %.4f"%average,
fontsize=12, color = "royalblue",weight="bold")
#如果坐标轴上面的tick过于密集
fig.autofmt_xdate()#自动调整xtick的间距
#网格
ax.xaxis.grid(True)
ax.yaxis.grid(True)
#添加文本 #x轴文本
plt.xlabel('Date')
#y轴文本
plt.ylabel('Ticket number')
#设置日期为每个月
#Location也就是以什么样的频率
ax.xaxis.set_major_locator(dates.WeekdayLocator())
plt.tight_layout()
plt.legend()
plt.show()
#.................................................................
#Resolved, Created 按照月度曲线绘制准备
def plot_solve_create_month(data):
Number_resolve = data.loc[:,'Resolved'].value_counts()
Number_create = data.loc[:,'Created'].value_counts()
##时间变量处理,保证横坐标保持一致
for idx in (set(Number_create.index) - set(Number_resolve.index)):
Number_resolve[idx]=0
for idx in (set(Number_resolve.index) - set(Number_create.index)):
Number_create[idx]=0
#the type of temp_resolve&temp_create are 'pandas.core.series.Series'
temp_resolve = Number_resolve.sort_index(ascending=True)
temp_create = Number_create.sort_index(ascending=True)
#准备横纵坐标
x1_dates = temp_resolve.index
y1 = temp_resolve.tolist()
x2_dates = temp_create.index
y2 = temp_create.tolist()
fig,ax=plt.subplots(figsize=(6,6))
fig.suptitle('Created VS Resolved ticket number of every month') # 添加标题以便我们辨别
ax.plot_date(x2_dates,y2,'r-x',label='Created')
ax.plot_date(x1_dates, y1, 'g-o',label='Resolved')
#增加注释
x0 = x1_dates[0]
y0 = max(y1)
plt.text(x0, y0, "Total ticket number solved: %d"%sum(y1),
fontsize=12, color = "royalblue",weight="bold")
# plt.text(x0, y0-1, "Average ticket numbet solved by everyday is: %.4f"%average,
# fontsize=12, color = "royalblue",weight="bold")
#如果坐标轴上面的tick过于密集
fig.autofmt_xdate()#自动调整xtick的间距
#网格
ax.xaxis.grid(True)
ax.yaxis.grid(True)
#添加文本 #x轴文本
plt.xlabel('Date')
#y轴文本
plt.ylabel('Ticket number')
#设置日期为每个月
#Location也就是以什么样的频率
ax.xaxis.set_major_locator(dates.WeekdayLocator())
plt.tight_layout()
plt.legend()
plt.show()
if __name__=="__main__":
data = DATA
# data = time_conver_month(data)
data = time_conver(data)
total,average = aver_ticket_num(START_DAY,END_DAY,data)
# plot_solve_create_month(data)
plot_solve_create(data,average)
文件 3 ticket_every_pyecharts.py
'''
data:2021-08-21
Author: Ling Li
Description:使用create查看每个人在每个function下解决的ticket情况
'''
from pyecharts import Bar
# from pyecharts.charts import Bar
# from pyecharts import options as ops
from Ini_set import *
def coord_func_num(name_support,Function):
name_support_data = name_support.loc[:,'Functional Area'].value_counts()
for idx in (Function - set(name_support_data.index)):
name_support_data[idx] = 0
name_support_data = name_support_data.sort_index(ascending=True)
# print(name_support_data)
columns = name_support_data.index.tolist()
name = name_support_data.tolist()
return columns,name
def plot_person_function(data):
Function = data.loc[:,'Functional Area'].value_counts()
Function = set(Function.index)
user = set(data.Assignee)
total_width, n = 1, len(user)
width = total_width / n
bar = Bar('Ticket solved by everyone')
for u in user:
name_support = data.loc[data['Assignee'] == u]
x,y = coord_func_num(name_support,Function)
bar.add(u,x,y,mark_line=['min','max'])
bar.render("type_ticket.html")
bar
if __name__=="__main__":
data = DATA
plot_person_function(data)
文件 4 ticket_function.py
'''
data:2021-08-21
Author: Ling Li
Description:查看每个Function下的ticket数量
'''
import matplotlib.pyplot as plt
from Ini_set import *
def autolabel(rects):
for rect in rects:
height = rect.get_height()
plt.text(rect.get_x()+rect.get_width()/2.-0.04, height+0.05, '%s' % int(height))
data = DATA
#Tickets are distributed by function
Function = data.loc[:,'Functional Area'].value_counts()
Function = Function.sort_index(ascending=True)
axis_x = Function.index.tolist()
print(axis_x)
x = list(range(len(axis_x)))
print(x)
y = Function.tolist()
print(y)
total_width, n = 1, 2
width = total_width / n
plt.title("Tickets are distributed by function")
P = plt.bar(x,y,width=width,tick_label = axis_x,color ='darkcyan')
autolabel(P)
#网格
plt.grid()
# 设置刻度字体大小
plt.xticks(fontsize=11)
plt.yticks(fontsize=12)
# 设置坐标标签字体大小
plt.ylabel('Ticket Number',fontsize=15)
plt.xlabel('Function Area',fontsize=15)
plt.tight_layout()
plt.show()
文件5 ticket_month.py
'''
data:2021-08-21
Author: Ling Li
Description:查看每个月解决的ticket数量
'''
import pandas as pd
import matplotlib.pyplot as plt
from Ini_set import *
data = DATA
#time_conver:导入的日期格式为字符串形式,转换成日期的形式
def time_conver(data):
data['Resolved'] = pd.to_datetime(data['Resolved'])
data['Created'] = pd.to_datetime(data['Created'])
data['Resolved'] = data['Resolved'].dt.strftime('%Y%m')
data['Created'] = data['Created'].dt.strftime('%Y%m')
return data
time_conver(data)
Function = data.loc[:,'Functional Area'].value_counts()
Function = set(Function.index)
#图像数表
def autolabel(rects):
for rect in rects:
height = rect.get_height()
plt.text(rect.get_x()+rect.get_width()/2.-0.04, height+0.05, '%s' % int(height))
#caculate the total ticket numbers and average number
Number_resolve = data.loc[:,'Resolved'].value_counts()
#parameters for (tickets by every month)
# plt.subplot(1,2,1)
plt.title("Resolved tickets by everymonth")
temp_resolve = Number_resolve.sort_index(ascending=True)
axis_x = temp_resolve.index.tolist()
x1 = list(range(len(axis_x)))
y1 = temp_resolve.tolist()
total_width, n = 1, len(axis_x)
# 调节柱形图宽度
width = total_width / 3
P1 = plt.bar(x1,y1,width=width,tick_label = axis_x,color='skyblue')
autolabel(P1)
plt.xlabel("Date by Month")
plt.ylabel("Ticket Number")
#增加注释
x0 = x1[0]
y0 = max(y1)
plt.text(x0, y0, "Total ticket number solved: %d"%sum(y1),
fontdict=dict(fontsize=12, color='r',family='monospace',),#字体属性字典
#添加文字背景色
bbox={'facecolor': '#74C476', #填充色
'edgecolor':'b',#外框色
'alpha': 0.5, #框透明度
'pad': 8,#本文与框周围距离
}
)
#网格
plt.grid()
plt.tight_layout()
plt.legend()
plt.show()
文件 6 ticket_month
import pandas as pd
import matplotlib.pyplot as plt
from Ini_set import *
data = DATA
#time_conver:导入的日期格式为字符串形式,转换成日期的形式
def time_conver(data):
data['Resolved'] = pd.to_datetime(data['Resolved'])
data['Created'] = pd.to_datetime(data['Created'])
data['Resolved'] = data['Resolved'].dt.strftime('%Y%m')
data['Created'] = data['Created'].dt.strftime('%Y%m')
return data
time_conver(data)
Function = data.loc[:,'Functional Area'].value_counts()
Function = set(Function.index)
#图像数表
def autolabel(rects):
for rect in rects:
height = rect.get_height()
plt.text(rect.get_x()+rect.get_width()/2.-0.04, height+0.05, '%s' % int(height))
#caculate the total ticket numbers and average number
Number_resolve = data.loc[:,'Resolved'].value_counts()
#parameters for (tickets by every month)
# plt.subplot(1,2,2)
plt.title("Resolved tickets by everybody")
Number_assignee = data.loc[:,'Assignee'].value_counts()
temp_assignee = Number_assignee.sort_index(ascending=True)
axis_x = temp_assignee.index.tolist()
x2 = list(range(len(axis_x)))
y2 = temp_assignee.tolist()
total_width, n = 1, len(axis_x)
width = total_width / n
P2 = plt.bar(x2,y2,width=width,tick_label = axis_x,color='darkcyan')
autolabel(P2)
plt.xlabel("Name")
plt.ylabel("Ticket Number")
#增加注释
x0 = x2[0]
y0 = max(y2)
plt.text(x0, y0, "Total ticket number solved: %d"%sum(y2),
fontdict=dict(fontsize=12, color='r',family='monospace',),#字体属性字典
#添加文字背景色
bbox={'facecolor': '#74C476', #填充色
'edgecolor':'b',#外框色
'alpha': 0.5, #框透明度
'pad': 8,#本文与框周围距离
}
)
plt.tight_layout()
plt.legend()
plt.show()
文件 7wordclounds.py
'''
data:2021-08-21
Author: Ling Li
Description:生成创建ticket的人的词云图
'''
import matplotlib.pyplot as plt
from wordcloud import WordCloud,ImageColorGenerator,STOPWORDS #词云,颜色生成器,停止词
import numpy as np #科学计算
from PIL import Image #处理图片
from Ini_set import *
data = DATA
word_counts = data.loc[:,'Reporter'].value_counts()
backgroud = np.array(Image.open(r'C:UserselxxlngdocumentsALINGduola.jpg'))
#定义词频背景
background_image = np.array(Image.open(r'C:UserselxxlngdocumentsALINGduola.jpg'))
font_path=r'C:\Windows\fonts\BASKVILL.TTF'
wd = WordCloud(
font_path=font_path, #设置字体格式,不然会乱码
background_color="white", #设置背景颜色
mask=background_image #设置背景图
).generate_from_frequencies(word_counts)
#保存词云图
wd.to_file('test.png')
#显示词云图
plt.imshow(wd,interpolation="bilinear")
plt.axis("off")
plt.show()
文件8 ticket_function.py
'''
data:2021-08-21
Author: Ling Li
Description:查看每个人在每个function下的ticket数量
'''
import pandas as pd
import math
import matplotlib.pyplot as plt
from Ini_set import *
#time_conver:导入的日期格式为字符串形式,转换成日期的形式
def time_conver(data):
data['Resolved'] = pd.to_datetime(data['Resolved'])
data['Created'] = pd.to_datetime(data['Created'])
data['Resolved'] = data['Resolved'].dt.strftime('%Y%m')
data['Created'] = data['Created'].dt.strftime('%Y%m')
return data
##定义函数来显示柱状上的数值
def autolabel(rects):
for rect in rects:
height = rect.get_height()
plt.text(rect.get_x()+rect.get_width()/2.-0.04, height+0.05, '%s' % int(height))
def coord_func_num(name_support,Function):
name_support_data = name_support.loc[:,'Functional Area'].value_counts()
for idx in (Function - set(name_support_data.index)):
name_support_data[idx] = 0
name_support_data = name_support_data.sort_index(ascending=True)
columns = name_support_data.index.tolist()
name = name_support_data.tolist()
return columns,name
def plot_person_function(data):
Function = data.loc[:,'Functional Area'].value_counts()
# tick_lable
axis_x = Function.index.tolist()
Function = set(Function.index)
user = set(data.Assignee)
total_width, n = 1, len(user)
width = total_width / n
x = list(range(len(Function)))
# print('Initial axis: ',x)
# 增加两个Function之间的间隙
x = [i*2 for i in x]
plt.title("Resolved tickets by everybody in Function")
count = 0
# 保证颜色唯一
colorlist = list()
for u in user:
count = count + 1
name_support = data.loc[data['Assignee'] == u]
x1,y = coord_func_num(name_support,Function)
temp = randomcolor()
while temp not in colorlist:
colorlist.append(temp)
else:
temp = randomcolor()
# 横坐标位置居中
if count == math.ceil( len(user)/2 ):
p = plt.bar(x,y,width=width,tick_label = axis_x, label=u,color=temp)
else:
p = plt.bar(x,y,width=width, label=u,color=temp)
x = [i + width for i in x]
# print('Distribution arrangement x-axis coordinate:', x)
plt.legend(fontsize=14)
autolabel(p)
#网格
plt.grid()
# plt.set_xticks(x +width/2)#将坐标设置在指定位置
# 设置刻度字体大小
plt.xticks(fontsize=11)
plt.yticks(fontsize=12)
# 设置坐标标签字体大小
plt.ylabel('Ticket Number',fontsize=15)
plt.xlabel('Function Area',fontsize=15)
plt.show()
if __name__=="__main__":
data = DATA
data = time_conver(data)
plot_person_function(data)



