目录
文章目录
前言
一、导入模块
二、Pandas数据处理
2.1 读取数据
2.2 是否有缺失值
2.3 查看中国每日数据
2.4 统计中国、美国、日本、澳大利亚4个国家数据
三、Pyecharts绘图
3.1 绘制基础折线图
3.2 加载样式
3.3 动态展示中国每日金牌数据
3.4 增加其他国家每日金牌数据
3.5 2020东京奥运会奖牌数世界分布
3.6 2020东京奥运会金牌世界分布
3.7 2020东京奥运会奖牌世界分布(动态)
总结
前言
前言
刚刚过去的东京奥运会,中国队取得了不错的成绩。本文将分别基于Pandas和Pyecharts进行数据处理和数据可视化,并利用可视化图表对奥运会相关信息进行展示。
一、导入模块
import pandas as pd
from pyecharts.charts import Timeline, Line, Tree
from pyecharts import options as opts
from pyecharts.commons.utils import JsCode
from pyecharts.globals import ThemeType
二、Pandas数据处理
2.1 读取数据
df = pd.read_csv('D:/天池数据/2020东京奥运会奖牌数据可视化/2020东京奥运会奖牌数据.csv', index_col=0, encoding = 'gb18030')
df.head(10)
二、Pandas数据处理
2.1 读取数据
df = pd.read_csv('D:/天池数据/2020东京奥运会奖牌数据可视化/2020东京奥运会奖牌数据.csv', index_col=0, encoding = 'gb18030')
df.head(10)
2.2 是否有缺失值
df.isnull().any()
各列数据均不存在确实情况。
2.3 查看中国每日数据
# 方法一
df1 = df[df['国家']=='中国']
df1
# 方法二
df1 = df[df.国家 == '中国']
df1
2.4 统计中国、美国、日本、澳大利亚4个国家数据
all_country_data = []
flg = {}
cols = ['国家']
countrys = ['中国','美国','日本','澳大利亚']
for country in countrys:
df1 = df[df['国家']==country]
df_t = df1.copy()
df2 = df.loc[df['国家']==country,['金牌','银牌','铜牌','总计']]
if len(df2.index.tolist()) >= len(cols):
cols += df2.index.tolist()
flg[country] = df1.iloc[:1, -1].values[0]
one_country_data = [country]
datasss = []
for i in range(df2.shape[0]):
datasss.append(df2[:i+1].apply(lambda x:x.sum()).values.tolist())
d1 = pd.Dataframe(data=datasss, columns=['金牌','银牌','铜牌','总计'])
for col in d1.columns:
df_t[col] = d1[col].values
df_t1 = df_t.loc[:,['金牌']]
one_country_data += df_t['金牌'].values.tolist()
all_country_data.append(one_country_data)
all_country_data
生成新的Dataframe:
d2 = pd.Dataframe(data=all_country_data,columns=cols) d2 = d2.fillna(method = 'ffill',axis=1) d2
method='ffill':用前一个非缺失值去填充缺失值。
method='bfill':用下一个非缺失值去填充缺失值。
这里采用前一个非缺失值对缺失值进行填充。同时,可根据需要获取多个国家数据,改变countrys列表即可。
三、Pyecharts绘图
3.1 绘制基础折线图
CHN = []
x_data=cols[1:]
for d_time in cols[1:]:
CHN.append(d2[d_time][d2['国家']=='中国'].values.tolist()[0])
l1 = (
Line()
.add_xaxis(x_data)
# 中国线条
.add_yaxis(
'中国',
CHN,
label_opts=opts.LabelOpts(is_show=True))
.set_global_opts(
title_opts=opts.TitleOpts(
title='中国金牌',
pos_left='center',
),
yaxis_opts=opts.AxisOpts(
name='金牌/枚',
is_scale=True,
max_=40),
legend_opts=opts.LegendOpts(is_show=False),
)
)
l1.render_notebook()
CHN = []
x_data=cols[1:]
for d_time in cols[1:]:
CHN.append(d2[d_time][d2['国家']=='中国'].values.tolist()[0])
l1 = (
Line()
.add_xaxis(x_data)
# 中国线条
.add_yaxis(
'中国',
CHN,
label_opts=opts.LabelOpts(is_show=True))
.set_global_opts(
title_opts=opts.TitleOpts(
title='中国金牌',
pos_left='center',
),
yaxis_opts=opts.AxisOpts(
name='金牌/枚',
is_scale=True,
max_=40),
legend_opts=opts.LegendOpts(is_show=False),
)
)
l1.render_notebook()
3.2 加载样式
# 背景色
background_color_js = (
"new echarts.graphic.LinearGradient(0, 0, 0, 1, "
"[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)"
)
# 线条样式
linestyle_dic = { 'normal': {
'width': 4,
'shadowColor': '#696969',
'shadowBlur': 10,
'shadowOffsetY': 10,
'shadowOffsetX': 10,
}
}
timeline = Timeline(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js),
width='980px',height='600px'))
timeline.add_schema(is_auto_play=True, is_loop_play=True,
is_timeline_show=True, play_interval=500)
CHN = []
x_data=cols[1:]
for d_time in cols[1:]:
CHN.append(d2[d_time][d2['国家']=='中国'].values.tolist()[0])
line = (
Line(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js),
width='980px',height='600px'))
.add_xaxis(x_data)
# 中国线条
.add_yaxis(
'中国',
CHN,
symbol_size=10,
is_smooth=True,
label_opts=opts.LabelOpts(is_show=True),
markpoint_opts=opts.MarkPointOpts(
data=[opts.MarkPointItem(
name="",
type_='max',
value_index=0,
symbol='image://'+ flg['中国'],
symbol_size=[40, 25],
)],
label_opts=opts.LabelOpts(is_show=False),
)
)
.set_series_opts(linestyle_opts=linestyle_dic,label_opts=opts.LabelOpts(font_size=12, color='red' ))
.set_global_opts(
title_opts=opts.TitleOpts(
title='中国金牌',
pos_left='center',
pos_top='2%',
title_textstyle_opts=opts.TextStyleOpts(
color='#DC143C', font_size=20)
),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(font_size=14, color='red'),
axisline_opts=opts.AxisLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(width=2, color='#DB7093'))),
yaxis_opts=opts.AxisOpts(
name='金牌/枚',
is_scale=True,
max_=40,
name_textstyle_opts=opts.TextStyleOpts(font_size=16,font_weight='bold',color='#FFD700'),
axislabel_opts=opts.LabelOpts(font_size=13,color='red'),
splitline_opts=opts.SplitLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(type_='dashed')),
axisline_opts=opts.AxisLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(width=2, color='#DB7093'))
),
legend_opts=opts.LegendOpts(is_show=False, pos_right='1.5%', pos_top='2%',
legend_icon='roundRect',orient = 'horizontal'),
)
)
line.render_notebook()
3.3 动态展示中国每日金牌数据
# 背景色
background_color_js = (
"new echarts.graphic.LinearGradient(0, 0, 0, 1, "
"[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)"
)
# 线条样式
linestyle_dic = {'normal': {
'width': 4,
'shadowColor': '#696969',
'shadowBlur': 10,
'shadowOffsetY': 10,
'shadowOffsetX': 10,
}
}
timeline = Timeline(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js),
width='980px', height='600px'))
timeline.add_schema(is_auto_play=True, is_loop_play=True,
is_timeline_show=True, play_interval=500)
CHN = []
x_data = cols[1:]
for d_time in cols[1:]:
CHN.append(d2[d_time][d2['国家'] == '中国'].values.tolist()[0])
line = (
Line(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js),
width='980px', height='600px'))
.add_xaxis(x_data)
# 中国线条
.add_yaxis(
'中国',
CHN,
symbol_size=10,
is_smooth=True,
label_opts=opts.LabelOpts(is_show=True),
markpoint_opts=opts.MarkPointOpts(
data=[opts.MarkPointItem(
name="",
type_='max',
value_index=0,
symbol='image://' + flg['中国'],
symbol_size=[40, 25],
)],
label_opts=opts.LabelOpts(is_show=False),
)
)
.set_series_opts(linestyle_opts=linestyle_dic, label_opts=opts.LabelOpts(font_size=12, color='red'))
.set_global_opts(
title_opts=opts.TitleOpts(
title='中国金牌',
pos_left='center',
pos_top='2%',
title_textstyle_opts=opts.TextStyleOpts(color='#DC143C', font_size=20)),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(font_size=14, color='red'),
axisline_opts=opts.AxisLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(width=2, color='#DB7093'))),
yaxis_opts=opts.AxisOpts(
name='金牌/枚',
is_scale=True,
max_=40,
name_textstyle_opts=opts.TextStyleOpts(
font_size=16, font_weight='bold', color='#FFD700'),
axislabel_opts=opts.LabelOpts(
font_size=13, color='red', rotate=15),
splitline_opts=opts.SplitLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(type_='dashed')),
axisline_opts=opts.AxisLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(width=2, color='#DB7093'))
),
legend_opts=opts.LegendOpts(is_show=True, pos_right='1%', pos_top='2%',
legend_icon='roundRect', orient='vertical'),
)
)
timeline.add(line, '{}'.format(d_time))
timeline.render_notebook()
3.4 增加其他国家每日金牌数据
# 背景色
background_color_js = (
"new echarts.graphic.LinearGradient(0, 0, 0, 1, "
"[{offset: 0, color: '#c86589'}, {offset: 1, color: '#06a7ff'}], false)"
)
# 线条样式
linestyle_dic = { 'normal': {
'width': 4,
'shadowColor': '#696969',
'shadowBlur': 10,
'shadowOffsetY': 10,
'shadowOffsetX': 10,
}
}
timeline = Timeline(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js),
width='980px',height='600px'))
timeline.add_schema(is_auto_play=True, is_loop_play=True,
is_timeline_show=True, play_interval=500)
CHN, USA, JPN, AUS = [], [], [], []
x_data=cols[1:]
for d_time in cols[1:]:
CHN.append(d2[d_time][d2['国家']=='中国'].values.tolist()[0])
USA.append(d2[d_time][d2['国家']=='美国'].values.tolist()[0])
JPN.append(d2[d_time][d2['国家']=='日本'].values.tolist()[0])
AUS.append(d2[d_time][d2['国家']=='澳大利亚'].values.tolist()[0])
line = (
Line(init_opts=opts.InitOpts(bg_color=JsCode(background_color_js),
width='980px',height='600px'))
.add_xaxis(x_data)
# 中国线条
.add_yaxis(
'中国',
CHN,
symbol_size=10,
is_smooth=True,
label_opts=opts.LabelOpts(is_show=True),
markpoint_opts=opts.MarkPointOpts(
data=[ opts.MarkPointItem(
name="",
type_='max',
value_index=0,
symbol='image://'+ flg['中国'],
symbol_size=[40, 25],
)],
label_opts=opts.LabelOpts(is_show=False),
)
)
# 美国线条
.add_yaxis(
'美国',
USA,
symbol_size=5,
is_smooth=True,
label_opts=opts.LabelOpts(is_show=True),
markpoint_opts=opts.MarkPointOpts(
data=[
opts.MarkPointItem(
name="",
type_='max',
value_index=0,
symbol='image://'+ flg['美国'],
symbol_size=[40, 25],
)
],
label_opts=opts.LabelOpts(is_show=False),
)
)
# 日本线条
.add_yaxis(
'日本',
JPN,
symbol_size=5,
is_smooth=True,
label_opts=opts.LabelOpts(is_show=True),
markpoint_opts=opts.MarkPointOpts(
data=[ opts.MarkPointItem(
name="",
type_='max',
value_index=0,
symbol='image://'+ flg['日本'],
symbol_size=[40, 25],
)],
label_opts=opts.LabelOpts(is_show=False),
)
)
# 澳大利亚线条
.add_yaxis(
'澳大利亚',
AUS,
symbol_size=5,
is_smooth=True,
label_opts=opts.LabelOpts(is_show=True),
markpoint_opts=opts.MarkPointOpts(
data=[ opts.MarkPointItem(
name="",
type_='max',
value_index=0,
symbol='image://'+ flg['澳大利亚'],
symbol_size=[40, 25],
)],
label_opts=opts.LabelOpts(is_show=False),
)
)
.set_series_opts(linestyle_opts=linestyle_dic)
.set_global_opts(
title_opts=opts.TitleOpts(
title='中国 VS 美国 VS 日本 VS 澳大利亚',
pos_left='center',
pos_top='2%',
title_textstyle_opts=opts.TextStyleOpts(
color='#DC143C', font_size=20)
),
xaxis_opts=opts.AxisOpts(axislabel_opts=opts.LabelOpts(font_size=14, color='red'),
axisline_opts=opts.AxisLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(width=2, color='#DB7093'))),
yaxis_opts=opts.AxisOpts(
name='金牌/枚',
is_scale=True,
max_=40,
name_textstyle_opts=opts.TextStyleOpts(font_size=16,font_weight='bold',color='#FFD700'),
axislabel_opts=opts.LabelOpts(font_size=13,color='red',rotate=15),
splitline_opts=opts.SplitLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(type_='dashed')),
axisline_opts=opts.AxisLineOpts(is_show=True,
linestyle_opts=opts.LineStyleOpts(width=2, color='#DB7093'))
),
legend_opts=opts.LegendOpts(is_show=True, pos_right='1%', pos_top='2%',
legend_icon='roundRect',orient = 'vertical'),
))
timeline.add(line, '{}'.format(d_time))
timeline.render_notebook()
3.5 2020东京奥运会奖牌数世界分布
import requests
from pyecharts.charts import Map
# 获取数据:
url = 'https://app-sc.miguvideo.com/vms-livedata/olympic-medal/total-table/15/110000004609'
r = requests.get(url, timeout=30)
r.raise_for_status()
r.encoding = r.apparent_encoding
datas = r.json()['body']['allMedalData']
df = pd.Dataframe()
for data in datas:
df = df.append([[
data['countryName'],
data['goldMedalNum'],
data['silverMedalNum'],
data['bronzeMedalNum'],
data['totalMedalNum']]])
df.columns = ['国家', '金牌', '银牌', '铜牌', '奖牌']
df = df.reset_index(drop=True)
df['国家'].replace('俄奥委会','俄罗斯',inplace=True)
name_map = {
'Singapore Rep.': '新加坡',
...
'Comoros': '科摩罗'
}
m0 = (
Map()
.add("奖牌数", [list(z) for z in zip(df['国家'].values, df['奖牌'].values)], "world", is_map_symbol_show=False,
is_roam=False, name_map=name_map)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(
title_opts=opts.TitleOpts(title="2020东京奥运会奖牌数世界分布"),
legend_opts=opts.LegendOpts(is_show=False),
visualmap_opts=opts.VisualMapOpts(
is_show=True,
max_=120,
is_piecewise=True,
split_number = 8,
),
)
)
m0.render_notebook()
首先,利用requests库爬取相关数据,并对数据进行相应的处理;之后,利用pyecharts绘制map地图对奖牌数的世界发布进行可视化。
3.6 2020东京奥运会金牌世界分布
m1 = (
Map()
.add("金牌", [list(z) for z in zip(df['国家'].values, df['金牌'].values)], "world", is_map_symbol_show=False,
is_roam=False, name_map=name_map)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(
title_opts=opts.TitleOpts(title="2020东京奥运会金牌世界分布"),
legend_opts=opts.LegendOpts(is_show=False),
visualmap_opts=opts.VisualMapOpts(is_show=True, max_=40,
is_piecewise=True,
split_number = 8,
range_color=['#FFFFE0', '#FFA07A', '#CD5C5C', '#8B0000']
),
)
)
m1.render_notebook()
3.7 2020东京奥运会奖牌世界分布(动态)
timeline = Timeline(init_opts=opts.InitOpts(theme=ThemeType.DARK))
timeline.add_schema(is_auto_play=True, is_loop_play=True,
is_timeline_show=False, play_interval=800)
colls=['奖牌','金牌','银牌','铜牌']
maxx = [120,40,40,40]
for index, col in enumerate(colls):
m = (
Map()
.add(col, [list(z) for z in zip(df['国家'].values, df[col].values)], "world", is_map_symbol_show=False,
is_roam=False, name_map=name_map)
.set_series_opts(label_opts=opts.LabelOpts(is_show=False))
.set_global_opts(
legend_opts=opts.LegendOpts(is_show=False),
visualmap_opts=opts.VisualMapOpts(is_show=False,
max_=maxx[index],
is_piecewise=True,
split_number = 20,
),
graphic_opts=[opts.GraphicGroup(graphic_item=opts.GraphicItem(
rotation=JsCode("Math.PI / 4"),
bounding="raw",
right=110,
bottom=110,
z=100),
children=[
opts.GraphicRect(
graphic_item=opts.GraphicItem(
left="center", top="center", z=100
),
graphic_shape_opts=opts.GraphicShapeOpts(
width=400, height=50
),
graphic_basicstyle_opts=opts.GraphicBasicStyleOpts(
fill="rgba(0,0,0,0.3)"
),
),
opts.GraphicText(
graphic_item=opts.GraphicItem(
left="center", top="center", z=100
),
graphic_textstyle_opts=opts.GraphicTextStyleOpts(
text="2020奥运会{}分布".format(col),
font="bold 26px Microsoft YaHei",
graphic_basicstyle_opts=opts.GraphicBasicStyleOpts(
fill="#fff"
),
),
),
],
)
],
)
)
timeline.add(m, "{}分布".format(col))
timeline.render_notebook()
总结
本文利用Pandas对数据进行处理,并利用Pyecharts绘制折线图(Line)和地图(Map),并通过添加时间轴组件(Timeline)对奥运会数据进行动态的可视化展示。Pyecharts相关的参数说明以及其他类型的图表制作可以参阅Pyecharts的官方文档简介 - pyecharts - A Python Echarts Plotting Library built with love.



