Pandas 学习笔记二
数据的读取与存储
csv格式
import pandas as pd
# 读取csv文件
data = pd.read_csv("stock_day.csv",usecols=['open','high','low','close'])
data.head()
| open | high | close | low |
|---|
| 2018-02-27 | 23.53 | 25.88 | 24.16 | 23.53 |
|---|
| 2018-02-26 | 22.80 | 23.78 | 23.53 | 22.80 |
|---|
| 2018-02-23 | 22.88 | 23.37 | 22.82 | 22.71 |
|---|
| 2018-02-22 | 22.25 | 22.76 | 22.28 | 22.02 |
|---|
| 2018-02-14 | 21.49 | 21.99 | 21.92 | 21.48 |
|---|
data = pd.read_csv("stock_day2.csv", names=["open", "high", "close", "low", "volume", "price_change", "p_change", "ma5", "ma10", "ma20", "v_ma5", "v_ma10", "v_ma20", "turnover"])
data.head()
| open | high | close | low | volume | price_change | p_change | ma5 | ma10 | ma20 | v_ma5 | v_ma10 | v_ma20 | turnover |
|---|
| 2018-02-27 | 23.53 | 25.88 | 24.16 | 23.53 | 95578.03 | 0.63 | 2.68 | 22.942 | 22.142 | 22.875 | 53782.64 | 46738.65 | 55576.11 | 2.39 |
|---|
| 2018-02-26 | 22.80 | 23.78 | 23.53 | 22.80 | 60985.11 | 0.69 | 3.02 | 22.406 | 21.955 | 22.942 | 40827.52 | 42736.34 | 56007.50 | 1.53 |
|---|
| 2018-02-23 | 22.88 | 23.37 | 22.82 | 22.71 | 52914.01 | 0.54 | 2.42 | 21.938 | 21.929 | 23.022 | 35119.58 | 41871.97 | 56372.85 | 1.32 |
|---|
| 2018-02-22 | 22.25 | 22.76 | 22.28 | 22.02 | 36105.01 | 0.36 | 1.64 | 21.446 | 21.909 | 23.137 | 35397.58 | 39904.78 | 60149.60 | 0.90 |
|---|
| 2018-02-14 | 21.49 | 21.99 | 21.92 | 21.48 | 23331.04 | 0.44 | 2.05 | 21.366 | 21.923 | 23.253 | 33590.21 | 42935.74 | 61716.11 | 0.58 |
|---|
# 写入csv文件
# data.to_csv("1.csv")
# 保存'open'列的数据
data[:10].to_csv("test.csv", columns=["open"])
pd.read_csv("test.csv")
| Unnamed: 0 | open |
|---|
| 0 | 2018-02-27 | 23.53 |
|---|
| 1 | 2018-02-26 | 22.80 |
|---|
| 2 | 2018-02-23 | 22.88 |
|---|
| 3 | 2018-02-22 | 22.25 |
|---|
| 4 | 2018-02-14 | 21.49 |
|---|
| 5 | 2018-02-13 | 21.40 |
|---|
| 6 | 2018-02-12 | 20.70 |
|---|
| 7 | 2018-02-09 | 21.20 |
|---|
| 8 | 2018-02-08 | 21.79 |
|---|
| 9 | 2018-02-07 | 22.69 |
|---|
data[:10].to_csv("test.csv", columns=["open"], index=False, mode="a", header=False)
pd.read_csv("test.csv")
| Unnamed: 0 | open |
|---|
| 0 | 2018-02-27 | 23.53 |
|---|
| 1 | 2018-02-26 | 22.80 |
|---|
| 2 | 2018-02-23 | 22.88 |
|---|
| 3 | 2018-02-22 | 22.25 |
|---|
| 4 | 2018-02-14 | 21.49 |
|---|
| 5 | 2018-02-13 | 21.40 |
|---|
| 6 | 2018-02-12 | 20.70 |
|---|
| 7 | 2018-02-09 | 21.20 |
|---|
| 8 | 2018-02-08 | 21.79 |
|---|
| 9 | 2018-02-07 | 22.69 |
|---|
| 10 | 23.53 | NaN |
|---|
| 11 | 22.8 | NaN |
|---|
| 12 | 22.88 | NaN |
|---|
| 13 | 22.25 | NaN |
|---|
| 14 | 21.49 | NaN |
|---|
| 15 | 21.4 | NaN |
|---|
| 16 | 20.7 | NaN |
|---|
| 17 | 21.2 | NaN |
|---|
| 18 | 21.79 | NaN |
|---|
| 19 | 22.69 | NaN |
|---|
hdf5格式
# 读取hdf5文件
dayClose = pd.read_hdf("day_close.h5")
dayClose.head()
| 000001.SZ | 000002.SZ | 000004.SZ | 000005.SZ | 000006.SZ | 000007.SZ | 000008.SZ | 000009.SZ | 000010.SZ | 000011.SZ | ... | 001965.SZ | 603283.SH | 002920.SZ | 002921.SZ | 300684.SZ | 002922.SZ | 300735.SZ | 603329.SH | 603655.SH | 603080.SH |
|---|
| 0 | 16.30 | 17.71 | 4.58 | 2.88 | 14.60 | 2.62 | 4.96 | 4.66 | 5.37 | 6.02 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 1 | 17.02 | 19.20 | 4.65 | 3.02 | 15.97 | 2.65 | 4.95 | 4.70 | 5.37 | 6.27 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 2 | 17.02 | 17.28 | 4.56 | 3.06 | 14.37 | 2.63 | 4.82 | 4.47 | 5.37 | 5.96 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 3 | 16.18 | 16.97 | 4.49 | 2.95 | 13.10 | 2.73 | 4.89 | 4.33 | 5.37 | 5.77 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 4 | 16.95 | 17.19 | 4.55 | 2.99 | 13.18 | 2.77 | 4.97 | 4.42 | 5.37 | 5.92 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
5 rows × 3562 columns
# 写入hdf5文件
dayClose.to_hdf("test.h5",key="close")
pd.read_hdf("test.h5",key="close").head()
| 000001.SZ | 000002.SZ | 000004.SZ | 000005.SZ | 000006.SZ | 000007.SZ | 000008.SZ | 000009.SZ | 000010.SZ | 000011.SZ | ... | 001965.SZ | 603283.SH | 002920.SZ | 002921.SZ | 300684.SZ | 002922.SZ | 300735.SZ | 603329.SH | 603655.SH | 603080.SH |
|---|
| 0 | 16.30 | 17.71 | 4.58 | 2.88 | 14.60 | 2.62 | 4.96 | 4.66 | 5.37 | 6.02 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 1 | 17.02 | 19.20 | 4.65 | 3.02 | 15.97 | 2.65 | 4.95 | 4.70 | 5.37 | 6.27 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 2 | 17.02 | 17.28 | 4.56 | 3.06 | 14.37 | 2.63 | 4.82 | 4.47 | 5.37 | 5.96 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 3 | 16.18 | 16.97 | 4.49 | 2.95 | 13.10 | 2.73 | 4.89 | 4.33 | 5.37 | 5.77 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 4 | 16.95 | 17.19 | 4.55 | 2.99 | 13.18 | 2.77 | 4.97 | 4.42 | 5.37 | 5.92 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
5 rows × 3562 columns
dayOpen = pd.read_hdf("day_open.h5")
dayOpen.to_hdf("test.h5",key="open")
pd.read_hdf("test.h5",key="open").head()
| 000001.SZ | 000002.SZ | 000004.SZ | 000005.SZ | 000006.SZ | 000007.SZ | 000008.SZ | 000009.SZ | 000010.SZ | 000011.SZ | ... | 001965.SZ | 603283.SH | 002920.SZ | 002921.SZ | 300684.SZ | 002922.SZ | 300735.SZ | 603329.SH | 603655.SH | 603080.SH |
|---|
| 0 | 15.50 | 16.15 | 4.26 | 2.73 | 13.99 | 2.52 | 4.76 | 4.45 | 5.37 | 5.79 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 1 | 16.50 | 17.94 | 4.53 | 2.91 | 14.78 | 2.61 | 4.99 | 4.69 | 5.37 | 6.03 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 2 | 17.00 | 18.80 | 4.63 | 3.04 | 16.08 | 2.65 | 4.96 | 4.73 | 5.37 | 6.26 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 3 | 16.95 | 16.59 | 4.52 | 3.02 | 13.20 | 2.63 | 4.81 | 4.35 | 5.37 | 5.74 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 4 | 16.20 | 16.96 | 4.50 | 2.95 | 13.17 | 2.80 | 4.88 | 4.34 | 5.37 | 5.80 | ... | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
5 rows × 3562 columns
JSON格式
# 读取JSON格式文件
sa = pd.read_json("Sarcasm_Headlines_Dataset.json", orient="records", lines=True)
sa.head()
| article_link | headline | is_sarcastic |
|---|
| 0 | https://www.huffingtonpost.com/entry/versace-b... | former versace store clerk sues over secret 'b... | 0 |
|---|
| 1 | https://www.huffingtonpost.com/entry/roseanne-... | the 'roseanne' revival catches up to our thorn... | 0 |
|---|
| 2 | https://local.theonion.com/mom-starting-to-fea... | mom starting to fear son's web series closest ... | 1 |
|---|
| 3 | https://politics.theonion.com/boehner-just-wan... | boehner just wants wife to listen, not come up... | 1 |
|---|
| 4 | https://www.huffingtonpost.com/entry/jk-rowlin... | j.k. rowling wishes snape happy birthday in th... | 0 |
|---|
# 写入JSON格式文件
sa.to_json("test.json", orient="records", lines=True)
pd.read_json("test.json", orient="records", lines=True)
| article_link | headline | is_sarcastic |
|---|
| 0 | https://www.huffingtonpost.com/entry/versace-b... | former versace store clerk sues over secret 'b... | 0 |
|---|
| 1 | https://www.huffingtonpost.com/entry/roseanne-... | the 'roseanne' revival catches up to our thorn... | 0 |
|---|
| 2 | https://local.theonion.com/mom-starting-to-fea... | mom starting to fear son's web series closest ... | 1 |
|---|
| 3 | https://politics.theonion.com/boehner-just-wan... | boehner just wants wife to listen, not come up... | 1 |
|---|
| 4 | https://www.huffingtonpost.com/entry/jk-rowlin... | j.k. rowling wishes snape happy birthday in th... | 0 |
|---|
| ... | ... | ... | ... |
|---|
| 26704 | https://www.huffingtonpost.com/entry/american-... | american politics in moral free-fall | 0 |
|---|
| 26705 | https://www.huffingtonpost.com/entry/americas-... | america's best 20 hikes | 0 |
|---|
| 26706 | https://www.huffingtonpost.com/entry/reparatio... | reparations and obama | 0 |
|---|
| 26707 | https://www.huffingtonpost.com/entry/israeli-b... | israeli ban targeting boycott supporters raise... | 0 |
|---|
| 26708 | https://www.huffingtonpost.com/entry/gourmet-g... | gourmet gifts for the foodie 2014 | 0 |
|---|
26709 rows × 3 columns
Pandas高级处理
缺失值处理
movie = pd.read_csv("IMDB-Movie-Data.csv")
movie.head()
| Rank | Title | Genre | Description | Director | Actors | Year | Runtime (Minutes) | Rating | Votes | Revenue (Millions) | metascore |
|---|
| 0 | 1 | Guardians of the Galaxy | Action,Adventure,Sci-Fi | A group of intergalactic criminals are forced ... | James Gunn | Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... | 2014 | 121 | 8.1 | 757074 | 333.13 | 76.0 |
|---|
| 1 | 2 | Prometheus | Adventure,Mystery,Sci-Fi | Following clues to the origin of mankind, a te... | Ridley Scott | Noomi Rapace, Logan Marshall-Green, Michael Fa... | 2012 | 124 | 7.0 | 485820 | 126.46 | 65.0 |
|---|
| 2 | 3 | Split | Horror,Thriller | Three girls are kidnapped by a man with a diag... | M. Night Shyamalan | James McAvoy, Anya Taylor-Joy, Haley Lu Richar... | 2016 | 117 | 7.3 | 157606 | 138.12 | 62.0 |
|---|
| 3 | 4 | Sing | Animation,Comedy,Family | In a city of humanoid animals, a hustling thea... | Christophe Lourdelet | Matthew McConaughey,Reese Witherspoon, Seth Ma... | 2016 | 108 | 7.2 | 60545 | 270.32 | 59.0 |
|---|
| 4 | 5 | Suicide Squad | Action,Adventure,Fantasy | A secret government agency recruits some of th... | David Ayer | Will Smith, Jared Leto, Margot Robbie, Viola D... | 2016 | 123 | 6.2 | 393727 | 325.02 | 40.0 |
|---|
import numpy as np
# 判断是否存在缺失值
# 如果缺失值不是nan而是其他符号则先替换为nan再进行判断处理
# data_new = data.replace(to_replace="?", value=np.nan)
np.any(pd.isnull(movie)) # 返回True说明有缺失值
True
np.all(pd.notnull(movie)) # 返回False说明有缺失值
False
pd.notnull(movie).all() # 找到有缺失值的字段
Rank True
Title True
Genre True
Description True
Director True
Actors True
Year True
Runtime (Minutes) True
Rating True
Votes True
Revenue (Millions) False
metascore False
dtype: bool
# 删除有缺失值的样本
data1 = movie.dropna()
pd.notnull(data1).all()
Rank True
Title True
Genre True
Description True
Director True
Actors True
Year True
Runtime (Minutes) True
Rating True
Votes True
Revenue (Millions) True
metascore True
dtype: bool
# 替换有缺失值的样本
movie["Revenue (Millions)"].fillna(movie["Revenue (Millions)"].mean(), inplace=True)
movie["metascore"].fillna(movie["metascore"].mean(), inplace=True)
pd.notnull(movie).all() # 缺失值已经处理完毕
Rank True
Title True
Genre True
Description True
Director True
Actors True
Year True
Runtime (Minutes) True
Rating True
Votes True
Revenue (Millions) True
metascore True
dtype: bool
数据离散化
data = pd.read_csv("stock_day.csv")
p_change = data["p_change"]
# 自动分组
sr = pd.qcut(p_change,10)
sr.value_counts()
(-10.030999999999999, -4.836] 65
(-0.462, 0.26] 65
(0.26, 0.94] 65
(5.27, 10.03] 65
(-4.836, -2.444] 64
(-2.444, -1.352] 64
(-1.352, -0.462] 64
(1.738, 2.938] 64
(2.938, 5.27] 64
(0.94, 1.738] 63
Name: p_change, dtype: int64
pd.get_dummies(sr, prefix="涨跌幅").head()
| 涨跌幅_(-10.030999999999999, -4.836] | 涨跌幅_(-4.836, -2.444] | 涨跌幅_(-2.444, -1.352] | 涨跌幅_(-1.352, -0.462] | 涨跌幅_(-0.462, 0.26] | 涨跌幅_(0.26, 0.94] | 涨跌幅_(0.94, 1.738] | 涨跌幅_(1.738, 2.938] | 涨跌幅_(2.938, 5.27] | 涨跌幅_(5.27, 10.03] |
|---|
| 2018-02-27 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
|---|
| 2018-02-26 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 |
|---|
| 2018-02-23 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
|---|
| 2018-02-22 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
|---|
| 2018-02-14 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
|---|
# 自定义分组
bins = [-100, -7, -5, -3, 0, 3, 5, 7, 100]
sr = pd.cut(p_change, bins)
sr.value_counts()
(0, 3] 215
(-3, 0] 188
(3, 5] 57
(-5, -3] 51
(5, 7] 35
(7, 100] 35
(-100, -7] 34
(-7, -5] 28
Name: p_change, dtype: int64
data2 = pd.get_dummies(sr, prefix="rise")
data2.head()
| rise_(-100, -7] | rise_(-7, -5] | rise_(-5, -3] | rise_(-3, 0] | rise_(0, 3] | rise_(3, 5] | rise_(5, 7] | rise_(7, 100] |
|---|
| 2018-02-27 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
|---|
| 2018-02-26 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
|---|
| 2018-02-23 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
|---|
| 2018-02-22 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
|---|
| 2018-02-14 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
|---|
合并
# 按方向拼接 pd.concat(data1,data2,axis = 0)
pd.concat([data, data2], axis=1).head()
| open | high | close | low | volume | price_change | p_change | ma5 | ma10 | ma20 | ... | v_ma20 | turnover | rise_(-100, -7] | rise_(-7, -5] | rise_(-5, -3] | rise_(-3, 0] | rise_(0, 3] | rise_(3, 5] | rise_(5, 7] | rise_(7, 100] |
|---|
| 2018-02-27 | 23.53 | 25.88 | 24.16 | 23.53 | 95578.03 | 0.63 | 2.68 | 22.942 | 22.142 | 22.875 | ... | 55576.11 | 2.39 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
|---|
| 2018-02-26 | 22.80 | 23.78 | 23.53 | 22.80 | 60985.11 | 0.69 | 3.02 | 22.406 | 21.955 | 22.942 | ... | 56007.50 | 1.53 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
|---|
| 2018-02-23 | 22.88 | 23.37 | 22.82 | 22.71 | 52914.01 | 0.54 | 2.42 | 21.938 | 21.929 | 23.022 | ... | 56372.85 | 1.32 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
|---|
| 2018-02-22 | 22.25 | 22.76 | 22.28 | 22.02 | 36105.01 | 0.36 | 1.64 | 21.446 | 21.909 | 23.137 | ... | 60149.60 | 0.90 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
|---|
| 2018-02-14 | 21.49 | 21.99 | 21.92 | 21.48 | 23331.04 | 0.44 | 2.05 | 21.366 | 21.923 | 23.253 | ... | 61716.11 | 0.58 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 |
|---|
5 rows × 22 columns
pd.concat([data, data2], axis=0).head()
| open | high | close | low | volume | price_change | p_change | ma5 | ma10 | ma20 | ... | v_ma20 | turnover | rise_(-100, -7] | rise_(-7, -5] | rise_(-5, -3] | rise_(-3, 0] | rise_(0, 3] | rise_(3, 5] | rise_(5, 7] | rise_(7, 100] |
|---|
| 2018-02-27 | 23.53 | 25.88 | 24.16 | 23.53 | 95578.03 | 0.63 | 2.68 | 22.942 | 22.142 | 22.875 | ... | 55576.11 | 2.39 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 2018-02-26 | 22.80 | 23.78 | 23.53 | 22.80 | 60985.11 | 0.69 | 3.02 | 22.406 | 21.955 | 22.942 | ... | 56007.50 | 1.53 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 2018-02-23 | 22.88 | 23.37 | 22.82 | 22.71 | 52914.01 | 0.54 | 2.42 | 21.938 | 21.929 | 23.022 | ... | 56372.85 | 1.32 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 2018-02-22 | 22.25 | 22.76 | 22.28 | 22.02 | 36105.01 | 0.36 | 1.64 | 21.446 | 21.909 | 23.137 | ... | 60149.60 | 0.90 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
| 2018-02-14 | 21.49 | 21.99 | 21.92 | 21.48 | 23331.04 | 0.44 | 2.05 | 21.366 | 21.923 | 23.253 | ... | 61716.11 | 0.58 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
5 rows × 22 columns
pd.concat([data, data2], axis=0).tail()
| open | high | close | low | volume | price_change | p_change | ma5 | ma10 | ma20 | ... | v_ma20 | turnover | rise_(-100, -7] | rise_(-7, -5] | rise_(-5, -3] | rise_(-3, 0] | rise_(0, 3] | rise_(3, 5] | rise_(5, 7] | rise_(7, 100] |
|---|
| 2015-03-06 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 |
|---|
| 2015-03-05 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 |
|---|
| 2015-03-04 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 |
|---|
| 2015-03-03 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 |
|---|
| 2015-03-02 | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | NaN | ... | NaN | NaN | 0.0 | 0.0 | 0.0 | 0.0 | 1.0 | 0.0 | 0.0 | 0.0 |
|---|
5 rows × 22 columns
left = pd.Dataframe({'key1': ['K0', 'K0', 'K1', 'K2'],
'key2': ['K0', 'K1', 'K0', 'K1'],
'A': ['A0', 'A1', 'A2', 'A3'],
'B': ['B0', 'B1', 'B2', 'B3']})
right = pd.Dataframe({'key1': ['K0', 'K1', 'K1', 'K2'],
'key2': ['K0', 'K0', 'K0', 'K0'],
'C': ['C0', 'C1', 'C2', 'C3'],
'D': ['D0', 'D1', 'D2', 'D3']})
left
| key1 | key2 | A | B |
|---|
| 0 | K0 | K0 | A0 | B0 |
|---|
| 1 | K0 | K1 | A1 | B1 |
|---|
| 2 | K1 | K0 | A2 | B2 |
|---|
| 3 | K2 | K1 | A3 | B3 |
|---|
right
| key1 | key2 | C | D |
|---|
| 0 | K0 | K0 | C0 | D0 |
|---|
| 1 | K1 | K0 | C1 | D1 |
|---|
| 2 | K1 | K0 | C2 | D2 |
|---|
| 3 | K2 | K0 | C3 | D3 |
|---|
pd.merge(left, right, how="inner", on=["key1", "key2"])
| key1 | key2 | A | B | C | D |
|---|
| 0 | K0 | K0 | A0 | B0 | C0 | D0 |
|---|
| 1 | K1 | K0 | A2 | B2 | C1 | D1 |
|---|
| 2 | K1 | K0 | A2 | B2 | C2 | D2 |
|---|
pd.merge(left, right, how="left", on=["key1", "key2"])
| key1 | key2 | A | B | C | D |
|---|
| 0 | K0 | K0 | A0 | B0 | C0 | D0 |
|---|
| 1 | K0 | K1 | A1 | B1 | NaN | NaN |
|---|
| 2 | K1 | K0 | A2 | B2 | C1 | D1 |
|---|
| 3 | K1 | K0 | A2 | B2 | C2 | D2 |
|---|
| 4 | K2 | K1 | A3 | B3 | NaN | NaN |
|---|
pd.merge(left, right, how="right", on=["key1", "key2"])
| key1 | key2 | A | B | C | D |
|---|
| 0 | K0 | K0 | A0 | B0 | C0 | D0 |
|---|
| 1 | K1 | K0 | A2 | B2 | C1 | D1 |
|---|
| 2 | K1 | K0 | A2 | B2 | C2 | D2 |
|---|
| 3 | K2 | K0 | NaN | NaN | C3 | D3 |
|---|
pd.merge(left, right, how="outer", on=["key1", "key2"])
| key1 | key2 | A | B | C | D |
|---|
| 0 | K0 | K0 | A0 | B0 | C0 | D0 |
|---|
| 1 | K0 | K1 | A1 | B1 | NaN | NaN |
|---|
| 2 | K1 | K0 | A2 | B2 | C1 | D1 |
|---|
| 3 | K1 | K0 | A2 | B2 | C2 | D2 |
|---|
| 4 | K2 | K1 | A3 | B3 | NaN | NaN |
|---|
| 5 | K2 | K0 | NaN | NaN | C3 | D3 |
|---|
# pd.crosstab(星期数据列, 涨跌幅数据列)
# 准备星期数据列
data.index
Index(['2018-02-27', '2018-02-26', '2018-02-23', '2018-02-22', '2018-02-14',
'2018-02-13', '2018-02-12', '2018-02-09', '2018-02-08', '2018-02-07',
...
'2015-03-13', '2015-03-12', '2015-03-11', '2015-03-10', '2015-03-09',
'2015-03-06', '2015-03-05', '2015-03-04', '2015-03-03', '2015-03-02'],
dtype='object', length=643)
# pandas日期类型
date = pd.to_datetime(data.index)
date
DatetimeIndex(['2018-02-27', '2018-02-26', '2018-02-23', '2018-02-22',
'2018-02-14', '2018-02-13', '2018-02-12', '2018-02-09',
'2018-02-08', '2018-02-07',
...
'2015-03-13', '2015-03-12', '2015-03-11', '2015-03-10',
'2015-03-09', '2015-03-06', '2015-03-05', '2015-03-04',
'2015-03-03', '2015-03-02'],
dtype='datetime64[ns]', length=643, freq=None)
data["week"] = date.weekday
date.weekday
Int64Index([1, 0, 4, 3, 2, 1, 0, 4, 3, 2,
...
4, 3, 2, 1, 0, 4, 3, 2, 1, 0],
dtype='int64', length=643)
data.head()
| open | high | close | low | volume | price_change | p_change | ma5 | ma10 | ma20 | v_ma5 | v_ma10 | v_ma20 | turnover | week |
|---|
| 2018-02-27 | 23.53 | 25.88 | 24.16 | 23.53 | 95578.03 | 0.63 | 2.68 | 22.942 | 22.142 | 22.875 | 53782.64 | 46738.65 | 55576.11 | 2.39 | 1 |
|---|
| 2018-02-26 | 22.80 | 23.78 | 23.53 | 22.80 | 60985.11 | 0.69 | 3.02 | 22.406 | 21.955 | 22.942 | 40827.52 | 42736.34 | 56007.50 | 1.53 | 0 |
|---|
| 2018-02-23 | 22.88 | 23.37 | 22.82 | 22.71 | 52914.01 | 0.54 | 2.42 | 21.938 | 21.929 | 23.022 | 35119.58 | 41871.97 | 56372.85 | 1.32 | 4 |
|---|
| 2018-02-22 | 22.25 | 22.76 | 22.28 | 22.02 | 36105.01 | 0.36 | 1.64 | 21.446 | 21.909 | 23.137 | 35397.58 | 39904.78 | 60149.60 | 0.90 | 3 |
|---|
| 2018-02-14 | 21.49 | 21.99 | 21.92 | 21.48 | 23331.04 | 0.44 | 2.05 | 21.366 | 21.923 | 23.253 | 33590.21 | 42935.74 | 61716.11 | 0.58 | 2 |
|---|
# 准备涨跌幅数据列
data["pona"] = np.where(data["p_change"] > 0, 1, 0)
data.head()
| open | high | close | low | volume | price_change | p_change | ma5 | ma10 | ma20 | v_ma5 | v_ma10 | v_ma20 | turnover | week | pona |
|---|
| 2018-02-27 | 23.53 | 25.88 | 24.16 | 23.53 | 95578.03 | 0.63 | 2.68 | 22.942 | 22.142 | 22.875 | 53782.64 | 46738.65 | 55576.11 | 2.39 | 1 | 1 |
|---|
| 2018-02-26 | 22.80 | 23.78 | 23.53 | 22.80 | 60985.11 | 0.69 | 3.02 | 22.406 | 21.955 | 22.942 | 40827.52 | 42736.34 | 56007.50 | 1.53 | 0 | 1 |
|---|
| 2018-02-23 | 22.88 | 23.37 | 22.82 | 22.71 | 52914.01 | 0.54 | 2.42 | 21.938 | 21.929 | 23.022 | 35119.58 | 41871.97 | 56372.85 | 1.32 | 4 | 1 |
|---|
| 2018-02-22 | 22.25 | 22.76 | 22.28 | 22.02 | 36105.01 | 0.36 | 1.64 | 21.446 | 21.909 | 23.137 | 35397.58 | 39904.78 | 60149.60 | 0.90 | 3 | 1 |
|---|
| 2018-02-14 | 21.49 | 21.99 | 21.92 | 21.48 | 23331.04 | 0.44 | 2.05 | 21.366 | 21.923 | 23.253 | 33590.21 | 42935.74 | 61716.11 | 0.58 | 2 | 1 |
|---|
交叉表和透视表
# 交叉表
dataTable = pd.crosstab(data["week"], data["pona"])
dataTable
| pona | 0 | 1 |
|---|
| week | | |
|---|
| 0 | 63 | 62 |
|---|
| 1 | 55 | 76 |
|---|
| 2 | 61 | 71 |
|---|
| 3 | 63 | 65 |
|---|
| 4 | 59 | 68 |
|---|
dataTable.sum(axis=1)
week
0 125
1 131
2 132
3 128
4 127
dtype: int64
dataTable.div(dataTable.sum(axis=1), axis=0).plot(kind="bar", stacked=True)
dataTable.div(dataTable.sum(axis=1), axis=0)
| pona | 0 | 1 |
|---|
| week | | |
|---|
| 0 | 0.504000 | 0.496000 |
|---|
| 1 | 0.419847 | 0.580153 |
|---|
| 2 | 0.462121 | 0.537879 |
|---|
| 3 | 0.492188 | 0.507812 |
|---|
| 4 | 0.464567 | 0.535433 |
|---|
# 透视表
data.pivot_table(["pona"], index=["week"])
| pona |
|---|
| week | |
|---|
| 0 | 0.496000 |
|---|
| 1 | 0.580153 |
|---|
| 2 | 0.537879 |
|---|
| 3 | 0.507812 |
|---|
| 4 | 0.535433 |
|---|
分组与聚合
col = pd.Dataframe({'color': ['white','red','green','red','green'], 'object': ['pen','pencil','pencil','ashtray','pen'],'price1':[5.56,4.20,1.30,0.56,2.75],'price2':[4.75,4.12,1.60,0.75,3.15]})
col
| color | object | price1 | price2 |
|---|
| 0 | white | pen | 5.56 | 4.75 |
|---|
| 1 | red | pencil | 4.20 | 4.12 |
|---|
| 2 | green | pencil | 1.30 | 1.60 |
|---|
| 3 | red | ashtray | 0.56 | 0.75 |
|---|
| 4 | green | pen | 2.75 | 3.15 |
|---|
# 进行分组,对颜色分组,price1进行聚合
# 用dataframe的方法进行分组
col.groupby(by="color")["price1"].max()
color
green 2.75
red 4.20
white 5.56
Name: price1, dtype: float64
col["price1"].groupby(col["color"]).max()
color
green 2.75
red 4.20
white 5.56
Name: price1, dtype: float64
实战案例 电影数据分析练习
数据文件:IMDB-Movie-Data.csv
问题1:我们想知道这些电影数据中评分的平均分,导演的人数等信息,我们应该怎么获取?
问题2:对于这一组电影数据,如果我们想rating,runtime的分布情况,应该如何呈现数据?
问题3:对于这一组电影数据,如果我们希望统计电影分类(genre)的情况,应该如何处理数据?
# 1、准备数据
movie = pd.read_csv("IMDB-Movie-Data.csv")
movie
| Rank | Title | Genre | Description | Director | Actors | Year | Runtime (Minutes) | Rating | Votes | Revenue (Millions) | metascore |
|---|
| 0 | 1 | Guardians of the Galaxy | Action,Adventure,Sci-Fi | A group of intergalactic criminals are forced ... | James Gunn | Chris Pratt, Vin Diesel, Bradley Cooper, Zoe S... | 2014 | 121 | 8.1 | 757074 | 333.13 | 76.0 |
|---|
| 1 | 2 | Prometheus | Adventure,Mystery,Sci-Fi | Following clues to the origin of mankind, a te... | Ridley Scott | Noomi Rapace, Logan Marshall-Green, Michael Fa... | 2012 | 124 | 7.0 | 485820 | 126.46 | 65.0 |
|---|
| 2 | 3 | Split | Horror,Thriller | Three girls are kidnapped by a man with a diag... | M. Night Shyamalan | James McAvoy, Anya Taylor-Joy, Haley Lu Richar... | 2016 | 117 | 7.3 | 157606 | 138.12 | 62.0 |
|---|
| 3 | 4 | Sing | Animation,Comedy,Family | In a city of humanoid animals, a hustling thea... | Christophe Lourdelet | Matthew McConaughey,Reese Witherspoon, Seth Ma... | 2016 | 108 | 7.2 | 60545 | 270.32 | 59.0 |
|---|
| 4 | 5 | Suicide Squad | Action,Adventure,Fantasy | A secret government agency recruits some of th... | David Ayer | Will Smith, Jared Leto, Margot Robbie, Viola D... | 2016 | 123 | 6.2 | 393727 | 325.02 | 40.0 |
|---|
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
|---|
| 995 | 996 | Secret in Their Eyes | Crime,Drama,Mystery | A tight-knit team of rising investigators, alo... | Billy Ray | Chiwetel Ejiofor, Nicole Kidman, Julia Roberts... | 2015 | 111 | 6.2 | 27585 | NaN | 45.0 |
|---|
| 996 | 997 | Hostel: Part II | Horror | Three American college students studying abroa... | Eli Roth | Lauren German, Heather Matarazzo, Bijou Philli... | 2007 | 94 | 5.5 | 73152 | 17.54 | 46.0 |
|---|
| 997 | 998 | Step Up 2: The Streets | Drama,Music,Romance | Romantic sparks occur between two dance studen... | Jon M. Chu | Robert Hoffman, Briana Evigan, Cassie Ventura,... | 2008 | 98 | 6.2 | 70699 | 58.01 | 50.0 |
|---|
| 998 | 999 | Search Party | Adventure,Comedy | A pair of friends embark on a mission to reuni... | Scot Armstrong | Adam Pally, T.J. Miller, Thomas Middleditch,Sh... | 2014 | 93 | 5.6 | 4881 | NaN | 22.0 |
|---|
| 999 | 1000 | Nine Lives | Comedy,Family,Fantasy | A stuffy businessman finds himself trapped ins... | Barry Sonnenfeld | Kevin Spacey, Jennifer Garner, Robbie Amell,Ch... | 2016 | 87 | 5.3 | 12435 | 19.64 | 11.0 |
|---|
1000 rows × 12 columns
# 问题1:我们想知道这些电影数据中评分的平均分,导演的人数等信息,我们应该怎么获取?
# 评分的平均分
movie["Rating"].mean()
6.723199999999999
# 导演的人数
np.unique(movie["Director"]).size
644
# 问题2:对于这一组电影数据,如果我们想rating,runtime的分布情况,应该如何呈现数据?
movie["Rating"].plot(kind="hist", figsize=(20, 8))
import matplotlib.pyplot as plt
# 1、创建画布
plt.figure(figsize=(20, 8), dpi=80)
# 2、绘制直方图
plt.hist(movie["Rating"], 20)
# 修改刻度
plt.xticks(np.linspace(movie["Rating"].min(), movie["Rating"].max(), 21))
# 添加网格
plt.grid(linestyle="--", alpha=0.5)
# 3、显示图像
plt.show()
# 问题3:对于这一组电影数据,如果我们希望统计电影分类(genre)的情况,应该如何处理数据?# 1、创建画布
# 先统计电影类别都有哪些
movie_genre = [i.split(",") for i in movie["Genre"]]
movie_genre
[['Action', 'Adventure', 'Sci-Fi'],
['Adventure', 'Mystery', 'Sci-Fi'],
['Horror', 'Thriller'],
['Animation', 'Comedy', 'Family'],
['Action', 'Adventure', 'Fantasy'],
['Action', 'Adventure', 'Fantasy'],
['Comedy', 'Drama', 'Music'],
['Comedy'],
['Action', 'Adventure', 'Biography'],
['Adventure', 'Drama', 'Romance'],
['Adventure', 'Family', 'Fantasy'],
['Biography', 'Drama', 'History'],
['Action', 'Adventure', 'Sci-Fi'],
['Animation', 'Adventure', 'Comedy'],
['Action', 'Comedy', 'Drama'],
['Animation', 'Adventure', 'Comedy'],
['Biography', 'Drama', 'History'],
['Action', 'Thriller'],
['Biography', 'Drama'],
['Drama', 'Mystery', 'Sci-Fi'],
['Adventure', 'Drama', 'Thriller'],
['Drama'],
['Crime', 'Drama', 'Horror'],
['Animation', 'Adventure', 'Comedy'],
['Action', 'Adventure', 'Sci-Fi'],
['Comedy'],
['Action', 'Adventure', 'Drama'],
['Horror', 'Thriller'],
['Comedy'],
['Action', 'Adventure', 'Drama'],
['Comedy'],
['Drama', 'Thriller'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Adventure', 'Comedy'],
['Action', 'Horror', 'Sci-Fi'],
['Action', 'Adventure', 'Sci-Fi'],
['Adventure', 'Drama', 'Sci-Fi'],
['Action', 'Adventure', 'Fantasy'],
['Action', 'Adventure', 'Western'],
['Comedy', 'Drama'],
['Animation', 'Adventure', 'Comedy'],
['Drama'],
['Horror'],
['Biography', 'Drama', 'History'],
['Drama'],
['Action', 'Adventure', 'Fantasy'],
['Drama', 'Thriller'],
['Adventure', 'Drama', 'Fantasy'],
['Action', 'Adventure', 'Sci-Fi'],
['Drama'],
['Action', 'Adventure', 'Fantasy'],
['Action', 'Adventure', 'Fantasy'],
['Comedy', 'Drama'],
['Action', 'Crime', 'Thriller'],
['Action', 'Crime', 'Drama'],
['Adventure', 'Drama', 'History'],
['Crime', 'Horror', 'Thriller'],
['Drama', 'Romance'],
['Comedy', 'Drama', 'Romance'],
['Biography', 'Drama'],
['Action', 'Adventure', 'Sci-Fi'],
['Horror', 'Mystery', 'Thriller'],
['Crime', 'Drama', 'Mystery'],
['Drama', 'Romance', 'Thriller'],
['Drama', 'Mystery', 'Sci-Fi'],
['Action', 'Adventure', 'Comedy'],
['Drama', 'History', 'Thriller'],
['Action', 'Adventure', 'Sci-Fi'],
['Drama'],
['Action', 'Drama', 'Thriller'],
['Drama', 'History'],
['Action', 'Drama', 'Romance'],
['Drama', 'Fantasy'],
['Drama', 'Romance'],
['Animation', 'Adventure', 'Comedy'],
['Action', 'Adventure', 'Fantasy'],
['Action', 'Sci-Fi'],
['Adventure', 'Drama', 'War'],
['Action', 'Adventure', 'Fantasy'],
['Action', 'Comedy', 'Fantasy'],
['Action', 'Adventure', 'Sci-Fi'],
['Comedy', 'Drama'],
['Biography', 'Comedy', 'Crime'],
['Crime', 'Drama', 'Mystery'],
['Action', 'Crime', 'Thriller'],
['Action', 'Adventure', 'Sci-Fi'],
['Crime', 'Drama'],
['Action', 'Adventure', 'Fantasy'],
['Crime', 'Drama', 'Mystery'],
['Action', 'Crime', 'Drama'],
['Crime', 'Drama', 'Mystery'],
['Action', 'Adventure', 'Fantasy'],
['Drama'],
['Comedy', 'Crime', 'Drama'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Comedy', 'Crime'],
['Animation', 'Drama', 'Fantasy'],
['Horror', 'Mystery', 'Sci-Fi'],
['Drama', 'Mystery', 'Thriller'],
['Crime', 'Drama', 'Thriller'],
['Biography', 'Crime', 'Drama'],
['Action', 'Adventure', 'Fantasy'],
['Adventure', 'Drama', 'Sci-Fi'],
['Crime', 'Mystery', 'Thriller'],
['Action', 'Adventure', 'Comedy'],
['Crime', 'Drama', 'Thriller'],
['Comedy'],
['Action', 'Adventure', 'Drama'],
['Drama'],
['Drama', 'Mystery', 'Sci-Fi'],
['Action', 'Horror', 'Thriller'],
['Biography', 'Drama', 'History'],
['Romance', 'Sci-Fi'],
['Action', 'Fantasy', 'War'],
['Adventure', 'Drama', 'Fantasy'],
['Comedy'],
['Horror', 'Thriller'],
['Action', 'Biography', 'Drama'],
['Drama', 'Horror', 'Mystery'],
['Animation', 'Adventure', 'Comedy'],
['Adventure', 'Drama', 'Family'],
['Adventure', 'Mystery', 'Sci-Fi'],
['Adventure', 'Comedy', 'Romance'],
['Action'],
['Action', 'Thriller'],
['Adventure', 'Drama', 'Family'],
['Action', 'Adventure', 'Sci-Fi'],
['Adventure', 'Crime', 'Mystery'],
['Comedy', 'Family', 'Musical'],
['Adventure', 'Drama', 'Thriller'],
['Drama'],
['Adventure', 'Comedy', 'Drama'],
['Drama', 'Horror', 'Thriller'],
['Drama', 'Music'],
['Action', 'Crime', 'Thriller'],
['Crime', 'Drama', 'Thriller'],
['Crime', 'Drama', 'Thriller'],
['Drama', 'Romance'],
['Mystery', 'Thriller'],
['Mystery', 'Thriller', 'Western'],
['Action', 'Adventure', 'Sci-Fi'],
['Comedy', 'Family'],
['Biography', 'Comedy', 'Drama'],
['Drama'],
['Drama', 'Western'],
['Drama', 'Mystery', 'Romance'],
['Comedy', 'Drama'],
['Action', 'Drama', 'Mystery'],
['Comedy'],
['Action', 'Adventure', 'Crime'],
['Adventure', 'Family', 'Fantasy'],
['Adventure', 'Sci-Fi', 'Thriller'],
['Drama'],
['Action', 'Crime', 'Drama'],
['Drama', 'Horror', 'Mystery'],
['Action', 'Horror', 'Sci-Fi'],
['Action', 'Adventure', 'Sci-Fi'],
['Comedy', 'Drama', 'Romance'],
['Action', 'Comedy', 'Fantasy'],
['Action', 'Comedy', 'Mystery'],
['Thriller', 'War'],
['Action', 'Comedy', 'Crime'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Adventure', 'Crime'],
['Action', 'Adventure', 'Thriller'],
['Drama', 'Fantasy', 'Romance'],
['Action', 'Adventure', 'Comedy'],
['Biography', 'Drama', 'History'],
['Action', 'Drama', 'History'],
['Action', 'Adventure', 'Thriller'],
['Crime', 'Drama', 'Thriller'],
['Animation', 'Adventure', 'Family'],
['Adventure', 'Horror'],
['Drama', 'Romance', 'Sci-Fi'],
['Animation', 'Adventure', 'Comedy'],
['Action', 'Adventure', 'Family'],
['Action', 'Adventure', 'Drama'],
['Action', 'Comedy'],
['Horror', 'Mystery', 'Thriller'],
['Action', 'Adventure', 'Comedy'],
['Comedy', 'Romance'],
['Horror', 'Mystery'],
['Drama', 'Family', 'Fantasy'],
['Sci-Fi'],
['Drama', 'Thriller'],
['Drama', 'Romance'],
['Drama', 'War'],
['Drama', 'Fantasy', 'Horror'],
['Crime', 'Drama'],
['Comedy', 'Drama', 'Romance'],
['Drama', 'Romance'],
['Drama'],
['Crime', 'Drama', 'History'],
['Horror', 'Sci-Fi', 'Thriller'],
['Action', 'Drama', 'Sport'],
['Action', 'Adventure', 'Sci-Fi'],
['Crime', 'Drama', 'Thriller'],
['Adventure', 'Biography', 'Drama'],
['Biography', 'Drama', 'Thriller'],
['Action', 'Comedy', 'Crime'],
['Action', 'Adventure', 'Sci-Fi'],
['Drama', 'Fantasy', 'Horror'],
['Biography', 'Drama', 'Thriller'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Adventure', 'Mystery'],
['Action', 'Adventure', 'Sci-Fi'],
['Drama', 'Horror'],
['Comedy', 'Drama', 'Romance'],
['Comedy', 'Romance'],
['Drama', 'Horror', 'Thriller'],
['Action', 'Adventure', 'Drama'],
['Drama'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Drama', 'Mystery'],
['Action', 'Adventure', 'Fantasy'],
['Action', 'Adventure', 'Fantasy'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Adventure', 'Comedy'],
['Drama', 'Horror'],
['Action', 'Comedy'],
['Action', 'Adventure', 'Sci-Fi'],
['Animation', 'Adventure', 'Comedy'],
['Horror', 'Mystery'],
['Crime', 'Drama', 'Mystery'],
['Comedy', 'Crime'],
['Drama'],
['Comedy', 'Drama', 'Romance'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Adventure', 'Family'],
['Horror', 'Sci-Fi', 'Thriller'],
['Drama', 'Fantasy', 'War'],
['Crime', 'Drama', 'Thriller'],
['Action', 'Adventure', 'Drama'],
['Action', 'Adventure', 'Thriller'],
['Action', 'Adventure', 'Drama'],
['Drama', 'Romance'],
['Biography', 'Drama', 'History'],
['Drama', 'Horror', 'Thriller'],
['Adventure', 'Comedy', 'Drama'],
['Action', 'Adventure', 'Romance'],
['Action', 'Drama', 'War'],
['Animation', 'Adventure', 'Comedy'],
['Animation', 'Adventure', 'Comedy'],
['Action', 'Adventure', 'Sci-Fi'],
['Adventure', 'Family', 'Fantasy'],
['Drama', 'Musical', 'Romance'],
['Drama', 'Sci-Fi', 'Thriller'],
['Comedy', 'Drama'],
['Action', 'Comedy', 'Crime'],
['Biography', 'Comedy', 'Drama'],
['Comedy', 'Drama', 'Romance'],
['Drama', 'Thriller'],
['Biography', 'Drama', 'History'],
['Action', 'Adventure', 'Sci-Fi'],
['Horror', 'Mystery', 'Thriller'],
['Comedy'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Drama', 'Sci-Fi'],
['Horror'],
['Drama', 'Thriller'],
['Comedy', 'Drama', 'Romance'],
['Drama', 'Thriller'],
['Comedy', 'Drama'],
['Drama'],
['Action', 'Adventure', 'Comedy'],
['Drama', 'Horror', 'Thriller'],
['Comedy'],
['Drama', 'Sci-Fi'],
['Action', 'Adventure', 'Sci-Fi'],
['Horror'],
['Action', 'Adventure', 'Thriller'],
['Adventure', 'Fantasy'],
['Action', 'Comedy', 'Crime'],
['Comedy', 'Drama', 'Music'],
['Animation', 'Adventure', 'Comedy'],
['Action', 'Adventure', 'Mystery'],
['Action', 'Comedy', 'Crime'],
['Crime', 'Drama', 'History'],
['Comedy'],
['Action', 'Adventure', 'Sci-Fi'],
['Crime', 'Mystery', 'Thriller'],
['Action', 'Adventure', 'Crime'],
['Thriller'],
['Biography', 'Drama', 'Romance'],
['Action', 'Adventure'],
['Action', 'Fantasy'],
['Action', 'Comedy'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Comedy', 'Crime'],
['Thriller'],
['Action', 'Drama', 'Horror'],
['Comedy', 'Music', 'Romance'],
['Comedy'],
['Drama'],
['Action', 'Adventure', 'Fantasy'],
['Drama', 'Romance'],
['Animation', 'Adventure', 'Comedy'],
['Comedy', 'Drama'],
['Biography', 'Crime', 'Drama'],
['Drama', 'History'],
['Action', 'Crime', 'Thriller'],
['Action', 'Biography', 'Drama'],
['Horror'],
['Comedy', 'Romance'],
['Comedy', 'Romance'],
['Comedy', 'Crime', 'Drama'],
['Adventure', 'Family', 'Fantasy'],
['Crime', 'Drama', 'Thriller'],
['Action', 'Crime', 'Thriller'],
['Comedy', 'Romance'],
['Biography', 'Drama', 'Sport'],
['Drama', 'Romance'],
['Drama', 'Horror'],
['Adventure', 'Fantasy'],
['Adventure', 'Family', 'Fantasy'],
['Action', 'Drama', 'Sci-Fi'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Horror'],
['Comedy', 'Horror', 'Thriller'],
['Action', 'Crime', 'Thriller'],
['Crime', 'Drama', 'Music'],
['Drama'],
['Action', 'Crime', 'Thriller'],
['Action', 'Sci-Fi', 'Thriller'],
['Biography', 'Drama'],
['Action', 'Adventure', 'Fantasy'],
['Drama', 'Horror', 'Sci-Fi'],
['Biography', 'Comedy', 'Drama'],
['Crime', 'Horror', 'Thriller'],
['Crime', 'Drama', 'Mystery'],
['Animation', 'Adventure', 'Comedy'],
['Action', 'Biography', 'Drama'],
['Biography', 'Drama'],
['Biography', 'Drama', 'History'],
['Action', 'Biography', 'Drama'],
['Drama', 'Fantasy', 'Horror'],
['Comedy', 'Drama', 'Romance'],
['Drama', 'Sport'],
['Drama', 'Romance'],
['Comedy', 'Romance'],
['Action', 'Crime', 'Thriller'],
['Action', 'Crime', 'Drama'],
['Action', 'Drama', 'Thriller'],
['Adventure', 'Family', 'Fantasy'],
['Action', 'Adventure'],
['Action', 'Adventure', 'Romance'],
['Adventure', 'Family', 'Fantasy'],
['Crime', 'Drama'],
['Comedy', 'Horror'],
['Comedy', 'Fantasy', 'Romance'],
['Drama'],
['Drama'],
['Comedy', 'Drama'],
['Comedy', 'Drama', 'Romance'],
['Adventure', 'Sci-Fi', 'Thriller'],
['Action', 'Adventure', 'Fantasy'],
['Comedy', 'Drama'],
['Biography', 'Drama', 'Romance'],
['Comedy', 'Fantasy'],
['Comedy', 'Drama', 'Fantasy'],
['Comedy'],
['Horror', 'Thriller'],
['Action', 'Adventure', 'Sci-Fi'],
['Adventure', 'Comedy', 'Horror'],
['Comedy', 'Mystery'],
['Drama'],
['Adventure', 'Drama', 'Fantasy'],
['Drama', 'Sport'],
['Action', 'Adventure'],
['Action', 'Adventure', 'Drama'],
['Action', 'Drama', 'Sci-Fi'],
['Action', 'Mystery', 'Sci-Fi'],
['Action', 'Crime', 'Drama'],
['Action', 'Crime', 'Fantasy'],
['Biography', 'Comedy', 'Drama'],
['Action', 'Crime', 'Thriller'],
['Biography', 'Crime', 'Drama'],
['Drama', 'Sport'],
['Adventure', 'Comedy', 'Drama'],
['Action', 'Adventure', 'Thriller'],
['Comedy', 'Fantasy', 'Horror'],
['Drama', 'Sport'],
['Horror', 'Thriller'],
['Drama', 'History', 'Thriller'],
['Animation', 'Action', 'Adventure'],
['Action', 'Adventure', 'Drama'],
['Action', 'Comedy', 'Family'],
['Action', 'Adventure', 'Drama'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Comedy'],
['Action', 'Crime', 'Drama'],
['Biography', 'Drama'],
['Comedy', 'Romance'],
['Comedy'],
['Drama', 'Fantasy', 'Romance'],
['Action', 'Adventure', 'Sci-Fi'],
['Comedy'],
['Comedy', 'Sci-Fi'],
['Comedy', 'Drama'],
['Animation', 'Action', 'Adventure'],
['Horror'],
['Action', 'Biography', 'Crime'],
['Animation', 'Adventure', 'Comedy'],
['Drama', 'Romance'],
['Drama', 'Mystery', 'Thriller'],
['Drama', 'History', 'Thriller'],
['Animation', 'Adventure', 'Comedy'],
['Action', 'Adventure', 'Sci-Fi'],
['Adventure', 'Comedy'],
['Action', 'Thriller'],
['Comedy', 'Music'],
['Animation', 'Adventure', 'Comedy'],
['Crime', 'Drama', 'Thriller'],
['Action', 'Adventure', 'Crime'],
['Comedy', 'Drama', 'Horror'],
['Drama'],
['Drama', 'Mystery', 'Romance'],
['Adventure', 'Family', 'Fantasy'],
['Drama'],
['Action', 'Drama', 'Thriller'],
['Drama'],
['Action', 'Horror', 'Romance'],
['Action', 'Drama', 'Fantasy'],
['Action', 'Crime', 'Drama'],
['Drama', 'Fantasy', 'Romance'],
['Action', 'Crime', 'Thriller'],
['Action', 'Mystery', 'Thriller'],
['Horror', 'Mystery', 'Thriller'],
['Action', 'Horror', 'Sci-Fi'],
['Comedy', 'Drama'],
['Comedy'],
['Action', 'Adventure', 'Horror'],
['Action', 'Adventure', 'Thriller'],
['Action', 'Crime', 'Drama'],
['Comedy', 'Crime', 'Drama'],
['Drama', 'Romance'],
['Drama', 'Thriller'],
['Action', 'Comedy', 'Crime'],
['Comedy'],
['Adventure', 'Family', 'Fantasy'],
['Drama', 'Romance'],
['Animation', 'Family', 'Fantasy'],
['Drama', 'Romance'],
['Thriller'],
['Adventure', 'Horror', 'Mystery'],
['Action', 'Sci-Fi'],
['Adventure', 'Comedy', 'Drama'],
['Animation', 'Action', 'Adventure'],
['Drama', 'Horror'],
['Action', 'Adventure', 'Sci-Fi'],
['Comedy', 'Drama'],
['Action', 'Horror', 'Mystery'],
['Action', 'Thriller'],
['Action', 'Adventure', 'Sci-Fi'],
['Drama'],
['Comedy', 'Drama', 'Romance'],
['Comedy', 'Crime'],
['Comedy', 'Romance'],
['Drama', 'Romance'],
['Crime', 'Drama', 'Thriller'],
['Horror', 'Mystery', 'Thriller'],
['Biography', 'Drama'],
['Drama', 'Mystery', 'Sci-Fi'],
['Adventure', 'Comedy', 'Family'],
['Action', 'Adventure', 'Crime'],
['Action', 'Crime', 'Mystery'],
['Mystery', 'Thriller'],
['Action', 'Sci-Fi', 'Thriller'],
['Action', 'Comedy', 'Crime'],
['Biography', 'Crime', 'Drama'],
['Biography', 'Drama', 'History'],
['Action', 'Adventure', 'Sci-Fi'],
['Adventure', 'Family', 'Fantasy'],
['Biography', 'Drama', 'History'],
['Biography', 'Comedy', 'Drama'],
['Drama', 'Thriller'],
['Horror', 'Thriller'],
['Drama'],
['Drama', 'War'],
['Comedy', 'Drama', 'Romance'],
['Drama', 'Romance', 'Sci-Fi'],
['Action', 'Crime', 'Drama'],
['Comedy', 'Drama'],
['Animation', 'Action', 'Adventure'],
['Adventure', 'Comedy', 'Drama'],
['Comedy', 'Drama', 'Family'],
['Drama', 'Romance', 'Thriller'],
['Comedy', 'Crime', 'Drama'],
['Animation', 'Comedy', 'Family'],
['Drama', 'Horror', 'Sci-Fi'],
['Action', 'Adventure', 'Drama'],
['Action', 'Horror', 'Sci-Fi'],
['Action', 'Crime', 'Sport'],
['Drama', 'Horror', 'Sci-Fi'],
['Drama', 'Horror', 'Sci-Fi'],
['Action', 'Adventure', 'Comedy'],
['Mystery', 'Sci-Fi', 'Thriller'],
['Crime', 'Drama', 'Thriller'],
['Animation', 'Adventure', 'Comedy'],
['Action', 'Sci-Fi', 'Thriller'],
['Drama', 'Romance'],
['Crime', 'Drama', 'Thriller'],
['Comedy', 'Drama', 'Music'],
['Drama', 'Fantasy', 'Romance'],
['Crime', 'Drama', 'Thriller'],
['Crime', 'Drama', 'Thriller'],
['Comedy', 'Drama', 'Romance'],
['Comedy', 'Romance'],
['Drama', 'Sci-Fi', 'Thriller'],
['Drama', 'War'],
['Action', 'Crime', 'Drama'],
['Sci-Fi', 'Thriller'],
['Adventure', 'Drama', 'Horror'],
['Comedy', 'Drama', 'Music'],
['Comedy', 'Drama', 'Romance'],
['Action', 'Adventure', 'Drama'],
['Action', 'Crime', 'Drama'],
['Adventure', 'Fantasy'],
['Drama', 'Romance'],
['Biography', 'History', 'Thriller'],
['Crime', 'Drama', 'Thriller'],
['Action', 'Drama', 'History'],
['Biography', 'Comedy', 'Drama'],
['Crime', 'Drama', 'Thriller'],
['Action', 'Biography', 'Drama'],
['Action', 'Drama', 'Sci-Fi'],
['Adventure', 'Horror'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Adventure', 'Mystery'],
['Comedy', 'Drama', 'Romance'],
['Horror', 'Thriller'],
['Action', 'Sci-Fi', 'Thriller'],
['Action', 'Sci-Fi', 'Thriller'],
['Biography', 'Drama'],
['Action', 'Crime', 'Drama'],
['Action', 'Crime', 'Mystery'],
['Action', 'Adventure', 'Comedy'],
['Crime', 'Drama', 'Thriller'],
['Crime', 'Drama'],
['Mystery', 'Thriller'],
['Mystery', 'Sci-Fi', 'Thriller'],
['Action', 'Mystery', 'Sci-Fi'],
['Drama', 'Romance'],
['Drama', 'Thriller'],
['Drama', 'Mystery', 'Sci-Fi'],
['Comedy', 'Drama'],
['Adventure', 'Family', 'Fantasy'],
['Biography', 'Drama', 'Sport'],
['Drama'],
['Comedy', 'Drama', 'Romance'],
['Biography', 'Drama', 'Romance'],
['Action', 'Adventure', 'Sci-Fi'],
['Drama', 'Sci-Fi', 'Thriller'],
['Drama', 'Romance', 'Thriller'],
['Mystery', 'Thriller'],
['Mystery', 'Thriller'],
['Action', 'Drama', 'Fantasy'],
['Action', 'Adventure', 'Biography'],
['Adventure', 'Comedy', 'Sci-Fi'],
['Action', 'Adventure', 'Thriller'],
['Fantasy', 'Horror'],
['Horror', 'Mystery'],
['Animation', 'Adventure', 'Comedy'],
['Action', 'Adventure', 'Drama'],
['Adventure', 'Family', 'Fantasy'],
['Action', 'Adventure', 'Sci-Fi'],
['Comedy', 'Drama'],
['Comedy', 'Drama'],
['Crime', 'Drama', 'Thriller'],
['Comedy', 'Romance'],
['Animation', 'Comedy', 'Family'],
['Comedy', 'Drama'],
['Comedy', 'Drama'],
['Biography', 'Drama', 'Sport'],
['Action', 'Adventure', 'Fantasy'],
['Action', 'Drama', 'History'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Adventure', 'Mystery'],
['Crime', 'Drama', 'Mystery'],
['Action'],
['Action', 'Adventure', 'Family'],
['Comedy', 'Romance'],
['Comedy', 'Drama', 'Romance'],
['Biography', 'Drama', 'Sport'],
['Action', 'Fantasy', 'Thriller'],
['Biography', 'Drama', 'Sport'],
['Action', 'Drama', 'Fantasy'],
['Adventure', 'Sci-Fi', 'Thriller'],
['Animation', 'Adventure', 'Comedy'],
['Drama', 'Mystery', 'Thriller'],
['Drama', 'Romance'],
['Crime', 'Drama', 'Mystery'],
['Comedy', 'Romance', 'Sport'],
['Comedy', 'Family'],
['Drama', 'Horror', 'Mystery'],
['Action', 'Drama', 'Sport'],
['Action', 'Adventure', 'Comedy'],
['Drama', 'Mystery', 'Sci-Fi'],
['Animation', 'Action', 'Comedy'],
['Action', 'Crime', 'Drama'],
['Action', 'Crime', 'Drama'],
['Comedy', 'Drama', 'Romance'],
['Animation', 'Action', 'Adventure'],
['Crime', 'Drama'],
['Drama'],
['Drama'],
['Comedy', 'Crime'],
['Drama'],
['Action', 'Adventure', 'Fantasy'],
['Drama', 'Fantasy', 'Romance'],
['Comedy', 'Drama'],
['Drama', 'Fantasy', 'Thriller'],
['Biography', 'Crime', 'Drama'],
['Comedy', 'Drama', 'Romance'],
['Action', 'Crime', 'Drama'],
['Sci-Fi'],
['Action', 'Biography', 'Drama'],
['Action', 'Comedy', 'Romance'],
['Adventure', 'Comedy', 'Drama'],
['Comedy', 'Crime', 'Drama'],
['Action', 'Fantasy', 'Horror'],
['Drama', 'Horror'],
['Horror'],
['Action', 'Thriller'],
['Action', 'Adventure', 'Mystery'],
['Action', 'Adventure', 'Fantasy'],
['Comedy', 'Drama', 'Romance'],
['Crime', 'Drama', 'Mystery'],
['Adventure', 'Comedy', 'Family'],
['Comedy', 'Drama', 'Romance'],
['Comedy'],
['Comedy', 'Drama', 'Horror'],
['Drama', 'Horror', 'Thriller'],
['Animation', 'Adventure', 'Family'],
['Comedy', 'Romance'],
['Mystery', 'Romance', 'Sci-Fi'],
['Crime', 'Drama'],
['Drama', 'Horror', 'Mystery'],
['Comedy'],
['Biography', 'Drama'],
['Comedy', 'Drama', 'Thriller'],
['Comedy', 'Western'],
['Drama', 'History', 'War'],
['Drama', 'Horror', 'Sci-Fi'],
['Drama'],
['Comedy', 'Drama'],
['Fantasy', 'Horror', 'Thriller'],
['Drama', 'Romance'],
['Action', 'Comedy', 'Fantasy'],
['Drama', 'Horror', 'Musical'],
['Crime', 'Drama', 'Mystery'],
['Horror', 'Mystery', 'Thriller'],
['Comedy', 'Music'],
['Drama'],
['Biography', 'Crime', 'Drama'],
['Drama'],
['Action', 'Adventure', 'Comedy'],
['Crime', 'Drama', 'Mystery'],
['Drama'],
['Action', 'Comedy', 'Crime'],
['Comedy', 'Drama', 'Romance'],
['Crime', 'Drama', 'Mystery'],
['Action', 'Comedy', 'Crime'],
['Drama'],
['Drama', 'Romance'],
['Crime', 'Drama', 'Mystery'],
['Adventure', 'Comedy', 'Romance'],
['Comedy', 'Crime', 'Drama'],
['Adventure', 'Drama', 'Thriller'],
['Biography', 'Crime', 'Drama'],
['Crime', 'Drama', 'Thriller'],
['Drama', 'History', 'Thriller'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Comedy'],
['Horror'],
['Action', 'Crime', 'Mystery'],
['Comedy', 'Romance'],
['Comedy'],
['Action', 'Drama', 'Thriller'],
['Action', 'Adventure', 'Sci-Fi'],
['Drama', 'Mystery', 'Thriller'],
['Comedy', 'Drama', 'Romance'],
['Action', 'Fantasy', 'Horror'],
['Drama', 'Romance'],
['Biography', 'Drama'],
['Biography', 'Drama'],
['Action', 'Adventure', 'Sci-Fi'],
['Animation', 'Adventure', 'Comedy'],
['Drama', 'Mystery', 'Thriller'],
['Action', 'Horror', 'Sci-Fi'],
['Drama', 'Romance'],
['Biography', 'Drama'],
['Action', 'Adventure', 'Drama'],
['Adventure', 'Drama', 'Fantasy'],
['Drama', 'Family'],
['Comedy', 'Drama', 'Romance'],
['Drama', 'Romance', 'Sci-Fi'],
['Action', 'Adventure', 'Thriller'],
['Comedy', 'Romance'],
['Crime', 'Drama', 'Horror'],
['Comedy', 'Fantasy'],
['Action', 'Comedy', 'Crime'],
['Adventure', 'Drama', 'Romance'],
['Action', 'Crime', 'Drama'],
['Crime', 'Horror', 'Thriller'],
['Romance', 'Sci-Fi', 'Thriller'],
['Comedy', 'Drama', 'Romance'],
['Crime', 'Drama'],
['Crime', 'Drama', 'Mystery'],
['Action', 'Adventure', 'Sci-Fi'],
['Animation', 'Fantasy'],
['Animation', 'Adventure', 'Comedy'],
['Drama', 'Mystery', 'War'],
['Comedy', 'Romance'],
['Animation', 'Comedy', 'Family'],
['Comedy'],
['Horror', 'Mystery', 'Thriller'],
['Action', 'Adventure', 'Drama'],
['Comedy'],
['Drama'],
['Adventure', 'Biography', 'Drama'],
['Comedy'],
['Horror', 'Thriller'],
['Action', 'Drama', 'Family'],
['Comedy', 'Fantasy', 'Horror'],
['Comedy', 'Romance'],
['Drama', 'Mystery', 'Romance'],
['Action', 'Adventure', 'Comedy'],
['Thriller'],
['Comedy'],
['Adventure', 'Comedy', 'Sci-Fi'],
['Comedy', 'Drama', 'Fantasy'],
['Mystery', 'Thriller'],
['Comedy', 'Drama'],
['Adventure', 'Drama', 'Family'],
['Horror', 'Thriller'],
['Action', 'Drama', 'Romance'],
['Drama', 'Romance'],
['Action', 'Adventure', 'Fantasy'],
['Comedy'],
['Action', 'Biography', 'Drama'],
['Drama', 'Mystery', 'Romance'],
['Adventure', 'Drama', 'Western'],
['Drama', 'Music', 'Romance'],
['Comedy', 'Romance', 'Western'],
['Thriller'],
['Comedy', 'Drama', 'Romance'],
['Horror', 'Thriller'],
['Adventure', 'Family', 'Fantasy'],
['Crime', 'Drama', 'Mystery'],
['Horror', 'Mystery'],
['Comedy', 'Crime', 'Drama'],
['Action', 'Comedy', 'Romance'],
['Biography', 'Drama', 'History'],
['Adventure', 'Drama'],
['Drama', 'Thriller'],
['Drama'],
['Action', 'Adventure', 'Fantasy'],
['Action', 'Biography', 'Drama'],
['Drama', 'Music'],
['Comedy', 'Drama'],
['Drama', 'Thriller', 'War'],
['Action', 'Mystery', 'Thriller'],
['Horror', 'Sci-Fi', 'Thriller'],
['Comedy', 'Drama', 'Romance'],
['Action', 'Sci-Fi'],
['Action', 'Adventure', 'Fantasy'],
['Drama', 'Mystery', 'Romance'],
['Drama'],
['Action', 'Adventure', 'Thriller'],
['Action', 'Crime', 'Thriller'],
['Animation', 'Action', 'Adventure'],
['Drama', 'Fantasy', 'Mystery'],
['Drama', 'Sci-Fi'],
['Animation', 'Adventure', 'Comedy'],
['Horror', 'Thriller'],
['Action', 'Thriller'],
['Comedy'],
['Biography', 'Drama'],
['Action', 'Mystery', 'Thriller'],
['Action', 'Mystery', 'Sci-Fi'],
['Crime', 'Drama', 'Thriller'],
['Comedy', 'Romance'],
['Comedy', 'Drama', 'Romance'],
['Biography', 'Drama', 'Thriller'],
['Drama'],
['Action', 'Adventure', 'Family'],
['Animation', 'Comedy', 'Family'],
['Action', 'Crime', 'Drama'],
['Comedy'],
['Comedy', 'Crime', 'Thriller'],
['Comedy', 'Romance'],
['Animation', 'Comedy', 'Drama'],
['Action', 'Crime', 'Thriller'],
['Comedy', 'Romance'],
['Adventure', 'Biography', 'Drama'],
['Animation', 'Adventure', 'Comedy'],
['Crime', 'Drama', 'Mystery'],
['Action', 'Comedy', 'Sci-Fi'],
['Comedy', 'Fantasy', 'Horror'],
['Comedy', 'Crime'],
['Animation', 'Action', 'Adventure'],
['Action', 'Drama', 'Thriller'],
['Fantasy', 'Horror'],
['Crime', 'Drama', 'Thriller'],
['Action', 'Adventure', 'Fantasy'],
['Comedy', 'Drama', 'Romance'],
['Biography', 'Drama', 'Romance'],
['Action', 'Drama', 'History'],
['Action', 'Adventure', 'Comedy'],
['Horror', 'Thriller'],
['Horror', 'Mystery', 'Thriller'],
['Comedy', 'Romance'],
['Animation', 'Adventure', 'Comedy'],
['Crime', 'Drama', 'Mystery'],
['Crime', 'Drama', 'Mystery'],
['Adventure', 'Biography', 'Drama'],
['Horror', 'Mystery', 'Thriller'],
['Horror', 'Thriller'],
['Drama', 'Romance', 'War'],
['Adventure', 'Fantasy', 'Mystery'],
['Action', 'Adventure', 'Sci-Fi'],
['Biography', 'Drama'],
['Drama', 'Thriller'],
['Horror', 'Thriller'],
['Drama', 'Horror', 'Thriller'],
['Action', 'Adventure', 'Fantasy'],
['Action', 'Horror', 'Thriller'],
['Comedy'],
['Drama', 'Sport'],
['Comedy', 'Family'],
['Drama', 'Romance'],
['Action', 'Adventure', 'Comedy'],
['Comedy'],
['Mystery', 'Romance', 'Thriller'],
['Crime', 'Drama'],
['Action', 'Comedy'],
['Crime', 'Drama', 'Mystery'],
['Biography', 'Drama', 'Romance'],
['Comedy', 'Crime'],
['Drama', 'Thriller'],
['Drama'],
['Animation', 'Adventure', 'Comedy'],
['Action', 'Thriller'],
['Drama', 'Thriller'],
['Animation', 'Adventure', 'Comedy'],
['Crime', 'Drama', 'Mystery'],
['Thriller'],
['Biography', 'Drama', 'Sport'],
['Crime', 'Drama', 'Thriller'],
['Drama', 'Music'],
['Crime', 'Drama', 'Thriller'],
['Drama', 'Romance'],
['Animation', 'Action', 'Adventure'],
['Comedy', 'Drama'],
['Action', 'Adventure', 'Drama'],
['Biography', 'Crime', 'Drama'],
['Horror'],
['Biography', 'Drama', 'Mystery'],
['Drama', 'Romance'],
['Animation', 'Drama', 'Romance'],
['Comedy', 'Family'],
['Drama'],
['Mystery', 'Thriller'],
['Drama', 'Fantasy', 'Horror'],
['Drama', 'Romance'],
['Biography', 'Drama', 'History'],
['Comedy', 'Family'],
['Action', 'Adventure', 'Thriller'],
['Comedy', 'Drama'],
['Action', 'Adventure', 'Fantasy'],
['Action', 'Thriller'],
['Drama', 'Romance'],
['Comedy', 'Drama', 'Romance'],
['Drama', 'Horror', 'Sci-Fi'],
['Comedy', 'Horror', 'Romance'],
['Drama'],
['Action', 'Adventure', 'Sci-Fi'],
['Action', 'Adventure', 'Fantasy'],
['Action', 'Adventure', 'Drama'],
['Biography', 'Comedy', 'Drama'],
['Drama', 'Mystery', 'Romance'],
['Animation', 'Adventure', 'Comedy'],
['Drama', 'Romance', 'Sci-Fi'],
['Drama'],
['Drama', 'Fantasy'],
['Drama', 'Romance'],
['Comedy', 'Horror', 'Thriller'],
['Comedy', 'Drama', 'Romance'],
['Crime', 'Drama'],
['Comedy', 'Romance'],
['Action', 'Drama', 'Family'],
['Comedy', 'Drama', 'Romance'],
['Action', 'Thriller', 'War'],
['Action', 'Comedy', 'Horror'],
['Biography', 'Drama', 'Sport'],
['Adventure', 'Comedy', 'Drama'],
['Comedy', 'Romance'],
['Comedy', 'Romance'],
['Comedy', 'Drama', 'Romance'],
['Action', 'Adventure', 'Crime'],
['Comedy', 'Romance'],
['Animation', 'Action', 'Adventure'],
['Action', 'Crime', 'Sci-Fi'],
['Drama'],
['Comedy', 'Drama', 'Romance'],
['Crime', 'Thriller'],
['Comedy', 'Horror', 'Sci-Fi'],
['Drama', 'Thriller'],
['Drama', 'Fantasy', 'Horror'],
['Thriller'],
['Adventure', 'Drama', 'Family'],
['Mystery', 'Sci-Fi', 'Thriller'],
['Biography', 'Crime', 'Drama'],
['Drama', 'Fantasy', 'Horror'],
['Action', 'Adventure', 'Thriller'],
['Crime', 'Drama', 'Horror'],
['Crime', 'Drama', 'Fantasy'],
['Adventure', 'Family', 'Fantasy'],
['Action', 'Adventure', 'Drama'],
['Action', 'Comedy', 'Horror'],
['Comedy', 'Drama', 'Family'],
['Action', 'Thriller'],
['Action', 'Adventure', 'Sci-Fi'],
['Adventure', 'Drama', 'Fantasy'],
['Drama'],
['Drama'],
['Comedy'],
['Drama'],
['Comedy', 'Drama', 'Music'],
['Drama', 'Fantasy', 'Music'],
['Drama'],
['Thriller'],
['Comedy', 'Horror'],
['Action', 'Comedy', 'Sport'],
['Horror'],
['Comedy', 'Drama'],
['Action', 'Drama', 'Thriller'],
['Drama', 'Romance'],
['Horror', 'Mystery'],
['Adventure', 'Drama', 'Fantasy'],
['Thriller'],
['Comedy', 'Romance'],
['Action', 'Sci-Fi', 'Thriller'],
['Fantasy', 'Mystery', 'Thriller'],
['Biography', 'Drama'],
['Crime', 'Drama'],
['Action', 'Adventure', 'Sci-Fi'],
['Adventure'],
['Comedy', 'Drama'],
['Comedy', 'Drama'],
['Comedy', 'Drama', 'Romance'],
['Adventure', 'Comedy', 'Drama'],
['Action', 'Sci-Fi', 'Thriller'],
['Comedy', 'Romance'],
['Action', 'Fantasy', 'Horror'],
['Crime', 'Drama', 'Thriller'],
['Action', 'Drama', 'Thriller'],
['Crime', 'Drama', 'Mystery'],
['Crime', 'Drama', 'Mystery'],
['Drama', 'Sci-Fi', 'Thriller'],
['Biography', 'Drama', 'History'],
['Crime', 'Horror', 'Thriller'],
['Drama'],
['Drama', 'Mystery', 'Thriller'],
['Adventure', 'Biography'],
['Adventure', 'Biography', 'Crime'],
['Action', 'Horror', 'Thriller'],
['Action', 'Adventure', 'Western'],
['Horror', 'Thriller'],
['Drama', 'Mystery', 'Thriller'],
['Comedy', 'Drama', 'Musical'],
['Horror', 'Mystery'],
['Biography', 'Drama', 'Sport'],
['Comedy', 'Family', 'Romance'],
['Drama', 'Mystery', 'Thriller'],
['Comedy'],
['Drama'],
['Drama', 'Thriller'],
['Biography', 'Drama', 'Family'],
['Comedy', 'Drama', 'Family'],
['Drama', 'Fantasy', 'Musical'],
['Comedy'],
['Adventure', 'Family'],
['Adventure', 'Comedy', 'Fantasy'],
['Horror', 'Thriller'],
['Drama', 'Romance'],
['Horror'],
['Biography', 'Drama', 'History'],
['Action', 'Adventure', 'Fantasy'],
['Drama', 'Family', 'Music'],
['Comedy', 'Drama', 'Romance'],
['Action', 'Adventure', 'Horror'],
['Comedy'],
['Crime', 'Drama', 'Mystery'],
['Horror'],
['Drama', 'Music', 'Romance'],
['Adventure', 'Comedy'],
['Comedy', 'Family', 'Fantasy']]
genres = np.unique([j for i in movie_genre for j in i])
genres
array(['Action', 'Adventure', 'Animation', 'Biography', 'Comedy', 'Crime',
'Drama', 'Family', 'Fantasy', 'History', 'Horror', 'Music',
'Musical', 'Mystery', 'Romance', 'Sci-Fi', 'Sport', 'Thriller',
'War', 'Western'], dtype='
# 每个类别有几部电影
count = pd.Dataframe(np.zeros(shape=[1000,20],dtype="int32"),columns=genres)
count.head()
| Action | Adventure | Animation | Biography | Comedy | Crime | Drama | Family | Fantasy | History | Horror | Music | Musical | Mystery | Romance | Sci-Fi | Sport | Thriller | War | Western |
|---|
| 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 3 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 4 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
# 计数填表
for i in range(1000):
count.loc[i, movie_genre[i]] = 1
count
| Action | Adventure | Animation | Biography | Comedy | Crime | Drama | Family | Fantasy | History | Horror | Music | Musical | Mystery | Romance | Sci-Fi | Sport | Thriller | War | Western |
|---|
| 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 |
|---|
| 1 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 0 | 0 |
|---|
| 2 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 |
|---|
| 3 | 0 | 0 | 1 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 4 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
|---|
| 995 | 0 | 0 | 0 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 996 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 997 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 |
|---|
| 998 | 0 | 1 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
| 999 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 1 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 |
|---|
1000 rows × 20 columns
count.sum(axis=0).sort_values(ascending=False).plot(kind="pie",figsize=(10,10),fontsize=20)
count.sum(axis=0).sort_values(ascending=False).plot(kind="bar",figsize=(20,9),fontsize=40,colormap="cool")