import pandas as pd
t2 = pd.Series([1, 2, 3, 4], index=list('abcde'))
t3 = pd.Dataframe(np.arange(12).reshape(3, 4), index=list('abc'), columns='WXYZ')
type(t3.loc['a', 'Z']) numpy.int64
t3.loc['a', :]
t3.loc[['a', 'c'], :]#选中多行
t3.loc[['a', 'c'], ['W', 'Z']]
t3.iloc[1, :] 第一行
t3.iloc[:, [2, 1]] 第二列和第一列
t3.iloc[[0, 2], [2, 1]] 2就是第三行 在loc和iloc中可以取到后面那个数字
t3.iloc[1:, :2] 第一行之后的每一行,第2列之前的每一列
df[df['Count_AnimalName']>800]
import pandas as pd
df = pd.read_csv('./dongNames.csv')
print(df[df['Count_AnimalName']>800])
print(df[(df['Count_AnimalName']>800)&(1000>df['Count_AnimalName')])
#多个条件的时候要用括号括起来
print(df[(df['Count_AnimalName']>700)&(df['Count_AnimalName')].str.len()>4)
pd.isnull()
pd.notnull()
t3.dropna(axis=0, how='any')#只要有任何一行有nan就删除整行
t3.dropna(axis=0, how='any', inplace=True)#就地修改
t2.fillna(0) #直接把nan变为传进去的数字
t2.fillna(t2.mean())#直接传入平均值
t2['age'] = t2['age'].fillna(t2['age'].mean())#t2['age']这一列就填充好了
# coding=utf-8
import pandas as pd
import numpy as np
file_path = "IMDB-Movie-Data.csv"
df = pd.read_csv(file_path)
# print(df.info())
print(df.head(1))
#获取平均评分
print(df["Rating"].mean())
#导演的人数
# print(len(set(df["Director"].tolist())))
print(len(df["Director"].unique()))
#获取演员的人数
temp_actors_list = df["Actors"].str.split(", ").tolist()
actors_list = [i for j in temp_actors_list for i in j]
actors_num = len(set(actors_list))
print(actors_num)
# coding=utf-8
import pandas as pd
from matplotlib import pyplot as plt
file_path = "./IMDB-Movie-Data.csv"
df = pd.read_csv(file_path)
# print(df.head(1))
# print(df.info())
#rating,runtime分布情况
#选择图形,直方图
#准备数据
runtime_data = df["Rating"].values
max_runtime = runtime_data.max()
min_runtime = runtime_data.min()
#计算组数
print(max_runtime-min_runtime)
num_bin = (max_runtime-min_runtime)//0.5
#设置图形的大小
plt.figure(figsize=(20,8),dpi=80)
plt.hist(runtime_data,num_bin)
_x = [min_runtime]
i = min_runtime
while i<=max_runtime+0.5:
i = i+0.5
_x.append(i)
plt.xticks(_x)
plt.show()