- pandas之Series创建
- pandas之Series切片和索引
- pandas之Dataframe
numpy能够帮助我们处理数值,但是pandas除了处理数值之外(基于numpy),还能够帮助我们处理其他类型的数据
pandas的常用数据类型:
1.Series 一维,带标签数组
2.Dataframe 二维,Series容器
# coding=utf-8 import numpy as np us_data = "./youtube_video_data/US_video_data_numbers.csv" uk_data = "./youtube_video_data/GB_video_data_numbers.csv" #加载国家数据 us_data = np.loadtxt(us_data,delimiter=",",dtype=int) uk_data = np.loadtxt(uk_data,delimiter=",",dtype=int) # 添加国家信息 #构造全为0的数据 zeros_data = np.zeros((us_data.shape[0],1)).astype(int) ones_data = np.ones((uk_data.shape[0],1)).astype(int) #分别添加一列全为0,1的数组 us_data = np.hstack((us_data,zeros_data)) uk_data = np.hstack((uk_data,ones_data)) # 拼接两组数据 final_data = np.vstack((us_data,uk_data)) print(final_data)pandas之Dataframe
from pymongo import MongoClient
import pandas as pd
client = MongoClient()
collection = client["douban"]["tv1"]
data = collection.find()
data_list = []
for i in data:
temp = {}
temp["info"]= i["info"]
temp["rating_count"] = i["rating"]["count"]
temp["rating_value"] = i["rating"]["value"]
temp["title"] = i["title"]
temp["country"] = i["tv_category"]
temp["directors"] = i["directors"]
temp["actors"] = i['actors']
data_list.append(temp)
# t1 = data[0]
# t1 = pd.Series(t1)
# print(t1)
df = pd.Dataframe(data_list)
# print(df)
#显示头几行
print(df.head(1))
# print("*"*100)
# print(df.tail(2))
#展示df的概览
# print(df.info())
# print(df.describe())
print(df["info"].str.split("/").tolist())
# coding=utf-8
import pandas as pd
import numpy as np
file_path = "IMDB-Movie-Data.csv"
df = pd.read_csv(file_path)
# print(df.info())
print(df.head(1))
#获取平均评分
print(df["Rating"].mean())
#导演的人数
# print(len(set(df["Director"].tolist())))
print(len(df["Director"].unique()))
#获取演员的人数
temp_actors_list = df["Actors"].str.split(", ").tolist()
actors_list = [i for j in temp_actors_list for i in j]
actors_num = len(set(actors_list))
print(actors_num)
# coding=utf-8
import pandas as pd
df = pd.read_csv("./dogNames2.csv")
# print(df.head())
# print(df.info())
#dataframe中排序的方法
df = df.sort_values(by="Count_AnimalName",ascending=False)
# print(df.head(5))
#pandas取行或者列的注意点
# - 方括号写数组,表示取行,对行进行操作
# - 写字符串,表示的去列索引,对列进行操作
print(df[:20])
print(df["Row_Labels"])
print(type(df["Row_Labels"]))



