【星海随笔】pandas综合应用

import  pandas   as  pd
import  numpy  as  np

生成一个 Series ，并对其进行操作

s = pd.Series(['A','b','B','gear','AGER',np.nan])
#一个Series
s.str.lower()
#将字符串全部在转换为小写
#如果有int类型，则默认转为NaN
s.str.contains('A')
#判断哪一列包含字符 A 
s.str.get_dummies(sep = '_')
#将 _ 当作分隔符，列出每个字符串

index = pd.Index(['  name  ', ' class  ' ,'  meta '])
#生成一个有字符串的object
'''
Index(['  name  ', ' class  ', '  meta '], dtype='object')
'''
index.str.strip()
#将对象中的空格全部清除

t1 = pd.Series(['A_a','B_b','c_c','absd',1])
#生成一个值
t1.str.split('_',expand=True)
#对值进行切分
'''
out: 生成为DataFarme
	0	1
0	A	a
1	B	b
2	c	c
3	absd	None
4	NaN	NaN
'''
t2 = t1.str.split('_',expand=True,n=1)
# n 为限制，限制切几次。

生成一个DataFrame ，并对其进行操作

df = pd.DataFrame(np.random.randn(3,2))
#生成以个3行2列的表
df = pd.DataFrame(np.random.randn(3,2) , columns = ['A','B'], index = range(3) )
#生成一个定义列名和行名的表
df.columns = df.columns.str.replace(' ','_')
#修改列名称的转换，将列名中的空格，转换为下划线 _

索引进阶

s = pd.Series(['A_a','B','b','gear','AGER',np.nan],index = np.arange(6)[::-1])
#生成一行数据，倒叙排列
s.isin(['gear'])
#查看 s 数据中，哪一行的字符是 gear
s = pd.Series(['A_a','B','b','gear','AGER',np.nan],index = pd.MultiIndex.from_product([[0,1],['a','b','c']])  )
#生成多索引
'''
还是 series 数据
0  a     A_a
   b       B
   c       b
1  a    gear
   b    AGER
   c     NaN
dtype: object
'''
s.iloc[s.index.isin([(0,'a')])]
#打印索引为0，a的信息

生成时间索引

dates = pd.date_range('20220501',periods=8)
#生成8天的时间
df = pd.DataFrame(np.random.randn(8,4),index=dates,columns=['A','B','C','D'])
#生成一个8行，4列的数据
df.where(df < 0)
#表格中小于 0 的打印出来
df.where(df < 0, 1)
#打印小于 0 的，不小于 0 的打印 1
df.where(df < 0, df + 1)
#打印小于 0 的，不小于 0 的在 原值的基础上 + 1 打印
df.query('(A < B)')
#附加条件，打印 A 列小于 B 列的数据
df.query('(A < B) & (B < C)')
#打印更多拥有附加条件的

pandas绘图

%matplotlib inline
import pandas as pd
import numpy as np

生成一行数据

s = pd.Series(np.random.randn(10),index = np.arange(0,100,10))
#生成随机10 个数字，索引为 0开始 到 100 ,每隔10个数字生成一个数字
s.plot()
#画一个X，Y 对位图

生成一个DataFrame

df = pd.DataFrame(np.random.randn(10,4),
                 index = np.arange(0,100,10),
                 columns = ['A','B','C','D'])
df.plot()
#生成一个ABCD四条线的图

import matplotlib.pyplot as plt
fig1,fig2 = plt.subplots(2,1)
data = pd.Series(np.random.rand(7),index=list('abcdefg'))
#注意 np.random.rand()是正数， randn() 是正负数都有
data.plot(ax = fig2[0],kind='bar')
data.plot(ax = fig2[1],kind='barh')

df = pd.DataFrame(np.random.rand(5,4),
                 index = ['one','two','three','four','five'],
                 columns = ['A','B','C','D'])

df.plot(kind='bar')

tips.total_bill.plot(kind='hist',bins=50)
#对tips DataFrame的是数据的 total_bill 列进行画图，分成50份

pd.plotting.scatter_matrix(macro,color='k',alpha=0.3)
#对所有数字的数据进行一个分组散点制图。不宜数据列太多，不容易看
macro_map = macro[['age','high','power']]
#可以把一些想要分析的列拉出来单独制图进行查看
macro_map.plot.scatter['age','high']
#可以单独分析两列的值，进行制图

【星海随笔】pandas综合应用

Python相关栏目本月热门文章