import pandas as pd
if __name__ == '__main__':
courses = ['语文', '数学', '英语', '计算机']
result = pd.Series(courses)
print(result)
"""
0 语文
1 数学
2 英语
3 计算机
dtype: object
"""
2.Dict转Series
import pandas as pd
grades = {'语文': 80, '数学':90, '英语': 100}
result = pd.Series(grades)
print(result)
"""
语文 80
数学 90
英语 100
dtype: int64
"""
3.Series转LIst
import pandas as pd
if __name__ == '__main__':
grades = {'语文': 80, '数学': 90, '英语': 100}
result = pd.Series(grades)
# print(result)
print(result.tolist())
"""
[80, 90, 100]
"""
4.Series转Dataframe
import pandas as pd
if __name__ == '__main__':
grades = {'语文': 80, '数学': 90, '英语': 100}
tmp = pd.Series(grades)
result = pd.Dataframe(tmp, columns=['grade'])
print(result)
"""
grade
语文 80
数学 90
英语 100
"""
5.Numpy创建Series
import pandas as pd, numpy as np
if __name__ == '__main__':
s = pd.Series(np.arange(10, 100, 10), index=np.arange(101, 110), dtype='float')
print(s)
101 10.0 102 20.0 103 30.0 104 40.0 105 50.0 106 60.0 107 70.0 108 80.0 109 90.0 dtype: float646.转换Series的数据类型
import pandas as pd
if __name__ == '__main__':
s = pd.Series(
data=["001", "002", "003", "004"],
index=list("abcd")
)
print(s)
print(s.astype(int)) # 类型
print(s.map(int)) # 函数
a 001 b 002 c 003 d 004 dtype: object a 1 b 2 c 3 d 4 dtype: int32 a 1 b 2 c 3 d 4 dtype: int647.添加新数据
import pandas as pd
if __name__ == '__main__':
s = pd.Series(
data={'语文': 99, '数学': 100}
)
s = s.append(pd.Series(
data={'英语': 150}
))
print(s)
语文 99 数学 100 英语 150 dtype: int648.reset index 转换为df
import pandas as pd
if __name__ == '__main__':
s = pd.Series(
data={'语文': 99, '数学': 100}
)
s = s.reset_index()
s.columns = ['project', 'grade']
print(s)
project grade 0 语文 99 1 数学 1009.Dict创建df
import pandas as pd
if __name__ == '__main__':
df = pd.Dataframe(
data={
'姓名': ['herio', 'xiaoo', 'gsda'],
'性别': ['男', '女', '男'],
'年龄': [18, 20, 19]
}
)
print(df)
姓名 性别 年龄 0 herio 男 18 1 xiaoo 女 20 2 gsda 男 1910.df设置索引列
import pandas as pd
if __name__ == '__main__':
df = pd.Dataframe(
data={
'姓名': ['herio', 'xiaoo', 'gsda'],
'性别': ['男', '女', '男'],
'年龄': [18, 20, 19]
}
)
df.set_index('姓名', inplace=True)
print(df)
性别 年龄 姓名 herio 男 18 xiaoo 女 20 gsda 男 1911.生成日期
import pandas as pd
if __name__ == '__main__':
res = pd.date_range(start='2022-01-01',end='2022-01-31')
res_1 = pd.date_range(start='2022-01-01',periods=31)
print(res,res_1,sep='n')
取每年的所有周一(freq)
import pandas as pd
if __name__ == '__main__':
res = pd.date_range(start='2022-01-01',end='2022-12-31',freq='W-MON')
print(res)
生成某一天的二十四个小时的日期
import pandas as pd
if __name__ == '__main__':
res = pd.date_range(start='2022-01-01', periods=24, freq='H')
res_1 = pd.date_range(start='2022-01-01', end='2022-01-02', closed='left',freq='H')
print(res)
print(res_1)
日期生成Dataframe
import pandas as pd
if __name__ == '__main__':
data = pd.date_range(start='2022-02-01',periods=31)
res = pd.Dataframe(data=data,columns=['day'])
res['day_of_year'] = res['day'].dt.day_of_year
print(res)
day day_of_year 0 2022-02-01 32 1 2022-02-02 33 2 2022-02-03 34 ..... 29 2022-03-02 61 30 2022-03-03 62生成随机数据列df
import pandas as pd
import numpy as np
if __name__ == '__main__':
year = pd.date_range(start='2022-01-01',periods=1000)
data = {
'normal': np.random.normal(loc=0,scale=1,size=1000),
'uniform': np.random.uniform(low=0,high=1,size=1000),
'binomial': np.random.binomial(n=1,p=0.2)
}
df = pd.Dataframe(data=data,index=year)
print(df)
normal uniform binomial 2022-01-01 -1.212357 0.561198 0 2022-01-02 1.455127 0.671026 0 2022-01-03 1.458189 0.922212 0 2022-01-04 -0.164604 0.948922 0 2022-01-05 -0.292973 0.602961 0 ... ... ... ... 2024-09-22 -0.350369 0.788879 0 2024-09-23 -0.716147 0.671242 0 2024-09-24 -0.345326 0.282493 0 2024-09-25 0.000214 0.735941 0 2024-09-26 0.072581 0.719543 0打印前10行和后5行
print(df.head(10))
print()
print(df.tail(5))
描述基本信息
print(df.info())
print(df.describe())
DatetimeIndex: 1000 entries, 2022-01-01 to 2024-09-26
Freq: D
Data columns (total 3 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 normal 1000 non-null float64
1 uniform 1000 non-null float64
2 binomial 1000 non-null int64
dtypes: float64(2), int64(1)
memory usage: 31.2 KB
None
normal uniform binomial
count 1000.000000 1000.000000 1000.0
mean -0.038351 0.513840 0.0
std 1.000126 0.289779 0.0
min -3.250206 0.000009 0.0
25% -0.732684 0.263531 0.0
50% -0.091297 0.521737 0.0
75% 0.612340 0.773006 0.0
max 3.682969 0.997907 0.0
统计数据列的值出现的次数
print(df['binomial'].value_counts())前50行数据存到csv文件中
df.head(50).to_csv("数据前50行.csv")
csv读取为Dataframe
import pandas as pd
import numpy as np
if __name__ == '__main__':
df = pd.read_csv("数据前50行.csv",index_col=0)
print(df.info())
print(df.head(10))
12.股票数据
索引列设置为普通列
df.reset_index(inplace=True)添加年份和月
import pandas as pd
if __name__ == '__main__':
df = pd.read_csv("00700.HK.csv")
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
print(df.head(10))
Date Open High Low Close Volume Year Month 0 2021-09-30 456.0 464.6 453.8 461.4 17335451 2021 9 1 2021-09-29 461.6 465.0 450.2 465.0 18250450 2021 9 2 2021-09-28 467.0 476.2 464.6 469.8 20947276 2021 9 3 2021-09-27 459.0 473.0 455.2 464.6 17966998 2021 9 4 2021-09-24 461.4 473.4 456.2 460.2 16656914 2021 9 5 2021-09-23 460.2 469.6 456.4 463.2 22210868 2021 9 6 2021-09-21 446.0 453.8 443.2 450.0 16556875 2021 9 7 2021-09-20 456.6 457.4 448.0 454.2 15513224 2021 9 8 2021-09-17 445.8 467.6 445.2 461.8 23982628 2021 9 9 2021-09-16 446.8 454.8 445.0 451.0 24519868 2021 9按年份分组对Close字段求平均值
print(df.groupby('Year')['Close'].mean())
求Close最小值和对应的索引行
import pandas as pd
if __name__ == '__main__':
df = pd.read_csv("00700.HK.csv")
df['Date'] = pd.to_datetime(df['Date'])
df['Year'] = df['Date'].dt.year
df['Month'] = df['Date'].dt.month
print(df['Close'].min())
print(df['Close'].argmin())
print(df.loc[[df['Close'].argmin()]])
3.375
4240
Date Open High Low Close Volume Year Month
4240 2004-07-26 3.45 3.5 3.375 3.375 7439000 2004 7
只处理需要的列
print(df[['Year', 'Open', 'High']].head(5))删除不需要的列
df.drop(columns=['Low','High'],inplace=True)
print(df.head(5))
Date Open Close Volume Year Month 0 2021-09-30 456.0 461.4 17335451 2021 9 1 2021-09-29 461.6 465.0 18250450 2021 9 2 2021-09-28 467.0 469.8 20947276 2021 9 3 2021-09-27 459.0 464.6 17966998 2021 9 4 2021-09-24 461.4 460.2 16656914 2021 9对列重命名
# df.columns = ['D','O','H','L','C','V','Y','M']
df.rename(columns={'Date':'D'},inplace=True)
print(df.head(5))
D Open High Low Close Volume Year Month 0 2021-09-30 456.0 464.6 453.8 461.4 17335451 2021 9 1 2021-09-29 461.6 465.0 450.2 465.0 18250450 2021 9 2 2021-09-28 467.0 476.2 464.6 469.8 20947276 2021 9 3 2021-09-27 459.0 473.0 455.2 464.6 17966998 2021 9 4 2021-09-24 461.4 473.4 456.2 460.2 16656914 2021 9



