@[目录]
numpy用于大型多维数组的数值计算
numpy的定义#!usr/bin/env python # -*- coding:utf-8 -*- """ @author: lyf @file: Task001.py @time: 2021/10/28 @desc: numpy的创建与类型 """ import numpy as np import random # 数组的几种定义方式 t1 = np.array([1,2,3]) print(t1) print(t1.dtype) t2 = np.array(range(10)) print(t2) print(t2.dtype) t3 = np.arange(10) print(t3) print(t3.dtype) # 定义数组时,指定数组的类型 t4 = np.array([1,2,3],dtype='i1') print(t4) print(t4.dtype) t5 = np.array([1,1,0,0,1,1,0],dtype='bool') print(t5) print(t5.dtype) # 调整数据类型 t6 = t5.astype(dtype='i1') print(t6) print(t6.dtype) # numpy中的小数 t7 = np.array([random.random() for i in range(10)]) print(t7) print(t7.dtype) t8 = np.round(t7,3) print(t8)numpy的形状与计算
import numpy as np
#查看数组的形状:shape
t1 = np.array([[1,2,3],[4,5,6]])
print('t1')
print(t1)
print(t1.shape)
print('*'*50)
t2 = np.array([[[1,2],[3,4]],[5,6],[[7,8,9],[10,11]]])
print('t2')
print(t2)
print(t2.shape)
#修改数组形状,二维数组更改为一维数组: reshape(1,n), t.shape[0]*t.shape[1],flatten
print('*'*50)
t3 = t1.reshape(1,6)
print('t3')
print(t3)
print(t3.shape)
print('*'*50)
t4 = t1.reshape(t1.shape[0]*t1.shape[1],)
print('t4')
print(t4)
print(t4.shape)
print("*"*50)
t5 = t1.flatten()
print('t5')
print(t5)
# 数组和数的四则运算:广播机制
print("*"*50)
t6 = t1 +2
print('t6')
print(t6)
#数组和数组的四则运算:数组行列一样时,对应位置数字相加
print("*"*50)
t7 = np.arange(6).reshape(2,3)
print('t7')
print(t7)
t8 = t7+t1
print('t8')
print(t8)
#数组和数组行列不同:广播机制,
print('*'*50)
t9 = np.arange(3)
print('t1')
print(t1)
print('t9')
print(t9)
t10 = t1+t9
print('t10')
print(t10)
numpy读取本地CSV数据文件
np.loadtxt(frame,dtype=np.float,delimiter=None,skiprows=0,usecols=None,unpack=False)
import numpy as np
#先导入文件地址
uk_file_path = './gb_videos_data_numbers.csv'
print('*'*50)
t1 = np.loadtxt(uk_file_path,delimiter=',')
print('t1')
print(t1)
print('*'*50)
t2 = np.loadtxt(uk_file_path,delimiter=',',unpack=1)
#unpack实现转置功能
print('t2')
print(t2)
# numpy中的转置:t.transpose(),t.T,t2.swapaxes(1,0)交换轴
print('*'*50)
t3 = t1.transpose()
print('t3')
print(t3)
t4 = t1.T
print('t4')
print(t4)
t5 = t1.swapaxes(1,0)
print('t5')
print(t5)
numpy中的索引和切片
import numpy as np
#先导入文件地址
uk_file_path = './gb_videos_data_numbers.csv'
t1 = np.loadtxt(uk_file_path,delimiter=',')
print('t1')
print(t1)
# 取列表中行
print('t1中的第三行')
print(t1[2]) #这两个语句同意print(t1[2,:])
print('t1中第三行及其以后的行')
print(t1[2:]) #print(t1[2:,:])
print('t1中不连续的多行,第3,5,8行')
print(t1[[2,4,7]]) #print(t1[[2,4,7],:])
#取列表中的列
print('取t1中第一列')
print(t1[:,0])
print('取连续多列')
print(t1[:,3:])
print('取不连续的多列')
print(t1[:,[1,3]])
# 取多行和多列
print('取第三行第四列')
print(t1[2,3])
print('取连续的多行和多列')
print(t1[2:6,1:3]) #取行列交叉点的值
print('取不连续的多行和多列')
print(t1[[1,7,3],[0,2,0]]) #取第一行第一列,第八行第三列,第四行第一列这三个值
numpy中数值的修改
直接赋值即可
import numpy as np
t = np.array([[2,3,4],[3,4,6],[77,8,6],[2,3,4]])
print('更改整行的值')
t[1]=1
print(t)
print('更改指定大小的值')
t[t>5] = 10
print(t)
# where的使用(相当于if else)
print('测试where的使用')
t1 = np.where(t<3,0,99)
print(t1)
# clip裁剪 t.clip(a,b)小于a替换为a,大于b替换为b
t2 = t.clip(3,9)
print(t2)
#将某处的值赋值为nan,nan为浮点类型,将原数据类型更改为浮点型之后才可赋值nan
t = t.astype(float)
t[2,1] = np.nan
print(t)
numpy中的nan
import numpy as np
t = np.array([[1,2,3,4],[3,4,5,6],[2,4,np.nan,6],[np.nan,8,5,3]])
t[1] = 0
print('t')
print(t)
print('统计数组中0的个数')
print(np.count_nonzero(t))
print('统计数组中nan的个数')
print(np.count_nonzero(np.isnan(t)))
print('行求和')
print(np.sum(t,axis=0))
print('列求和')
print(np.sum(t,axis=1))
sum, mean, median,max,min,ptp(极值,np提供),std(标准差)
import numpy as np
def fill_ndarray(t1):
for i in range(t1.shape[1]):
temp_col = t1[:,i]
if np.count_nonzero(np.isnan(temp_col)) != 0:
temp_col_not_nan = temp_col[temp_col == temp_col]
temp_col[temp_col != temp_col] = temp_col_not_nan.mean()
return t1
print('将数列中的nan替换为均值,或者中值')
if __name__ == '__main__':
t1 = np.arange(12).reshape((3, 4)).astype(float)
print('t1原始矩阵')
print(t1)
t1[1, 2:] = np.nan
print('t1赋值nan后的矩阵')
t2 = fill_ndarray(t1)
print('t2')
print(t2)
绘制直方图
import numpy as np from matplotlib import pyplot as plt uk_file_path = './gb_videos_data_numbers.csv' t_uk = np.loadtxt(uk_file_path,delimiter=',',dtype='int') #取评论的数据 t_uk_comments = t_uk[:,-1] t_uk_comments = t_uk_comments[t_uk_comments<=10000] #选择比10000小的数据 d = 500 bin_nums = (t_uk_comments.max()-t_uk_comments.min())//d plt.figure(figsize=(20,8),dpi=80) plt.hist(t_uk_comments,bin_nums) plt.show()散点图
import numpy as np from matplotlib import pyplot as plt uk_file_path = './gb_videos_data_numbers.csv' t_uk = np.loadtxt(uk_file_path,delimiter=',',dtype='int') t_uk = t_uk[t_uk[:,1] < 20000] t_uk_comments = t_uk[:,-1] t_uk_like = t_uk[:,1] plt.scatter(t_uk_comments,t_uk_like) plt.show()numpy数组的拼接
np.vstack竖直拼接,np.hstack水平拼接,
import numpy as np
t1 = np.array([[1,2,3],[2,3,4]])
t2 = np.array([[2,6,4],[2,9,1]])
print('t1')
print(t1)
print('t2')
print(t2)
print('拼接')
t3 = np.vstack((t1,t2))
t4 = np.hstack((t1,t2))
print('t3')
print(t3)
print('t4')
print(t4)
print('行交换')
t5 = t2[[1,0],:]
print(t5)
print('列交换')
t6 = t2[:,[1,0]]
print(t6)
numpy生成数组
import numpy as np
t1 = np.ones((3,2))
t2 = np.zeros(4,3)
np.eye(10)
np.argmax(t1,axis=1)
np.argmin(t2,axis=0)
print('numpy生成随机数')
np.round()
np.roundn()



