# -*- coding: utf-8 -*-
"""
Created on Mon May 25 10:41:17 2020
@author: Daisy Chan
"""
import numpy as np
# =============================================================================
# NumPy is the basic software package for scientific computing using Python.
# =============================================================================
# Numpy 数组只能有一种数据类型
np_bool_array = [[True, False], [True, True], [False, True]]
np.array(np_bool_array)
# 零数组
a = np.zeros((2,2)) # Create an array of all zeros
print(a) # Prints "[[ 0. 0.]
# [ 0. 0.]]"
b = np.ones((1,2)) # Create an array of all ones
print(b) # Prints "[[ 1. 1.]]"
# 填充数组
c = np.full((2,2), 7) # Create a constant array
print(c) # Prints "[[ 7. 7.]
# [ 7. 7.]]"
# 单位数组/单位矩阵
d = np.eye(2) # Create a 2x2 identity matrix
print(d) # Prints "[[ 1. 0.]
# [ 0. 1.]]"
# Return evenly spaced values within a given interval. / 返回均匀间隔的数组
np.arange(-3, 4) # array([-3, -2, -1, 0, 1, 2, 3])
np.arange(-3, 4, 2) # array([-3, -1, 1, 3])
# 使用np.random 模块的random function 随机数组
e = np.random.random((2,2)) # Create an array filled with random values
print(e) # Might print "[[ 0.91940167 0.08143941]
# [ 0.68744134 0.87236687]]
# =============================================================================
# Slicing and Indexing
# =============================================================================
array = np.array([2, 4, 6, 8, 10])
# Index starts from 0
array[2:4]
array = np.array([[1,3,5,7], [2,4,6,8]])
array[0][:3]
# 负的index表示从右向左数的位数
array[0][-4:]
# =============================================================================
# Numpy data type
# =============================================================================
# np.int32 can store 2^32 integers
# sample numpy data types:
# np.int64, np.int32, np.float64, np.float32
# dtype attribute
np.array([1.3, 5.7, 175.5]).dtype
np.array(np_bool_array).dtype
np.array([[1, 2, 3], [4, 5, 6]]).dtype
np.array([[1, 2, 3], ['hello', 5, 6]]).dtype
# Type conversion .astype()
np.array(np_bool_array).astype(int).dtype
# =============================================================================
# Using boolean operators with numpy/ numpy中的逻辑与或非
# =============================================================================
# Before, the operational operators like < and >= worked with Numpy arrays
# out of the box. Unfortunately, this is not true for the boolean operators
# and, or, and not.
# To use these operators with Numpy, you will need
# np.logical_and(), np.logical_or() and np.logical_not().
lst = np.array([18.0, 20.0, 10.75, 9.50])
np.logical_and(lst > 13, lst <=18)
# =============================================================================
# array dimensionality 数组维度
# =============================================================================
# 2_D_array_A = np.array([1_D_array_A, 1_D_array_B])
# ...
# N_D_array_A = np.array([N-1_D_array_A, N-2_D_array_B, ...])
# ^
# | 6, 8 /
# | 10, 12 /
# 5, 6 /
# | 7, 8 /
# |1, 2 /
# |3, 4 /
# -------->
# array([[1., 2.],
# [3., 4.]])
x = np.array([[1,2],[3,4]], dtype=np.float64)
# array([[5., 6.],
# [7., 8.]])
y = np.array([[5,6],[7,8]], dtype=np.float64)
z = np.array([x, y, np.array([[ 6., 8.], [10., 12.]])])
z.shape
# =============================================================================
# Flattening and reshaping 数组重塑
# =============================================================================
print(z.flatten())
print(z.reshape(3, 4))
# =============================================================================
# Filtering & Sorting
# =============================================================================
array = np.random.random((2,2))
print(array)
# 默认axis = 0
np.sort(array, axis = 0)
np.sort(array, axis = 1)
# filter
condi = array < 0.5
# fancy indexing returns array of elements
array[condi]
# np.where() returns array of indices
np.where(condi)
# find and replace
array = np.array(['', '4', '10', '', '7'])
np.where(array == '', '0', array)
# =============================================================================
# Adding & Removing
# =============================================================================
# np.concatenate((,)), catenates along the first axis by default.
np.concatenate((np.array([1,2,3,4]), np.array([4,5,6,7])))
# catenates columns
a1 = np.array([['1', 'James'], ['2', 'George'], ['3', 'Amy']])
a2 = np.array([['M', '47'], ['M', '23'], ['F', '36']])
np.concatenate((a1, a2), axis = 1)
# concatenate compatibility:
# all the input array dimensions for the concatenation axis must match exactly
# Dimension compacibility:
# all the input arrays must have same number of dimensions.
# delete the second row of a1
np.delete(a1, 1, axis = 0)
# delete the second column of a1
np.delete(a1, 1, axis = 1)
# =============================================================================
# 数组的agg method
# .sum() 求和
# .min()/.max() 最小最大值
# .mean() 均值
# .cumsum() 累加值
# =============================================================================
# Create baseball player height-weight list, a list of lists
baseball_mbr = [[180, 78.4],
[215, 102.7],
[210, 98.5],
[188, 75.2]]
# Create a 2D numpy array from baseball: np_baseball
np_baseball = np.array(baseball_mbr)
# Print out the type of np_baseball
print(type(np_baseball))
# Print out the shape of np_baseball
print(np_baseball.shape)
# Basic foundation statistic function
# Print mean height (first column)
# 方法一
avg = np.mean(np_baseball[:,0])
print("Average: " + str(avg))
# 方法二
# 对行求均值,即每一列产生对应的均值
np_baseball.mean(0)
# 对列求均值,即每一行产生对应的均值
np_baseball.mean(1)
# Print median height. Replace 'None'
med = np.median(np_baseball[:,0])
print("Median: " + str(med))
# Print out the standard deviation on height. Replace 'None'
stddev = np.std(np_baseball[:,0])
print("Standard Deviation: " + str(stddev))
# Print out correlation between first and second column. Replace 'None'
corr = np.corrcoef(np_baseball[:,0], np_baseball[:,1])
print("Correlation: " + str(corr))
# =============================================================================
# 数组的元素运算
# =============================================================================
# array([[1., 2.],
# [3., 4.]])
x = np.array([[1,2],[3,4]], dtype=np.float64)
# array([[5., 6.],
# [7., 8.]])
y = np.array([[5,6],[7,8]], dtype=np.float64)
# Elementwise sum;
# [[ 6.0 8.0]
# [10.0 12.0]]
print(x + y)
print(np.add(x, y))
# Elementwise difference;
# [[-4.0 -4.0]
# [-4.0 -4.0]]
print(x - y)
print(np.subtract(x, y))
# Elementwise product;
# [[ 5.0 12.0]
# [21.0 32.0]]
print(x * y)
print(np.multiply(x, y))
# 矩阵乘法
np.dot(x, y)
# Elementwise division;
# [[ 0.2 0.33333333]
# [ 0.42857143 0.5 ]]
print(x / y)
print(np.divide(x, y))
# Elementwise square root; produces the array
# [[ 1. 1.41421356]
# [ 1.73205081 2. ]]
print(np.sqrt(x))
# =============================================================================
# Broadcasting: 可以使代码的运行速度更快、减少代码数量
# Compatibility Rules
# 1. numpy compares sets of array dimensions from right to left
# 2. two dimensions are compatible when
# - one of them has a length of "1"
# - they are of equal lengths
# 3. all dimension sets must be compatible
# =============================================================================
# 当两个数组的形状并不相同的时候,可以通过扩展数组的方式来实现相加相减相乘等操作,
# 这种机制叫做广播
# 广播的原则:如果两个数组的后缘维度(trailing dimension,即从末尾开始算起的维度)的轴长度相符,
# 或其中的一方的长度为1,则认为它们是广播兼容的。
# 广播会在缺失和(或)长度为1的维度上进行。
# (10, 5) and (10, 1): 广播兼容
# (10, 5) and (5, ): 广播兼容
# (4,2,3) and (2,3): 广播兼容
brod1 = np.random.random((10, 5)) + np.ones((10, 1))
print(brod1.shape)
brod2 = np.random.random((10, 5)) + np.ones((5,))
print(brod2.shape)
brod3 = np.random.random((4, 2, 3)) + np.ones((2, 3))
print(brod3.shape)