来自 AI基础:Numpy简易入门
手动运行了一遍实例代码,笔记待查于此处。
Numpy 简易入门
Numpy是 Numeric Python 的简称,可用于高速矢量化、矩阵数据运算。
1.1 Numpy 数组对象
Numpy 是用 Python 语言写的科学计算库,包括:
- N维数组对象Array(框架底层都用 np 来计算,比如计算并返回,哪一类label 的概率最高,用的是np的argmax的函数)
- 成熟的广播函数库
- 整合 C/C++和 Fortran 的工具包(此处体现了 -> Python 的胶水语言特性)
- 线性代数、傅里叶变换和随机数生成函数等,与稀疏矩阵运算包 scipy 配合更方便(这个package,可以做统计检验,比如 t-检验,做 A/B Test)
import numpy as np
data = np.arange(15).reshape(3, 5)
print(data)
print(data.ndim)
print(data.shape)
print(data.size)
print(data.dtype)
[[ 0 1 2 3 4]
[ 5 6 7 8 9]
[10 11 12 13 14]]
<class 'numpy.ndarray'>
2
(3, 5)
15
int64
1.2 创建 Numpy 数组对象
import numpy as np
data1 = np.array([[3,2,1]])
print(data1)
data2 = np.array([[3,2,1],[6,5,4]])
print(data2)
print(np.zeros((3,5)))
print(np.ones((3,5)))
print(np.empty((5,2)))
print(np.arange(1,20,5))
print(np.ones((2,3),dtype='float64'))
[[3 2 1]]
[[3 2 1]
[6 5 4]]
[[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]
[0. 0. 0. 0. 0.]]
[[1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1.]
[1. 1. 1. 1. 1.]]
[[-3.10503618e+231 -3.10503618e+231]
[ 2.47032823e-323 0.00000000e+000]
[ 0.00000000e+000 0.00000000e+000]
[ 0.00000000e+000 0.00000000e+000]
[ 0.00000000e+000 6.95335581e-309]]
[ 1 6 11 16]
[[1. 1. 1.]
[1. 1. 1.]]
1.3 ndarry 对象的数据类型
1.3.1 查看数据类型
data1 = np.array([[3,2,1],[6,5,4]])
print(data1.dtype.name)
data2 = np.ones((2,3),dtype='float64')
print(data2.dtype.name)
int64
float64
1.3.2 转换数据类型
data1 = np.array([[3,2,1],[6,5,4]])
print(data1.dtype.name)
data2 = np.ones((2,3),dtype='float64')
print(data2.dtype.name)
int64
float64
data2 = np.ones((2,3),dtype='float64')
print(data2.dtype.name)
int_data1 = data2.astype(np.int64)
print(int_data1.dtype.name)
float64
int64
str_data = np.array(['4','5','6'])
print(str_data.dtype.name)
int_data = str_data.astype(np.int64)
print(int_data.dtype.name)
str32
int64
1.4 数组运算
1.4.1 向量化
data1 = np.array([[4,5,6],[7,8,9]])
data2 = np.array([[5,5,6],[7,10,9]])
print(data1+data2)
print(data1-data2)
print(data1*data2)
print(data1/data2)
[[ 9 10 12]
[14 18 18]]
[[-1 0 0]
[ 0 -2 0]]
[[20 25 36]
[49 80 81]]
[[0.8 1. 1. ]
[1. 0.8 1. ]]
1.4.2 数组广播
如果 array_a.shape != array_b.shape,广播机制就自动触发了。
data1 = np.array([[0],[1],[2],[3]])
data2 = np.array([[1,2,3]])
print(data1)
print("=========")
print(data2)
print("=========")
print(data1+data2)
[[0]
[1]
[2]
[3]]
=========
[[1 2 3]]
=========
[[1 2 3]
[2 3 4]
[3 4 5]
[4 5 6]]
图解:
1.4.3 数组与标量间的运算
如果 array_a.shape != array_b.shape,广播机制就自动触发了。
data1 = np.array([[1,2,3],[4,5,6]])
data2 = 11
print(data1)
print(data1+data2)
print(data1*data2)
print(data1-data2)
print(data1/data2)
[[1 2 3]
[4 5 6]]
[[12 13 14]
[15 16 17]]
[[11 22 33]
[44 55 66]]
[[-10 -9 -8]
[ -7 -6 -5]]
[[0.09090909 0.18181818 0.27272727]
[0.36363636 0.45454545 0.54545455]]
1.5 ndarray 索引和切片
1.5.1 整数索引和切片
arr = np.arange(8)
print(arr)
print(arr[5])
print(arr[3:5])
print(arr[1:6:2])
[0 1 2 3 4 5 6 7]
5
[3 4]
[1 3 5]
arr2d = np.array([[1,2,3],[4,5,6]])
print(arr2d)
print(arr2d[1])
print(arr2d[0,1])
print(arr2d[:2])
print(arr2d[0:2,0:2])
print(arr2d[1,:2])
[[1 2 3]
[4 5 6]]
[4 5 6]
2
[[1 2 3]
[4 5 6]]
[[1 2]
[4 5]]
[4 5]
1.5.2 花式(数组)索引的基本使用
demo_arr = np.empty((4,4))
for i in range(4):
demo_arr[i] = np.arange(i,i + 4)
print(demo_arr)
print(demo_arr[[0,2]])
print(demo_arr[[1,3],[1,2]])
[[0. 1. 2. 3.]
[1. 2. 3. 4.]
[2. 3. 4. 5.]
[3. 4. 5. 6.]]
[[0. 1. 2. 3.]
[2. 3. 4. 5.]]
[2. 5.]
1.5.3 布尔型
student_name = np.array(['Tom',"Jack Ma",'Molly','Bruce'])
print(student_name)
student_score = np.array([[79, 88, 80], [89, 90, 92], [83, 78, 85], [78, 76, 80]])
print(student_score)
print(student_name == 'Jack Ma')
print(student_score[student_name == 'Bruce'])
print(student_score[student_name == 'Bruce',:1])
['Tom' 'Jack Ma' 'Molly' 'Bruce']
[[79 88 80]
[89 90 92]
[83 78 85]
[78 76 80]]
[False True False False]
[[78 76 80]]
[[78]]
1.6 数组的转置和轴对称
arr1 = np.arange(15).reshape(3, 5)
print(arr1)
print(arr1.T)
arr2 = np.arange(16).reshape((2,2,4))
print(arr2.transpose(1,2,0))
print(arr2.swapaxes(1,0))
[[ 0 1 2 3 4]
[ 5 6 7 8 9]
[10 11 12 13 14]]
[[ 0 5 10]
[ 1 6 11]
[ 2 7 12]
[ 3 8 13]
[ 4 9 14]]
[[[ 0 8]
[ 1 9]
[ 2 10]
[ 3 11]]
[[ 4 12]
[ 5 13]
[ 6 14]
[ 7 15]]]
[[[ 0 1 2 3]
[ 8 9 10 11]]
[[ 4 5 6 7]
[12 13 14 15]]]
1.7 Numpy 通用函数
arr = np.array([4,9,16])
print(np.sqrt(arr))
print(np.abs(arr))
x = np.array([12,9,13,15])
y = np.array([11,10,4,8])
print(np.add(x,y))
print(np.multiply(x,y))
print(np.maximum(x,y))
print(np.greater(x,y))
[2. 3. 4.]
[ 4 9 16]
[23 19 17 23]
[132 90 52 120]
[12 10 13 15]
[ True False True True]
1.8 利用Numpy数组进行数据处理
1.8.1 将条件逻辑转为数组运算
x = np.array([1,8,10])
y = np.array([11,4,8])
arr_con = np.array([True,False,True])
result = np.where(arr_con,x,y)
print(result)
[ 1 4 10]
1.8.2 数组统计运算
arr = np.arange(10)
var1 = arr.sum()
var2 = arr.mean()
var3 = arr.min()
var4 = arr.max()
var5 = arr.argmin()
var6 = arr.argmax()
var7 = arr.cumsum()
var8 = arr.cumprod()
print(var1)
print(var2)
print(var3)
print(var4)
print(var5)
print(var6)
print(var7)
print(var8)
45
4.5
0
9
0
9
[ 0 1 3 6 10 15 21 28 36 45]
[0 0 0 0 0 0 0 0 0 0]
arr = np.arange(1,16).reshape((3,5))
print(arr)
print(np.diff(arr,axis=1))
print(np.diff(arr,axis=0))
[[ 1 2 3 4 5]
[ 6 7 8 9 10]
[11 12 13 14 15]]
[[1 1 1 1]
[1 1 1 1]
[1 1 1 1]]
[[5 5 5 5 5]
[5 5 5 5 5]]
print(np.floor([-0.6,-1.4,-0.1,-1.8,0,1.4,1.7]))
print(np.ceil([1.2,1.5,1.8,2.1,2.0,-0.5,-0.6,-0.3]))
负数取整,向左。
[-1. -2. -1. -2. 0. 1. 1.]
[ 2. 2. 2. 3. 2. -0. -0. -0.]
arr = np.arange(10)
print(arr)
print(np.where(arr < 5 , arr, 10*arr))
[0 1 2 3 4 5 6 7 8 9]
[ 0 1 2 3 4 50 60 70 80 90]
1.8.4 检索数组元素
arr = np.array([[4,6,8],[11,3,-8],[6,5,2]])
print(np.any(arr > 0))
print(np.all(arr > 0))
True
False
1.8.5 唯一化及其他集合逻辑
arr = np.array([13,45,6,77,8,13,12,12,2222])
print(np.unique(arr))
print(np.in1d(arr, [13,12]))
[ 6 8 12 13 45 77 2222]
[ True False False False False True True True False]
1.9 线性代数模块
arr_x = np.array([[1,2,3],[4,5,6]])
arr_y = np.array([[1,2,],[3,4],[5,6]])
print(arr_x.dot(arr_y))
[[22 28]
[49 64]]
1.10 随机数模块
print(np.random.rand(4,4))
print(np.random.rand(2,4,5))
[[0.81782247 0.19627196 0.47536585 0.0839395 ]
[0.71840063 0.12909142 0.85735096 0.64975114]
[0.41547986 0.37506747 0.2013906 0.01261993]
[0.39221488 0.06204719 0.52023665 0.77400181]]
[[[0.8367346 0.85784458 0.96927022 0.82935407 0.72544339]
[0.45293445 0.02093661 0.13855598 0.83985129 0.51082136]
[0.8960852 0.47382384 0.62323173 0.9553976 0.13545716]
[0.91273645 0.56919542 0.02027771 0.83513558 0.98522069]]
[[0.65019463 0.68427283 0.8384966 0.26230144 0.67670195]
[0.84063365 0.88312345 0.42528515 0.363616 0.75830665]
[0.80756949 0.31635892 0.92864915 0.93583129 0.49753883]
[0.90122766 0.34228892 0.01383052 0.95354365 0.50214403]]]
np.random.seed(42)
print(np.random.rand(3))
np.random.seed(42)
print(np.random.rand(3))
np.random.seed()
print(np.random.rand(3))
[0.37454012 0.95071431 0.73199394]
[0.37454012 0.95071431 0.73199394]
[0.43855322 0.20868575 0.21541442]
其他
极客时间《数据分析专栏》
实例代码
# create array
import numpy as np
a = np.array([1,3,4,5,9])
b = np.array([[1,3,5],[3,5,7],[4,1,2]])
b[1,1] = 10
print(a.shape, b.shape, a.dtype, )
print(b)
# construct array
persontype = np.dtype({
'names':['name','age','chinese','math','english'],
'formats':['S32','i', 'i', 'i', 'f']
})
peoples = np.array([("ZhangFei",32,75,100, 90),("GuanYu",24,85,96,88.5),
("ZhaoYun",28,85,92,96.5),("HuangZhong",29,65,85,100)],
dtype=persontype)
ages = peoples[:]['age']
chineses = peoples[:]['chinese']
maths = peoples[:]['math']
englishs = peoples[:]['english']
print(np.mean(ages))
print(np.mean(chineses))
print(np.mean(maths))
print(np.mean(englishs))
print(englishs.dtype)
# incremental arrays
print(np.arange(1,21,3))
print(np.linspace(1,29,10))
# calculation
x1 = np.arange(1,11,2)
x2 = np.linspace(1,9,5)
print(np.add(x1,x2))
print(np.subtract(x1,x2))
print(np.multiply(x1,x2))
print(np.divide(x1,x2))
print(np.power(x1,x2))
print(np.remainder(x1,x2))
# stats
a = np.array([[1,2,3],[2,3,4],[4,5,6]])
print(np.amin(a))
print(np.amin(a,0))
print(np.amin(a,1))
print(np.amax(a))
print(np.amax(a,0))
print(np.amax(a,1))
# distance = Maximum - Minimum
a = np.array([[1,2,3],[4,5,6],[7,8,9]])
print(np.ptp(a))
print(np.ptp(a,0))
print(np.ptp(a,1))
## percentile
print(np.percentile(a,50))
print(np.percentile(a,50,axis=0))
print(np.percentile(a,50,axis=1))
# average()
a = np.array([1,2,3,4])
wts = np.array([1,2,3,4])
print(np.average(a))
print(np.average(a,weights=wts))
# standard deviation & variance
print(np.std(a))
print(np.var(a))
## sort
a = np.array([[2,3,4],[2,4,1]])
print(np.sort(a,))
print(np.sort(a,axis=None)
)
print(np.sort(a,axis=0)
)
print(np.sort(a,axis=1)
)
# quizz
scoretype = np.dtype({
'names': ['name', 'chinese', 'english', 'math'],
'formats': ['S32', 'i', 'i', 'i']})
peoples = np.array(
[
("zhangfei", 66, 65, 30),
("guanyu", 95, 85, 98),
("zhaoyun", 93, 92, 96),
("huangzhong", 90, 88, 77),
("dianwei", 80, 90, 90)
], dtype=scoretype)
#print(peoples)
def check_scores(subject):
print(f'{subject}|{np.mean(peoples[:][subject])}|{np.min(peoples[:][subject])}|{np.max(peoples[:][subject])}|{np.var(peoples[:][subject])}|{np.std(peoples[:][subject])}')
print("'subject'|'mean'|'min'|'max'|'var'|'std'")
check_scores('english')
check_scores('chinese')
check_scores('math')
print("ranking")
ranking = sorted(peoples,key=lambda x:x[1]+x[2]+x[3], reverse=True)
print(ranking)
参考:
- Numpy 练习100 题 https://github.com/rougier/numpy-100
- Numpy 30 万行代码实现主流机器学习模型 https://github.com/ddbourgin/numpy-ml