## 机器学习中数据处理与可视化的python、numpy等常用函数

qinjianhuang 2020-11-13 05:26:11

### np.tile()

tile（）相当于复制当前行元素或者列元素

import numpy as np
m1 = np.array([1, 2, 3, 4])
# 行复制两次，列复制一次到一个新数组中
print(np.tile(m1, (2, 1)))
print("===============")
# 行复制一次，列复制两次到一个新数组中
print(np.tile(m1, (1, 2)))
print("===============")
# 行复制两次，列复制两次到一个新数组中
print(np.tile(m1, (2, 2)))

D:\Python\python.exe E:/ML_Code/test_code.py
[[1 2 3 4]
[1 2 3 4]]
===============
[[1 2 3 4 1 2 3 4]]
===============
[[1 2 3 4 1 2 3 4]
[1 2 3 4 1 2 3 4]]

### sum()

sum函数是对元素进行求和，对于二维数组以上则可以根据参数axis进行分别对行和列进行求和，axis=0代表按列求和，axis=1代表行求和。

import numpy as np
m1 = np.array([1, 2, 3, 4])
# 元素逐个求和
print(sum(m1))
m2 = np.array([[6, 2, 2, 4], [1, 2, 4, 7]])
# 按列相加
print(m2.sum(axis=0))
# 按行相加
print(m2.sum(axis=1))

D:\Python\python.exe E:/ML_Code/test_code.py
10
[ 7 4 6 11]
[14 14]
Process finished with exit code 0

### shape和reshape

import numpy as np
a = np.array([[1, 2, 3], [4, 5, 6]])
print(a.shape)
b = np.reshape(a, 6)
print(b)
# -1是根据数组大小进行维度的自动推断
c = np.reshape(a, (3, -1)) # 为指定的值将被推断出为2
print(c)

D:\python-3.5.2\python.exe E:/ML_Code/test_code.py
(2, 3)
---
[1 2 3 4 5 6]
---
[[1 2]
[3 4]
[5 6]]


### numpy.random.rand

import numpy as np
# 创建一个给定类型的数组，将其填充在一个均匀分布的随机样本[0, 1)中
print(np.random.rand(3))
print(np.random.rand(2, 2))


D:\python-3.5.2\python.exe E:/ML_Code/test_code.py
[ 0.03568079 0.68235136 0.64664722]
---
[[ 0.43591417 0.66372315]
[ 0.86257381 0.63238434]]


### zip()

zip() 函数用于将可迭代的对象作为参数，将对象中对应的元素打包成一个个元组，然后返回由这些元组组成的列表。

import numpy as np
a1 = np.array([1, 2, 3, 4])
a2 = np.array([11, 22, 33, 44])
z = zip(a1, a2)
print(list(z))


D:\Python\python.exe E:/ML_Code/test_code.py
[(1, 11), (2, 22), (3, 33), (4, 44)]
Process finished with exit code 0


<zip object at 0x01FB2E90>

### 矩阵相关

import numpy as np
# 生成随机矩阵
myRand = np.random.rand(3, 4)
print(myRand)
# 生成单位矩阵
myEye = np.eye(3)
print(myEye)
from numpy import *
# 矩阵所有元素求和
myMatrix = mat([[1, 2, 3], [4, 5, 6], [7, 8, 9]])
print(sum(myMatrix))
# 计算矩阵的秩
print(linalg.det(myMatrix))
# 计算矩阵的逆
print(linalg.inv(myMatrix))


from numpy import *
import numpy as np
vector1 = mat([[1, 2], [1, 1]])
vector2 = mat([[1, 2], [1, 1]])
vector3 = np.array([[1, 2], [1, 1]])
vector4 = np.array([[1, 2], [1, 1]])
# Python自带的mat矩阵的运算规则是两者都按照矩阵乘法的规则来运算
print(vector1 * vector2)
# Python自带的mat矩阵的运算规则是两者都按照矩阵乘法的规则来运算
print(dot(vector1, vector2))
# numpy乘法运算中"*"是数组元素逐个计算
print(vector3 * vector4)
# numpy乘法运算中dot是按照矩阵乘法的规则来运算
print(dot(vector3, vector4))

D:\python-3.5.2\python.exe D:/PyCharm/py_base/py_numpy.py
[[3 4]
[2 3]]
---
[[3 4]
[2 3]]
---
[[1 4]
[1 1]]
---
[[3 4]
[2 3]]


### 向量相关

d12=k=1n(x1kx2k)2

d12=(AB)(AB)T

from numpy import *
# 计算两个向量的欧氏距离
vector1 = mat([1, 2])
vector2 = mat([3, 4])
print(sqrt((vector1 - vector2) * ((vector1 - vector2).T)))

### 概率相关

from numpy import *
import numpy as np
arrayOne = np.array([[1, 2, 3, 4, 5], [7, 4, 3, 3, 3]])
# 计算第一列的平均数
mv1 = mean(arrayOne[0])
# 计算第二列的平均数
mv2 = mean(arrayOne[1])
# 计算第一列的标准差
dv1 = std(arrayOne[0])
# 计算第二列的标准差
dv2 = std(arrayOne[1])
print(mv1)
print(mv2)
print(dv1)
print(dv2)

https://huangqinjian.blog.csdn.net/article/details/78574306