1. 原理推导
最大化投影方向的方差来的
2. python简单实现
def pca(dataMat, k):
# 求每一列的均值
mean = np.mean(dataMat, axis=0)
xMat = dataMat - mean
# 协方差矩阵
covMat = np.dot(xMat.T, xMat)
# 特征值特征向量
e_vals, e_vecs = np.linalg.eig(covMat)
sort_index = np.argsort(e_vals)
print e_vals, e_vecs
print sort_index
m , n = dataMat.shape
if k >= n:
return np.dot(xMat, e_vecs)
so = sort_index[n-k:]
w_vecs = e_vecs[:, so]
return np.dot(xMat, w_vecs)
3. sklearn
from sklearn.decomposition import PCA as pca1
com = pca1(1)
ret2 = com.fit(data)
print 'dddddddddddddddddddd'
print com.explained_variance_ratio_
print com.explained_variance_
print com.transform(data)