python降維分析 Python資料降維

2021-10-11 04:32:05 字數 4522 閱讀 6941

一些資料降維的特徵提取演算法,先導入包和資料:

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from mpl_toolkits.mplot3d import axes3d

from sklearn.decomposition import pca, kernelpca

from sklearn.manifold import isomap, mds, tsne

from sklearn.preprocessing import standardscaler

## 對酒的特徵資料進行標準化

wine_x,wine_y = load_wine(return_x_y=true)

wine_x = standardscaler().fit_transform(wine_x)

主成分分析

## 使用主成分分析對酒資料集進行降維

pca = pca(n_components = 13,random_state = 123)

pca.fit(wine_x)

## 使用資料的前3個主成分較合適

pca_wine_x = pca.transform(wine_x)[:,0:3]

print(pca_wine_x.shape)

## 在3d空間中視覺化主成分分析後的資料空間分布

colors = ["red","blue","green"]

shapes = ["o","s","*"]

fig = plt.figure(figsize=(10,6))

## 將座標系設定為3d

ax1 = fig.add_subplot(111, projection="3d")

for ii,y in enumerate(wine_y):

ax1.scatter(pca_wine_x[ii,0],pca_wine_x[ii,1],pca_wine_x[ii,2],

s = 40,c = colors[y],marker = shapes[y])

ax1.set_xlabel("主成分1",rotation=20)

ax1.set_ylabel("主成分2",rotation=-20)

ax1.set_zlabel("主成分3",rotation=90)

ax1.azim = 225

ax1.set_title("主成分特徵空間視覺化")

plt.show()

核主成分分析

## 使用核主成分分析獲取資料的主成分

kpca = kernelpca(n_components = 13,kernel = "rbf", ## 核函式為rbf核

gamma = 0.2,random_state = 123)

kpca.fit(wine_x)

## 獲取前3個核主成分

kpca_wine_x = kpca.transform(wine_x)[:,0:3]

print(kpca_wine_x.shape)

## 在3d空間中視覺化主成分分析後的資料空間分布

colors = ["red","blue","green"]

shapes = ["o","s","*"]

fig = plt.figure(figsize=(10,6))

## 將座標系設定為3d

ax1 = fig.add_subplot(111, projection="3d")

for ii,y in enumerate(wine_y):

ax1.scatter(kpca_wine_x[ii,0],kpca_wine_x[ii,1],kpca_wine_x[ii,2],

s = 40,c = colors[y],marker = shapes[y])

ax1.set_xlabel("核主成分1",rotation=20)

ax1.set_ylabel("核主成分2",rotation=-20)

ax1.set_zlabel("核主成分3",rotation=90)

ax1.azim = 225

ax1.set_title("核主成分特徵空間視覺化")

plt.show()

流形學習

## 流行學習進行資料的非線性降維

isomap = isomap(n_neighbors = 7,## 每個點考慮的近鄰數量

n_components = 3) ## 降維到3維空間中

## 獲取降維後的資料

isomap_wine_x = isomap.fit_transform(wine_x)

print(isomap_wine_x.shape)

## 在3d空間中視覺化流行降維後的資料空間分布

colors = ["red","blue","green"]

shapes = ["o","s","*"]

fig = plt.figure(figsize=(10,6))

## 將座標系設定為3d

ax1 = fig.add_subplot(111, projection="3d")

for ii,y in enumerate(wine_y):

ax1.scatter(isomap_wine_x[ii,0],isomap_wine_x[ii,1],isomap_wine_x[ii,2],

s = 40,c = colors[y],marker = shapes[y])

ax1.set_xlabel("特徵1",rotation=20)

ax1.set_ylabel("特徵2",rotation=-20)

ax1.set_zlabel("特徵3",rotation=90)

ax1.azim = 225

ax1.set_title("isomap降維視覺化")

plt.show()

tsne

## tsne進行資料的降維,降維到3維空間中

tsne = tsne(n_components = 3,perplexity =25,

early_exaggeration =3,random_state=123)

## 獲取降維後的資料

tsne_wine_x = tsne.fit_transform(wine_x)

print(tsne_wine_x.shape)

## 在3d空間中視覺化流行降維後的資料空間分布

colors = ["red","blue","green"]

shapes = ["o","s","*"]

fig = plt.figure(figsize=(10,6))

## 將座標系設定為3d

ax1 = fig.add_subplot(111, projection="3d")

for ii,y in enumerate(wine_y):

ax1.scatter(tsne_wine_x[ii,0],tsne_wine_x[ii,1],tsne_wine_x[ii,2],

s = 40,c = colors[y],marker = shapes[y])

ax1.set_xlabel("特徵1",rotation=20)

ax1.set_ylabel("特徵2",rotation=-20)

ax1.set_zlabel("特徵3",rotation=90)

ax1.azim = 225

ax1.set_title("isomap降維視覺化")

plt.show()

多維尺度分析

## mds進行資料的降維,降維到3維空間中

mds = mds(n_components = 3,dissimilarity = "euclidean",random_state=123)

## 獲取降維後的資料

mds_wine_x = mds.fit_transform(wine_x)

print(mds_wine_x.shape)

## 在3d空間中視覺化流行降維後的資料空間分布

colors = ["red","blue","green"]

shapes = ["o","s","*"]

fig = plt.figure(figsize=(10,6))

## 將座標系設定為3d

ax1 = fig.add_subplot(111, projection="3d")

for ii,y in enumerate(wine_y):

ax1.scatter(mds_wine_x[ii,0],mds_wine_x[ii,1],mds_wine_x[ii,2],

s = 40,c = colors[y],marker = shapes[y])

ax1.set_xlabel("特徵1",rotation=20)

ax1.set_ylabel("特徵2",rotation=-20)

ax1.set_zlabel("特徵3",rotation=90)

ax1.azim = 225

ax1.set_title("mds降維視覺化")

plt.show()

python用tsne降維 tSNE降維

我有兩套資料訓練和測試。這兩個資料集分別有30213和30235個專案,每個專案有66個維度。在 我正在嘗試應用scikit learn的t sne將維數降到2。由於資料集很大,如果我試圖一次性處理整個資料,我會遇到記憶體錯誤,我會嘗試將它們分成塊,然後一次轉換乙個塊,如下所示 tsne manif...

python 陣列公升維降維

一 numpy.array 增加維度 import numpy as np a np.array 1,2,3 print a 輸出array 1,2,3 print a none 輸出array 1,2,3 print a none 輸出array 1 2 3 二 複製填充並公升維 舉例 把a的sh...

主成分分析 降維

import pandas as pd 引數初始化 inputfile data principal component.xls outputfile tmp dimention reducted.xls 降維後的資料 data pd.read excel inputfile,header none...