Python 波士頓房價資料主成分PCA分析降維

2021-08-22 18:09:08 字數 1833 閱讀 9067

import pandas as pd

import matplotlib.pyplot as plt

import numpy as np

from sklearn import datasets,metrics

from sklearn.linear_model import linearregression

from sklearn.model_selection import train_test_split

from sklearn.preprocessing import standardscaler

from sklearn.decomposition import truncatedsvd

%matplotlib inline

匯入資料

boston = datasets.load_boston()

boston.feature_names

劃分訓練集和測試集並使資料正規化

x = boston.data

y = boston.target

scale = standardscaler()

normal_x = scale.fit_transform(x)

x_train, x_test, y_train, y_test = train_test_split(normal_x, y, test_size=0.1, random_state=42)

顯示資料直方圖

# 顯示資料

plt.hist(x_train[:,0], bins=20)

plt.hist(x_train[:,2], bins=20, alpha=0.3)

主成分pca分析

# pca

n_components = 10

pca = truncatedsvd(n_components=n_components)

x_train_pca = pca.fit_transform(x_train)

x_test_pca = pca.transform(x_test)

視覺化

components = pca.components_

plt.plot(pca.explained_variance_)

plt.xlabel('component')

plt.ylabel('explained variance')

print("前{}個主成分解釋了資料中%的變化".format(n_components, sum(pca.explained_variance_ratio_)*100))

比較pca之前與之後線性回歸後的誤差結果比較

model = linearregression()

model.fit(x_train_pca, y_train)

print("前{}個pca主成分進行線性回歸的mse是{}".format(n_components,

metrics.mean_squared_error(y_test,model.predict(x_test_pca))))

model = linearregression()

model.fit(x_train, y_train)

print("不進行pca分析線性回歸的mse是{}".format(

metrics.mean_squared_error(y_test,model.predict(x_test))))

波士頓房價線性回歸

from matplotlib import pyplot as plt from sklearn import linear model from sklearn.model selection import train test split import numpy as np import p...

波士頓房價資料集視覺化

將所有屬性與房價之間的關係視覺化 import matplotlib.pyplot as plt import numpy as np import tensorflow as tf plt.rcparams font.sans serif microsoft yahei 指定預設字型 plt.rc...

使用sklearn載入波士頓房價資料集

使用sklearn載入波士頓房價資料集 從sklearn匯入資料集 from sklearn.datasets import load boston載入資料 boston load boston x為輸入,y為輸出 x boston.data y boston.target檢視資料有哪些字段 載入的...