機器學習基礎自學五(整合演算法)

2021-10-02 11:46:11 字數 2372 閱讀 8811

from pandas import read_csv

from sklearn.model_selection import kfold

from sklearn.model_selection import cross_val_score

from sklearn.ensemble import baggingclassifier

from sklearn.tree import decisiontreeclassifier#袋裝演算法

from sklearn.ensemble import randomforestclassifier#隨機森林

from sklearn.ensemble import extratreesclassifier#極端隨機樹

from sklearn.ensemble import adaboostclassifier#adaboost,迭代演算法

from sklearn.ensemble import gradientboostingclassifier#隨機梯度提公升(gbm)

# 匯入資料

filename = 'd:\example\machinelearning-master\pima_data.csv'

names = ['preg', 'plas', 'pres', 'skin', 'test', 'mass', 'pedi', 'age', 'class']

data = read_csv(filename, names=names)

# 將資料分為輸入資料和輸出結果

array = data.values

x = array[:, 0:8]

y = array[:, 8]

num_folds = 10

seed = 7

kfold = kfold(n_splits=num_folds, random_state=seed)

cart = decisiontreeclassifier()

num_tree = 100

model = baggingclassifier(base_estimator=cart, n_estimators=num_tree, random_state=seed)#袋裝演算法

result = cross_val_score(model, x, y, cv=kfold)

print('袋裝演算法:',result.mean())

kfold = kfold(n_splits=num_folds, random_state=seed)

num_tree = 100

max_features = 3

model = randomforestclassifier(n_estimators=num_tree, random_state=seed, max_features=max_features)#隨機森林

result = cross_val_score(model, x, y, cv=kfold)

print('隨機森林:',result.mean())

kfold = kfold(n_splits=num_folds, random_state=seed)

num_tree = 100

max_features = 3

model = extratreesclassifier(n_estimators=num_tree, random_state=seed, max_features=max_features)# 極端隨機樹

result = cross_val_score(model, x, y, cv=kfold)

print('極端隨機樹:',result.mean())

kfold = kfold(n_splits=num_folds, random_state=seed)

num_tree = 30

model = adaboostclassifier(n_estimators=num_tree, random_state=seed)

result = cross_val_score(model, x, y, cv=kfold)#adaboost,迭代演算法

print('adaboost,迭代演算法:',result.mean())

kfold = kfold(n_splits=num_folds, random_state=seed)

num_tree = 30

model = gradientboostingclassifier(n_estimators=num_tree, random_state=seed)

result = cross_val_score(model, x, y, cv=kfold)#隨機梯度提公升

print('隨機梯度提公升:',result.mean())

機器學習演算法(五)整合

整合指用多個基學習器共同構成乙個更加強大的學習器。整合包含三種方法 boosting,bagging,stacking 1 boosting 包括gbdt和adaboost,各學習器間存在強依賴關係,只能序列實現 2 bagging的代表演算法是隨機森林,各學習器間不存在強依賴關係,可以並行實現 3...

機器學習 整合演算法

整合演算法用一些相對較弱的學習模型獨立地就同樣的樣本進行訓練,然後把結果整合起來進行整體 整合演算法的主要難點在於究竟整合哪些獨立的較弱的學習模型以及如何把學習結果整合起來。這是一類非常強大的演算法,同時也非常流行。是構建多個學習器,然後通過一定策略結合把它們來完成學習任務的,常常可以獲得比單一學習...

機器學習 實驗五 整合學習(投票方式)

實驗 github 然後在main中,使用乙個迴圈來進行測試模型的正確率 int test number 100 int correct 0 for int counter0 0 counter0 test number counter0 測試迴圈 for int counter1 0 counte...