機器學習 KNN分類演算法(上)

2021-09-29 12:25:59 字數 3905 閱讀 9984


(1) 計算測試物件到訓練集中每個物件的距離






import numpy as np

from math import sqrt

from collections import counter

# 定義分類器

class knnclassifier:

def __init__(self, k):


assert k >= 1, "k must be valid"

self.k = k

self._x_train = none

self._y_train = none

def fit(self, x_train, y_train):


assert x_train.shape[0] == y_train.shape[0], \

"the size of x_train must be equal to the size of y_train"

assert self.k <= x_train.shape[0], \

"the size of x_train must be at least k"

self._x_train = x_train

self._y_train = y_train

return self

def predict(self,x_predict):


assert self._x_train is not none and self._y_train is not none, \

"must fit before predict!"

assert x_predict.shape[1] == self._x_train.shape[1], \

"the feature number of x_predict must be equal to x_train"

y_predict = [self._predict(x) for x in x_predict]

return np.array(y_predict)

def _predict(self, x):

distances = [sqrt(np.sum((x_train - x) ** 2)) for x_train in self._x_train]

nearest = np.argsort(distances)

topk_y = [self._y_train[i] for i in nearest]

votes = counter(topk_y)

return votes.most_common(1)[0][0]

def score(self, x_test, y_test):

"""根據x_test進行**, 給出**的真值y_test,計算**模型的準確度"""

y_predict = self.predict(x_test)

return self.accuracy_score(y_test, y_predict)

def accuracy_score(y_true, y_predict):


assert y_true.shape[0] != y_predict.shape[0], \

"the size of y_true must be equal to the size of y_predict"

return sum(y_true == y_predict) / len(y_true)

def __repr__(self):

return "knn(k=%d)" % self.k

raw_data_x = [[3.393533211, 2.331273381],

[3.110073483, 1.781539638],

[1.343853454, 3.368312451],

[3.582294121, 4.679917921],

[2.280362211, 2.866990212],

[7.423436752, 4.685324231],

[5.745231231, 3.532131321],

[9.172112222, 2.511113104],

[7.927841231, 3.421455345],

[7.939831414, 0.791631213]

]raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1]

# 設定訓練組

trainx = np.array(raw_data_x)

trainy = np.array(raw_data_y)

# **資料

x1 = np.array([8.093607318,3.365731514])

knn_clf = knnclassifier(k=6)

knn_clf.fit(trainx, trainy)

predict_x = x1.reshape(1,-1)

predict_y = knn_clf.predict(predict_x)


import numpy as np

from sklearn.neighbors import kneighborsclassifier

raw_data_x = [[3.393533211, 2.331273381],

[3.110073483, 1.781539638],

[1.343853454, 3.368312451],

[3.582294121, 4.679917921],

[2.280362211, 2.866990212],

[7.423436752, 4.685324231],

[5.745231231, 3.532131321],

[9.172112222, 2.511113104],

[7.927841231, 3.421455345],

[7.939831414, 0.791631213]

]raw_data_y = [0, 0, 0, 0, 0, 1, 1, 1, 1, 1] # 設定訓練組

x_train = np.array(raw_data_x)

y_train = np.array(raw_data_y) # 將資料視覺化


# 建立knn_classifier例項

knn_classifier = kneighborsclassifier(n_neighbors=6)

# knn_classifier做一遍fit(擬合)的過程,沒有返回值,模型就儲存在knn_classifier例項中

knn_classifier.fit(x_train, y_train)

# knn進行**predict,需要傳入乙個矩陣,而不能是乙個陣列

y_predict = knn_classifier.predict(x.reshape(1,-1))







