python實現KNN分類演算法

2021-10-04 10:52:49 字數 4297 閱讀 7256

k近鄰演算法,簡稱knn演算法,作用就是分類

三大要素

import numpy

import matplotlib.pyplot as plt

'''已知訓練集和訓練集類別、測試集

1.測試集廣播和訓練集一樣的規格

2.計算距離

3.對距離列表排序返回最近的k個點的下標

4.有幾個類別就設幾個標籤用來統計,迴圈排序列表,對類別判斷,少數服從多數

5.資料視覺化

'''if __name__ ==

'__main__'

:pass

k =5# 已知 奇數

trainlabel =

["b"

,"b"

,"a"

,"a"

,"b"

,"b"

,"a"

,"b"

,"a"

,"a"

,"b"

,"a"

]# 訓練集標籤類別

traindata = numpy.loadtxt(

"knndata1.txt"

, delimiter=

",")

# 訓練集中的座標資料

testdata = numpy.array(

[0.7

,0.7])

# 測試點

# print(traindata)

# print(testdata)

# 測試點廣播成和訓練集相同的規格

testdata = numpy.tile(testdata,

(traindata.shape[0]

,1))

# 不廣播也行,為了畫圖方便還是廣播

# print(testdata)

# 計算距離

mandist = numpy.

sum(

abs(testdata - traindata)

, axis=1)

# 曼哈頓

# print(mandist)

eucdist = numpy.

sum(

(testdata - traindata)**2

, axis=1)

**0.5

# 歐式距離 一般用歐式距離

# print(eucdist)

# 排序

sortindex = numpy.argsort(eucdist)

# sortindex = numpy.argsort(mandist)

# print(sortindex)

a = b =

0for i in sortindex[

0:k]

:if trainlabel[i]

=="a"

: a +=

1else

: b +=

1print

(a, b)

print

("i am a"

)if a>b else

print

("i am b"

)# 資料視覺化

plt.figure(

) plt.title(

"wgs"

)for i in

range

(traindata.shape[0]

):# 有幾個點迴圈幾次

if trainlabel[i]

=="a"

: plt.scatter(traindata[i,0]

, traindata[i,1]

, c=

"r")

else

: plt.scatter(traindata[i,0]

, traindata[i,1]

, c=

"g")

if a>b:

plt.scatter(testdata[0,

0], testdata[0,

1], c=

"r", marker=

"*", label=

"test point"

)else

: plt.scatter(testdata[0,

0], testdata[0,

1], c=

"g", marker=

"*", label=

"test point"

) plt.grid(

true

) plt.legend(bbox_to_anchor=(0

,1.1

), loc=

2, borderaxespad=0)

# # plt.show()

小案例

# 訓練集

traindata =

# 測試集

testdata =

# 提取資料

tranlable =

# 標籤

getlist =

# 訓練集

x1 = testdata[

"hi boy"

] x2 = testdata[

"hi boy"

] new_test =

[x1[0]

, x2[1]

]for i in traindata.keys():

temp = traindata[i]0]

)# x1]

)# y2]

)# 標籤

x =[i for i in getlist[0:

:2]]

y =[i for i in getlist[1:

:2]]

new_traindata = numpy.c_[x, y]

# 測試集規格和訓練集一致

new_test = numpy.tile(new_test,

(new_traindata.shape[0]

,1))

# 歐式距離

distance = numpy.sqrt(numpy.

sum(

(new_test - new_traindata)**2

, axis=1)

)# 排序返回原下標

sortlist = numpy.argsort(distance)

# 分類

k =3 a = b =

0for i in sortlist[

0:k]

:if tranlable[i]

=="愛情片"

: a +=

1else

: b +=

1print

("愛情片"

)if a>b else

print

("動作片"

)# 資料視覺化

for i in

range

(new_traindata.shape[0]

):if tranlable[i]

=="愛情片"

: plt.scatter(new_traindata[i,0]

, new_traindata[i,1]

, c=

"r")

else

: plt.scatter(new_traindata[i,0]

, new_traindata[i,1]

, c=

"g")

if a>b:

plt.scatter(new_test[0,

0], new_test[0,

1], c=

"r", marker=

"+")

else

: plt.scatter(new_test[0,

0], new_test[0,

1], c=

"g", marker=

"+")

plt.show(

)

Python 實現 KNN 分類演算法

2.python 實現 本文將詳細講述 knn 演算法及其 python 實現 knn k nearest neighbour 即 k最近鄰,是分類演算法中最簡單的演算法之一。knn 演算法的核心思想是如果乙個樣本在特徵空間中的 k 個最相鄰的樣本中的大多數屬於某乙個類別,則將該樣本歸為該類別 有 ...

python實現KNN分類演算法

import sklearn import numpy as np from sklearn.neighbors import kneighborsclassifier import sklearn.datasets as data import matplotlib as mpl import m...

分類 KNN分類演算法之Python實現

knn稱為k最近鄰。對於待分類資料,它先計算出與其最相近的k個的樣本,然後判斷這k個樣本中最多的類標籤,並將待分類資料標記為這個最多的類標籤。python樣例 import numpy as np from sklearn.neighbors import kneighborsclassifier ...