KNN最近鄰演算法python實現

2021-08-21 09:49:29 字數 4382 閱讀 9812

from numpy import *

import operator

def createdataset():

group = array([[1.0,1.1], [1.0,1.0], [0,0], [0,0.1]])

labels = ['a', 'a', 'b', 'b']

return group, labels

def classify0(inx, dataset, labels, k):

'''k-近鄰演算法'''

datasetsize = dataset.shape[0]

sorteddistindicies = ((((tile(inx,(datasetsize, 1))-dataset)**2).sum(axis=1))**0.5).argsort()

classcount =

for i in range(k):

voteilabel = labels[sorteddistindicies[i]]

classcount[voteilabel] = classcount.get(voteilabel, 0) + 1

sortedclasscount = sorted(classcount.items(), key=operator.itemgetter(1), reverse=true)

return sortedclasscount[0][0]

def file2matrix(filename):

'''將文字記錄轉化為numpy的解析程式'''

fr = open(filename)

arrayolines = fr.readlines()

numberoflines = len(arrayolines)

returnmat = zeros((numberoflines,3))

classlabelvector =

index = 0

for line in arrayolines:

line = line.strip()

listfromline = line.split('\t')

returnmat[index,:] = listfromline[0:3]

index += 1

return returnmat, classlabelvector

def autonorm(dataset):

'''歸一化特徵值'''

minvals = dataset.min(0)

maxvals = dataset.max(0)

ranges = maxvals - minvals

normdataset = zeros(shape(dataset))

m = dataset.shape[0]

normdataset = dataset - tile(minvals,(m,1))

normdataset = normdataset/tile(ranges,(m,1))

return normdataset, ranges, minvals

def datingclasstest():

'''分類器針對約會**的測試**'''

horatio = 0.10

datingdatamat, datinglabels = file2matrix('datingtestset2.txt')

normmat, ranges, minvals = autonorm(datingdatamat)

m = normmat.shape[0]

numtestvecs = int(m*horatio)

errorcount = 0.0

for i in range(numtestvecs):

classifierresult = classify0(normmat[i,:],normmat[numtestvecs:m,:],datinglabels[numtestvecs:m],3)

print("the classifier came back with: %d, the real answer is: %d" %(classifierresult, datinglabels[i]))

if(classifierresult != datinglabels[i]):

errorcount += 1.0

print("the total error rate is : %f"%(errorcount/float(numtestvecs)))

def classifyperson():

'''約會****函式'''

resultlist = ['not at all', 'in small doses', 'in large doses']

percenttats = float(input("percentage of time spent playing video games?"))

ffmiles = float(input("frequent of ice cream consumed per year?"))

icecream = float(input("liters of ice cream consumed per year?"))

datingdatamat, datinglabels = file2matrix('datingtestset2.txt')

normmat, ranges, minvals = autonorm(datingdatamat)

inarr = array([ffmiles, percenttats, icecream])

classifierresult = classify0((inarr-minvals)/ranges, normmat, datinglabels, 3)

print("you will probably like this person: ", resultlist[classifierresult - 1])

# 示列:手寫識別系統

from os import listdir

def img2vector(filename):

'''準備資料:將影象轉換為測試向量'''

returnvect = zeros((1,1024))

fr = open(filename)

for i in range(32):

linestr = fr.readline()

for j in range(32):

returnvect[0,32*i+j] = int(linestr[j])

return returnvect

def handwritingclasstest():

'''手寫數字識別系統的測試**'''

hwlabels =

trainingfilelist = listdir('trainingdigits')

m = len(trainingfilelist)

trainingmat = zeros((m,1024))

for i in range(m):

filenamestr = trainingfilelist[i]

classnumstr = int(filenamestr.split('.')[0].split('_')[0])

trainingmat[i,:] = img2vector('trainingdigits/%s'%filenamestr)

testfilelist = listdir('testdigits')

errorcount = 0.0

mtest = len(testfilelist)

for i in range(mtest):

filenamestr = testfilelist[i]

classnumstr = int(filenamestr.split('.')[0].split('_')[0])

vectorundertest = img2vector('testdigits/%s'%filenamestr)

classifierresult = classify0(vectorundertest,trainingmat,hwlabels,3)

print("the classifier came back with: %d, the real number is: %d"%(classifierresult, classnumstr))

if(classifierresult!=classnumstr):

errorcount += 1.0

print("\nthe total number of errors is: %d"% errorcount)

print("\nthe total error rate is: %f"%(errorcount/float(mtest)))

Python最近鄰演算法(KNN)

近朱者赤,近墨者黑。學習筆記 knn 乙個樣本在特徵空間中,總有k個與之最相似 即特徵空間中最鄰近 的樣本。其中,大多數屬於某一類別,則該樣本也屬於這個類別。計算步驟 1.算距離 算出測試樣本到訓練集中每個樣本的距離。例如 歐氏距離 2.找鄰居 找出距離最近的k個訓練物件。k值的選取 交叉驗證 3....

最近鄰規則演算法(KNN)

最近鄰演算法 knn 是乙個基於例項學習的分類演算法。如果乙個例項在特徵空間中的k個最相似 即特徵空間中最近鄰 的例項中的大多數屬於某乙個類別,則該例項也屬於這個類別。所選擇的鄰居都是已經正確分類的例項。演算法步驟 1 把所有分類好的 有標籤label 的資料 例項 作為訓練集 2 選擇好引數k 3...

KNN 最近鄰演算法初探

本文質量不咋地,目的是記錄一下自己的 還有乙個原因是我發現別人的部落格寫的太好了!knn 演算法屬於監督學習的演算法,基本原理是對整個資料整體進行打標籤之後,對乙個新的元素根據其在向量空間中的位置來對其分類。k近鄰演算法是在訓練資料集中找到與該例項最鄰近的k個例項,這k個例項的多數屬於某個類,我們就...