K means演算法實現

2021-08-13 17:53:19 字數 3965 閱讀 9035

import math

import numpy

class

point_data_reader:

file_name = str()

def__init__

(self,file_name):

point_data_reader.file_name = file_name

defget_data_list

(self,num_lost):

file_ = open(self.file_name,'r+')

db = list()

for line in file_.readlines():

reader_list = list()

for reader_ in line.split():

try:

eval(reader_)

except:

else:

file_.close()

return db

class

k_means:

def__init__

(self,point_list):

# 點的資料

k_means.point_list = point_list

# 維度

k_means.dimension = len(point_list[0])

# 隨機點獲取

defadd_random_point

(self):

now = 0

point = list()

while now != self.dimension:

min = none

max = none

for line in self.point_list:

if max == none

or line[now] > max :

max = line[now]

if min == none

or line[now] < min :

min = line[now]

now += 1

return point

# 返回乙個包含len(gather_point_list)個元素的列表,第n個列表內的點分給gather_point_list[n]

defseprate_data

(self,gather_point_list):

split_data = list()

for i in range(len(gather_point_list)):

for data_reader in self.point_list:

length = 0

now_max_index = 0

for point_reader in gather_point_list:

# euclidean distance算距離 d_ = sqrt_^m (x_-x_)^2}

now_length = self.euclidean_distance(data_reader,point_reader)

if length < now_length:

length = now_length

now_max_index = gather_point_list.index(point_reader)

return split_data

defeuclidean_distance

(self,point,gather_point):

now = 0

sum = 0

while now != len(point):

sum += (point[now] - gather_point[now])**2

now += 1

return math.sqrt(sum)

# 根據現有分割改變聚集點位置

# 即求點群簇中心點

# argmax_x = sum_^n sqrt_^m (x_j^n - x_j)^2} 等同於:

# argmax_x = sum_^n sum_^m (x_j^n - x_j)^2

# 求各個梯度的導得: x = 為各維度平均數

defget_centeral_point

(self,seprate_data):

gather_point = list()

for point_list_reader in seprate_data:

if len(point_list_reader) == 0:

continue

sum = numpy.zeros((1,self.dimension))

for point_reader in point_list_reader:

sum += numpy.mat(point_reader)

sum /= len(point_list_reader)

return gather_point

defget_seprate_point

(self,k = 2,error = 0.000001):

gather_point = list()

for i in range(k):

# 增加k個隨機點

# 上一次分類所得的聚集點

last_gather_point = list(gather_point)

# 把資料分割給聚集點

seprate_data = self.seprate_data(gather_point)

# 重新計算得到資料的聚集點座標

gather_point = self.get_centeral_point(seprate_data)

# 保證至少有乙個點附在聚集點,否則刪掉它

while len(gather_point) != len(last_gather_point):

last_gather_point = list(gather_point)

seprate_data = self.seprate_data(gather_point)

gather_point = self.get_centeral_point(seprate_data)

while abs((numpy.mat(gather_point)-numpy.mat(last_gather_point)).sum()) >= error:

last_gather_point = list(gather_point)

seprate_data = self.seprate_data(gather_point)

gather_point = self.get_centeral_point(seprate_data)

# 保證至少有乙個點附在聚集點,否則刪掉它

while len(gather_point) != len(last_gather_point):

last_gather_point = list(gather_point)

seprate_data = self.seprate_data(gather_point)

gather_point = self.get_centeral_point(seprate_data)

return gather_point,seprate_data

if __name__ == '__main__':

db = point_data_reader('text.dat').get_data_list(num_lost = 1.0)

k = k_means(db)

p = k.add_random_point()

gather_point,seprate_data = k.get_seprate_point(k = numpy.random.random_integers(1,10),error = 0.001)

print(gather_point)

# print(seprate_data)

Kmeans演算法實現

include opencv2 highgui highgui.hpp include opencv2 core core.hpp include using namespace cv using namespace std static void help int main int argc ch...

K means演算法實現

首先隨機生成k個聚類中心點 根據聚類中心點,將資料分為k類。分類的原則是資料離哪個中心點近就將它分為哪一類別。再根據分好的類別的資料,重新計算聚類的類別中心點。不斷的重複2和3步,直到中心點不再變化。from numpy import import csv import matplotlib.pyp...

matlab實現kmeans演算法

kmeans是一種聚類演算法 無監督學習 演算法分為兩步 1.隨機選取k個聚類中心。2.計算每個樣本點離哪個聚類中心最近 距離計算 就將該樣本分為這個類。3.重新計算這k個類的聚類中心。一種簡單的計算方法為 計算每個類的平均值即為新的聚類中心。重複執行步驟2,直到聚類中心的變化小於給定閾值,或者達到...