機器學習之 樸素貝葉斯簡單例項

2022-07-30 00:09:15 字數 3978 閱讀 8395

import

numpy as np

import

math

#載入模擬資料

defloaddata():

postinglist=[['

my','

dog','

has','

flea

','problem

','help

','please'],

['maybe

','not

','take

','him

','to

','dog

','park

','stupid'],

['my

','dalmation

','is

','so

','cute

','i

','love

','him'],

['stop

','posting

','stupid

','worthless

','garbage'],

['mr

','licks

','ate

','my

','steak

','how

','to

','stop

','him'],

['quit

','buying

','worthless

','dog

','food

','stupid']]

classvec = [0,1,0,1,0,1] #

1 侮辱 0 非侮辱

return

postinglist,classvec

#建立詞彙表

defcreateset(dataset):

result =set()

for i in

dataset:

result = result |set(i)

return

list(result)

#dataset,labels = loaddata()

#vacablist = createset(dataset)

#print('外lables',labels)

#print('外dataset',dataset)

#print('外vacablist:',vacablist)

#建立和詞彙表對應的向量

defsetofword(vacablist,inputdata):

mylist = [0] *len(vacablist)

for word in

inputdata:

if word in

vacablist:

mylist[vacablist.index(word)] = 1

else

:

print('

沒有 {} 這個詞

'.format(word))

return

mylist

#setofdata = setofword(vacablist,dataset[3])

#print('外setofdata:',setofdata) #[1, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 0]

#print(vacablist) #['garbage', 'to', 'worthless', 'ate', 'has', 'so', 'take', 'cute', 'dog', 'flea', 'buying', 'help', 'is', 'park', 'i', 'food', 'my', 'licks', 'posting', 'dalmation', 'problem', 'please', 'stop', 'how', 'stupid', 'maybe', 'love', 'steak', 'quit', 'him', 'not', 'mr']

#print(dataset[3]) #['stop', 'posting', 'stupid', 'worthless', 'garbage']

#trainmat =

#for i in dataset:##

print('外trainmat:',trainmat)

#訓練函式,算p(word\1)的概率

defp1(trainmat,labels):

plable_1 = sum(labels)/len(labels)

data_0 =np.ones(len(trainmat[0]))

count_0 = 2data_1 =np.ones(len(trainmat[0]))

count_1 = 2

for i in

range(len(labels)):

if labels[i] ==0:

data_0 +=trainmat[i]

count_0 +=sum(trainmat[i])

if labels[i] == 1:

data_1 +=trainmat[i]

count_1 +=sum(trainmat[i])

data_0 = data_0 /count_0

data_1 = data_1/count_1

print('

data_0:{},count:{}

'.format(data_0,count_0))

print('

data_1:{},count:{}

'.format(data_1, count_1))

print('

plabel_1:

',plable_1)

return

data_0,data_1,plable_1

#p1(trainmat,labels)

#用得到的概率分類

defclassfy(testset,data_0,data_1,plabel_1):

print('

開始classfy')

p1 = 1p0 = 1

for i in

range(len(testset)):

if testset[i] ==1:

p1 = p1 *data_1[i]

p0 = p0 *data_0[i]

p1 = p1 *plabel_1

p0 = p0 * (1-plabel_1)

print('

p1:{},p0:{}

'.format(p1,p0))

if p1>p0:

print('

該分類為1')

return 1

else

:

print('

該分類為0')

return0#

測試總邏輯**

deftest():

dataset,labels =loaddata()

vacablist =createset(dataset)

trainmat =

for i in dataset: #

因為訓練函式需要訓練資料是詞彙表的格式

data_0, data_1, plable_1 =p1(trainmat,labels)

testlist = ['

my','

love

','stupid']

testdata =setofword(vacablist,testlist)

classfy(testdata,data_0,data_1,plable_1)

test()

機器學習之樸素貝葉斯

寫在前面 本文寫自初學時,若後續學習過程中有新的理解,將不定期進行更新 若文中敘述有誤,望不吝賜教,也將及時修改 貝葉斯分類是一類分類演算法的總稱,這類演算法均以貝葉斯定理為基礎,故統稱為貝葉斯分類。而樸素樸素貝葉斯分類是貝葉斯分類中最簡單,也是常見的一種分類方法。在理解樸素貝葉斯之前,需要對兩個數...

機器學習之樸素貝葉斯

樸素貝葉斯 1 樸素貝葉斯 2 一 高斯樸素貝葉斯 gaussiannb實現了高斯樸素貝葉斯分類演算法,設假設特徵的可能性是高斯的 p x i y 12 y 2exp xi y 22 y 2 p x i y frac exp frac p xi y 2 y2 exp 2 y2 xi y 2 1 引數...

機器學習 樸素貝葉斯

樸素貝葉斯原理 1.貝葉斯公式 2.樸素貝葉斯的模型 3.後驗概率最大化的含義 4.樸素貝葉斯的引數估計 4.1.特徵是離散值 假設符合多項式分布 4.2.特徵是稀疏的離散值 假設符合伯努利分布 4.3.特徵是連續值 假設符合正態分佈 5.樸素貝葉斯演算法過程 6.樸素貝葉斯演算法小結 scikit...