決策樹構建與深度節點數簡單例子

2021-08-15 11:07:34 字數 4326 閱讀 4047

1、構建treeplotter.py

#coding:utf-8

import matplotlib.pyplot as plt

# 定義決策樹決策結果的屬性,用字典來定義

# 下面的字典定義也可寫作 decisionnode=

# boxstyle為文字框的型別,sawtooth是鋸齒形,fc是邊框線粗細

decisionnode = dict(boxstyle="sawtooth", fc="0.8")

leafnode = dict(boxstyle="round4", fc="0.8")

arrow_args = dict(arrowstyle="<-")

def plotnode(nodetxt, centerpt, parentpt, nodetype):

# annotate是關於乙個資料點的文字

# nodetxt為要顯示的文字,centerpt為文字的中心點,箭頭所在的點,parentpt為指向文字的點

createplot.ax1.annotate(nodetxt, xy=parentpt, xycoords='axes fraction',

xytext=centerpt, textcoords='axes fraction',

va="center", ha="center", bbox=nodetype, arrowprops=arrow_args )

def create******plot():

fig = plt.figure(1,facecolor='white') # 定義乙個畫布,背景為白色

fig.clf() # 把畫布清空

# createplot.ax1為全域性變數,繪製影象的控制代碼,subplot為定義了乙個繪圖,

#111表示figure中的圖有1行1列,即1個,最後的1代表第乙個圖

# frameon表示是否繪製座標軸矩形

createplot.ax1 = plt.subplot(111,frameon=false)

plotnode('a decision node',(0.5,0.1),(0.1,0.5),decisionnode)

plotnode('a leaf node',(0.8,0.1),(0.3,0.8),leafnode)

plt.show()

def getnumleafs(mytree):

numleafs = 0

firstsides = list(mytree.keys())

firststr = firstsides[0]

seconddict = mytree[firststr]

for key in seconddict.keys():

if type(seconddict[key]).__name__ == 'dict':

numleafs += getnumleafs(seconddict[key])

else: numleafs += 1

return numleafs

def gettreedepth(mytree):

maxdepth = 0

firstsides = list(mytree.keys())

firststr = firstsides[0]

seconddict = mytree[firststr]

for key in seconddict.keys():

if type(seconddict[key]).__name__ == 'dict':

thisdepth = 1+ gettreedepth(seconddict[key])

else: thisdepth = 1

if thisdepth > maxdepth: maxdepth = thisdepth

return maxdepth

def createplot(intree):

fig = plt.figure(1, facecolor='white')

fig.clf()

axprops = dict(xticks=, yticks=)# 定義橫縱座標軸,無內容

#createplot.ax1 = plt.subplot(111, frameon=false, **axprops) # 繪製影象,無邊框,無座標軸

createplot.ax1 = plt.subplot(111, frameon=false)

plottree.totalw = float(getnumleafs(intree)) #全域性變數寬度 = 葉子數

plottree.totald = float(gettreedepth(intree)) #全域性變數高度 = 深度

#圖形的大小是0-1 ,0-1

plottree.xoff = -0.5/plottree.totalw; #例如繪製3個葉子結點,座標應為1/3,2/3,3/3

#但這樣會使整個圖形偏右因此初始的,將x值向左移一點。

plottree.yoff = 1.0;

plottree(intree, (0.5,1.0), '')

plt.show()

def plottree(mytree, parentpt, nodetxt):

numleafs = getnumleafs(mytree) #當前樹的葉子數

depth = gettreedepth(mytree) #沒有用到這個變數

firstsides = list(mytree.keys())

firststr = firstsides[0]

#cntrpt文字中心點 parentpt 指向文字中心的點

cntrpt = (plottree.xoff + (1.0 + float(numleafs))/2.0/plottree.totalw, plottree.yoff)

plotmidtext(cntrpt, parentpt, nodetxt) #畫分支上的鍵

plotnode(firststr, cntrpt, parentpt, decisionnode)

seconddict = mytree[firststr]

plottree.yoff = plottree.yoff - 1.0/plottree.totald #從上往下畫

for key in seconddict.keys():

if type(seconddict[key]).__name__=='dict':#如果是字典則是乙個判斷(內部)結點

plottree(seconddict[key],cntrpt,str(key))

else: #列印葉子結點

plottree.xoff = plottree.xoff + 1.0/plottree.totalw

plotnode(seconddict[key], (plottree.xoff, plottree.yoff), cntrpt, leafnode)

plotmidtext((plottree.xoff, plottree.yoff), cntrpt, str(key))

plottree.yoff = plottree.yoff + 1.0/plottree.totald

def plotmidtext(cntrpt, parentpt, txtstring):

xmid = (parentpt[0]-cntrpt[0])/2.0 + cntrpt[0]

ymid = (parentpt[1]-cntrpt[1])/2.0 + cntrpt[1]

createplot.ax1.text(xmid, ymid, txtstring, va="center", ha="center", rotation=30)

#這個是用來建立資料集即決策樹

def retrievetree(i):

listoftrees =[}, 1: }, 2:}}},

}, 1: 'no'}}}}

]return listoftrees[i]

2、構建tree

test.py

import treeplotter

#treeplotter.create******plot()

mytree= }}}

#mytree = treeplotter.retrievetree(0)

print (treeplotter.getnumleafs(mytree))

print (treeplotter.gettreedepth(mytree))

treeplotter.createplot(mytree)

決策樹構建

from sklearn import tree import pydotplus x 0,0 1,1 y 0,1 clf tree.decisiontreeclassifier 決策樹分類器 clf clf.fit x,y 對樣本x和對應的類y訓練決策樹 clf.predict 2.2.為乙個新的...

決策樹簡析

決策樹的基礎知識參照這裡面的內容 總結 1 決策樹的關鍵在於分類屬性的選擇 2 衡量分類屬性優劣的判別標準,資訊增益 3 a 計算類別資訊熵 b 計算不同分類屬性的資訊熵 c 類別資訊熵與分類資訊熵之差即為資訊增益 d 選擇資訊增益最大的屬性作為 標準 e 若已無分類屬性可以使用,但是並未達到純子集...

決策樹 ID3構建決策樹

coding utf 8 from math import log import operator 建立訓練資料集 defcreatedataset dataset 1,1,yes 1,1,yes 1,0,no 0,1,no 0,1,no 資料集的最後乙個元素作為該資料的標籤,是否是魚 labels...