Python3實現FP Growth演算法

2021-09-18 06:33:19 字數 4690 閱讀 5706

# !/usr/bin/python

# coding:utf-8

def loadsimpdat():

simpdat = [['r', 'z', 'h', 'j', 'p'],

['z', 'y', 'x', 'w', 'v', 'u', 't', 's'],

['z'],

['r', 'x', 'n', 'o', 's'],

['y', 'r', 'x', 'z', 'q', 't', 'p'],

['y', 'z', 'x', 'e', 'q', 's', 't', 'm']]

return simpdat

def createinitset(dataset):

"""產生初始資料集合"""

retdict = {}

for trans in dataset:

fset = frozenset(trans)

retdict.setdefault(fset, 0)

retdict[fset] += 1

return retdict

class treenode:

def __init__(self, namevalue, numoccur, parentnode):

self.name = namevalue

self.count = numoccur

self.nodelink = none

self.parent = parentnode

self.children = {}

def inc(self, numoccur):

self.count += numoccur

def disp(self, ind=1):

print(' ' * ind, self.name, ' ', self.count)

for child in self.children.values():

child.disp(ind + 1)

def createtree(dataset, minsup=1):

headertable = {}

#此一次遍歷資料集, 記錄每個資料項的支援度

for trans in dataset:

for item in trans:

headertable[item] = headertable.get(item, 0) + 1

#根據最小支援度過濾

lessthanminsup = list(filter(lambda k:headertable[k] < minsup, headertable.keys()))

for k in lessthanminsup:

del(headertable[k])

freqitemset = set(headertable.keys())

#如果所有資料都不滿足最小支援度,返回none, none

if len(freqitemset) == 0:

return none, none

for k in headertable:

headertable[k] = [headertable[k], none]

rettree = treenode('φ', 1, none)

#第二次遍歷資料集,構建fp-tree

for transet, count in dataset.items():

#根據最小支援度處理一條訓練樣本,key:樣本中的乙個樣例,value:該樣例的的全域性支援度

locald = {}

for item in transet:

if item in freqitemset:

locald[item] = headertable[item][0]

if len(locald) > 0:

#根據全域性頻繁項對每個事務中的資料進行排序,等價於 order by p[1] desc, p[0] desc

ordereditems = [v[0] for v in sorted(locald.items(), key=lambda p: (p[1], p[0]), reverse=true)]

updatetree(ordereditems, rettree, headertable, count)

return rettree, headertable

def updatetree(items, intree, headertable, count):

if items[0] in intree.children: # 檢查該元素是否已經存在fp樹中

intree.children[items[0]].inc(count) # 計數+1

else: # 不存在則新增到fp樹中

intree.children[items[0]] = treenode(items[0], count, intree)

if headertable[items[0]][1] == none: # 更新頭表

headertable[items[0]][1] = intree.children[items[0]]

else:

updateheader(headertable[items[0]][1], intree.children[items[0]])

if len(items) > 1: # 擷取已排序list的剩餘部分,並以當前節點作為父節點

updatetree(items[1:], intree.children[items[0]], headertable, count)

def updateheader(nodetotest, targetnode):

while (nodetotest.nodelink != none): # 找到尾節點

nodetotest = nodetotest.nodelink

nodetotest.nodelink = targetnode

def ascendtree(leafnode, prefixpath):

"""獲取當前節點的所有祖先"""

if leafnode.parent != none:

ascendtree(leafnode.parent, prefixpath)

def findprefixpath(basepat, headtable):

"""獲取當前頻繁項的所有字首路徑(條件模式基)"""

condpats = {}

treenode = headtable[basepat][1]

while treenode != none:

# 獲取當前頻繁項的所有字首路徑(條件模式基)

prefixpath =

ascendtree(treenode, prefixpath)

if len(prefixpath) > 1:

condpats[frozenset(prefixpath[1:])] = treenode.count # 該條件模式基獲得該節點所具有的置信度

treenode = treenode.nodelink

return condpats

def minetree(intree, headertable, minsup=1, prefix=set(), freqitemlist=):

bigl = [v[0] for v in sorted(headertable.items(), key=lambda p: (p[1][0], p[0]))]

for basepat in bigl:

newfreqset = prefix.copy()

newfreqset.add(basepat)

# 通過條件模式基找到的頻繁項集

condpattbases = findprefixpath(basepat, headertable)

# 建立條件fp樹

mycondtree, myhead = createtree(condpattbases, minsup)

if myhead != none:

print('condpattbases: ', basepat, condpattbases)

mycondtree.disp()

print('*' * 30)

minetree(mycondtree, myhead, minsup, newfreqset, freqitemlist)

def main():

simpdat = loadsimpdat()

dictdat = createinitset(simpdat)

myfptree, myheader = createtree(dictdat, 3)

myfptree.disp()

print('*' * 30)

# 獲取條件模式基

for key in [v[0] for v in sorted(myheader.items(), key=lambda p: (p[1][0], p[0]), reverse=true)]:

condpats = findprefixpath(key, myheader)

print(key, condpats)

print('*' * 30)

# 建立條件fp樹

minetree(myfptree, myheader, 2)

if __name__ == '__main__':

main()

python3實現CryptoJS AES加密演算法

from crypto.cipher import aes from binascii import b2a hex,a2b hex import base64 class aescrypt def init self,key self.key key.encode utf8 self.mode a...

Python3 實現選擇排序

選擇排序 selection sort 原理很簡單,就是依次在未排序資料段中選擇出乙個最小的數,然後將其排列在已排序資料段末端,直到整個資料段排序完成演算法結束。程式如下,第乙個迴圈依次縮小未排序資料段的長度,並且每次將最小值暫定為未排序中第一位索引。第二個迴圈依次將該最小值與未排序資料段作比較,選...

python3實現線性單元

理論知識見 直接上python3的 coding utf 8 import matplotlib.pyplot as plt from functools import reduce class perceptron object 初始化,輸入訓練數目,啟用函式 def init self,inpu...