dats=[['google','amazon',],['amazon','google','python','cse'],['cse','google'],['amazon','python'],
['cse','amazon','python','google'],['amazon','google','cse','data',]]
class fptree:
def __init__(self,ide,cnt,parent):
self.ide=ide
self.cnt=cnt
self.parent=parent
self.link=None
self.child={}
def increm(self,cnt):
self.cnt+=cnt
def genFi(data,minsp,dic=False):
kdc={}
fi,sfi=[],[]
nnewdata={}
for dat in data:
for i in range(0,len(dat)):
if dat[i] not in kdc:
kdc[dat[i]]=1
elif dat[i] in dat[0:i]:
continue
else:
kdc[dat[i]]+=1
for k,v in kdc.items():
if v1:
genTree(data[1:],cnt,null.child[data[0]],kdc)
#nodes with same names but in different paths
def updateNull(alr_pre_node,same_new):
while(alr_pre_node.link!=None):
alr_pre_node=alr_pre_node.link
alr_pre_node.link=same_new
def gen_cond_pattern_bases(node):
patterns={}
while node!=None:
prefix=[]
bottom_up(node,prefix)
if len(prefix)>1:
patterns[tuple(prefix[1:])]=node.cnt
node=node.link
return patterns
def bottom_up(node,prefix):
if node.parent!=None:
prefix.append(node.ide)
bottom_up(node.parent,prefix)
def cond_tree(null,kdc,minsup,prefix,freq_items,sup={},sing_sup=0):
List=[v[0] for v in sorted(kdc.items(),key=lambda p:p[1])]
for it in List:
new_freq_items=prefix.copy()
dic={}
dic[it]=0
for k,v in sup.items():
if it in k:
dic[it]+=v
new_freq_items.add(it)
if dic[it]!=0:
freq_items.append((new_freq_items,dic[it]))
else:
freq_items.append((new_freq_items,sing_sup[it][0]))
patterns=gen_cond_pattern_bases(kdc[it][1])
da=[]
for k,v in patterns.items():
for i in range(0,v):
da.append(list(k))
new_null,new_kdc=genFi(da,2)
if new_null!=None:
cond_tree(new_null,new_kdc,minsup,new_freq_items,freq_items,patterns,sing_sup)
htable=genFi(dats,2)
freq_itemss=[]
cond_tree(htable[0],htable[1],2,set([]),freq_itemss,{},htable[1])
freq_itemss
data=pd.DataFrame(columns=["items","support"])
its=[]
sps=[]
for i in range(0,len(freq_itemss)):
its.append(list(freq_itemss[i][0]))
sps.append(freq_itemss[i][1])
data["items"]=its
data["support"]=sps
data.sort_values(ascending=False,by="support")
OUTPUT:
items support
12 [google] 5
14 [amazon] 5
8 [cse] 4
11 [cse, google] 4
13 [amazon, google] 4
0 [python] 3
7 [python, amazon] 3
9 [cse, amazon] 3
10 [cse, amazon, google] 3
1 [python, cse] 2
2 [python, cse, amazon] 2
3 [python, google] 2
4 [python, amazon, google] 2
5 [python, cse, amazon, google] 2
6 [python, cse, google] 2