Experiment No. 9
Experiment No. 9
import pandas as pd
import numpy as np
from mlxtend.frequent_patterns import apriori, association_rules
import matplotlib.pyplot as plt
data=pd.read_csv("retail_dataset.csv")
data.head()
0 1 2 3 4 5 6
0 Bread Wine Eggs Meat Cheese Pencil Diaper
1 Bread Cheese Meat Diaper Wine Milk Pencil
2 Cheese Meat Eggs Milk Wine NaN NaN
3 Cheese Meat Eggs Milk Wine NaN NaN
4 Meat Pencil Wine NaN NaN NaN NaN
data.info()
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 315 entries, 0 to 314
Data columns (total 7 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 0 315 non-null object
1 1 285 non-null object
2 2 245 non-null object
3 3 187 non-null object
4 4 133 non-null object
5 5 71 non-null object
6 6 41 non-null object
dtypes: object(7)
memory usage: 17.4+ KB
data.isnull().sum()
0 0
1 30
2 70
3 128
4 182
5 244
6 274
dtype: int64
print(data.shape[1])
types = data.dtypes
print(types)
0 object
1 object
2 object
3 object
4 object
5 object
6 object
dtype: object
#Create list
transactions = []
for i in range(0, data.shape[0]):
transactions.append([str(data.values[i,j]) for j in range(0, 7)])
transactions
print(i)
314
import pandas as pd
from mlxtend.preprocessing import TransactionEncoder
te = TransactionEncoder()
te_ary = te.fit(transactions).transform(transactions)
te_ary
df = pd.DataFrame(te_ary, columns=te.columns_)
print(df)
df=df[['Bagel','Bread','Cheese','Diaper','Eggs','Meat','Milk','Pencil'
,'Wine']]
# df.drop(axis = 1)
print(df)
# print(df)
freq_items = apriori(df, min_support=0.2, use_colnames=True)
freq_items
support itemsets
0 0.425397 (Bagel)
1 0.504762 (Bread)
2 0.501587 (Cheese)
3 0.406349 (Diaper)
4 0.438095 (Eggs)
5 0.476190 (Meat)
6 0.501587 (Milk)
7 0.361905 (Pencil)
8 0.438095 (Wine)
9 0.279365 (Bread, Bagel)
10 0.225397 (Milk, Bagel)
11 0.238095 (Bread, Cheese)
12 0.231746 (Bread, Diaper)
13 0.206349 (Meat, Bread)
14 0.279365 (Milk, Bread)
15 0.200000 (Pencil, Bread)
16 0.244444 (Bread, Wine)
17 0.200000 (Cheese, Diaper)
18 0.298413 (Cheese, Eggs)
19 0.323810 (Meat, Cheese)
20 0.304762 (Milk, Cheese)
21 0.200000 (Pencil, Cheese)
22 0.269841 (Cheese, Wine)
23 0.234921 (Diaper, Wine)
24 0.266667 (Meat, Eggs)
25 0.244444 (Milk, Eggs)
26 0.241270 (Eggs, Wine)
27 0.244444 (Meat, Milk)
28 0.250794 (Meat, Wine)
29 0.219048 (Milk, Wine)
30 0.200000 (Pencil, Wine)
31 0.215873 (Meat, Cheese, Eggs)
32 0.203175 (Meat, Cheese, Milk)
list(rules)
['antecedents',
'consequents',
'antecedent support',
'consequent support',
'support',
'confidence',
'lift',
'leverage',
'conviction',
'zhangs_metric']
print(len(rules))
14