Skip to content

Commit

Permalink
Merge branch 'main' into feature-implement-apriori-algorithm
Browse files Browse the repository at this point in the history
  • Loading branch information
Elwizzy12 authored Dec 11, 2023
2 parents a82917c + 9f17953 commit 186058e
Show file tree
Hide file tree
Showing 9 changed files with 245 additions and 40 deletions.
4 changes: 2 additions & 2 deletions docs/apriori.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Support
# Apriori

In association rules mining, the aprori help to find all frequent
itemsets in a dataset and generate strong association rules.
Expand Down Expand Up @@ -28,4 +28,4 @@ Output: Frequent Itemsets, Strong Rules
ii. If Calculate_Confidence(D, Rule) >= min_confidence:
a. Add Rule to Strong Rules
5. Return Frequent Itemsets, Strong Rules
```
```
4 changes: 2 additions & 2 deletions docs/confidence.md
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Support
# Confidence

In association rules mining, the confidence help to measure the
likelihood of occurrence of an itemset given another itemset.
Expand All @@ -21,4 +21,4 @@ Output: Confidence value for Rule
Else:
a. Confidence = 0
4. Return Confidence
```
```
2 changes: 2 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,7 @@

if __name__ == '__main__':
result = apriori([[1, 2, 3], [1, 2, 3], [1, 2, 3]], 0.3, 0.7)

print(result)
print(result)

45 changes: 37 additions & 8 deletions notebook/apriori.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,38 @@
{
"cell_type": "code",
"execution_count": null,
"id": "initial_id",
"metadata": {
"collapsed": true
},
"id": "54e2e8be",
"metadata": {},
"outputs": [],
"source": [
"from src.apriori import apriori"
"# Assuming apriori.py is in the same directory or update the path accordingly\n",
"from apriori import apriori\n",
"\n",
"# Define your dataset\n",
"dataset = [\n",
" ['Milk', 'Bread', 'Eggs'],\n",
" ['Milk', 'Diapers'],\n",
" ['Bread', 'Butter', 'Eggs'],\n",
"]\n",
"\n",
"# Set your minimum support and confidence thresholds\n",
"min_support = 0.4\n",
"min_confidence = 0.6\n",
"\n",
"# Call the apriori function\n",
"frequent_itemsets, strong_rules = apriori(dataset, min_support, min_confidence)\n",
"\n",
"# Display the results\n",
"print(\"Frequent Itemsets:\")\n",
"for itemset, support_value in frequent_itemsets.items():\n",
" print(f\"{itemset}: {support_value}\")\n",
"\n",
"print(\"\\nStrong Rules:\")\n",
"for rule, confidence_value in strong_rules.items():\n",
" print(f\"{rule[0]} -> {rule[1]} (Confidence: {confidence_value})\")"
]


},
{
"cell_type": "code",
Expand All @@ -19,10 +43,15 @@
"metadata": {
"collapsed": false
},

"outputs": [],
"source": [
"apriori([[1, 3], [1, 2], [1, 2, 3]], 0.5)"
]

"id": "4d2a7fe66b2d70bb"


}
],
"metadata": {
Expand All @@ -34,14 +63,14 @@
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
40 changes: 30 additions & 10 deletions notebook/confidence.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,33 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "initial_id",
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
"execution_count": 10,
"id": "0df8ea7c",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Confidence for rule (['bread'], ['milk']): 0.7499999999999999\n"
]
}
],
"source": [
"from confidence import confidence\n",
"\n",
"dataset = [\n",
" ['bread', 'milk'],\n",
" ['bread', 'diaper', 'beer', 'eggs'],\n",
" ['milk', 'diaper', 'beer', 'cola'],\n",
" ['bread', 'milk', 'diaper', 'beer'],\n",
" ['bread', 'milk', 'diaper', 'cola']\n",
" ]\n",
"rule = (['bread'], ['milk']) \n",
"\n",
"confidence_value = confidence(dataset, rule)\n",
"print(f\"Confidence for rule {rule}: {confidence_value}\")"
]
}
],
"metadata": {
Expand All @@ -20,14 +40,14 @@
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
41 changes: 31 additions & 10 deletions notebook/support.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,34 @@
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "initial_id",
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
"execution_count": 9,
"id": "fb107e08",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Support for ['bread', 'milk']: 0.6\n"
]
}
],
"source": [
"from support import support\n",
"\n",
"dataset = [\n",
" ['bread', 'milk'],\n",
" ['bread', 'diaper', 'beer', 'eggs'],\n",
" ['milk', 'diaper', 'beer', 'cola'],\n",
" ['bread', 'milk', 'diaper', 'beer'],\n",
" ['bread', 'milk', 'diaper', 'cola']\n",
"]\n",
"itemset = ['bread', 'milk']\n",
"#calling the support function\n",
"\n",
"support_value = support(itemset, dataset)\n",
"print(f\"Support for {itemset}: {support_value}\")"
]
}
],
"metadata": {
Expand All @@ -20,14 +41,14 @@
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.6"
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
Expand Down
103 changes: 99 additions & 4 deletions src/apriori.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,25 @@


sub_dataset_type = tuple[object]
from typing import List, Tuple, Dict
from itertools import chain, combinations

# Importing support and confidence functions
from support import support
from confidence import confidence

sub_dataset_type = Tuple[object]
support_type = float
strong_rules = list[object], list[object]
strong_rules_type = Tuple[List[object], List[object]]
confidence_type = float


def apriori(transactions: list[list[object]], min_support: float = 0.7, min_confidence: float = 0.5) \
-> tuple[dict[sub_dataset_type, support_type], dict[strong_rules, confidence_type]]:
def apriori(transactions: List[List[object]], min_support: float = 0.7, min_confidence: float = 0.5) \
-> Tuple[Dict[sub_dataset_type, support_type], Dict[strong_rules_type, confidence_type]]:
"""
To find all frequent itemsets in a dataset and generate strong association rules.
"""

return {}, {}
from itertools import combinations

Expand Down Expand Up @@ -157,4 +167,89 @@ def apriori(dataset, min_support, min_confidence):

print("\nStrong Rules:")
for rule, confidence in strong_rules:
print(rule, "Confidence:", confidence)
print(rule, "Confidence:", confidence)

# Initializes dictionaries to store frequent itemsets and strong rules.
frequent_itemsets = {}
strong_rules = {}

# Creates unique 1-itemsets and calculates their support using the support function.
# Initialize L1 = {frequent 1-itemsets}
unique_items = set(item for transaction in transactions for item in transaction)
candidates_1 = [frozenset([item]) for item in unique_items]
frequent_itemsets[1] = {candidate: support(transactions, [list(candidate)]) for candidate in candidates_1}

# Iterates over the levels of itemsets (k) until no more frequent itemsets are found and generating candidate sets
# For (k = 2; Lk-1 is not empty; k++):
k = 2
while len(frequent_itemsets[k - 1]) > 0:
# Generating Ck, candidate k-itemsets, from Lk-1
candidates_k = generate_candidates(list(frequent_itemsets[k - 1]), k)

# For each transaction t in D:
for transaction in transactions:
# Increment count of all candidates in Ck that are contained in t
for candidate in candidates_k:
if set(candidate).issubset(transaction):
frequent_itemsets[k - 1][frozenset(candidate)] += 1

# Lk = {c in Ck | support(c) >= min_support}
frequent_itemsets[k] = {candidate: support_value for candidate, support_value in
frequent_itemsets[k - 1].items()
if support_value / len(transactions) >= min_support}

k += 1

# Frequent Itemsets = Union of all Lk
frequent_itemsets = {itemset: support_value for itemsets in frequent_itemsets.values() for itemset, support_value in
itemsets.items()}

# For each frequent itemset l in Frequent Itemsets:
for itemset in frequent_itemsets.keys():

# Generate all non-empty subsets of l
subsets = get_subsets(itemset)

# For every non-empty subset s of l:
for subset in subsets:
# Rule = s -> (l - s)
rule = (subset, list(set(itemset) - set(subset)))

# If Calculate_Confidence(D, Rule) >= min_confidence:
confidence_value = confidence(transactions, rule)
if confidence_value >= min_confidence:
# Add Rule to Strong Rules
strong_rules[tuple(rule)] = confidence_value

# Return Frequent Itemsets, Strong Rules
return frequent_itemsets, strong_rules


def generate_candidates(frequent_itemsets: List[frozenset], k: int) -> List[frozenset]:
"""
Generate candidate k-itemsets from frequent (k-1)-itemsets.
"""
candidates = []
n = len(frequent_itemsets)

for i in range(n):
for j in range(i + 1, n):
# Merging the frequent (k-1)-itemsets to generate candidates
candidate = frozenset(sorted(set(frequent_itemsets[i]).union(frequent_itemsets[j])))

# Check if the candidate has length k
if len(candidate) == k:
candidates.append(candidate)

return candidates



def get_subsets(itemset: List[object]) -> List[List[object]]:
"""
Generate all non-empty subsets of a set.
"""
return [list(subset) for subset in chain.from_iterable(combinations(itemset, r) for r in range(1, len(itemset)))]



23 changes: 19 additions & 4 deletions src/confidence.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,15 @@
dataset_type = list[list[object]]
rule_type = tuple[list[object], list[object]]
from typing import List, Tuple
from support import support

dataset_type = List[List[object]]
rule_type = Tuple[List[object], List[object]]

def confidence(data_set: dataset_type, rule: rule_type) -> float:

def confidence(dataset: dataset_type, rule: rule_type) -> float:
"""
To measure the likelihood of occurrence of an itemset given another itemset.
"""

pass
def calculate_support(dataset, itemset):
count = 0
Expand Down Expand Up @@ -42,4 +46,15 @@ def calculate_confidence(dataset, rule):
rule = (['A'], ['C'])

confidence = calculate_confidence(dataset, rule)
print("Confidence:", confidence)
print("Confidence:", confidence)

antecedent, consequent = rule
antecedent_support = support(antecedent, dataset)
rule_support = support(antecedent + consequent, dataset)

if antecedent_support == 0:
return 0 # Avoid division by zero

confidence_value = rule_support / antecedent_support
return confidence_value

Loading

0 comments on commit 186058e

Please sign in to comment.