From dba22251a41d68cb54b59e8e7cf8c367fe705f41 Mon Sep 17 00:00:00 2001 From: AssahBismarkabah Date: Fri, 8 Dec 2023 16:52:10 +0100 Subject: [PATCH 1/8] configuring gpg key --- configuring_gpg.sh | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 configuring_gpg.sh diff --git a/configuring_gpg.sh b/configuring_gpg.sh new file mode 100644 index 0000000..ec263e2 --- /dev/null +++ b/configuring_gpg.sh @@ -0,0 +1,26 @@ +#small bash script to configure gpg +#!/bin/bash + +echo "Generating a new GPG key..." + +# Setting the email address associated with the GPG key +read -p "Enter your email address: " email + +# Set the key type and key length +key_type="RSA" +key_length="4096" + +# Set the expiration period for the key +expiration="0" # 0 means the key does not expiree + +# Generate the GPG key +gpg --batch --full-generate-key < Date: Fri, 8 Dec 2023 17:03:37 +0100 Subject: [PATCH 2/8] configuring gpg key.. --- configuring_gpg.sh | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/configuring_gpg.sh b/configuring_gpg.sh index ec263e2..b7e2aa0 100644 --- a/configuring_gpg.sh +++ b/configuring_gpg.sh @@ -1,17 +1,19 @@ -#small bash script to configure gpg +#small bash script to generate gpg key #!/bin/bash echo "Generating a new GPG key..." -# Setting the email address associated with the GPG key +# Set the email address associated with the GPG key read -p "Enter your email address: " email -# Set the key type and key length + +# Setting the key type and key length key_type="RSA" key_length="4096" -# Set the expiration period for the key -expiration="0" # 0 means the key does not expiree + +# Setting the expiration period for the key +expiration="0" # 0 means the key does not expire # Generate the GPG key gpg --batch --full-generate-key < Date: Sat, 9 Dec 2023 05:13:17 +0100 Subject: [PATCH 3/8] Implement confidence and support calculation functions --- notebook/confidence.ipynb | 40 ++++++++++++++++++++++++++++---------- notebook/support.ipynb | 41 +++++++++++++++++++++++++++++---------- src/confidence.py | 20 +++++++++++++++++-- src/support.py | 19 +++++++++++++++++- 4 files changed, 97 insertions(+), 23 deletions(-) diff --git a/notebook/confidence.ipynb b/notebook/confidence.ipynb index 29cd743..4c1ed13 100644 --- a/notebook/confidence.ipynb +++ b/notebook/confidence.ipynb @@ -2,13 +2,33 @@ "cells": [ { "cell_type": "code", - "execution_count": null, - "id": "initial_id", - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] + "execution_count": 8, + "id": "deab37f7", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Confidence for rule (['bread'], ['milk']): 0.7499999999999999\n" + ] + } + ], + "source": [ + "from confidence import confidence\n", + "\n", + "dataset = [\n", + " ['bread', 'milk'],\n", + " ['bread', 'diaper', 'beer', 'eggs'],\n", + " ['milk', 'diaper', 'beer', 'cola'],\n", + " ['bread', 'milk', 'diaper', 'beer'],\n", + " ['bread', 'milk', 'diaper', 'cola']\n", + " ]\n", + "rule = (['bread'], ['milk']) \n", + "\n", + "confidence_value = confidence(dataset, rule)\n", + "print(f\"Confidence for rule {rule}: {confidence_value}\")" + ] } ], "metadata": { @@ -20,14 +40,14 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.10.12" } }, "nbformat": 4, diff --git a/notebook/support.ipynb b/notebook/support.ipynb index 29cd743..3300673 100644 --- a/notebook/support.ipynb +++ b/notebook/support.ipynb @@ -2,13 +2,34 @@ "cells": [ { "cell_type": "code", - "execution_count": null, - "id": "initial_id", - "metadata": { - "collapsed": true - }, - "outputs": [], - "source": [] + "execution_count": 6, + "id": "4f811223", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Support for ['bread', 'milk']: 0.6\n" + ] + } + ], + "source": [ + "from support import support\n", + "\n", + "dataset = [\n", + " ['bread', 'milk'],\n", + " ['bread', 'diaper', 'beer', 'eggs'],\n", + " ['milk', 'diaper', 'beer', 'cola'],\n", + " ['bread', 'milk', 'diaper', 'beer'],\n", + " ['bread', 'milk', 'diaper', 'cola']\n", + "]\n", + "itemset = ['bread', 'milk']\n", + "#calling the support function\n", + "\n", + "support_value = support(itemset, dataset)\n", + "print(f\"Support for {itemset}: {support_value}\")" + ] } ], "metadata": { @@ -20,14 +41,14 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.10.12" } }, "nbformat": 4, diff --git a/src/confidence.py b/src/confidence.py index ba640e4..793c293 100644 --- a/src/confidence.py +++ b/src/confidence.py @@ -1,9 +1,25 @@ dataset_type = list[list[object]] rule_type = tuple[list[object], list[object]] - +from support import support def confidence(data_set: dataset_type, rule: rule_type) -> float: """ To measure the likelihood of occurrence of an itemset given another itemset. """ - pass + #association rule in the form (antecedent, consequent). + antecedent, consequent = rule + + support_ab = support(antecedent + consequent, data_set) + + support_a = support(antecedent, data_set) + + if support_a != 0: + #validating the value of the Supp of the antecedent before computation + + confidence_value = support_ab / support_a + + else: + confidence_value = 0 + + return confidence_value + diff --git a/src/support.py b/src/support.py index 130e148..4a4a664 100644 --- a/src/support.py +++ b/src/support.py @@ -6,4 +6,21 @@ def support(itemsets: dataset_type, data_set: itemset_type) -> float: """ To find the frequency of itemsets in the dataset. """ - pass + #initialising the counter to keep track of the occurance in the list + count=0 + + total_transactions=len(data_set) + +#looping through the entire dataset for occurance of itemset + + for transaction in data_set: + if set(itemsets).issubset(transaction): + + count = count + 1 + #implementing support method + + support_value=count / total_transactions + + return support_value + + From 06ce8b0b38ea229dde94bc77469c5079210e42d2 Mon Sep 17 00:00:00 2001 From: AssahBismarkabah Date: Sat, 9 Dec 2023 11:51:49 +0100 Subject: [PATCH 4/8] apriori functionality and corresponding function call in main --- configuring_gpg.sh | 28 ----------- main.py | 23 +++++++-- notebook/apriori.ipynb | 50 +++++++++++-------- src/apriori.py | 107 +++++++++++++++++++++++++++++++++++++++-- src/support.py | 25 +++++----- 5 files changed, 164 insertions(+), 69 deletions(-) delete mode 100644 configuring_gpg.sh diff --git a/configuring_gpg.sh b/configuring_gpg.sh deleted file mode 100644 index b7e2aa0..0000000 --- a/configuring_gpg.sh +++ /dev/null @@ -1,28 +0,0 @@ -#small bash script to generate gpg key -#!/bin/bash - -echo "Generating a new GPG key..." - -# Set the email address associated with the GPG key -read -p "Enter your email address: " email - - -# Setting the key type and key length -key_type="RSA" -key_length="4096" - - -# Setting the expiration period for the key -expiration="0" # 0 means the key does not expire - -# Generate the GPG key -gpg --batch --full-generate-key < {rule[1]} (Confidence: {confidence_value})") \ No newline at end of file diff --git a/notebook/apriori.ipynb b/notebook/apriori.ipynb index d2a0524..ce2c2da 100644 --- a/notebook/apriori.ipynb +++ b/notebook/apriori.ipynb @@ -3,26 +3,36 @@ { "cell_type": "code", "execution_count": null, - "id": "initial_id", - "metadata": { - "collapsed": true - }, + "id": "54e2e8be", + "metadata": {}, "outputs": [], "source": [ - "from src.apriori import apriori" + "# Assuming apriori.py is in the same directory or update the path accordingly\n", + "from apriori import apriori\n", + "\n", + "# Define your dataset\n", + "dataset = [\n", + " ['Milk', 'Bread', 'Eggs'],\n", + " ['Milk', 'Diapers'],\n", + " ['Bread', 'Butter', 'Eggs'],\n", + "]\n", + "\n", + "# Set your minimum support and confidence thresholds\n", + "min_support = 0.4\n", + "min_confidence = 0.6\n", + "\n", + "# Call the apriori function\n", + "frequent_itemsets, strong_rules = apriori(dataset, min_support, min_confidence)\n", + "\n", + "# Display the results\n", + "print(\"Frequent Itemsets:\")\n", + "for itemset, support_value in frequent_itemsets.items():\n", + " print(f\"{itemset}: {support_value}\")\n", + "\n", + "print(\"\\nStrong Rules:\")\n", + "for rule, confidence_value in strong_rules.items():\n", + " print(f\"{rule[0]} -> {rule[1]} (Confidence: {confidence_value})\")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "outputs": [], - "source": [ - "apriori([1, 2, 3], [[1, 3], [1, 2], [1, 2, 3]], 0.5)" - ], - "metadata": { - "collapsed": false - }, - "id": "4d2a7fe66b2d70bb" } ], "metadata": { @@ -34,14 +44,14 @@ "language_info": { "codemirror_mode": { "name": "ipython", - "version": 2 + "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", - "pygments_lexer": "ipython2", - "version": "2.7.6" + "pygments_lexer": "ipython3", + "version": "3.10.12" } }, "nbformat": 4, diff --git a/src/apriori.py b/src/apriori.py index 957c524..05e2cc2 100644 --- a/src/apriori.py +++ b/src/apriori.py @@ -1,12 +1,109 @@ -sub_dataset_type = tuple[object] +from typing import List, Tuple, Dict +from itertools import chain, combinations + +# Importing support and confidence functions +from support import support +from confidence import confidence + +sub_dataset_type = Tuple[object] support_type = float -strong_rules = list[object], list[object] +strong_rules_type = Tuple[List[object], List[object]] confidence_type = float -def apriori(transactions: list[list[object]], min_support: float = 0.7, min_confidence: float = 0.5) \ - -> tuple[dict[sub_dataset_type, support_type], dict[strong_rules, confidence_type]]: +def apriori(transactions: List[List[object]], min_support: float = 0.7, min_confidence: float = 0.5) \ + -> Tuple[Dict[sub_dataset_type, support_type], Dict[strong_rules_type, confidence_type]]: """ To find all frequent itemsets in a dataset and generate strong association rules. """ - return {}, {} + + # Initializes dictionaries to store frequent itemsets and strong rules. + frequent_itemsets = {} + strong_rules = {} + + # Creates unique 1-itemsets and calculates their support using the support function. + # Initialize L1 = {frequent 1-itemsets} + unique_items = set(item for transaction in transactions for item in transaction) + candidates_1 = [frozenset([item]) for item in unique_items] + frequent_itemsets[1] = {candidate: support(transactions, [list(candidate)]) for candidate in candidates_1} + + # Iterates over the levels of itemsets (k) until no more frequent itemsets are found and generating candidate sets + # For (k = 2; Lk-1 is not empty; k++): + k = 2 + while len(frequent_itemsets[k - 1]) > 0: + # Generating Ck, candidate k-itemsets, from Lk-1 + candidates_k = generate_candidates(list(frequent_itemsets[k - 1]), k) + + # For each transaction t in D: + for transaction in transactions: + # Increment count of all candidates in Ck that are contained in t + for candidate in candidates_k: + if set(candidate).issubset(transaction): + frequent_itemsets[k - 1][frozenset(candidate)] += 1 + + # Lk = {c in Ck | support(c) >= min_support} + frequent_itemsets[k] = {candidate: support_value for candidate, support_value in + frequent_itemsets[k - 1].items() + if support_value / len(transactions) >= min_support} + + k += 1 + + # Frequent Itemsets = Union of all Lk + frequent_itemsets = {itemset: support_value for itemsets in frequent_itemsets.values() for itemset, support_value in + itemsets.items()} + + # For each frequent itemset l in Frequent Itemsets: + for itemset in frequent_itemsets.keys(): + + # Generate all non-empty subsets of l + subsets = get_subsets(itemset) + + # For every non-empty subset s of l: + for subset in subsets: + # Rule = s -> (l - s) + rule = (subset, list(set(itemset) - set(subset))) + + # If Calculate_Confidence(D, Rule) >= min_confidence: + confidence_value = confidence(transactions, rule) + if confidence_value >= min_confidence: + # Add Rule to Strong Rules + strong_rules[tuple(rule)] = confidence_value + + # Return Frequent Itemsets, Strong Rules + return frequent_itemsets, strong_rules + + +def generate_candidates(frequent_itemsets: List[frozenset], k: int) -> List[frozenset]: + """ + Generate candidate k-itemsets from frequent (k-1)-itemsets. + """ + candidates = [] + n = len(frequent_itemsets) + + for i in range(n): + for j in range(i + 1, n): + # Merging the frequent (k-1)-itemsets to generate candidates + candidate = frozenset(sorted(set(frequent_itemsets[i]).union(frequent_itemsets[j]))) + + # Check if the candidate has length k + if len(candidate) == k: + candidates.append(candidate) + + return candidates + + + +def get_subsets(itemset: List[object]) -> List[List[object]]: + """ + Generate all non-empty subsets of a set. + """ + return [list(subset) for subset in chain.from_iterable(combinations(itemset, r) for r in range(1, len(itemset)))] + + + + + + + + + \ No newline at end of file diff --git a/src/support.py b/src/support.py index 4a4a664..4f9abf4 100644 --- a/src/support.py +++ b/src/support.py @@ -1,26 +1,25 @@ -itemset_type = list[object] -dataset_type = list[list[object]] +from typing import List +itemset_type = frozenset +dataset_type = List[List[object]] def support(itemsets: dataset_type, data_set: itemset_type) -> float: """ To find the frequency of itemsets in the dataset. """ - #initialising the counter to keep track of the occurance in the list - count=0 + # initializing the counter to keep track of the occurrence in the list + count = 0 - total_transactions=len(data_set) - -#looping through the entire dataset for occurance of itemset + total_transactions = len(data_set) + # looping through the entire dataset for occurrence of itemset for transaction in data_set: - if set(itemsets).issubset(transaction): - + if itemsets.issubset(transaction): count = count + 1 - #implementing support method - - support_value=count / total_transactions + + # implementing support method + support_value = count / total_transactions return support_value - + From 576cb1a9ba52b037e0082bcadae40e1cbdb8d160 Mon Sep 17 00:00:00 2001 From: AssahBismarkabah Date: Sat, 9 Dec 2023 12:25:22 +0100 Subject: [PATCH 5/8] added tests to notebook file --- notebook/confidence.ipynb | 4 ++-- notebook/support.ipynb | 4 ++-- src/confidence.py | 27 +++++++++++---------------- 3 files changed, 15 insertions(+), 20 deletions(-) diff --git a/notebook/confidence.ipynb b/notebook/confidence.ipynb index 4c1ed13..81d28ea 100644 --- a/notebook/confidence.ipynb +++ b/notebook/confidence.ipynb @@ -2,8 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 8, - "id": "deab37f7", + "execution_count": 10, + "id": "0df8ea7c", "metadata": {}, "outputs": [ { diff --git a/notebook/support.ipynb b/notebook/support.ipynb index 3300673..c03bdb1 100644 --- a/notebook/support.ipynb +++ b/notebook/support.ipynb @@ -2,8 +2,8 @@ "cells": [ { "cell_type": "code", - "execution_count": 6, - "id": "4f811223", + "execution_count": 9, + "id": "fb107e08", "metadata": {}, "outputs": [ { diff --git a/src/confidence.py b/src/confidence.py index 793c293..02150e4 100644 --- a/src/confidence.py +++ b/src/confidence.py @@ -1,25 +1,20 @@ -dataset_type = list[list[object]] -rule_type = tuple[list[object], list[object]] +from typing import List, Tuple from support import support -def confidence(data_set: dataset_type, rule: rule_type) -> float: +dataset_type = List[List[object]] +rule_type = Tuple[List[object], List[object]] + + +def confidence(dataset: dataset_type, rule: rule_type) -> float: """ To measure the likelihood of occurrence of an itemset given another itemset. """ - #association rule in the form (antecedent, consequent). antecedent, consequent = rule + antecedent_support = support(antecedent, dataset) + rule_support = support(antecedent + consequent, dataset) - support_ab = support(antecedent + consequent, data_set) - - support_a = support(antecedent, data_set) - - if support_a != 0: - #validating the value of the Supp of the antecedent before computation - - confidence_value = support_ab / support_a - - else: - confidence_value = 0 + if antecedent_support == 0: + return 0 # Avoid division by zero + confidence_value = rule_support / antecedent_support return confidence_value - From 551646a783d0ce7589f2f6e8c9abd00e2fd596a3 Mon Sep 17 00:00:00 2001 From: Stephane SEGNING LAMBOU Date: Sat, 9 Dec 2023 13:03:33 +0100 Subject: [PATCH 6/8] Update confidence.md --- docs/confidence.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/confidence.md b/docs/confidence.md index be62daf..c4f18db 100644 --- a/docs/confidence.md +++ b/docs/confidence.md @@ -1,4 +1,4 @@ -# Support +# Confidence In association rules mining, the confidence help to measure the likelihood of occurrence of an itemset given another itemset. @@ -21,4 +21,4 @@ Output: Confidence value for Rule Else: a. Confidence = 0 4. Return Confidence -``` \ No newline at end of file +``` From 6fb0b8be15d46e7c66786973dd491b7e702b84ab Mon Sep 17 00:00:00 2001 From: Stephane SEGNING LAMBOU Date: Sat, 9 Dec 2023 13:05:07 +0100 Subject: [PATCH 7/8] Update apriori.md --- docs/apriori.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/apriori.md b/docs/apriori.md index 961c3cd..da74535 100644 --- a/docs/apriori.md +++ b/docs/apriori.md @@ -1,4 +1,4 @@ -# Support +# Apriori In association rules mining, the aprori help to find all frequent itemsets in a dataset and generate strong association rules. @@ -28,4 +28,4 @@ Output: Frequent Itemsets, Strong Rules ii. If Calculate_Confidence(D, Rule) >= min_confidence: a. Add Rule to Strong Rules 5. Return Frequent Itemsets, Strong Rules -``` \ No newline at end of file +``` From 2f8c474e298733b8b2665ff62eb0b912408e238e Mon Sep 17 00:00:00 2001 From: AssahBismarkabah Date: Sat, 9 Dec 2023 13:41:31 +0100 Subject: [PATCH 8/8] revert accidental changes in the --> main --- main.py | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/main.py b/main.py index b35fab3..cbe8c98 100644 --- a/main.py +++ b/main.py @@ -1,22 +1,5 @@ from src.apriori import apriori -dataset = [ - [1, 2, 3], - [1, 2, 3], - [1, 2, 3], -] - -min_support = 0.3 -min_confidence = 0.7 - -# Call the apriori function -frequent_itemsets, strong_rules = apriori(dataset, min_support, min_confidence) - -# Display the results -print("Frequent Itemsets:") -for itemset, support_value in frequent_itemsets.items(): - print(f"{itemset}: {support_value}") - -print("\nStrong Rules:") -for rule, confidence_value in strong_rules.items(): - print(f"{rule[0]} -> {rule[1]} (Confidence: {confidence_value})") \ No newline at end of file +if __name__ == '__main__': + result = apriori([[1, 2, 3], [1, 2, 3], [1, 2, 3]], 0.3, 0.7) + print(result) \ No newline at end of file