From 7ffb03583db5d78966f04b0f5256a298b6483841 Mon Sep 17 00:00:00 2001 From: Angelo Antonio Salatino Date: Thu, 19 Dec 2024 22:36:02 +0100 Subject: [PATCH 01/10] added check on get_weights --- cso_classifier/classifier.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/cso_classifier/classifier.py b/cso_classifier/classifier.py index 66843da..d27d7a0 100644 --- a/cso_classifier/classifier.py +++ b/cso_classifier/classifier.py @@ -46,8 +46,10 @@ def __init__(self, **parameters): self.explanation = parameters["explanation"] if "explanation" in parameters else False self.delete_outliers = parameters["delete_outliers"] if "delete_outliers" in parameters else True self.fast_classification = parameters["fast_classification"] if "fast_classification" in parameters else True - self.silent = parameters["silent"] if "silent" in parameters else False self.get_weights = parameters["get_weights"] if "get_weights" in parameters else False + self.silent = parameters["silent"] if "silent" in parameters else False + + self.__check_parameters(parameters) @@ -232,7 +234,11 @@ def __check_parameters(self, parameters): if "fast_classification" in parameters: if not isinstance(parameters["fast_classification"], bool): raise TypeError("Field fast_classification must be set to either True or False. Got %s instead." % type(parameters["fast_classification"]).__name__) - + + if "get_weights" in parameters: + if not isinstance(parameters["get_weights"], bool): + raise TypeError("Field get_weights must be set to either True or False. Got %s instead." % type(parameters["get_weights"]).__name__) + if "silent" in parameters: if not isinstance(parameters["silent"], bool): raise TypeError("Field silent must be set to either True or False. Got %s instead." % type(parameters["silent"]).__name__) From 29e76f97c25abe961cbb8dc299db6e94afab860b Mon Sep 17 00:00:00 2001 From: Angelo Antonio Salatino Date: Thu, 19 Dec 2024 22:36:19 +0100 Subject: [PATCH 02/10] added functions to extract descendants --- cso_classifier/ontology.py | 74 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) diff --git a/cso_classifier/ontology.py b/cso_classifier/ontology.py index b485b34..56a2e1b 100644 --- a/cso_classifier/ontology.py +++ b/cso_classifier/ontology.py @@ -226,6 +226,80 @@ def get_all_broaders_of_topic(self, topic): pass return all_broaders + + def get_all_descendants_of_topics(self, topics): + """ + Finds all the descendants of a given list (or set) of topics. + + Parameters + ---------- + topics : list (or set) + List of topics of which identifying the descendants. + + Raises + ------ + ValueError + Error is raised when a different type of datase. + + Returns + ------- + list + The unique list of all descendants of the input topics. + + """ + + if type(topics) == str: + return self.get_all_descendants_of_topic(topics) + elif type(topics) == list or type(topics) == set: + descendants = [] + for topic in topics: + descendants.extend(self.get_all_descendants_of_topic(topic)) + return list(set(descendants)) + else: + raise TypeError("Error: The type of 'topics' must be either list or set.") + + + def get_all_descendants_of_topic(self, topic): + """ + Identifies all the descendants of a given topic of CSO + + Parameters + ---------- + topic : str + the topic. + + Raises + ------ + TypeError + raises the error if the topic is not a string. + + Returns + ------- + list + the list of descendant topics of 'topic'. + + """ + + + if type(topic) != str: + raise TypeError("Error: The type of 'topic' must be str.") + + if topic not in self.topics: + raise ValueError(f"Error: The topic '{topic}' is not available in this version of the Ontology.") + + set_of_descendants = set() + queue = deque() + queue.append(topic) + + while len(queue) > 0: + dequeued = queue.popleft() + set_of_descendants.add(dequeued) + if dequeued in self.narrowers: + narrower_concepts = self.narrowers[dequeued] + for narrower_concept in narrower_concepts: + queue.append(narrower_concept) + + return list(set_of_descendants) def find_closest_matches(self, word): From 07820b5ff1601e088b52fc287066f26878ce8bcf Mon Sep 17 00:00:00 2001 From: Angelo Antonio Salatino Date: Fri, 20 Dec 2024 00:11:55 +0100 Subject: [PATCH 03/10] changed return type of descendants (now set, before list) --- cso_classifier/ontology.py | 14 ++++----- cso_classifier/result.py | 59 ++++++++++++++++++++++++++++++++++++-- output.json | 29 ++++++++++++++++++- 3 files changed, 92 insertions(+), 10 deletions(-) diff --git a/cso_classifier/ontology.py b/cso_classifier/ontology.py index 56a2e1b..1a0c898 100644 --- a/cso_classifier/ontology.py +++ b/cso_classifier/ontology.py @@ -243,7 +243,7 @@ def get_all_descendants_of_topics(self, topics): Returns ------- - list + set The unique list of all descendants of the input topics. """ @@ -251,10 +251,10 @@ def get_all_descendants_of_topics(self, topics): if type(topics) == str: return self.get_all_descendants_of_topic(topics) elif type(topics) == list or type(topics) == set: - descendants = [] + descendants = set() for topic in topics: - descendants.extend(self.get_all_descendants_of_topic(topic)) - return list(set(descendants)) + descendants.update(self.get_all_descendants_of_topic(topic)) + return descendants else: raise TypeError("Error: The type of 'topics' must be either list or set.") @@ -275,7 +275,7 @@ def get_all_descendants_of_topic(self, topic): Returns ------- - list + set the list of descendant topics of 'topic'. """ @@ -298,8 +298,8 @@ def get_all_descendants_of_topic(self, topic): narrower_concepts = self.narrowers[dequeued] for narrower_concept in narrower_concepts: queue.append(narrower_concept) - - return list(set_of_descendants) + + return set_of_descendants def find_closest_matches(self, word): diff --git a/cso_classifier/result.py b/cso_classifier/result.py index b63c463..bf479c6 100644 --- a/cso_classifier/result.py +++ b/cso_classifier/result.py @@ -1,7 +1,7 @@ class Result: """ A simple abstraction layer for retrieving the results """ - def __init__(self, explanation = False, get_weights=False): + def __init__(self, explanation = False, get_weights=False, filter_output=False): """ Initialising the ontology class """ self.syntactic = list() @@ -20,6 +20,15 @@ def __init__(self, explanation = False, get_weights=False): self.syntactic_weights = dict() self.semantic_weights = dict() self.result_attr += ('syntactic_weights','semantic_weights',) + + self.filter_output = False + if filter_output: + self.filter_output = True + self.filtered_syntactic = list() + self.filtered_semantic = list() + self.filtered_union = list() + self.filtered_enhanced = list() + self.result_attr += ('filtered_syntactic', 'filtered_semantic', 'filtered_union', 'filtered_enhanced',) def get_dict(self): @@ -107,7 +116,53 @@ def get_semantic_topics_weights(self): """ Gets the semantic_weights variable """ return self.semantic_weights - + + + def set_filtered_syntactic(self, filtered_syntactic): + """ Set the filtered syntactic + """ + self.filtered_syntactic = filtered_syntactic + + + def get_filtered_syntactic(self): + """ Get the filtered syntactic + """ + return self.filtered_syntactic + + def set_filtered_semantic(self, filtered_semantic): + """ Set the filtered semantic + """ + self.filtered_semantic = filtered_semantic + + + def get_filtered_semantic(self): + """ Get the filtered semantic + """ + return self.filtered_semantic + + + def set_filtered_union(self, filtered_union): + """ Set the filtered union + """ + self.filtered_union = filtered_union + + + def get_filtered_union(self): + """ Get the filtered union + """ + return self.filtered_union + + def set_filtered_enhanced(self, filtered_enhanced): + """ Set the filtered enhanced + """ + self.filtered_enhanced = filtered_enhanced + + + def get_filtered_enhanced(self): + """ Get the filtered enhanced + """ + return self.filtered_enhanced + def dump_temporary_explanation(self, temporary_explanation): """ It dumps the temporary explanation. After it will be reorganised diff --git a/output.json b/output.json index 410bb8c..55f6c02 100644 --- a/output.json +++ b/output.json @@ -213,5 +213,32 @@ "topology": 1.0, "network topology": 1.0, "graph theory": 1.0 - } + }, + "filtered_syntactic": [ + "anonymization", + "data privacy", + "privacy", + "anonymity", + "sensitive informations" + ], + "filtered_semantic": [ + "anonymization", + "data privacy", + "privacy", + "anonymity" + ], + "filtered_union": [ + "anonymization", + "data privacy", + "privacy", + "anonymity", + "sensitive informations" + ], + "filtered_enhanced": [ + "privacy preserving", + "computer security", + "authentication", + "network security", + "access control" + ] } \ No newline at end of file From 800eb6a585c2cbad9a8881d902b33cc4bc827985 Mon Sep 17 00:00:00 2001 From: Angelo Antonio Salatino Date: Fri, 20 Dec 2024 00:12:29 +0100 Subject: [PATCH 04/10] postprocmodule takes care of filtering topics --- cso_classifier/postprocmodule.py | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/cso_classifier/postprocmodule.py b/cso_classifier/postprocmodule.py index 283c5f3..237686e 100644 --- a/cso_classifier/postprocmodule.py +++ b/cso_classifier/postprocmodule.py @@ -26,13 +26,18 @@ def __init__(self, model = None, cso = None, **parameters): self.enhancement = parameters["enhancement"] if "enhancement" in parameters else "first" #defines the type of enhancement self.delete_outliers = parameters["delete_outliers"] if "delete_outliers" in parameters else True self.get_weights = parameters["get_weights"] if "get_weights" in parameters else False + + self.filter_output = True if "filter_by" in parameters else False + self.filter_by = parameters["filter_by"] if "filter_by" in parameters else [] if "result" in parameters: self.result = parameters["result"] # the result object self.list_of_topics = self.result.get_union() else: self.result = None - + + if self.filter_output: + self.descendants_to_keep = self.cso.get_all_descendants_of_topics(self.filter_by) def set_result(self, result): """Function that initializes the result variable in the class. @@ -244,3 +249,24 @@ def filtering_outliers(self): return self.result + + def filtering_by_user_defined_topics(self): + """ Identifies the topics that are descendants of user defined ancestors. + Saves this into a new key of the result. + """ + + self.result.set_filtered_syntactic(list(filter(lambda topic: topic in self.descendants_to_keep, self.result.get_syntactic()))) + self.result.set_filtered_semantic(list(filter(lambda topic: topic in self.descendants_to_keep, self.result.get_semantic()))) + self.result.set_filtered_union(list(filter(lambda topic: topic in self.descendants_to_keep, self.result.get_union()))) + self.result.set_filtered_enhanced(list(filter(lambda topic: topic in self.descendants_to_keep, self.result.get_enhanced()))) + + + + def process(self): + """ Runs the postprocessing module (changed from version 3.3) + """ + result = self.filtering_outliers() + if self.filter_output: + self.filtering_by_user_defined_topics() + + return self.result From dc8b1737fa29e386c80202052fa4eab36ffc875d Mon Sep 17 00:00:00 2001 From: Angelo Antonio Salatino Date: Fri, 20 Dec 2024 00:12:57 +0100 Subject: [PATCH 05/10] the simple run now is able to filter still need to implement the batch run --- cso_classifier/classifier.py | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) diff --git a/cso_classifier/classifier.py b/cso_classifier/classifier.py index d27d7a0..f5c9275 100644 --- a/cso_classifier/classifier.py +++ b/cso_classifier/classifier.py @@ -39,6 +39,8 @@ def __init__(self, **parameters): True to return weights. Default value is False - silent (boolean): determines whether to print the progress. If true goes in silent mode. Instead, if false does not print anything in standard output. + - filter_by (list): determines whether the output should be filtered accoring to certain branches of CSO. Please note, + this will not filter the regular result set, but rather return an additional key with filtered topics """ self.modules = parameters["modules"] if "modules" in parameters else "both" @@ -48,7 +50,9 @@ def __init__(self, **parameters): self.fast_classification = parameters["fast_classification"] if "fast_classification" in parameters else True self.get_weights = parameters["get_weights"] if "get_weights" in parameters else False self.silent = parameters["silent"] if "silent" in parameters else False - + + self.filter_output = True if "filter_by" in parameters else False + self.filter_by = parameters["filter_by"] if "filter_by" in parameters else [] self.__check_parameters(parameters) @@ -86,7 +90,10 @@ def run(self, paper): self.models_loaded = True t_paper = Paper(paper, self.modules) - result = Result(self.explanation, self.get_weights) + result = Result(self.explanation, self.get_weights, self.filter_output) + + + # Passing parameters to the two classes (synt and sema) and actioning classifiers @@ -108,8 +115,14 @@ def run(self, paper): result.dump_temporary_explanation(sema_module.get_explanation()) - postprocess = post(self.model, self.cso, enhancement=self.enhancement, result=result, delete_outliers=self.delete_outliers, get_weights=self.get_weights) - result = postprocess.filtering_outliers() + postprocess = post(self.model, + self.cso, + enhancement=self.enhancement, + result=result, + delete_outliers=self.delete_outliers, + get_weights=self.get_weights, + filter_by=self.filter_by) + result = postprocess.process() return result.get_dict() @@ -242,6 +255,10 @@ def __check_parameters(self, parameters): if "silent" in parameters: if not isinstance(parameters["silent"], bool): raise TypeError("Field silent must be set to either True or False. Got %s instead." % type(parameters["silent"]).__name__) + + if "filter_by" in parameters: + if not isinstance(parameters["filter_by"], list): + raise TypeError("Field filter_by must be a list of strings. Got %s instead." % type(parameters["filter_by"]).__name__) @staticmethod From ed084ff02c53b1584deb1e32e5980f71f8c793b6 Mon Sep 17 00:00:00 2001 From: Angelo Antonio Salatino Date: Fri, 20 Dec 2024 00:14:32 +0100 Subject: [PATCH 06/10] adapted with new parameter --- CSO-Classifier.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CSO-Classifier.py b/CSO-Classifier.py index 823eff0..c03455e 100644 --- a/CSO-Classifier.py +++ b/CSO-Classifier.py @@ -46,7 +46,7 @@ # In[Run Classifier] -cc = CSOClassifier(explanation=True, get_weights=True) +cc = CSOClassifier(explanation=True, get_weights=True, filter_by=["computer security"]) result = cc.run(paper) From 716e4dd0ad743dec63a51f037bfb9aea15974155 Mon Sep 17 00:00:00 2001 From: Angelo Antonio Salatino Date: Fri, 20 Dec 2024 00:16:18 +0100 Subject: [PATCH 07/10] improved details of function --- cso_classifier/result.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cso_classifier/result.py b/cso_classifier/result.py index bf479c6..477961d 100644 --- a/cso_classifier/result.py +++ b/cso_classifier/result.py @@ -119,47 +119,47 @@ def get_semantic_topics_weights(self): def set_filtered_syntactic(self, filtered_syntactic): - """ Set the filtered syntactic + """ Set the filtered syntactic topics """ self.filtered_syntactic = filtered_syntactic def get_filtered_syntactic(self): - """ Get the filtered syntactic + """ Get the filtered syntactic topics """ return self.filtered_syntactic def set_filtered_semantic(self, filtered_semantic): - """ Set the filtered semantic + """ Set the filtered semantic topics """ self.filtered_semantic = filtered_semantic def get_filtered_semantic(self): - """ Get the filtered semantic + """ Get the filtered semantic topics """ return self.filtered_semantic def set_filtered_union(self, filtered_union): - """ Set the filtered union + """ Set the filtered union topics """ self.filtered_union = filtered_union def get_filtered_union(self): - """ Get the filtered union + """ Get the filtered union topics """ return self.filtered_union def set_filtered_enhanced(self, filtered_enhanced): - """ Set the filtered enhanced + """ Set the filtered enhanced topics """ self.filtered_enhanced = filtered_enhanced def get_filtered_enhanced(self): - """ Get the filtered enhanced + """ Get the filtered enhanced topics """ return self.filtered_enhanced From 90ae1290ccba66e54af31a5a149aadcf36f4c884 Mon Sep 17 00:00:00 2001 From: Angelo Antonio Salatino Date: Fri, 20 Dec 2024 15:02:34 +0100 Subject: [PATCH 08/10] implemented filtering in the parallelised section --- cso_classifier/classifier.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/cso_classifier/classifier.py b/cso_classifier/classifier.py index f5c9275..ff0a763 100644 --- a/cso_classifier/classifier.py +++ b/cso_classifier/classifier.py @@ -190,7 +190,12 @@ def _batch_run_single_worker(self, papers): # Passing parameters to the two classes (synt and sema) synt_module = synt(cso) sema_module = sema(model, cso, self.fast_classification) - postprocess = post(model, cso, enhancement=self.enhancement, delete_outliers=self.delete_outliers, get_weights=self.get_weights) + postprocess = post(model, + cso, + enhancement=self.enhancement, + delete_outliers=self.delete_outliers, + get_weights=self.get_weights, + filter_by=self.filter_by) # initializing variable that will contain output @@ -201,7 +206,7 @@ def _batch_run_single_worker(self, papers): print("Processing:", paper_id) paper.set_paper(paper_value) - result = Result(self.explanation, self.get_weights) + result = Result(self.explanation, self.get_weights, self.filter_output) # Passing paper and actioning the classifier if self.modules in ('syntactic','both'): @@ -220,7 +225,7 @@ def _batch_run_single_worker(self, papers): result.dump_temporary_explanation(sema_module.get_explanation()) postprocess.set_result(result) - result = postprocess.filtering_outliers() + result = postprocess.process() class_res[paper_id] = result.get_dict() return class_res From b0022992f4e7887d46c84da6989c5aefc0a71d8a Mon Sep 17 00:00:00 2001 From: Angelo Antonio Salatino Date: Mon, 23 Dec 2024 13:12:02 +0100 Subject: [PATCH 09/10] updated documentation of version 3.3 with example of filter_by --- README.md | 149 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 147 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 6ba215c..6acdc4a 100644 --- a/README.md +++ b/README.md @@ -33,12 +33,15 @@ Read more: [https://skm.kmi.open.ac.uk/cso-classifier/](https://skm.kmi.open.ac. - [Sample Input (SP)](#sample-input-sp) - [Run (SP)](#run-sp) - [Sample Output (SP)](#sample-output-sp) + - [Run on Single Paper with filter\_by](#run-on-single-paper-with-filter_by) + - [Sample Output when using the filter\_by parameter](#sample-output-when-using-the-filter_by-parameter) - [Classifying in batch mode (BM)](#classifying-in-batch-mode-bm) - [Sample Input (BM)](#sample-input-bm) - [Run (BM)](#run-bm) - [Sample Output (BM)](#sample-output-bm) - [Parameters](#parameters) - [Releases](#releases) + - [v3.3](#v33) - [v3.2](#v32) - [v3.1](#v31) - [v3.0](#v30) @@ -345,6 +348,140 @@ Below you can find an example. The keys syntactic and semantic respectively cont } ``` +#### Run on Single Paper with filter_by + +In this example, we will run the CSO Classifier by filtering topics in *computer security* (look at how the ```filter_by``` parameter is set). + +```python +from cso_classifier import CSOClassifier +cc = CSOClassifier(modules = "both", enhancement = "first", explanation = True, filter_by=["computer security"]) +result = cc.run(paper) +print(result) +``` + +#### Sample Output when using the filter_by parameter + +The JSON below it the produced output, and as you can see there 4 additional keys (*filtered_XXXX*) at the bottom containing only a subset of topics within the field of **computer security**. + +```json +{ + "syntactic": [ + "real-world networks", + "anonymization", + "network topology", + "data privacy", + "social networks", + "privacy", + "twitter", + "graph theory", + "online social networks", + "anonymity", + "data mining", + "micro-blog", + "sensitive informations" + ], + "semantic": [ + "anonymization", + "network topology", + "topology", + "data privacy", + "social networks", + "privacy", + "twitter", + "graph theory", + "online social networks", + "anonymity", + "data mining", + "micro-blog" + ], + "union": [ + "real-world networks", + "anonymization", + "network topology", + "topology", + "data privacy", + "social networks", + "privacy", + "twitter", + "graph theory", + "online social networks", + "anonymity", + "data mining", + "micro-blog", + "sensitive informations" + ], + "enhanced": [ + "complex networks", + "privacy preserving", + "computer networks", + "world wide web", + "computer security", + "social media", + "theoretical computer science", + "online systems", + "authentication", + "network security", + "computer science", + "access control" + ], + "explanation": { + "social networks": ["real-world networks", "social networks", "twitter", "social-network", "online social networks", "social network", "microblogging", "social networking"], + "online social networks": ["online social networks", "social networks", "social network"], + "sensitive informations": ["sensitive information"], + "data mining": ["data mining", "mining", "data-mining"], + "privacy": ["anonymous", "anonymity", "sensitive information", "data privacy", "privacy"], + "anonymization": ["anonymization"], + "anonymity": ["anonymous", "anonymity"], + "real-world networks": ["real-world networks"], + "twitter": ["twitter graph", "anonymous twitter", "microblogging", "microblogging service", "twitter"], + "micro-blog": ["twitter graph", "anonymous twitter", "microblogging", "microblogging service", "twitter"], + "network topology": ["network topology", "topology"], + "data privacy": ["data privacy", "privacy"], + "graph theory": ["graph theory"], + "topology": ["network topology", "topology"], + "complex networks": ["real-world networks"], + "privacy preserving": ["anonymization"], + "computer networks": ["network topology", "topology"], + "world wide web": ["real-world networks", "social networks", "twitter", "social-network", "online social networks", "social network", "microblogging", "social networking"], + "computer security": ["anonymous", "anonymity", "sensitive information", "data privacy", "privacy"], + "social media": ["microblogging", "twitter"], + "theoretical computer science": ["graph theory"], + "online systems": ["online social networks", "social networks", "social network"], + "authentication": ["anonymous", "anonymity"], + "network security": ["anonymous", "anonymity", "sensitive information"], + "computer science": ["data mining", "mining", "data-mining"], + "access control": ["sensitive information"] + }, + "filtered_syntactic": [ + "anonymization", + "data privacy", + "privacy", + "anonymity", + "sensitive informations" + ], + "filtered_semantic": [ + "anonymization", + "data privacy", + "privacy", + "anonymity" + ], + "filtered_union": [ + "anonymization", + "data privacy", + "privacy", + "anonymity", + "sensitive informations" + ], + "filtered_enhanced": [ + "privacy preserving", + "computer security", + "authentication", + "network security", + "access control" + ] +} +``` + ### Classifying in batch mode (BM) #### Sample Input (BM) @@ -432,7 +569,7 @@ Below you can find an example. The keys syntactic and semantic respectively cont ``` ### Parameters -Beside the paper(s), the function running the CSO Classifier accepts seven additional parameters: (i) **workers**, (ii) **modules**, (iii) **enhancement**, (iv) **explanation**, (v) **delete_outliers**, (vi) **fast_classification**, and (vii) **silent**. There is no particular order on how to specify these paramaters. Here we explain their usage. The workers parameters is an integer (equal or greater than 1), modules and enhancement are strings that define a particular behaviour for the classifier. The explanation, delete_outliers, fast_classification, and silent parameters are booleans. +Beside the paper(s), the function running the CSO Classifier accepts seven additional parameters: (i) **workers**, (ii) **modules**, (iii) **enhancement**, (iv) **explanation**, (v) **delete_outliers**, (vi) **fast_classification**, (vii) **silent**, and (ix) **filter_by**. There is no particular order on how to specify these paramaters. Here we explain their usage. The workers parameters is an integer (equal or greater than 1), modules and enhancement are strings that define a particular behaviour for the classifier. The explanation, delete_outliers, fast_classification, and silent parameters are booleans. Finally, filter_by is a list (i) The parameter *workers* defines the number of threads to run for classifying the input corpus. For instance, if ```workers = 4```, there will be 4 instances of the CSO Classifier, each one receiving a chunk (equally split) of the corpus to process. Once all processes are completed, the results will be aggregated and returned. The default value for *workers* is *1*. This parameter is available only when running the classifier in *batch mode*. @@ -450,6 +587,8 @@ Beside the paper(s), the function running the CSO Classifier accepts seven addit (viii) The parameter *silent* can be either *True* or *False*. This determines whether the classifier prints its progress in the console. If set to True, the classifier will be silent and will not print any output while classifying. The default value for *silent* is *False*. +(ix) The parameter *filter_by* is a list, containing CSO topic, and lets you focus the classification on specific sub-branches of CSO. For instance, to narrow down the results to subtopics within **artificial intelligence** and **semantic web** you can set ```filter_by = ["artificial intelligence", "semantic web"]```. This will produce four extra outputs (*syntactic_filtered*, *semantic_filtered*, *union_filtered*, *enhanced_filtered*) containing only the CSO topics that fall under the hierarchical structure of the specified areas. By default this parameter is an empty list, and therefore the classifier will consider all CSO topics as usual. You can check [Run on Single Paper with filter\_by](#run-on-single-paper-with-filter_by) to see how it works. + |# | Parameter | Single Paper | Batch Mode | @@ -462,6 +601,7 @@ Beside the paper(s), the function running the CSO Classifier accepts seven addit |vi | fast_classification| :white_check_mark: | :white_check_mark: | |vii| get_weights | :white_check_mark: | :white_check_mark: | |viii| silent | :white_check_mark: | :white_check_mark: | +|ix| filter_by | :white_check_mark: | :white_check_mark: | **Table 1**: Parameters availability when using CSO Classifier @@ -469,7 +609,12 @@ Beside the paper(s), the function running the CSO Classifier accepts seven addit ## Releases -Here we list the available releases for the CSO Classifier. These releases are available for download both from [Github](https://github.com/angelosalatino/cso-classifier/releases) and [Zenodo](10.5281/zenodo.2660819). +Here we list the available releases for the CSO Classifier. These releases are available for download both from [Github](https://github.com/angelosalatino/cso-classifier/releases) and [Zenodo](http://doi.org/10.5281/zenodo.2660819). + +### v3.3 + +This release extends version 3.2 with a new feature that lets you refine the classification process by focusing on specific areas within the Computer Science Ontology. Specifically, providing one or more topics within the parameter *filter_by* (type list), the classifier will extract the sub-branches of such CSO topics, and when classifying will narrow down the output to the only sub-topics available in those areas. This is especially helpful when you are interested in exploring specific branches of the CSO, such as identifying only the concepts related to **artificial intelligence** and **semantic web** within a given paper, and can be achieved by setting ```filter_by = ["artificial intelligence", "semantic web"]``` (see [Parameters](#parameters)). If this parameter is set, the classifier will return the standard classification results, with four extra sets of results (*syntactic_filtered*, *semantic_filtered*, *union_filtered*, *enhanced_filtered*) containing only the filtered topics. This gives users the full picture and a focused view within the chosen areas. + ### v3.2 From 6aafd93a823266aad7fb4bcdab2b5ff393a70670 Mon Sep 17 00:00:00 2001 From: Angelo Antonio Salatino Date: Mon, 23 Dec 2024 13:15:59 +0100 Subject: [PATCH 10/10] Update CSO-Classifier.ipynb --- CSO-Classifier.ipynb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CSO-Classifier.ipynb b/CSO-Classifier.ipynb index 70ce273..1abc8a0 100644 --- a/CSO-Classifier.ipynb +++ b/CSO-Classifier.ipynb @@ -70,7 +70,7 @@ "metadata": {}, "outputs": [], "source": [ - "cc = CSOClassifier(explanation=True, get_weights=True)\n", + "cc = CSOClassifier(explanation=True, get_weights=True, filter_by=[\"computer security\"])\n", "\n", "result = cc.run(paper)" ] @@ -120,4 +120,4 @@ }, "nbformat": 4, "nbformat_minor": 2 -} \ No newline at end of file +}