From f6ddd34d46405c93129295d168e68959d6fa4cd0 Mon Sep 17 00:00:00 2001 From: "sandipsamal117@gmail.com" Date: Wed, 5 Feb 2025 14:35:56 -0500 Subject: [PATCH] update plugin to read search and anon columns automatically --- dyanon.py | 25 ++++++++++--------------- 1 file changed, 10 insertions(+), 15 deletions(-) diff --git a/dyanon.py b/dyanon.py index f0cd6f1..bde6f94 100644 --- a/dyanon.py +++ b/dyanon.py @@ -29,7 +29,7 @@ logger.remove() logger.add(sys.stderr, format=logger_format) -__version__ = '1.0.8' +__version__ = '1.0.9' DISPLAY_TITLE = r""" _ _ @@ -62,16 +62,6 @@ default="", help="plugin instance ID from which to start analysis", ) -parser.add_argument( - "--searchIdx", - default="", - help="comma separated indices of columns containing search data", -) -parser.add_argument( - "--anonIdx", - default="", - help="comma separated indices of columns containing anonymization data", -) parser.add_argument( "--CUBEurl", default="http://localhost:8000/api/v1/", @@ -256,8 +246,13 @@ def health_check(options) -> bool: # See PyCharm help at https://www.jetbrains.com/help/pycharm/ def create_query(df: pd.DataFrame, str_srch_idx: str, str_anon_idx: str): - l_srch_idx = list(map(int, str_srch_idx.split(','))) - l_anon_idx = list(map(int, str_anon_idx.split(','))) + l_srch_idx = [] + l_anon_idx = [] + for column in df.columns: + if "search" in str(column).lower(): + l_srch_idx.append(df.columns.get_loc(column)) + if "anon" in str(column).lower(): + l_anon_idx.append(df.columns.get_loc(column)) l_job = [] @@ -266,12 +261,12 @@ def create_query(df: pd.DataFrame, str_srch_idx: str, str_anon_idx: str): s_col = (df.columns[l_srch_idx].values) s_row = (row[1].iloc[l_srch_idx].values) - s_d = [{k: v} for k, v in zip(s_col, s_row)] + s_d = [{k.split('.')[0].split('_')[1]: v} for k, v in zip(s_col, s_row)] d_job["search"] = dict(ChainMap(*s_d)) a_col = (df.columns[l_anon_idx].values) a_row = (row[1].iloc[l_anon_idx].values) - a_d = [{k.split('.')[0]: v} for k, v in zip(a_col, a_row)] + a_d = [{k.split('.')[0].split('_')[1]: v} for k, v in zip(a_col, a_row)] d_job["anon"] = dict(ChainMap(*a_d)) l_job.append(d_job)