From 5edcc6dda1a2b1175a3312e86eb759ee667635f2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Sara=20Monz=C3=B3n?= Date: Wed, 1 May 2024 21:57:17 +0200 Subject: [PATCH] added masking after filtering per row in df_filter --- taranis/utils.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/taranis/utils.py b/taranis/utils.py index 86918f0..0875833 100644 --- a/taranis/utils.py +++ b/taranis/utils.py @@ -343,8 +343,10 @@ def filter_df( rows_to_drop = mask.sum(axis=1) / len(df.columns) > row_thr filtered_df = df.loc[~rows_to_drop, :] + mask_fil = filtered_df.applymap(lambda x: bool(re.search(regex_pattern, str(x)))) + # Filter columns: Drop columns where the count of true in mask / total rows >= column_thr - cols_to_drop = mask.sum(axis=0) / len(df) > column_thr + cols_to_drop = mask_fil.sum(axis=0) / len(df) > column_thr filtered_df = filtered_df.loc[:, ~cols_to_drop] return filtered_df