Skip to content

Commit

Permalink
added masking after filtering per row in df_filter
Browse files Browse the repository at this point in the history
  • Loading branch information
saramonzon committed May 1, 2024
1 parent 4ba230d commit 5edcc6d
Showing 1 changed file with 3 additions and 1 deletion.
4 changes: 3 additions & 1 deletion taranis/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,8 +343,10 @@ def filter_df(
rows_to_drop = mask.sum(axis=1) / len(df.columns) > row_thr
filtered_df = df.loc[~rows_to_drop, :]

mask_fil = filtered_df.applymap(lambda x: bool(re.search(regex_pattern, str(x))))

# Filter columns: Drop columns where the count of true in mask / total rows >= column_thr
cols_to_drop = mask.sum(axis=0) / len(df) > column_thr
cols_to_drop = mask_fil.sum(axis=0) / len(df) > column_thr
filtered_df = filtered_df.loc[:, ~cols_to_drop]

return filtered_df
Expand Down

0 comments on commit 5edcc6d

Please sign in to comment.