Skip to content

Commit

Permalink
fixed duplicated IDs, #251
Browse files Browse the repository at this point in the history
  • Loading branch information
zqfang committed Feb 28, 2024
1 parent a9d1b7b commit 7d5dd11
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion gseapy/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -240,14 +240,15 @@ def _check_data(self, exprs: pd.DataFrame) -> pd.DataFrame:

def make_unique(self, rank_metric: pd.DataFrame, col_idx: int) -> pd.DataFrame:
"""
make gene id column unique
make gene id column unique by adding a digit, similar to R's make.unique
"""
id_col = rank_metric.columns[col_idx]
if rank_metric.duplicated(subset=id_col).sum() > 0:
self._logger.info("Input gene rankings contains duplicated IDs")
mask = rank_metric.duplicated(subset=id_col, keep=False)
dups = (
rank_metric.loc[mask, id_col]
.to_frame()
.groupby(id_col)
.cumcount()
.map(lambda c: "_" + str(c) if c else "")
Expand Down

0 comments on commit 7d5dd11

Please sign in to comment.