Skip to content

Commit

Permalink
[MRG] fix identifier munging for local databases (#145)
Browse files Browse the repository at this point in the history
* fix identifier split

* fix identifier split x 2

* fix identifier foo in notebooks
  • Loading branch information
ctb authored Jan 30, 2022
1 parent af9a8cd commit f1b5468
Show file tree
Hide file tree
Showing 3 changed files with 22 additions and 4 deletions.
7 changes: 5 additions & 2 deletions genome_grist/copy_local_genomes.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,8 +41,11 @@ def main():
record_name = record.name
break

record_name = record_name.split(' ', 1)
ident, remainder = record_name
ident, *remainder = record_name.split(' ', 1)
if remainder: # is list, needs to be string
remainder = remainder[0]
else:
remainder = ident

print(f"read identifer '{ident}' and name '{remainder}'")

Expand Down
9 changes: 8 additions & 1 deletion genome_grist/notebooks/report-gather.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,14 @@
"\n",
"# connect gather_df to all_df and left_df using 'genome_id'\n",
"def fix_name(x):\n",
" return \"_\".join(x.split('_')[:2]).split('.')[0]\n",
" # pick off first space-delimited name as identifier\n",
" x = x.split(' ')[0]\n",
" \n",
" # eliminate stuff after the period, too.\n",
" x = x.split('.')[0]\n",
" \n",
" return x\n",
" #return \"_\".join(x.split('_')[:2]).split('.')[0]\n",
"\n",
"gather_df['genome_id'] = gather_df['name'].apply(fix_name)\n",
"names_df['genome_id'] = names_df['ident'].apply(fix_name)"
Expand Down
10 changes: 9 additions & 1 deletion genome_grist/notebooks/report-mapping.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,15 @@
"\n",
"# connect gather_df to all_df and left_df using 'genome_id'\n",
"def fix_name(x):\n",
" return \"_\".join(x.split('_')[:2]).split('.')[0]\n",
" # pick off first space-delimited name as identifier\n",
" x = x.split(' ')[0]\n",
" \n",
" # eliminate stuff after the period, too.\n",
" x = x.split('.')[0]\n",
" \n",
" return x\n",
" #return \"_\".join(x.split('_')[:2]).split('.')[0]\n",
"\n",
"\n",
"gather_df['genome_id'] = gather_df['name'].apply(fix_name)\n",
"names_df['genome_id'] = names_df['ident'].apply(fix_name)"
Expand Down

0 comments on commit f1b5468

Please sign in to comment.