Skip to content

Commit

Permalink
Add notebook to create pickle file for fms and labels
Browse files Browse the repository at this point in the history
  • Loading branch information
J535D165 committed Dec 16, 2024
1 parent f7ee003 commit 4cf10c2
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 0 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@

synergy-dataset/

# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
Expand Down
69 changes: 69 additions & 0 deletions feature_matrices.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from pathlib import Path\n",
"import pickle\n",
"\n",
"import synergy_dataset as sd\n",
"import asreview as asr\n",
"\n",
"from asreview.models.feature_extraction import Tfidf\n",
"\n",
"print(\"synergy-dataset version\", sd.__version__)\n",
"print(\"asreview version:\", asr.__version__)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"folder_pickle_files = Path(\"synergy-dataset\", \"pickles\")\n",
"folder_pickle_files.mkdir(parents=True, exist_ok=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for dataset in sd.iter_datasets():\n",
"\n",
" df = dataset.to_frame().reset_index()\n",
"\n",
" X = Tfidf().fit_transform(df[\"title\"].fillna(\"\").values, df[\"abstract\"].fillna(\"\").values)\n",
"\n",
" with open(folder_pickle_files / f\"{dataset.name}.pkl\", \"wb\") as f:\n",
" pickle.dump((X, df[\"label_included\"]), f)\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "asreview-dev",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 4cf10c2

Please sign in to comment.