-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
3 changed files
with
273 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,269 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"# Analysis of Alibay ABFE Results\n", | ||
"\n", | ||
"In [our manuscript](https://doi.org/10.26434/chemrxiv-2024-3ft7f), we point out that most component simulations of the ABFE calculations are not strictly converged, because the distributions of perturbed energies sampled by replicate runs are significantly different. To illustrate that this issue is wide-spread, we analyse gradients from ABFE calculations from [Alibay et al.'s work](https://www.nature.com/articles/s42004-022-00721-4). We arbitrarily choose to analyse the Cyclphilin-D data. We are grateful to Alibay et al. for providing the raw gradient data." | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"import a3fe as a3\n", | ||
"import os\n", | ||
"from pathlib import Path\n", | ||
"import matplotlib.pyplot as plt\n", | ||
"from scipy.stats import kruskal\n", | ||
"from pymbar.timeseries import statisticalInefficiency\n", | ||
"from tqdm import tqdm\n", | ||
"import numpy as np\n", | ||
"plt.style.use(\"seaborn-v0_8-colorblind\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Cyclophilin D: Data Extraction" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Get the data from zenodo\n", | ||
"\n", | ||
"! wget https://zenodo.org/records/5906019/files/complex.zip\\?download\\=1\n", | ||
"! wget https://zenodo.org/records/5906019/files/ligand.zip\\?download\\=1" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Unzip the data\n", | ||
"\n", | ||
"! mv complex.zip\\?download\\=1.1 complex.zip && unzip complex.zip\n", | ||
"! mv ligand.zip\\?download\\=1 ligand.zip && unzip ligand.zip" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Base directory for 'complex' and 'ligand'\n", | ||
"base_dir = Path('.')\n", | ||
"\n", | ||
"xvg_paths = {}\n", | ||
"for leg in ['complex', 'ligand']:\n", | ||
" leg_path = base_dir / leg\n", | ||
" if not leg_path.exists():\n", | ||
" print(f\"Directory {leg_path} does not exist.\")\n", | ||
" continue\n", | ||
"\n", | ||
" xvg_paths[leg] = {}\n", | ||
" for lig_path in leg_path.iterdir():\n", | ||
" if not lig_path.is_dir():\n", | ||
" continue # Skip files, only process directories\n", | ||
"\n", | ||
" lig = lig_path.name\n", | ||
" xvg_paths[leg][lig] = {}\n", | ||
" for run in range(1, 6):\n", | ||
" run_path = lig_path / f\"run{run}\"\n", | ||
" xvg_paths[leg][lig][run] = {}\n", | ||
" for stage in [\"restraints\", \"coul\", \"vdw\"]:\n", | ||
" if leg == 'ligand' and stage == 'restraints':\n", | ||
" continue # No restraint stage for ligand\n", | ||
" stage_path = run_path / f\"{stage}-xvg\"\n", | ||
" if not stage_path.exists():\n", | ||
" print(f\"Directory {stage_path} does not exist.\")\n", | ||
" continue\n", | ||
"\n", | ||
" xvg_paths[leg][lig][run][stage] = {}\n", | ||
" for xvg_file in stage_path.iterdir():\n", | ||
" if xvg_file.is_file() and xvg_file.suffix == '.xvg':\n", | ||
" lam = xvg_file.stem.split(\".\")[1]\n", | ||
" xvg_paths[leg][lig][run][stage][lam] = xvg_file" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def read_grads(xvg_path: Path) -> list[float]:\n", | ||
" lines = xvg_path.read_text().splitlines()\n", | ||
" filtered_lines = [line for line in lines if not line.startswith((\"#\", \"@\"))]\n", | ||
" # The gradients are the second column\n", | ||
" return [float(line.split()[1]) for line in filtered_lines]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# For one example, check that we have the expected number of gradient data points\n", | ||
"example_xvg_path = xvg_paths['complex']['ligand-27'][1]['coul']['0']\n", | ||
"example_xvgs = read_grads(example_xvg_path)\n", | ||
"\n", | ||
"NRG_FREQ = 100\n", | ||
"TIMESTEP = 4E-6 # ns\n", | ||
"\n", | ||
"print(f\"Total time: {(len(example_xvgs) - 1) * NRG_FREQ * TIMESTEP} ns\")" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Now, get the data and subsample with pymbar timeseries\n", | ||
"\n", | ||
"grads_subsampled = {}\n", | ||
"for leg in ['complex', 'ligand']:\n", | ||
" grads_subsampled[leg] = {}\n", | ||
" for lig in tqdm(xvg_paths[leg], desc=leg):\n", | ||
" grads_subsampled[leg][lig] = {}\n", | ||
" for run in xvg_paths[leg][lig]:\n", | ||
" grads_subsampled[leg][lig][run] = {}\n", | ||
" for stage in xvg_paths[leg][lig][run]:\n", | ||
" grads_subsampled[leg][lig][run][stage] = {}\n", | ||
" for lam in xvg_paths[leg][lig][run][stage]:\n", | ||
" xvg_path = xvg_paths[leg][lig][run][stage][lam]\n", | ||
" grads = read_grads(xvg_path)\n", | ||
" g = statisticalInefficiency(grads)\n", | ||
" grads_subsampled[leg][lig][run][stage][lam] = grads[::round(g)]" | ||
] | ||
}, | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"## Cyclophilin D: Data Analysis" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"def get_sig_diff_grads(grads_subsampled: dict, leg: str, lig: str, stage: str) -> tuple[float, float]:\n", | ||
" \"\"\"\n", | ||
" Calculate the percentage of lambda windows where the gradient distributions\n", | ||
" are significantly different, using the Kruskal-Wallis test\n", | ||
" \"\"\"\n", | ||
" n_lam = len(grads_subsampled[leg][lig][1][stage])\n", | ||
" n_sig_diff = 0\n", | ||
"\n", | ||
" for i in range(n_lam):\n", | ||
" gradients = [grads_subsampled[leg][lig][run][stage][str(i)] for run in range(1, 6)]\n", | ||
" _, p = kruskal(*gradients)\n", | ||
" if p < 0.05:\n", | ||
" n_sig_diff += 1\n", | ||
"\n", | ||
" return n_lam, n_sig_diff" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Get a dictionary with the percentage of significantly different gradients\n", | ||
"sig_diff_grads = {}\n", | ||
"for lig in grads_subsampled[\"complex\"]:\n", | ||
" sig_diff_grads[lig] = {}\n", | ||
" for leg in ['complex', 'ligand']:\n", | ||
" sig_diff_grads[lig][leg] = {}\n", | ||
" for stage in grads_subsampled[leg][lig][1]:\n", | ||
" sig_diff_grads[lig][leg][stage] = get_sig_diff_grads(grads_subsampled, leg, lig, stage)\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Plot the percentage of lambda windows where the gradients are significantly different all on one bar plot\n", | ||
"fig, ax = plt.subplots(figsize=(12, 4), dpi=300)\n", | ||
"x = np.arange(len(sig_diff_grads))\n", | ||
"width = 0.2\n", | ||
"stage_name_map = {\"restraints\": \"Restrain\", \"coul\": \"Discharge\", \"vdw\": \"Vanish\"}\n", | ||
"ligand_labels = [lig_name.replace(\"-\", \" \") for lig_name in sig_diff_grads]\n", | ||
"\n", | ||
"# Plot complex and ligand next to each other\n", | ||
"for i, stage in enumerate(['restraints', 'coul', 'vdw']):\n", | ||
" color = ax._get_lines.get_next_color()\n", | ||
" stage_name = stage_name_map[stage]\n", | ||
"\n", | ||
" # Bound leg\n", | ||
" y_complex = [100 * (sig_diff_grads[lig][\"complex\"][stage][1] / sig_diff_grads[lig][\"complex\"][stage][0]) for lig in sig_diff_grads]\n", | ||
" ax.bar(x + i * width, y_complex, width, label=f\"Bound {stage_name}\", edgecolor=\"k\", alpha=1, color=color)\n", | ||
"\n", | ||
" # Free leg\n", | ||
" if stage != 'restraints':\n", | ||
" y_lig = [100 * (sig_diff_grads[lig][\"ligand\"][stage][1] / sig_diff_grads[lig][\"ligand\"][stage][0]) for lig in sig_diff_grads]\n", | ||
" ax.bar(x + i * width, y_lig, width, label=f\"Free {stage_name}\", edgecolor=\"k\", alpha=0.8, color=color, hatch=\"///////\")\n", | ||
"\n", | ||
"# Set x ticks but rotate them 90\n", | ||
"ax.set_xticks(x + width)\n", | ||
"ax.set_xticklabels(ligand_labels, rotation=90)\n", | ||
"ax.set_ylabel(\"% Windows with Significant\\n Inter-run Differences Between\\n Gradient Distributions\")\n", | ||
"# Put label off to right of plot\n", | ||
"ax.legend(bbox_to_anchor=(1.03, 0.7))\n", | ||
"fig.tight_layout(pad=-2)\n", | ||
"fig.savefig(\"final_analysis/gradient_sig_diffs_cyclod_alibay.png\", dpi=300, bbox_inches=\"tight\")\n" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": null, | ||
"metadata": {}, | ||
"outputs": [], | ||
"source": [ | ||
"# Remove all the downloaded data\n", | ||
"\n", | ||
"! rm -r complex ligand complex.zip ligand.zip" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "a3fe_reproduce", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |