diff --git a/dev/.documenter-siteinfo.json b/dev/.documenter-siteinfo.json index 356e3e8b..38618dc9 100644 --- a/dev/.documenter-siteinfo.json +++ b/dev/.documenter-siteinfo.json @@ -1 +1 @@ -{"documenter":{"julia_version":"1.9.4","generation_timestamp":"2023-12-05T19:11:25","documenter_version":"1.2.1"}} \ No newline at end of file +{"documenter":{"julia_version":"1.10.0","generation_timestamp":"2024-02-09T15:34:10","documenter_version":"1.2.1"}} \ No newline at end of file diff --git a/dev/assets/ComplexMixtures.py b/dev/assets/ComplexMixtures.py index 7f2d6090..6af5b7e1 100644 --- a/dev/assets/ComplexMixtures.py +++ b/dev/assets/ComplexMixtures.py @@ -1,12 +1,14 @@ # # ComplexMixtures.py # -# A Python module to provide an inferface for the Julia ComplexMixtures.jl package. +# A Python module to provide an interface for the Julia ComplexMixtures.jl package. # # See: https://m3g.github.com/ComplexMixtures.jl # # Author: L. Martinez / IQ-Unicamp, 2023. # +# This script is adapted to version 2.0 of ComplexMixtures.jl +# import sys # @@ -47,8 +49,10 @@ select = jl.pdb.select # From ComplexMixtures -Selection = jl.cm.Selection +AtomSelection = jl.cm.AtomSelection Trajectory = jl.cm.Trajectory +SoluteGroup = jl.cm.SoluteGroup +SolventGroup = jl.cm.SolventGroup Options = jl.cm.Options save = jl.cm.save load = jl.cm.load diff --git a/dev/assets/logo-dark.svg b/dev/assets/logo-dark.svg index fb9b8859..fc75a0d0 100644 --- a/dev/assets/logo-dark.svg +++ b/dev/assets/logo-dark.svg @@ -2,20 +2,20 @@ + inkscape:version="1.1.2 (0a00cf5339, 2022-02-04)" + sodipodi:docname="logo-dark.svg" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns="http://www.w3.org/2000/svg" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:dc="http://purl.org/dc/elements/1.1/"> - - - + fit-margin-bottom="0" + inkscape:pagecheckerboard="0" /> @@ -124,7 +109,6 @@ image/svg+xml - @@ -134,31 +118,87 @@ id="layer1" transform="translate(504.66495,16.56467)"> - - + + + + + + + + + + + style="fill:#8d8d8d;fill-opacity:0.97647059"> + + + + + + + + + + + + + @@ -166,9 +206,9 @@ inkscape:connector-curvature="0" id="path5432" d="m -486.24385,-8.0306483 c -1.07774,-0.011615 -3.04267,0.044354 -3.04839,1.4718146 -0.166,1.0342982 -0.19348,2.6606512 1.31319,1.9410143 1.36011,-0.6686732 1.83399,1.6336886 3.21533,0.8289659 1.29203,-0.256116 2.11482,-1.7616855 0.7857,-2.6090419 -0.66389,-0.6816477 -1.45895,-1.1440821 -2.26583,-1.6327529 z" - style="fill:#2a9d8f;stroke:#a4d2e5;stroke-width:0.71300524px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0.97217554;fill-opacity:1" /> + style="fill:#4c64b0;stroke:#8e8e8e;stroke-width:0.71300524px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0.97369283;fill-opacity:1" /> + style="fill:#369844;stroke:#8e8e8e;stroke-width:0.71300524px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:0.97369283;fill-opacity:1" /> + inkscape:version="1.1.2 (0a00cf5339, 2022-02-04)" + sodipodi:docname="logo.svg" + xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" + xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" + xmlns="http://www.w3.org/2000/svg" + xmlns:svg="http://www.w3.org/2000/svg" + xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" + xmlns:cc="http://creativecommons.org/ns#" + xmlns:dc="http://purl.org/dc/elements/1.1/"> + fit-margin-bottom="0" + inkscape:pagecheckerboard="0" /> @@ -124,7 +125,6 @@ image/svg+xml - @@ -134,13 +134,13 @@ id="layer1" transform="translate(504.66495,16.56467)"> @@ -166,9 +166,9 @@ inkscape:connector-curvature="0" id="path5432" d="m -486.24385,-8.0306483 c -1.07774,-0.011615 -3.04267,0.044354 -3.04839,1.4718146 -0.166,1.0342982 -0.19348,2.6606512 1.31319,1.9410143 1.36011,-0.6686732 1.83399,1.6336886 3.21533,0.8289659 1.29203,-0.256116 2.11482,-1.7616855 0.7857,-2.6090419 -0.66389,-0.6816477 -1.45895,-1.1440821 -2.26583,-1.6327529 z" - style="fill:#2a9d8f;stroke:#264653;stroke-width:0.71300524px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;fill-opacity:1" /> + style="fill:#4c64b0;stroke:#264653;stroke-width:0.71300524px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;fill-opacity:1" /> + style="fill:#399746;stroke:#264653;stroke-width:0.71300524px;stroke-linecap:butt;stroke-linejoin:miter;stroke-opacity:1;fill-opacity:1" /> Recommended Workflow manual section) +import Pkg; +Pkg.activate("."); + +# Load packages +using ComplexMixtures +using PDBTools +using Plots + +# Load PDB file of the system +atoms = readPDB("./system.pdb") + +# Select the protein and the TMAO molecules +protein = select(atoms, "protein") +tmao = select(atoms, "resname TMAO") + +# Setup solute and solvent structures. We need to provide +# either the number of atoms per molecule, or the number +# of molecules in each selection. +solute = AtomSelection(protein, nmols=1) +solvent = AtomSelection(tmao, natomspermol=14) + +# Setup the Trajectory structure: this will define which +# coordinates are used to compute the MDDF when reading +# the trajectory file. +trajectory = Trajectory("./trajectory.dcd", solute, solvent) + +# Run the calculation and get results: this is the computationally +# intensive part of the calculation. +results = mddf(trajectory) + +# Save the results to recover them later if required +save(results, "./results.json") + +# Plot the some of the most important results. +# +# - The results.d array contains the distances. +# - The results.mddf array contains the MDDF. +# - The results.kb array contains the Kirkwood-Buff integrals. +# +plot(results.d, results.mddf, xlabel="d / Å", ylabel="MDDF") # plot the MDDF +savefig("./mddf.pdf") +plot(results.d, results.kb, xlabel="d / Å", ylabel="KB / cm³ mol⁻¹") # plot the KB +savefig("./kb.pdf") \ No newline at end of file diff --git a/dev/assets/scripts/example1/script1.jl b/dev/assets/scripts/example1/script1.jl new file mode 100644 index 00000000..96300679 --- /dev/null +++ b/dev/assets/scripts/example1/script1.jl @@ -0,0 +1,86 @@ +# Activate environment in current directory +import Pkg; Pkg.activate(".") + +# Run this once, to install necessary packages: +# Pkg.add(["ComplexMixtures", "PDBTools", "Plots", "LaTeXStrings"]) + +# Load packages +using ComplexMixtures +using PDBTools +using Plots, Plots.Measures +using LaTeXStrings + +# The complete trajectory file can be downloaded from (1Gb): +# https://www.dropbox.com/scl/fi/zfq4o21dkttobg2pqd41m/glyc50_traj.dcd?rlkey=el3k6t0fx6w5yiqktyx96gzg6&dl=0 + +# The example output file is available at: +# +# Load PDB file of the system +atoms = readPDB("./system.pdb") + +# Select the protein and the GLYC molecules +protein = select(atoms, "protein") +glyc = select(atoms, "resname GLYC") + +# Setup solute and solvent structures +solute = AtomSelection(protein, nmols=1) +solvent = AtomSelection(glyc, natomspermol=14) + +# Path to the trajectory file +trajectory_file = "./glyc50_traj.dcd" + +# Run mddf calculation, and save results +trajectory = Trajectory(trajectory_file, solute, solvent) +results = mddf(trajectory) +save(results, "glyc50_results.json") + +# +# Produce plots +# +# Default options for plots +Plots.default( + fontfamily="Computer Modern", + linewidth=2, + framestyle=:box, + label=nothing, + grid=false +) + +# +# The complete MDDF and the Kirkwood-Buff Integral +# +plot(layout=(1, 2)) +# plot mddf +plot!(results.d, results.mddf, + xlabel=L"r/\AA", + ylabel="mddf", + subplot=1 +) +hline!([1], linestyle=:dash, linecolor=:gray, subplot=1) +# plot KB integral +plot!(results.d, results.kb / 1000, #to L/mol + xlabel=L"r/\AA", + ylabel=L"G_{us}/\mathrm{L~mol^{-1}}", + subplot=2 +) +# size and margin +plot!(size=(800, 300), margin=4mm) +savefig("./mddf.png") + +# +# Atomic contributions to the MDDF +# +hydroxyls = ["O1", "O2", "O3", "H1", "H2", "H3"] +aliphatic = ["C1", "C2", "HA", "HB", "HC", "HD"] +hydr_contrib = contributions(results, SolventGroup(hydroxyls)) +aliph_contrib = contributions(results, SolventGroup(aliphatic)) + +plot(results.d, results.mddf, + xlabel=L"r/\AA", + ylabel="mddf", + size=(600, 400) +) +plot!(results.d, hydr_contrib, label="Hydroxyls") +plot!(results.d, aliph_contrib, label="Aliphatic chain") +hline!([1], linestyle=:dash, linecolor=:gray) +savefig("./mddf_atom_contrib.png") \ No newline at end of file diff --git a/dev/assets/scripts/example1/script2.jl b/dev/assets/scripts/example1/script2.jl new file mode 100644 index 00000000..5c6b09c7 --- /dev/null +++ b/dev/assets/scripts/example1/script2.jl @@ -0,0 +1,66 @@ +# Activate environment in current directory +import Pkg; Pkg.activate(".") + +# Run this once, to install necessary packages: +# Pkg.add(["ComplexMixtures", "PDBTools", "Plots", "LaTeXStrings"]) + +# Load packages +using ComplexMixtures +using PDBTools +using Plots, Plots.Measures +using LaTeXStrings + +# The complete trajectory file can be downloaded from (3Gb): +# https://drive.google.com/file/d/14M30jDHRwUM77hzbDphgbu8mcWFBcQrX/view?usp=sharing + +# The example output file is available at: +# +# Load PDB file of the system +atoms = readPDB("./system.pdb") + +# Select the protein and the GLYC molecules +protein = select(atoms, "protein") +glyc = select(atoms, "resname GLYC") + +# Load example output file (computed in the previous script) +example_output = "./glyc50_results.json" +results = load(example_output) + +# +# Plot a 2D map showing the contributions of some residues +# +residues = collect(eachresidue(protein)) + +# We will plot only the range 70:110, for clarity +irange = 70:110 + +# We create matrix of with a number of rows equal to the number +# of bins of the mddf histogram (length(results.d)) and a number of +# columns equal to the number of residues +rescontrib = zeros(length(results.d), length(residues)) + +# Each column is then filled up with the contributions of each residue +for (ires, residue) in enumerate(residues) + rescontrib[:, ires] .= contributions(results, SoluteGroup(residue)) +end + +# Plot only for distances within 1.5 and 3.5: +idmin = findfirst(d -> d > 1.5, results.d) +idmax = findfirst(d -> d > 3.5, results.d) + +# Obtain pretty labels for the residues in the x-axis +xticks = PDBTools.residue_ticks(protein, first=70, last=110) + +# Plot a contour courves with the density at each distance from +# each residue +Plots.default(fontfamily="Computer Modern") +contourf(irange, results.d[idmin:idmax], rescontrib[idmin:idmax, irange], + color=cgrad(:tempo), linewidth=1, linecolor=:black, + colorbar=:none, levels=5, + xlabel="Residue", ylabel=L"r/\AA", + xticks=xticks, xrotation=60, + xtickfont=font(8, "Computer Modern"), + size=(700, 400), + margin=0.5Plots.PlotMeasures.cm +) +savefig("./density2D.png") \ No newline at end of file diff --git a/dev/assets/scripts/example1/script3.jl b/dev/assets/scripts/example1/script3.jl new file mode 100644 index 00000000..4b7cde1d --- /dev/null +++ b/dev/assets/scripts/example1/script3.jl @@ -0,0 +1,17 @@ +import Pkg; Pkg.activate(".") +using PDBTools +using ComplexMixtures + +# PDB file of the system simulated +atoms = readPDB("./system.pdb") + +# Load results of a ComplexMixtures run +results = load("./glyc50_results.json") + +# Inform which is the solute +protein = select(atoms, "protein") +solute = AtomSelection(protein, nmols=1) + +# Compute the 3D density grid and output it to the PDB file +# here we use dmax=3.5 such that the the output file is not too large +grid = grid3D(results, atoms, "./grid.pdb"; dmin=1.5, dmax=3.5) \ No newline at end of file diff --git a/dev/assets/scripts/example2/script1.jl b/dev/assets/scripts/example2/script1.jl new file mode 100644 index 00000000..afe50379 --- /dev/null +++ b/dev/assets/scripts/example2/script1.jl @@ -0,0 +1,71 @@ +import Pkg; Pkg.activate(".") + +using PDBTools +using ComplexMixtures +using Plots +using LaTeXStrings +using EasyFit: movavg + +# The full trajectory file is available at: +# https://www.dropbox.com/scl/fi/jwafhgxaxuzsybw2y8txd/traj_Polyacry.dcd?rlkey=p4bn65m0pkuebpfm0hf158cdm&dl=0 +trajectory_file = "./traj_Polyacry.dcd" + +# Load a PDB file of the system +system = readPDB("./equilibrated.pdb") + +# Select the atoms corresponding DMF molecules +dmf = select(system, "resname DMF") + +# Select the atoms corresponding to the Poly-acrylamide +acr = select(system, "resname FACR or resname ACR or resname LACR") + +# Set the solute and the solvent selections for ComplexMixtures +solute = AtomSelection(acr, nmols=1) +solvent = AtomSelection(dmf, natomspermol=12) + +# Set the trajectory structure +trajectory = Trajectory(trajectory_file, solute, solvent) + +# Use a large dbulk distance for better KB convergence +options = Options(dbulk=19.) + +# Compute the mddf and associated properties +results = mddf(trajectory, options) + +# Save results to file for later use +save(results, "./mddf.json") +println("Results saved to ./mddf.json file") + +# Plot the MDDF and KB integrals +plot_font = "Computer Modern" +default( + fontfamily=plot_font, + linewidth=1.5, + framestyle=:box, + label=nothing, + grid=false, + palette=:tab10 +) +scalefontsizes(); scalefontsizes(1.3) + +# Plot the MDDF of DMF relative to PolyACR and its corresponding KB integral +plot(layout=(2,1)) +plot!( + results.d, + movavg(results.mddf,n=9).x, # Smooth example with a running average + ylabel="MDDF", + xlims=(0,20), + subplot=1, +) + +# Plot the KB integral +plot!( + results.d, + movavg(results.kb,n=9).x, # smooth kb + xlabel=L"\textrm{Distance / \AA}", + ylabel=L"\textrm{KB~/~cm^2~mol^{-1}}", + xlim=(-1,20), + subplot=2 +) +savefig("./mddf_kb.png") +println("Plot saved to mddf_kb.png") \ No newline at end of file diff --git a/dev/assets/scripts/example2/script2.jl b/dev/assets/scripts/example2/script2.jl new file mode 100644 index 00000000..0abdb82a --- /dev/null +++ b/dev/assets/scripts/example2/script2.jl @@ -0,0 +1,89 @@ +import Pkg; Pkg.activate(".") + +using ComplexMixtures +using Plots +using LaTeXStrings +using EasyFit: movavg + +# Some default settings for the plots +plot_font = "Computer Modern" +Plots.default( + fontfamily=plot_font, + linewidth=1.5, + framestyle=:box, + label=nothing, + grid=false, +) + +# Load previusly saved results, computed in the previous script +results = load("./mddf.json") + +# Plot with two subplots +plot(layout=(2,1)) + +# Plot the total mddf +plot!( + results.d, + movavg(results.mddf,n=10).x, # Smooth example with a running average + label="Total", + subplot=1 +) + +# Plot DMF group contributions to the MDDF. We use a dictionary where +# the keys are the group names, and the values are the atom names of the group +groups = Dict( + "CO" => ["C","O"], # carbonyl + "N" => ["N"], + "Methyl groups" => ["CC","CT","HC1","HC2","HC3","HT1","HT2","HT3"], +) +for (group_label, group_atoms) in pairs(groups) + # Retrieve the contributions of the atoms of this group + group_contrib = contributions(results, SolventGroup(group_atoms)) + # Plot the contributions of this groups, with the appropriate label + plot!( + results.d, + movavg(group_contrib,n=10).x, + label=group_label, + subplot=1 + ) +end + +# Adjust scale and label of axis +plot!(xlim=(1,5), ylabel="MDDF", subplot=1) + +# +# Plot ACR group contributions to the MDDF. This is an interesting case, +# as the groups are repeated along the polymer chain +# +groups = Dict( + L"\textrm{CH_3}" => ["CF","HF1","HF2","HF3", "CL","HL1","HL2","HL3"], # terminal methyles + "CO" => ["OE1","CD"], # carbonyl + L"\textrm{NH_2}" => ["NE2","HE22","HE21"], # amine + L"\textrm{CHCH_2}" => ["C","H2","H1","CA","HA"], # backbone +) +# Plot total mddf +plot!( + results.d, + movavg(results.mddf,n=10).x, # Smooth example with a running average + label="Total", + subplot=2 +) +# Plot group contributions +for (group_name, atom_names) in pairs(groups) + group_contrib = contributions(results, SoluteGroup(atom_names)) + plot!( + results.d, + movavg(group_contrib,n=10).x, + label=group_name, + subplot=2 + ) +end +# Adjust scale and label of axis +plot!( + xlim=(1,5), + xlabel=L"\textrm{Distance / \AA}", + ylabel="MDDF", subplot=2 +) +# Save figure +savefig("./mddf_groups.png") +println("Created figure file: ./mddf_groups.png") \ No newline at end of file diff --git a/dev/assets/scripts/example2/script3.jl b/dev/assets/scripts/example2/script3.jl new file mode 100644 index 00000000..73dfc0f4 --- /dev/null +++ b/dev/assets/scripts/example2/script3.jl @@ -0,0 +1,72 @@ +import Pkg; Pkg.activate(".") + +using ComplexMixtures +using Plots +using EasyFit: movavg +using LaTeXStrings +using PDBTools + +# Here we will produce a 2D plot of group contributions, splitting the +# contributions of each mer of the polymer into its chemical groups + +# Chemical groups of the polymer monomers, defined by the atom types: +groups = Dict( + L"\textrm{CH_3}" => ["CF","HF1","HF2","HF3"], # methyles + "CO" => ["OE1","CD"], # carbonyl + L"\textrm{NH_2}" => ["NE2","HE22","HE21"], # amine + L"\textrm{CHCH_2}" => ["C","H2","H1","CA","HA"], # backbone + L"\textrm{CH_3}" => ["CL","HL1","HL2","HL3"], # terminal methyles +) + +system = readPDB("./equilibrated.pdb") +acr = select(system, "resname FACR or resname ACR or resname LACR") +results = load("./mddf.json") + +# Here we split the polymer in residues, to extract the contribution of +# each chemical group of each polymer mer independently +group_contribs = Vector{Float64}[] +labels = String[] +for (imer, mer) in enumerate(eachresidue(acr)) + for (group_label, group_atoms) in pairs(groups) + # only first residue has a terminal CH3 + if imer != 1 && group_label == L"\textrm{CH_3}" + continue + end + # only last residue has a terminal CH3 + if imer != 5 && group_label == L"\textrm{CH_3}" + continue + end + # Filter the atoms of this mer that belong to the group + mer_group_atoms = filter(at -> name(at) in group_atoms, mer) + # Retrive the contribution of this mer atoms to the MDDF + atoms_contrib = contributions(results, SoluteGroup(mer_group_atoms)) + # Smooth the contributions + atoms_contrib = movavg(atoms_contrib; n = 10).x + # Add contributions to the group contributions list + push!(group_contribs, atoms_contrib) + # Push label to label list + push!(labels,group_label) + end +end + +# Convert the group contributions to a matrix +group_contribs = stack(group_contribs) + +# Find the indices of the limits of the map we want +idmin = findfirst( d -> d > 1.5, results.d) +idmax = findfirst( d -> d > 3.2, results.d) + +# Plot contour map +Plots.default(fontfamily="Computer Modern") +contourf( + 1:length(labels), + results.d[idmin:idmax], + group_contribs[idmin:idmax,:], + color=cgrad(:tempo),linewidth=1,linecolor=:black, + colorbar=:none,levels=10, + xlabel="Group",ylabel=L"r/\AA",xrotation=60, + xticks=(1:length(labels),labels), + margin=5Plots.Measures.mm # adjust margin +) +savefig("./map2D_acr.png") +println("Plot saved to map2D_acr.png") \ No newline at end of file diff --git a/dev/assets/scripts/example3/POPC_ethanol_chains.png b/dev/assets/scripts/example3/POPC_ethanol_chains.png new file mode 100644 index 00000000..6baa27d1 Binary files /dev/null and b/dev/assets/scripts/example3/POPC_ethanol_chains.png differ diff --git a/dev/assets/scripts/example3/POPC_water_chains.png b/dev/assets/scripts/example3/POPC_water_chains.png new file mode 100644 index 00000000..00d33aaa Binary files /dev/null and b/dev/assets/scripts/example3/POPC_water_chains.png differ diff --git a/dev/assets/scripts/example3/script1.jl b/dev/assets/scripts/example3/script1.jl new file mode 100644 index 00000000..99fecc41 --- /dev/null +++ b/dev/assets/scripts/example3/script1.jl @@ -0,0 +1,122 @@ +import Pkg; +Pkg.activate("."); + +using PDBTools +using ComplexMixtures +using Plots +using LaTeXStrings +using EasyFit: movavg + +# The full trajectory file is available at: +# https://www.dropbox.com/scl/fi/hcenxrdf8g8hfbllyakhy/traj_POPC.dcd?rlkey=h9zivtwgya3ivva1i6q6xmr2p&dl=0 +trajectory_file = "./traj_POPC.dcd" + +# Load a PDB file of the system +system = readPDB("./equilibrated.pdb") + +# Select the atoms corresponding to glycerol and water +popc = select(system, "resname POPC") +water = select(system, "water") +ethanol = select(system, "resname ETOH") + +# Set the complete membrane as the solute. We use nmols=1 here such +# that the membrane is considered a single solute in the calculation. +solute = AtomSelection(popc, nmols=1) + +# Compute water-POPC distribution and KB integral +solvent = AtomSelection(water, natomspermol=3) + +# Set the trajectory structure +trajectory = Trajectory(trajectory_file, solute, solvent) + +# We want to get reasonably converged KB integrals, which usually +# require large solute domains. Distribution functions converge +# rapidly (~10Angs or less), on the other side. +options = Options(dbulk=20.0) + +# Compute the mddf and associated properties +mddf_water_POPC = mddf(trajectory, options) + +# Save results to file for later use +save(mddf_water_POPC, "./mddf_water_POPC.json") +println("Results saved to ./mddf_water_POPC.json file") + +# Compute ethanol-POPC distribution and KB integral +solvent = AtomSelection(ethanol, natomspermol=9) +traj = Trajectory(trajectory_file, solute, solvent) +mddf_ethanol_POPC = mddf(traj, options) + +# Save results for later use +save(mddf_ethanol_POPC, "./mddf_ethanol_POPC.json") +println("Results saved to ./mddf_ethanol_POPC.json file") + +# +# Plot the MDDF and KB integrals +# +# Plot defaults +plot_font = "Computer Modern" +default( + fontfamily=plot_font, + linewidth=2.5, + framestyle=:box, + label=nothing, + grid=false, + palette=:tab10 +) +scalefontsizes(); scalefontsizes(1.3) + +# +# Plots cossolvent-POPC MDDFs in subplot 1 +# +plot(layout=(2,1)) +# Water MDDF +plot!( + mddf_water_POPC.d, # distances + movavg(mddf_water_POPC.mddf,n=10).x, # water MDDF - smoothed + label="Water", + subplot=1 +) +# Ethanol MDDF +plot!( + mddf_ethanol_POPC.d, # distances + movavg(mddf_ethanol_POPC.mddf,n=10).x, # water MDDF - smoothed + label="Ethanol", + subplot=1 +) +# Plot settings +plot!( + xlabel=L"\textrm{Distance / \AA}", + ylabel="MDDF", + xlim=(0,10), + subplot=1 +) + +# +# Plot cossolvent-POPC KB integrals in subplot 2 +# +# Water KB +plot!( + mddf_water_POPC.d, # distances + mddf_water_POPC.kb, # water KB + label="Water", + subplot=2 +) +# Ethanol KB +plot!( + mddf_ethanol_POPC.d, # distances + mddf_ethanol_POPC.kb, # ethanol KB + label="Ethanol", + subplot=2 +) +# Plot settings +plot!( + xlabel=L"\textrm{Distance / \AA}", + ylabel=L"\textrm{KB~/~L~mol^{-1}}", + xlim=(0,10), + subplot=2 +) + +savefig("popc_water_ethanol_mddf_kb.png") +println("Plot saved to popc_water_ethanol_mddf_kb.png file") + + diff --git a/dev/assets/scripts/example3/script2.jl b/dev/assets/scripts/example3/script2.jl new file mode 100644 index 00000000..d356bf29 --- /dev/null +++ b/dev/assets/scripts/example3/script2.jl @@ -0,0 +1,52 @@ +import Pkg; +Pkg.activate("."); + +using ComplexMixtures +using PDBTools +using Plots +using LaTeXStrings +using EasyFit: movavg + +# Some default settings for the plots +plot_font = "Computer Modern" +Plots.default( + fontfamily=plot_font, + linewidth=1.5, + framestyle=:box, + label=nothing, + grid=false, +) +scalefontsizes(); scalefontsizes(1.3) + +# Read system PDB file +system = readPDB("equilibrated.pdb") +ethanol = select(system, "resname ETOH") + +# Load the pre-calculated MDDF of ethanol +mddf_ethanol_POPC = load("mddf_ethanol_POPC.json") + +# +# Contributions of the ethanol groups +# +# Define the groups using selections. Set a dict, in which the keys are the group names +# and the values are the selections +groups = Dict( + "Hydroxyl" => select(ethanol, "name O1 or name HO1"), + "Aliphatic chain" => select(ethanol, "not name O1 and not name HO1"), +) +# plot the total mddf and the contributions of the groups +x = mddf_ethanol_POPC.d +plot(x, movavg(mddf_ethanol_POPC.mddf, n=10).x, label="Total MDDF") +for (group_name, group_atoms) in pairs(groups) + cont = contributions(mddf_ethanol_POPC, SolventGroup(group_atoms)) + y = movavg(cont, n=10).x + plot!(x, y, label=group_name) +end +# Plot settings +plot!( + xlim=(1, 8), + xlabel=L"\textrm{Distance / \AA}", + ylabel="MDDF" +) +savefig("./mddf_ethanol_groups.png") +println("The plot was saved as mddf_ethanol_groups.png") \ No newline at end of file diff --git a/dev/assets/scripts/example3/script3.jl b/dev/assets/scripts/example3/script3.jl new file mode 100644 index 00000000..96e11e45 --- /dev/null +++ b/dev/assets/scripts/example3/script3.jl @@ -0,0 +1,71 @@ +import Pkg; +Pkg.activate("."); + +using ComplexMixtures +using PDBTools +using Plots +using LaTeXStrings +using EasyFit: movavg + +# Some default settings for the plots +plot_font = "Computer Modern" +Plots.default( + fontfamily=plot_font, + linewidth=2, + framestyle=:box, + label=nothing, + grid=false, +) +scalefontsizes(); +scalefontsizes(1.3); + +# Read system PDB file +system = readPDB("equilibrated.pdb") + +# Load the pre-calculated MDDF of water +mddf_water_POPC = load("mddf_water_POPC.json") + +# +# Here we define the POPC groups, from the atom names. Each group +# is a vector of atom names, and the keys are the group names. +# +groups = Dict( + "Choline" => ["N", "C12", "H12A", "H12B", "C13", "H13A", "H13B", "H13C", "C14", + "H14A", "H14B", "H14C", "C15", "H15A", "H15B", "H15C", "C11", "H11A", "H11B"], + "Phosphate" => ["P", "O13", "O14", "O12"], + "Glycerol" => ["O11", "C1", "HA", "HB", "C2", "HS", "O21", "C3", "HX", "HY", "O31"], + "Oleoyl" => ["O22", "C21", "H2R", "H2S", "C22", "C23", "H3R", "H3S", "C24", "H4R", "H4S", + "C25", "H5R", "H5S", "C26", "H6R", "H6S", "C27", "H7R", "H7S", "C28", "H8R", "H8S", + "C29", "H91", "C210", "H101", "C211", "H11R", "H11S", "C212", "H12R", "H12S", + "C213", "H13R", "H13S", "C214", "H14R", "H14S", "C215", "H15R", "H15S", + "C216", "H16R", "H16S", "C217", "H17R", "H17S", "C218", "H18R", "H18S", "H18T"], + "Palmitoyl" => ["C31", "O32", "C32", "H2X", "H2Y", "C33", "H3X", "H3Y", "C34", "H4X", "H4Y", + "C35", "H5X", "H5Y", "C36", "H6X", "H6Y", "C37", "H7X", "H7Y", "C38", "H8X", + "H8Y", "C39", "H9X", "H9Y", "C310", "H10X", "H10Y", "C311", "H11X", "H11Y", + "C312", "H12X", "H12Y", "C313", "H13X", "H13Y", "C314", "H14X", "H14Y", "C315", + "H15X", "H15Y", "C316", "H16X", "H16Y", "H16Z"], +) + +# +# plot the total mddf and the contributions of the groups +# +x = mddf_water_POPC.d +plot( + x, + movavg(mddf_water_POPC.mddf, n=10).x, + label="Total water-POPC MDDF" +) +for (group_name, group_atoms) in pairs(groups) + cont = contributions(mddf_water_POPC, SoluteGroup(group_atoms)) + y = movavg(cont, n=10).x + plot!(x, y, label=group_name) +end +# Plot settings +plot!( + xlim=(1, 5), + xlabel=L"\textrm{Distance / \AA}", + ylabel="MDDF" +) +savefig("./mddf_POPC_water_groups.png") +println("The plot was saved as mddf_POPC_water_groups.png") + diff --git a/dev/assets/scripts/example3/script4.jl b/dev/assets/scripts/example3/script4.jl new file mode 100644 index 00000000..f30bcdfc --- /dev/null +++ b/dev/assets/scripts/example3/script4.jl @@ -0,0 +1,71 @@ +import Pkg; +Pkg.activate("."); + +using ComplexMixtures +using PDBTools +using Plots +using LaTeXStrings +using EasyFit: movavg + +# Some default settings for the plots +plot_font = "Computer Modern" +Plots.default( + fontfamily=plot_font, + linewidth=2, + framestyle=:box, + label=nothing, + grid=false, +) +scalefontsizes(); +scalefontsizes(1.3); + +# Read system PDB file +system = readPDB("equilibrated.pdb") + +# Load the pre-calculated MDDF of ethanol +mddf_ethanol_POPC = load("mddf_ethanol_POPC.json") + +# +# Here we define the POPC groups, from the atom names. Each group +# is a vector of atom names, and the keys are the group names. +# +groups = Dict( + "Choline" => ["N", "C12", "H12A", "H12B", "C13", "H13A", "H13B", "H13C", "C14", + "H14A", "H14B", "H14C", "C15", "H15A", "H15B", "H15C", "C11", "H11A", "H11B"], + "Phosphate" => ["P", "O13", "O14", "O12"], + "Glycerol" => ["O11", "C1", "HA", "HB", "C2", "HS", "O21", "C3", "HX", "HY", "O31"], + "Oleoyl" => ["O22", "C21", "H2R", "H2S", "C22", "C23", "H3R", "H3S", "C24", "H4R", "H4S", + "C25", "H5R", "H5S", "C26", "H6R", "H6S", "C27", "H7R", "H7S", "C28", "H8R", "H8S", + "C29", "H91", "C210", "H101", "C211", "H11R", "H11S", "C212", "H12R", "H12S", + "C213", "H13R", "H13S", "C214", "H14R", "H14S", "C215", "H15R", "H15S", + "C216", "H16R", "H16S", "C217", "H17R", "H17S", "C218", "H18R", "H18S", "H18T"], + "Palmitoyl" => ["C31", "O32", "C32", "H2X", "H2Y", "C33", "H3X", "H3Y", "C34", "H4X", "H4Y", + "C35", "H5X", "H5Y", "C36", "H6X", "H6Y", "C37", "H7X", "H7Y", "C38", "H8X", + "H8Y", "C39", "H9X", "H9Y", "C310", "H10X", "H10Y", "C311", "H11X", "H11Y", + "C312", "H12X", "H12Y", "C313", "H13X", "H13Y", "C314", "H14X", "H14Y", "C315", + "H15X", "H15Y", "C316", "H16X", "H16Y", "H16Z"], +) + +# +# plot the total mddf and the contributions of the groups +# +x = mddf_ethanol_POPC.d +plot( + x, + movavg(mddf_ethanol_POPC.mddf, n=10).x, + label="Total ethanol-POPC MDDF" +) +for (group_name, group_atoms) in pairs(groups) + cont = contributions(mddf_ethanol_POPC, SoluteGroup(group_atoms)) + y = movavg(cont, n=10).x + plot!(x, y, label=group_name) +end +# Plot settings +plot!( + xlim=(1.3, 5), + ylim=(0, 1.8), + xlabel=L"\textrm{Distance / \AA}", + ylabel="MDDF" +) +savefig("./mddf_POPC_ethanol_groups.png") +println("The plot was saved as mddf_POPC_ethanol_groups.png") diff --git a/dev/assets/scripts/example3/script5.jl b/dev/assets/scripts/example3/script5.jl new file mode 100644 index 00000000..bb8595b6 --- /dev/null +++ b/dev/assets/scripts/example3/script5.jl @@ -0,0 +1,192 @@ +import Pkg; +Pkg.activate("."); + +using ComplexMixtures +using PDBTools +using Plots +using LaTeXStrings +using EasyFit: movavg + +# Some default settings for the plots +plot_font = "Computer Modern" +Plots.default( + fontfamily=plot_font, + linewidth=2, + framestyle=:box, + label=nothing, + grid=false, +) +scalefontsizes(); +scalefontsizes(1.3); + +# Read system PDB file +system = readPDB("equilibrated.pdb") + +# Load the pre-calculated MDDF of ethanol +mddf_ethanol_POPC = load("mddf_ethanol_POPC.json") + +# Splitting the oleoyl chain into groups along the the chain. +# The labels `CH_2` etc stand for `CH₂`, for example, in LaTeX notation, +# for a nicer plot axis ticks formatting. +oleoyl_groups = [ + "CO" => ["O22", "C21"], + "CH_2" => ["H2R", "H2S", "C22"], + "CH_2" => ["C23", "H3R", "H3S"], + "CH_2" => ["C24", "H4R", "H4S"], + "CH_2" => ["C25", "H5R", "H5S"], + "CH_2" => ["C26", "H6R", "H6S"], + "CH_2" => ["C27", "H7R", "H7S"], + "CH_2" => ["C28", "H8R", "H8S"], + "CH" => ["C29", "H91"], + "CH" => ["C210", "H101"], + "CH_2" => ["C211", "H11R", "H11S"], + "CH_2" => ["C212", "H12R", "H12S"], + "CH_2" => ["C213", "H13R", "H13S"], + "CH_2" => ["C214", "H14R", "H14S"], + "CH_2" => ["C215", "H15R", "H15S"], + "CH_2" => ["C216", "H16R", "H16S"], + "CH_2" => ["C217", "H17R", "H17S"], + "CH_3" => ["C218", "H18R", "H18S", "H18T"] +] + +# Format tick labels with LaTeX +labels_o = [latexstring("\\textrm{$key}") for (key, val) in oleoyl_groups] + +# We first collect the contributions of each group into a vector of vectors: +gcontrib = Vector{Float64}[] # empty vector of vectors +for (group_name, group_atoms) in oleoyl_groups + group_contributions = contributions(mddf_ethanol_POPC, SoluteGroup(group_atoms)) + push!(gcontrib, movavg(group_contributions; n=10).x) +end + +# Convert the vector of vectors into a matrix +gcontrib = stack(gcontrib) + +# Find the indices of the MDDF where the distances are between 1.5 and 3.0 Å +idmin = findfirst(d -> d > 1.5, mddf_ethanol_POPC.d) +idmax = findfirst(d -> d > 3.0, mddf_ethanol_POPC.d) + +# The plot will have two lines, the first plot will contain the +# oleoyl groups contributions, and the second plot will contain the +# contributions of the palmitoyl groups. +plot(layout=(2, 1)) + +# Plot the contributions of the oleoyl groups +contourf!( + 1:length(oleoyl_groups), + mddf_ethanol_POPC.d[idmin:idmax], + gcontrib[idmin:idmax, :], + color=cgrad(:tempo), linewidth=1, linecolor=:black, + colorbar=:none, levels=10, + ylabel=L"r/\AA", xrotation=60, + xticks=(1:length(oleoyl_groups), labels_o), + subplot=1, +) +annotate!( 14, 2.7, text("Oleoyl", :left, 12, plot_font), subplot=1) + +# +# Repeat procedure for the palmitoyl groups +# +palmitoyl_groups = [ + "CO" => ["C31", "O32"], + "CH_2" => ["C32", "H2X", "H2Y"], + "CH_2" => ["C33", "H3X", "H3Y"], + "CH_2" => ["C34", "H4X", "H4Y"], + "CH_2" => ["C35", "H5X", "H5Y"], + "CH_2" => ["C36", "H6X", "H6Y"], + "CH_2" => ["C37", "H7X", "H7Y"], + "CH_2" => ["C38", "H8X", "H8Y"], + "CH_2" => ["C39", "H9X", "H9Y"], + "CH_2" => ["C310", "H10X", "H10Y"], + "CH_2" => ["C311", "H11X", "H11Y"], + "CH_2" => ["C312", "H12X", "H12Y"], + "CH_2" => ["C313", "H13X", "H13Y"], + "CH_2" => ["C314", "H14X", "H14Y"], + "CH_2" => ["C315", "H15X", "H15Y"], + "CH_3" => ["C316", "H16X", "H16Y", "H16Z"], +] + +# Format tick labels with LaTeX +labels_p = [latexstring("\\textrm{$key}") for (key, val) in palmitoyl_groups] + +# We first collect the contributions of each group into a # vector of vectors: +gcontrib = Vector{Float64}[] # empty vector of vectors +for (group_name, group_atoms) in palmitoyl_groups + group_contributions = contributions(mddf_ethanol_POPC, SoluteGroup(group_atoms)) + push!(gcontrib, movavg(group_contributions; n=10).x) +end + +# Convert the vector of vectors into a matrix +gcontrib = stack(gcontrib) + +# Plot the contributions of the palmitoyl groups +contourf!( + 1:length(palmitoyl_groups), + mddf_ethanol_POPC.d[idmin:idmax], + gcontrib[idmin:idmax, :], + color=cgrad(:tempo), linewidth=1, linecolor=:black, + colorbar=:none, levels=10, + xlabel="Group", + ylabel=L"r/\AA", xrotation=60, + xticks=(1:length(labels_p), labels_p), + bottom_margin=0.5Plots.Measures.cm, + subplot=2, +) +annotate!( 12, 2.7, text("Palmitoyl", :left, 12, plot_font), subplot=2) + +savefig("POPC_ethanol_chains.png") +println("The plot was saved as POPC_ethanol_chains.png") + +# +# Now, plot a similar map for the water interactions with the POPC chain +# +mddf_water_POPC = load("mddf_water_POPC.json") +plot(layout=(2, 1)) + +# Plot the contributions of the oleoyl groups +# We first collect the contributions of each group into a vector of vectors: +gcontrib = Vector{Float64}[] # empty vector of vectors +for (group_name, group_atoms) in oleoyl_groups + group_contributions = contributions(mddf_water_POPC, SoluteGroup(group_atoms)) + push!(gcontrib, movavg(group_contributions; n=10).x) +end +# Convert the vector of vectors into a matrix +gcontrib = stack(gcontrib) +# Plot matrix as density map +contourf!( + 1:length(oleoyl_groups), + mddf_water_POPC.d[idmin:idmax], + gcontrib[idmin:idmax, :], + color=cgrad(:tempo), linewidth=1, linecolor=:black, + colorbar=:none, levels=10, + ylabel=L"r/\AA", xrotation=60, + xticks=(1:length(oleoyl_groups), labels_o), subplot=1, +) +annotate!( 14, 2.7, text("Oleoyl", :left, 12, plot_font), subplot=1) + +# Plot the contributions of the palmitoyl groups +# We first collect the contributions of each group into a vector of vectors: +gcontrib = Vector{Float64}[] # empty vector of vectors +for (group_name, group_atoms) in palmitoyl_groups + group_contributions = contributions(mddf_water_POPC, SoluteGroup(group_atoms)) + push!(gcontrib, movavg(group_contributions; n=10).x) +end +# Convert the vector of vectors into a matrix +gcontrib = stack(gcontrib) +# Plot matrix as density map +contourf!( + 1:length(palmitoyl_groups), + mddf_water_POPC.d[idmin:idmax], + gcontrib[idmin:idmax, :], + color=cgrad(:tempo), linewidth=1, linecolor=:black, + colorbar=:none, levels=10, + xlabel="Group", + ylabel=L"r/\AA", xrotation=60, + xticks=(1:length(palmitoyl_groups), labels_o), + subplot=2, + bottom_margin=0.5Plots.Measures.cm, +) +annotate!( 12, 2.7, text("Palmitoyl", :left, 12, plot_font), subplot=2) + +savefig("POPC_water_chains.png") +println("The plot was saved as POPC_water_chains.png") \ No newline at end of file diff --git a/dev/assets/scripts/example4/GlycerolWater_map.png b/dev/assets/scripts/example4/GlycerolWater_map.png new file mode 100644 index 00000000..25ada9eb Binary files /dev/null and b/dev/assets/scripts/example4/GlycerolWater_map.png differ diff --git a/dev/assets/scripts/example4/mddf_group_contributions.png b/dev/assets/scripts/example4/mddf_group_contributions.png new file mode 100644 index 00000000..0503abb1 Binary files /dev/null and b/dev/assets/scripts/example4/mddf_group_contributions.png differ diff --git a/dev/assets/scripts/example4/mddf_kb.png b/dev/assets/scripts/example4/mddf_kb.png new file mode 100644 index 00000000..b667f0e2 Binary files /dev/null and b/dev/assets/scripts/example4/mddf_kb.png differ diff --git a/dev/assets/scripts/example4/script1.jl b/dev/assets/scripts/example4/script1.jl new file mode 100644 index 00000000..421639eb --- /dev/null +++ b/dev/assets/scripts/example4/script1.jl @@ -0,0 +1,86 @@ +import Pkg; +Pkg.activate("."); + +using PDBTools +using ComplexMixtures +using Plots +using LaTeXStrings +using EasyFit: movavg + +# Load a PDB file of the system +system = readPDB("./equilibrated.pdb") + +# The full trajectory file is available at: +# https://www.dropbox.com/scl/fi/ag7k2d7i9d7ivbd5zmtl9/traj_Glyc.dcd?rlkey=93i31a5ytlzb34ulzjz315eyq&dl=0 +trajectory_file = "./traj_Glyc.dcd" + +# Select the atoms corresponding to glycerol and water (using PDBTools) +glyc = select(system, "resname GLLM") +water = select(system, "water") + +# Compute Glycerol-Glycerol auto correlation mddf +solute = AtomSelection(glyc, natomspermol=14) +trajectory = Trajectory(trajectory_file, solute) # solute and solvent are the same + +# We define a large solute domain (large dbulk) to obtain a good convergence +# for the KB integral. The mddf converges at much shorter distances. +options = Options(dbulk=20.0) +mddf_glyc = mddf(trajectory, options) + +# Save results for later analysis +save(mddf_glyc, "./mddf_glyc.json") + +# Compute water-glycerol mddf +solvent = AtomSelection(water, natomspermol=3) +trajectory = Trajectory(trajectory_file, solute, solvent) +mddf_glyc_water = mddf(trajectory, options) + +# Save results for later analysis +save(mddf_glyc_water, "./mddf_glyc_water.json") + +# +# Plot the MDDFs +# +Plots.default( + fontfamily="Computer Modern", + linewidth=2, + framestyle=:box, + grid=false, + label=nothing, +) +scalefontsizes(); +scalefontsizes(1.3) +plot(layout=(2, 1)) + +# glycerol-glycerol auto correlation +x = mddf_glyc.d # distances +y = movavg(mddf_glyc.mddf, n=10).x # the mddf (using movavg to smooth noise) +plot!(x, y, label="Glycerol-Glycerol", subplot=1) + +# water-glycerol correlation +x = mddf_glyc_water.d +y = movavg(mddf_glyc_water.mddf, n=10).x +plot!(x, y, label="Glycerol-Water", subplot=1) +plot!(ylabel="MDDF", xlim=(1.5, 8), subplot=1) + +# Plot the KB integrals +# glycrerol-glycerol +y = movavg(mddf_glyc.kb, n=10).x +plot!(x, y, subplot=2) + +# water-glycerol +y = movavg(mddf_glyc_water.kb, n=10).x +plot!(x, y, subplot=2) + +# plot settings +plot!( + xlabel=L"\textrm{Distance / \AA}", + ylabel=L"\textrm{KB~/~cm^3~mol^{-1}}", + xlim=(0, 20), + subplot=2 +) + +# Save plot +savefig("./mddf_kb.png") +println("Plot saved to mddf_kb.png") + diff --git a/dev/assets/scripts/example4/script2.jl b/dev/assets/scripts/example4/script2.jl new file mode 100644 index 00000000..2db8c144 --- /dev/null +++ b/dev/assets/scripts/example4/script2.jl @@ -0,0 +1,85 @@ +import Pkg; +Pkg.activate("."); + +using PDBTools +using ComplexMixtures +using Plots +using LaTeXStrings +using EasyFit: movavg + +# Load a PDB file of the system +system = readPDB("./equilibrated.pdb") + +# Select the atoms corresponding to glycerol and water (using PDBTools) +glyc = select(system, "resname GLLM") +water = select(system, "water") + +# Load previously computed mddfs +mddf_glyc = load("./mddf_glyc.json") +mddf_glyc_water = load("./mddf_glyc_water.json") + +# Plot some group contributions to the MDDF. We select the atom names +# corresponding to each type of group of the glycerol molecule. +hydroxyls = ["O1", "O2", "O3", "HO1", "HO2", "HO3"] +aliphatic = ["C1", "C2", "C3", "H11", "H12", "H2", "H31", "H32"] + +# +# Extract the contributions of these atoms to the MDDFs +# +# glycerol-glycerol +mddf_glyc_hydroxyls = contributions(mddf_glyc, SoluteGroup(hydroxyls)) +mddf_glyc_aliphatic = contributions(mddf_glyc, SoluteGroup(aliphatic)) +# glycerol-water +mddf_glyc_water_hydroxyls = contributions(mddf_glyc_water, SoluteGroup(hydroxyls)) +mddf_glyc_water_aliphatic = contributions(mddf_glyc_water, SoluteGroup(aliphatic)) + +# +# Plot the contributions +# +Plots.default( + fontfamily="Computer Modern", + linewidth=2, + framestyle=:box, + grid=false, + label=nothing, +) +scalefontsizes(); +scalefontsizes(1.2) +plot(layout=(2, 1)) + +# +# Group contributions to glycerol-glycerol auto correlation +# +x = mddf_glyc.d # distances +# Total mddf +y = movavg(mddf_glyc.mddf; n=10).x +plot!(x, y, label="Total", subplot=1) +# Hydroxyls +y = movavg(mddf_glyc_hydroxyls; n=10).x +plot!(x, y, label="Hydroxyls", subplot=1) +# Aliphatic +y = movavg(mddf_glyc_aliphatic; n=10).x +plot!(x, y, label="Aliphatic", subplot=1) + +# +# Group contributions to glycerol-water correlation +# +x = mddf_glyc_water.d # distances +# Total mddf +y = movavg(mddf_glyc_water.mddf; n=10).x +plot!(x, y, label="Total", subplot=2) +# Hydroxyls +y = movavg(mddf_glyc_water_hydroxyls; n=10).x +plot!(x, y, label="Hydroxyls", subplot=2) +# Aliphatic +y = movavg(mddf_glyc_water_aliphatic; n=10).x +plot!(x, y, label="Aliphatic", subplot=2) + +# plot settings +plot!(ylabel="Glyc-Glyc MDDF", xlim=(1.0, 8.0), subplot=1) +plot!(ylabel="Glyc-Water MDDF", xlim=(1.0, 8.0), subplot=2) +plot!(xlabel=L"\mathrm{Distance / \AA}", subplot=2) + +# Save figure +savefig("./mddf_group_contributions.png") +println("Plot saved to mddf_group_contributions.png") diff --git a/dev/assets/scripts/example4/script3.jl b/dev/assets/scripts/example4/script3.jl new file mode 100644 index 00000000..df741d03 --- /dev/null +++ b/dev/assets/scripts/example4/script3.jl @@ -0,0 +1,105 @@ +import Pkg; +Pkg.activate("."); + +using PDBTools +using ComplexMixtures +using Plots +using LaTeXStrings +using EasyFit: movavg + +# Load a PDB file of the system +system = readPDB("./equilibrated.pdb") + +# Select the atoms corresponding to glycerol and water (using PDBTools) +glyc = select(system, "resname GLLM") +water = select(system, "water") + +# Load previously computed mddfs +mddf_glyc = load("./mddf_glyc.json") +mddf_glyc_water = load("./mddf_glyc_water.json") + +# 2D maps plot of group contributions + +# Glycerol groups +groups = [ + "OH" => ["O1", "HO1"], # first hydroxyl + "CH_2" => ["C1", "H11", "H12"], # first CH2 + "OH" => ["O2", "HO2"], # second hydroxyl + "CH" => ["C2", "H2"], # CH + "CH_2" => ["C3", "H31", "H32"], # second CH2 + "OH" => ["O3", "HO3"] # third hydroxyl +] +labels = latexstring.("\\textrm{$name}" for (name, atoms) in groups) + +# +# Contributions to Glycerol-Glycerol autocorrelation +# First, create a vector of vectors, in which each component carries the +# contributions of each Glycerol group to the MDDF +# +group_contrib = Vector{Float64}[] # empty vector of vectors +for (name, atoms) in groups + push!(group_contrib, contributions(mddf_glyc, SolventGroup(atoms))) +end + +# Convert output to a matrix to plot a 2D map +group_contrib = stack(group_contrib) + +# The distance range to plot +idmin = findfirst(d -> d > 1.5, mddf_glyc.d) +idmax = findfirst(d -> d > 3.0, mddf_glyc.d) + +# +# plot the map +# +Plots.default( + fontfamily="Computer Modern", + linewidth=2, + framestyle=:box, + grid=false, + label=nothing, +) +scalefontsizes(); +scalefontsizes(1.3) + +plot(layout=(2, 1)) + +# map of contributions to the Glycerol-Glycerol autocorrelation +contourf!( + 1:length(groups), + mddf_glyc.d[idmin:idmax], + group_contrib[idmin:idmax, :], + color=cgrad(:tempo), linewidth=1, linecolor=:black, + colorbar=:none, levels=5, + xticks=(1:length(groups), labels), xrotation=60, + ylabel=L"r/\AA", + subplot=1 +) + +# Water-glycerol interactions (Glycerol contributions) +group_contrib = Vector{Float64}[] # empty vector of vectors +for (name, atoms) in groups + push!(group_contrib, contributions(mddf_glyc_water, SoluteGroup(atoms))) +end + +# Convert output to a matrix to plot a 2D map +group_contrib = stack(group_contrib) + +# map of the contributions of Glycerol groups to the Glycerol-Water correlation +contourf!( + 1:length(groups), + mddf_glyc_water.d[idmin:idmax], + group_contrib[idmin:idmax, :], + color=cgrad(:tempo), linewidth=1, linecolor=:black, + colorbar=:none, levels=5, + xticks=(1:length(groups), labels), xrotation=60, + ylabel=L"r/\AA", + subplot=2 +) +plot!( + xlabel="Glycerol group", + bottommargin = 0.5Plots.Measures.cm, + subplot=2 +) + +savefig("./GlycerolWater_map.png") +println("Plot saved to GlycerolWater_map.png") \ No newline at end of file diff --git a/dev/assets/scripts/python/group_contributions.png b/dev/assets/scripts/python/group_contributions.png new file mode 100644 index 00000000..90e112f4 Binary files /dev/null and b/dev/assets/scripts/python/group_contributions.png differ diff --git a/dev/assets/scripts/python/mddf_kb.png b/dev/assets/scripts/python/mddf_kb.png new file mode 100644 index 00000000..fb13157c Binary files /dev/null and b/dev/assets/scripts/python/mddf_kb.png differ diff --git a/dev/assets/scripts/python/script1.py b/dev/assets/scripts/python/script1.py new file mode 100644 index 00000000..7be68e61 --- /dev/null +++ b/dev/assets/scripts/python/script1.py @@ -0,0 +1,36 @@ +# The ComplexMixtures.py file is assumed to be in the current +# directory. +# Obtain it from: +# https://m3g.github.io/ComplexMixtures.jl/stable/assets/ComplexMixtures.py +import ComplexMixtures as cm + +# Load the pdb file of the system using `PDBTools`: +atoms = cm.readPDB("./system.pdb") + +# Create arrays of atoms with the protein and Glycerol atoms, +# using the `select` function of the `PDBTools` package: +protein = cm.select(atoms,"protein") +glyc = cm.select(atoms,"resname GLYC") +water = cm.select(atoms,"water") + +# Setup solute and solvent structures, required for computing the MDDF, +# with `AtomSelection` function of the `ComplexMixtures` package: +solute = cm.AtomSelection(protein, nmols=1) +solvent = cm.AtomSelection(glyc, natomspermol=14) + +# Read and setup the Trajectory structure required for the computations: +trajectory = cm.Trajectory("./glyc50_sample.dcd", solute, solvent) + +# Run the calculation and get results: +results = cm.mddf(trajectory) + +# Save the reults to recover them later if required +cm.save(results,"./glyc50.json") +print("Results saved to glyc50.json") + +# Compute the water distribution function around the protein: +solvent = cm.AtomSelection(water, natomspermol=3) +trajectory = cm.Trajectory("./glyc50_sample.dcd", solute, solvent) +results = cm.mddf(trajectory) +cm.save(results,"./water.json") +print("Results saved to water.json") \ No newline at end of file diff --git a/dev/assets/scripts/python/script2.py b/dev/assets/scripts/python/script2.py new file mode 100644 index 00000000..4e3553a0 --- /dev/null +++ b/dev/assets/scripts/python/script2.py @@ -0,0 +1,20 @@ +import ComplexMixtures as cm +import matplotlib.pyplot as plt + +# Load the actual results obtained with the complete simulation: +glyc_results = cm.load("./glyc50.json") +water_results = cm.load("./water.json") + +# Plot MDDF and KB +fig, axs = plt.subplots(2) +axs[0].plot(glyc_results.d, glyc_results.mddf, label="Glycerol") +axs[0].plot(water_results.d, water_results.mddf, label="Water") +axs[0].set(ylabel="MDDF") + +# Plot KB integral +axs[1].plot(glyc_results.d, glyc_results.kb) +axs[1].plot(water_results.d, water_results.kb) +axs[1].set(xlabel="distance / Angs", ylabel="KB integral") +plt.tight_layout() + +plt.savefig("mddf_kb.png") \ No newline at end of file diff --git a/dev/assets/scripts/python/script3.py b/dev/assets/scripts/python/script3.py new file mode 100644 index 00000000..ba1db8e6 --- /dev/null +++ b/dev/assets/scripts/python/script3.py @@ -0,0 +1,28 @@ +# Load packages +import ComplexMixtures as cm +import matplotlib.pyplot as plt + +# Read the pdb file and set solvent and solute groups +atoms = cm.readPDB("./system.pdb") +protein = cm.select(atoms, "protein") +glyc = cm.select(atoms, "resname GLYC") + +# load previously computed MDDF results +results = cm.load("./glyc50.json") + +# Select atoms by name +hydroxyls = cm.list(["O1","O2","O3","H1","H2","H3"]) +aliphatic = cm.list(["C1","C2","HA","HB","HC","HD"]) + +# Extract the contributions of the Glycerol hydroxyls and aliphatic groups +hydr_contributions = cm.contributions(results, cm.SolventGroup(hydroxyls)) +aliph_contributions = cm.contributions(results, cm.SolventGroup(aliphatic)) + +# Plot +plt.plot(results.d, results.mddf, label="Total MDDF") +plt.plot(results.d, hydr_contributions, label="Hydroxyls") +plt.plot(results.d, aliph_contributions, label="Aliphatic") +plt.legend() +plt.xlabel("distance / Angs") +plt.ylabel("MDDF") +plt.savefig("group_contributions.png") \ No newline at end of file diff --git a/dev/contrib/index.html b/dev/contrib/index.html index 5203bf84..b9e2980c 100644 --- a/dev/contrib/index.html +++ b/dev/contrib/index.html @@ -1,50 +1,46 @@ -Atomic and group contributions · ComplexMixtures.jl

Atomic and group contributions

One of the interesting features of Minimum-Distance distributions is that they can be naturally decomposed into the atomic or group contributions. Simply put, if a MDDF has a peak at a hydrogen-bonding distance, it is natural to decompose that peak into the contributions of each type of solute or solvent atom to that peak.

To obtain the atomic contributions of an atom or group of atoms, the contributions function is provided. For example, in a system composed of a protein and water, we would have defined the solute and solvent using:

using PDBTools, ComplexMixtures
+Atomic and group contributions · ComplexMixtures.jl

Atomic and group contributions

One of the interesting features of Minimum-Distance distributions is that they can be naturally decomposed into the atomic or group contributions. Simply put, if a MDDF has a peak at a hydrogen-bonding distance, it is natural to decompose that peak into the contributions of each type of solute or solvent atom to that peak.

To obtain the atomic contributions of an atom or group of atoms, the contributions function is provided. For example, in a system composed of a protein and water, we would have defined the solute and solvent using:

using PDBTools, ComplexMixtures
 atoms = readPDB("system.pdb")
 protein = select(atoms,"protein")
 water = select(atoms,"water")
-solute = Selection(protein,nmols=1)
-solvent = Selection(water,natomspermol=3)

The MDDF calculation is executed with:

trajectory = Trajectory("trajectory.dcd",solute,solvent)
-results = mddf(trajectory)

Atomic contributions in the result data structure

The results data structure contains the decomposition of the MDDF into the contributions of every type of atom of the solute and the solvent. These data is available at the results.solute_atom and results.solvent_atom arrays:

julia> results.solute_atom
-50×1463 Array{Float64,2}:
- 0.0  0.0      0.0  …  0.0  0.0  0.0
- 0.0  0.0      0.0  …  0.0  0.0  0.0
- ...
- 0.0  0.14245  0.0  …  0.0  0.0  0.0
- 0.0  0.0      0.0  …  0.0  0.0  0.0
-
-julia> results.solvent_atom 
-50×3 Array{Float64,2}:
- 0.0        0.0        0.0 
- 0.0        0.0        0.0 
- ...
- 0.26087    0.26087    0.173913
- 0.25641    0.0854701  0.170940

Here, 50 is the number of bins of the histogram, whose distances are available at the results.d vector.

It is expected that for a protein most of the atoms do not contribute to the MDDF, and that all values are zero at very short distances, smaller than the radii of the atoms.

The three columns of the results.solvent_atom array correspond to the thee atoms of the water molecule in this example. The sequence of atoms correspond to that of the PDB file, but can be retrieved with:

julia> solvent.names
-3-element Array{String,1}:
- "OH2"
- "H1"
- "H2"

Therefore, if the first column of the results.solvent_atom vector is plotted as a function of the distances, one gets the contributions to the MDDF of the Oxygen atom of water. For example, here we plot the total MDDF and the Oxygen contributions:

using Plots
-plot(results.d,results.mddf,label="Total MDDF",linewidth=2)
-plot!(results.d,results.solvent_atom[:,1],label="OH2",linewidth=2)
-plot!(xlabel="Distance / Å",ylabel="MDDF")

Selecting groups by atom names or indexes

To plot the contributions of the hydrogen atoms of water to the total MDDF, we have to select the two atoms, named H1 and H2. The contributions function provides several practical ways of doing that, with or without the use of PDBTools.

The contributions function receives three parameters:

  1. The solute or solvent data structure, created with Selection.
  2. The array of atomic contributions (here results.solute_atom or results.solvent_atom), corresponding to the selection in 1.
  3. A selection of a group of atoms within the molecule of interest, provided as described below.

Selecting by indexes within the molecule

To select simply by the index of the atoms of the molecules, just provide a list of indexes to the contributions function. For example, to select the hydrogen atoms, which are the second and third atoms of the water molecule, use:

julia> indexes = [ 2, 3 ]
-julia> h_contributions = contributions(solvent,R.solvent_atom,indexes)
-500-element Array{Float64,1}:
- 0.0
- 0.0
- ⋮
- 0.7742706465861815
- 0.8084139794974875

Plotting both the oxygen (index = 1) and hydrogen contributions results in:

Selecting by atom name

The exact same plot above could be obtained by providing lists of atom names instead of indexes to the contributions function:

oxygen = ["OH2"]
-o_contributions = contributions(solvent,R.solvent_atom,oxygen) 
-hydrogens = ["H1","H2"]
-h_contributions = contributions(solvent,R.solvent_atom,hydrogens)

The above plot can be obtained with:

using Plots
-plot(results.d,results.mddf,label="Total MDDF",linewidth=2)
-plot!(results.d,o_contributions,label="OH2",linewidth=2)
-plot!(results.d,h_contributions,label="Hydrogen atoms",linewidth=2)
-plot!(xlabel="Distance / Å",ylabel="MDDF")

General selections using PDBTools

More interesting and general is to select atoms of a complex molecule, like a protein, using residue names, types, etc. Here we illustrate how this is done by providing selection strings to contributions to obtain the contributions to the MDDF of different types of residues of a protein to the total MDDF.

For example, if we want to split the contributions of the charged and neutral residues to the total MDDF distribution, we could use to following code. Here, solute refers to the protein.

charged_residues = PDBTools.select(atoms,"charged")
-charged_contributions = contributions(solute,R.solute_atom,charged_residues)
+solute = AtomSelection(protein,nmols=1)
+solvent = AtomSelection(water,natomspermol=3)

The MDDF calculation is executed with:

trajectory = Trajectory("trajectory.dcd",solute,solvent)
+results = mddf(trajectory)

Atomic contributions in the result data structure

The results data structure contains the decomposition of the MDDF into the contributions of every type of atom of the solute and the solvent. These contributions can be retrieved using the contributions function, with the SoluteGroup and SolventGroup selectors.

For example, if the MDDF of water (solvent) relative to a solute was computed, and water has atom names OH2, H1, H2, one can retrieve the contributions of the oxygen atom with:

OH2 = contributions(results, SolventGroup(["OH2"]))

or with, if OH2 is the first atom in the molecule,

OH2 = contributions(results, SolventGroup([1]))

The contributions of the hydrogen atoms can be obtained, similarly, with:

H = contributions(results, SolventGroup(["H1", "H2"]))

or with, if OH2 is the first atom in the molecule,

H = contributions(results, SolventGroup([2, 3]))

Each of these calls will return a vector of the constributions of these atoms to the total MDDF.

For example, here we plot the total MDDF and the Oxygen contributions:

using Plots
+plot(results.d, results.mddf, label=["Total MDDF"], linewidth=2)
+plot!(results.d, contributions(results, SolventGroup(["OH2"])), label=["OH2"], linewidth=2)
+plot!(xlabel="Distance / Å", ylabel="MDDF")

Using PDBTools

If the solute is a protein, or other complex molecule, selections defined with PDBTools can be used. For example, this will retrieve the contribution of the acidic residues of a protein to total MDDF:

using PDBTools
+atoms = readPDB("system.pdb")
+acidic_residues = select(atoms, "acidic")
+acidic_contributions = contributions(results, SoluteGroup(acidic_residues))

It is expected that for a protein most of the atoms do not contribute to the MDDF, and that all values are zero at very short distances, smaller than the radii of the atoms.

More interesting and general is to select atoms of a complex molecule, like a protein, using residue names, types, etc. Here we illustrate how this is done by providing selection strings to contributions to obtain the contributions to the MDDF of different types of residues of a protein to the total MDDF.

For example, if we want to split the contributions of the charged and neutral residues to the total MDDF distribution, we could use to following code. Here, solute refers to the protein.

charged_residues = PDBTools.select(atoms,"charged")
+charged_contributions = contributions(results, SoluteGroup(charged_residues))
 
 neutral_residues = PDBTools.select(atoms,"neutral")
-neutral_contributions = contributions(solute,R.solute_atom,neutral_residues)

The charged and neutral outputs are vectors containing the contributions of these residues to the total MDDF. The corresponding plot is:

plot(results.d,results.mddf,label="Total MDDF",linewidth=2)
+neutral_contributions = contributions(atoms, SoluteGroup(neutral_residues))

The charged_contributions and neutral_contributions outputs are vectors containing the contributions of these residues to the total MDDF. The corresponding plot is:

plot(results.d,results.mddf,label="Total MDDF",linewidth=2)
 plot!(results.d,charged_contributions,label="Charged residues",linewidth=2)
 plot!(results.d,neutral_contributions,label="Neutral residues",linewidth=2)
-plot!(xlabel="Distance / Å",ylabel="MDDF")

Resulting in:

Note here how charged residues contribute strongly to the peak at hydrogen-bonding distances, but much less in general. Of course all selection options could be used, to obtain the contributions of specific types of residues, atoms, the backbone, the side-chains, etc.

+plot!(xlabel="Distance / Å",ylabel="MDDF")

Resulting in:

Note here how charged residues contribute strongly to the peak at hydrogen-bonding distances, but much less in general. Of course all selection options could be used, to obtain the contributions of specific types of residues, atoms, the backbone, the side-chains, etc.

Reference functions

ComplexMixtures.contributionsMethod
contributions(R::Result, group::Union{SoluteGroup,SolventGroup}; type = :mddf)

Returns the contributions of the atoms of the solute or solvent to the MDDF, coordiantion number or MD count.

Arguments

  • R::Result: The result of a calculation.
  • group::Union{SoluteGroup,SolventGroup}: The group of atoms to consider.
  • type::Symbol: The type of contributions to return. Can be :mddf (default), :coordination_number or :md_count.

Examples

julia> using ComplexMixtures, PDBTools
+
+julia> dir = ComplexMixtures.Testing.data_dir*"/Gromacs";
+
+julia> atoms = readPDB(dir*"/system.pdb");
+
+julia> protein = select(atoms, "protein");
+
+julia> emim = select(atoms, "resname EMI"); 
+
+julia> solute = AtomSelection(protein, nmols = 1)
+AtomSelection 
+    1231 atoms belonging to 1 molecule(s).
+    Atoms per molecule: 1231
+    Number of groups: 1231
+
+julia> solvent = AtomSelection(emim, natomspermol = 20)
+AtomSelection 
+    5080 atoms belonging to 254 molecule(s).
+    Atoms per molecule: 20
+    Number of groups: 20
+
+julia> results = load(dir*"/protein_EMI.json"); # load pre-calculated results
+
+julia> contributions(results, SoluteGroup(["CA", "CB"])) # contribution of CA and CB atoms to the MDDF
+
source
diff --git a/dev/example1/index.html b/dev/example1/index.html new file mode 100644 index 00000000..5f6af02c --- /dev/null +++ b/dev/example1/index.html @@ -0,0 +1,184 @@ + +◦ Protein in water/glycerol · ComplexMixtures.jl

Protein in water/glycerol

The following examples consider a system composed a protein solvated by a mixture of water and glycerol, built with Packmol. The simulations were performed with NAMD with periodic boundary conditions and a NPT ensemble at room temperature and pressure. Molecular pictures were produced with VMD and plots were produced with Julia's Plots library.

+ +

Image of the system of the example: a protein solvated by a mixture of glycreol (green) and water, at a concentration of 50%vv.

Index

Data, packages, and execution

The files required to run this example are:

  • system.pdb: The PDB file of the complete system.
  • glyc50_traj.dcd: Trajectory file. This is a 1GB file, necessary for running from scratch the calculations.

To run the scripts, we suggest the following procedure:

  1. Create a directory, for example example1.
  2. Copy the required data files above to this directory.
  3. Launch julia in that directory, activate the directory environment, and install the required packages. This is done by launching Julia and executing:
    import Pkg 
    +Pkg.activate(".")
    +Pkg.add(["ComplexMixtures", "PDBTools", "Plots", "LaTeXStrings, EasyFit"])
    +exit()
  4. Copy the code of each script in to a file, and execute with:
    julia -t auto script.jl
    Alternativelly (and perhaps preferrably), copy line by line the content of the script into the Julia REPL, to follow each step of the calculation. For a more advanced Julia usage, we suggest the VSCode IDE with the Julia Language Support extension.

MDDF, KB integrals, and group contributions

Here we compute the minimum-distance distribution function, the Kirkwood-Buff integral, and the atomic contributions of the solvent to the density. This example illustrates the regular usage of ComplexMixtures, to compute the minimum distance distribution function, KB-integrals and group contributions.

Complete example code: click here!
# Activate environment in current directory
+import Pkg; Pkg.activate(".")
+
+# Run this once, to install necessary packages:
+# Pkg.add(["ComplexMixtures", "PDBTools", "Plots", "LaTeXStrings"])
+
+# Load packages
+using ComplexMixtures
+using PDBTools
+using Plots, Plots.Measures
+using LaTeXStrings
+
+# The complete trajectory file can be downloaded from (1Gb):
+# https://www.dropbox.com/scl/fi/zfq4o21dkttobg2pqd41m/glyc50_traj.dcd?rlkey=el3k6t0fx6w5yiqktyx96gzg6&dl=0
+
+# The example output file is available at:
+# 
+# Load PDB file of the system
+atoms = readPDB("./system.pdb")
+
+# Select the protein and the GLYC molecules
+protein = select(atoms, "protein")
+glyc = select(atoms, "resname GLYC")
+
+# Setup solute and solvent structures
+solute = AtomSelection(protein, nmols=1)
+solvent = AtomSelection(glyc, natomspermol=14)
+
+# Path to the trajectory file
+trajectory_file = "./glyc50_traj.dcd" 
+
+# Run mddf calculation, and save results
+trajectory = Trajectory(trajectory_file, solute, solvent)
+results = mddf(trajectory)
+save(results, "glyc50_results.json")
+
+#
+# Produce plots
+#
+# Default options for plots 
+Plots.default(
+    fontfamily="Computer Modern", 
+    linewidth=2, 
+    framestyle=:box, 
+    label=nothing, 
+    grid=false
+)
+
+#
+# The complete MDDF and the Kirkwood-Buff Integral
+#
+plot(layout=(1, 2))
+# plot mddf
+plot!(results.d, results.mddf,
+    xlabel=L"r/\AA", 
+    ylabel="mddf", 
+    subplot=1
+)
+hline!([1], linestyle=:dash, linecolor=:gray, subplot=1)
+# plot KB integral
+plot!(results.d, results.kb / 1000, #to L/mol
+    xlabel=L"r/\AA", 
+    ylabel=L"G_{us}/\mathrm{L~mol^{-1}}", 
+    subplot=2
+)
+# size and margin
+plot!(size=(800, 300), margin=4mm)
+savefig("./mddf.png")
+
+#
+# Atomic contributions to the MDDF
+#
+hydroxyls = ["O1", "O2", "O3", "H1", "H2", "H3"]
+aliphatic = ["C1", "C2", "HA", "HB", "HC", "HD"]
+hydr_contrib = contributions(results, SolventGroup(hydroxyls))
+aliph_contrib = contributions(results, SolventGroup(aliphatic))
+
+plot(results.d, results.mddf, 
+    xlabel=L"r/\AA", 
+    ylabel="mddf", 
+    size=(600, 400)
+)
+plot!(results.d, hydr_contrib, label="Hydroxyls")
+plot!(results.d, aliph_contrib, label="Aliphatic chain")
+hline!([1], linestyle=:dash, linecolor=:gray)
+savefig("./mddf_atom_contrib.png")

Output

The code above will produce the following plots, which contain the minimum-distance distribution of glycerol relative to the protein, and the corresponding KB integral:

+ +

and the same distribution function, decomposed into the contributions of the hydroxyl and aliphatic groups of glycerol:

+ +
Note

To change the options of the calculation, set the Options structure accordingly and pass it as a parameter to mddf. For example:

options = Options(cutoff=10.)
+mddf(trajectory,options)

The complete set of options available is described here.

2D density map

In this followup from the example aboave, we compute group contributions of the solute (the protein) to the MDDFs, split into the contributions each protein residue. This allows the observation of the penetration of the solvent on the structure, and the strength of the interaction of the solvent, or cossolvent, with each type of residue in the structure.

Complete example code: click here!
# Activate environment in current directory
+import Pkg; Pkg.activate(".")
+
+# Run this once, to install necessary packages:
+# Pkg.add(["ComplexMixtures", "PDBTools", "Plots", "LaTeXStrings"])
+
+# Load packages
+using ComplexMixtures
+using PDBTools
+using Plots, Plots.Measures
+using LaTeXStrings
+
+# The complete trajectory file can be downloaded from (3Gb):
+# https://drive.google.com/file/d/14M30jDHRwUM77hzbDphgbu8mcWFBcQrX/view?usp=sharing
+
+# The example output file is available at:
+# 
+# Load PDB file of the system
+atoms = readPDB("./system.pdb")
+
+# Select the protein and the GLYC molecules
+protein = select(atoms, "protein")
+glyc = select(atoms, "resname GLYC")
+
+# Load example output file (computed in the previous script)
+example_output = "./glyc50_results.json"
+results = load(example_output)
+
+#
+# Plot a 2D map showing the contributions of some residues
+#
+residues = collect(eachresidue(protein))
+
+# We will plot only the range 70:110, for clarity
+irange = 70:110
+
+# We create matrix of with a number of rows equal to the number
+# of bins of the mddf histogram (length(results.d)) and a number of 
+# columns equal to the number of residues
+rescontrib = zeros(length(results.d), length(residues))
+
+# Each column is then filled up with the contributions of each residue
+for (ires, residue) in enumerate(residues)
+    rescontrib[:, ires] .= contributions(results, SoluteGroup(residue))
+end
+
+# Plot only for distances within 1.5 and 3.5:
+idmin = findfirst(d -> d > 1.5, results.d)
+idmax = findfirst(d -> d > 3.5, results.d)
+
+# Obtain pretty labels for the residues in the x-axis
+xticks = PDBTools.residue_ticks(protein, first=70, last=110)
+
+# Plot a contour courves with the density at each distance from
+# each residue
+Plots.default(fontfamily="Computer Modern")
+contourf(irange, results.d[idmin:idmax], rescontrib[idmin:idmax, irange],
+  color=cgrad(:tempo), linewidth=1, linecolor=:black,
+  colorbar=:none, levels=5,
+  xlabel="Residue", ylabel=L"r/\AA",
+  xticks=xticks, xrotation=60,
+  xtickfont=font(8, "Computer Modern"),
+  size=(700, 400),
+  margin=0.5Plots.PlotMeasures.cm
+)
+savefig("./density2D.png")

Output

The code above will produce the following plot, which contains, for each residue, the contributions of each residue to the distribution function of glycerol, within 1.5 to 3.5 $\mathrm{\AA}$ of the surface of the protein.

+ +

3D density map

In this example we compute three-dimensional representations of the density map of Glycerol in the vicinity of a set of residues of a protein, from the minimum-distance distribution function.

Complete example code: click here!
import Pkg; Pkg.activate(".")
+using PDBTools
+using ComplexMixtures
+
+# PDB file of the system simulated
+atoms = readPDB("./system.pdb")
+
+# Load results of a ComplexMixtures run
+results = load("./glyc50_results.json")
+
+# Inform which is the solute
+protein = select(atoms, "protein")
+solute = AtomSelection(protein, nmols=1)
+
+# Compute the 3D density grid and output it to the PDB file
+# here we use dmax=3.5 such that the the output file is not too large
+grid = grid3D(results, atoms, "./grid.pdb"; dmin=1.5, dmax=3.5)

Here, the MDDF is decomposed at each distance according to the contributions of each solute (the protein) residue. The grid is created such that, at each point in space around the protein, it is possible to identify:

  1. Which atom is the closest atom of the solute to that point.

  2. Which is the contribution of that atom (or residue) to the distribution function.

Therefore, by filtering the 3D density map at each distance one can visualize over the solute structure which are the regions that mostly interact with the solvent of choice at each distance. Typical images of such a density are:

+ +

In the figure on the left, the points in space around the protein are selected with the following properties: distance from the protein smaller than 2.0Å and relative contribution to the MDDF at the corresponding distance of at least 10% of the maximum contribution. Thus, we are selecting the regions of the protein corresponding to the most stable hydrogen-bonding interactions. The color of the points is the contribution to the MDDF, from blue to red. Thus, the most reddish-points corresponds to the regions where the most stable hydrogen bonds were formed. We have marked two regions here, on opposite sides of the protein, with arrows.

Clicking on those points we obtain which are the atoms of the protein contributing to the MDDF at that region. In particular, the arrow on the right points to the strongest red region, which corresponds to an Aspartic acid. These residues are shown explicitly under the density (represented as a transparent surface) on the figure in the center.

The figure on the right displays, overlapped with the hydrogen-bonding residues, the most important contributions to the second peak of the distribution, corresponding to distances from the protein between 2.0 and 3.5Å. Notably, the regions involved are different from the ones forming hydrogen bonds, indicating that non-specific interactions with the protein (and not a second solvation shell) are responsible for the second peak.

How to run this example:

Assuming that the input files are available in the script directory, just run the script with:

julia density3D.jl

Alternatively, open Julia and copy/paste or the commands in density3D.jl or use include("./density3D.jl"). These options will allow you to remain on the Julia section with access to the grid data structure that was generated and corresponds to the output grid.pdb file.

This will create the grid.pdb file. Here we provide a previously setup VMD session that contains the data with the visualization choices used to generate the figure above. Load it with:

vmd -e grid.vmd

A short tutorial video showing how to open the input and output PDB files in VMD and produce images of the density is available here:

+ +
diff --git a/dev/example2/index.html b/dev/example2/index.html new file mode 100644 index 00000000..6fa7a132 --- /dev/null +++ b/dev/example2/index.html @@ -0,0 +1,248 @@ + +◦ Polyacrylamide in DMF · ComplexMixtures.jl

Polyacrylamide in DMDF

In this example we illustrate how the solvation structure of a polymer can be studied with ComplexMixtures.jl. The system is a 5-mer segment of polyacrylamide (PAE - capped with methyl groups), solvated with dimethylformamide (DMF). The system is interesting because of the different functional groups and polarities involved in the interactions of DMF with PAE. A snapshot of the system is shown below.

+ +

The structures of DMF and of the polyacrylamide segment are:

+ + + + + + + + +
DMFPolyacrylamide
+

Index

Data, packages, and execution

The files required to run this example are:

To run the scripts, we suggest the following procedure:

  1. Create a directory, for example example2.
  2. Copy the required data files above to this directory.
  3. Launch julia in that directory: activate the directory environment, and install the required packages. This launching Julia and executing:
    import Pkg 
    +Pkg.activate(".")
    +Pkg.add(["ComplexMixtures", "PDBTools", "Plots", "LaTeXStrings", "EasyFit"])
    +exit()
  4. Copy the code of each script in to a file, and execute with:
    julia -t auto script.jl
    Alternativelly (and perhaps preferrably), copy line by line the content of the script into the Julia REPL, to follow each step of the calculation.

MDDF and KB integrals

Here we compute the minimum-distance distribution function, the Kirkwood-Buff integral, and the atomic contributions of the solvent to the density. This example illustrates the regular usage of ComplexMixtures, to compute the minimum distance distribution function, KB-integrals and group contributions.

Complete example code: click here!
import Pkg; Pkg.activate(".")
+
+using PDBTools
+using ComplexMixtures
+using Plots
+using LaTeXStrings
+using EasyFit: movavg
+
+# The full trajectory file is available at: 
+# https://www.dropbox.com/scl/fi/jwafhgxaxuzsybw2y8txd/traj_Polyacry.dcd?rlkey=p4bn65m0pkuebpfm0hf158cdm&dl=0 
+trajectory_file = "./traj_Polyacry.dcd"
+
+# Load a PDB file of the system
+system = readPDB("./equilibrated.pdb")
+
+# Select the atoms corresponding DMF molecules
+dmf = select(system, "resname DMF")
+
+# Select the atoms corresponding to the Poly-acrylamide
+acr = select(system, "resname FACR or resname ACR or resname LACR")
+
+# Set the solute and the solvent selections for ComplexMixtures
+solute = AtomSelection(acr, nmols=1)
+solvent = AtomSelection(dmf, natomspermol=12)
+
+# Set the trajectory structure
+trajectory = Trajectory(trajectory_file, solute, solvent)
+
+# Use a large dbulk distance for better KB convergence
+options = Options(dbulk=19.)
+
+# Compute the mddf and associated properties
+results = mddf(trajectory, options)
+
+# Save results to file for later use
+save(results, "./mddf.json")
+println("Results saved to ./mddf.json file")
+
+# Plot the MDDF and KB integrals
+plot_font = "Computer Modern"
+default(
+    fontfamily=plot_font,
+    linewidth=1.5, 
+    framestyle=:box, 
+    label=nothing, 
+    grid=false,
+    palette=:tab10
+)
+scalefontsizes(); scalefontsizes(1.3)
+
+# Plot the MDDF of DMF relative to PolyACR and its corresponding KB integral
+plot(layout=(2,1))
+plot!(
+    results.d, 
+    movavg(results.mddf,n=9).x, # Smooth example with a running average
+    ylabel="MDDF", 
+    xlims=(0,20),
+    subplot=1, 
+)
+
+# Plot the KB integral
+plot!(
+    results.d, 
+    movavg(results.kb,n=9).x, # smooth kb
+    xlabel=L"\textrm{Distance / \AA}",
+    ylabel=L"\textrm{KB~/~cm^2~mol^{-1}}",
+    xlim=(-1,20),
+    subplot=2
+)
+savefig("./mddf_kb.png")
+println("Plot saved to mddf_kb.png")

Output

The distribution of DMF molecules around polyacrylamide is shown below. There is a peak at ~2.5Angs, indicating favorable non-specific interactions between the solvent molecules and the polymer. The peak is followed by a dip and diffuse peaks at higher distances. Thus, the DMF molecules are structured around the polymer, but essentially only in the first solvation shell.

The KB integral in a bicomponent mixture converges to the (negative of the) apparent molar volume of the solute. It is negative, indicating that the accumulation of DMF in the first solvation shell of the polymer is not enough to compensate the excluded volume of the solute.

Group contributions

The MDDF can be decomposed into the contributions of the DMF chemical groups, and on the polyacrylamide chemical groups. In the first panel below we show the contributions of the DMF chemical groups to the distribution function.

Complete example code: click here!
import Pkg; Pkg.activate(".")
+
+using ComplexMixtures
+using Plots
+using LaTeXStrings
+using EasyFit: movavg
+
+# Some default settings for the plots
+plot_font = "Computer Modern"
+Plots.default(
+        fontfamily=plot_font,
+        linewidth=1.5, 
+        framestyle=:box, 
+        label=nothing, 
+        grid=false,
+)
+
+# Load previusly saved results, computed in the previous script
+results = load("./mddf.json")
+
+# Plot with two subplots
+plot(layout=(2,1))
+
+# Plot the total mddf
+plot!(
+    results.d,
+    movavg(results.mddf,n=10).x, # Smooth example with a running average
+    label="Total",
+    subplot=1
+)
+
+# Plot DMF group contributions to the MDDF. We use a dictionary where
+# the keys are the group names, and the values are the atom names of the group
+groups = Dict( 
+    "CO" => ["C","O"], # carbonyl
+    "N" => ["N"], 
+    "Methyl groups" => ["CC","CT","HC1","HC2","HC3","HT1","HT2","HT3"],
+)
+for (group_label, group_atoms) in pairs(groups)
+    # Retrieve the contributions of the atoms of this group
+    group_contrib = contributions(results, SolventGroup(group_atoms))
+    # Plot the contributions of this groups, with the appropriate label
+    plot!(
+        results.d, 
+        movavg(group_contrib,n=10).x,
+        label=group_label,
+        subplot=1
+    )
+end
+
+# Adjust scale and label of axis
+plot!(xlim=(1,5), ylabel="MDDF", subplot=1)
+
+#
+# Plot ACR group contributions to the MDDF. This is an interesting case,
+# as the groups are repeated along the polymer chain
+#
+groups = Dict(
+    L"\textrm{CH_3}" => ["CF","HF1","HF2","HF3", "CL","HL1","HL2","HL3"], # terminal methyles
+    "CO" => ["OE1","CD"], # carbonyl
+    L"\textrm{NH_2}" => ["NE2","HE22","HE21"], # amine
+    L"\textrm{CHCH_2}" => ["C","H2","H1","CA","HA"], # backbone
+)
+# Plot total mddf 
+plot!(
+    results.d,
+    movavg(results.mddf,n=10).x, # Smooth example with a running average
+    label="Total",
+    subplot=2
+)
+# Plot group contributions
+for (group_name, atom_names) in pairs(groups)
+    group_contrib = contributions(results, SoluteGroup(atom_names))
+    plot!(
+        results.d,
+        movavg(group_contrib,n=10).x, 
+        label=group_name,
+        subplot=2
+    )
+end
+# Adjust scale and label of axis
+plot!(
+    xlim=(1,5),
+    xlabel=L"\textrm{Distance / \AA}",
+    ylabel="MDDF", subplot=2
+)
+# Save figure
+savefig("./mddf_groups.png")
+println("Created figure file: ./mddf_groups.png")

Output

The decomposition reveals that specific interactions peaking at distances slightly smaller than 2$\AA$ exist between the polymer and the carbonyl group of DMF. Thus, there hydrogen bonds between the polymer and this group, which dominate the interactions between the solute and the solvent at short distances. The non-specific interactions peak at 2.5Angs and are composed of contributions of all DMF chemical groups, but particularly of the methyl groups.

The decomposition of the same MDDF in the contributions of the chemical groups of the polymer is clearly associated to the DMF contributions. The specific, hydrogen-bonding, interactions, are associated to the polymer amine groups. The amine groups also contribute to the non-specific interactions at greater distances, but these are a sum of the contributions of all polymer groups, polar or aliphatic.

2D density map

We can decompose the MDDF into the contributions of each portion of the polymer chain. The map below displays the contributions of each chemical group of the polymer, now split into the mers of the polymer, to the MDDF.

Complete example code: click here!
import Pkg; Pkg.activate(".")
+
+using ComplexMixtures
+using Plots
+using EasyFit: movavg
+using LaTeXStrings
+using PDBTools
+
+# Here we will produce a 2D plot of group contributions, splitting the
+# contributions of each mer of the polymer into its chemical groups
+
+# Chemical groups of the polymer monomers, defined by the atom types:
+groups = Dict(
+    L"\textrm{CH_3}" => ["CF","HF1","HF2","HF3"], # methyles
+    "CO" => ["OE1","CD"], # carbonyl
+    L"\textrm{NH_2}" => ["NE2","HE22","HE21"], # amine 
+    L"\textrm{CHCH_2}" => ["C","H2","H1","CA","HA"], # backbone
+    L"\textrm{CH_3}" => ["CL","HL1","HL2","HL3"], # terminal methyles
+)
+
+system = readPDB("./equilibrated.pdb")
+acr = select(system, "resname FACR or resname ACR or resname LACR")
+results = load("./mddf.json")
+
+# Here we split the polymer in residues, to extract the contribution of 
+# each chemical group of each polymer mer independently
+group_contribs = Vector{Float64}[]
+labels = String[]
+for (imer, mer) in enumerate(eachresidue(acr))
+    for (group_label, group_atoms) in pairs(groups)
+        # only first residue has a terminal CH3
+        if imer != 1 && group_label == L"\textrm{CH_3}" 
+            continue
+        end
+        # only last residue has a terminal CH3
+        if imer != 5 && group_label == L"\textrm{CH_3}" 
+            continue
+        end
+        # Filter the atoms of this mer that belong to the group
+        mer_group_atoms = filter(at -> name(at) in group_atoms, mer)
+        # Retrive the contribution of this mer atoms to the MDDF
+        atoms_contrib = contributions(results, SoluteGroup(mer_group_atoms)) 
+        # Smooth the contributions
+        atoms_contrib = movavg(atoms_contrib; n = 10).x
+        # Add contributions to the group contributions list
+        push!(group_contribs, atoms_contrib)
+        # Push label to label list
+        push!(labels,group_label)
+    end
+end
+
+# Convert the group contributions to a matrix
+group_contribs = stack(group_contribs)
+
+# Find the indices of the limits of the map we want
+idmin = findfirst( d -> d > 1.5, results.d)
+idmax = findfirst( d -> d > 3.2, results.d)
+
+# Plot contour map
+Plots.default(fontfamily="Computer Modern")
+contourf(
+    1:length(labels),
+    results.d[idmin:idmax],
+    group_contribs[idmin:idmax,:],
+    color=cgrad(:tempo),linewidth=1,linecolor=:black,
+    colorbar=:none,levels=10,
+    xlabel="Group",ylabel=L"r/\AA",xrotation=60,
+    xticks=(1:length(labels),labels),
+    margin=5Plots.Measures.mm # adjust margin 
+)
+savefig("./map2D_acr.png")
+println("Plot saved to map2D_acr.png")

Output

The terminal methyl groups interact strongly with DMF, and strong local density augmentations are visible in particular on the amine groups. These occur at less than 2.0Angs and are characteristic of hydrogen-bond interactions. Interestingly, the DMF molecules are excluded from the aliphatic and carbonyl groups of the polymer, relative to the other groups.

Finally, it is noticeable that the central mer is more weakly solvated by DMF than the mers approaching the extremes of the polymer chain. This is likely a result of the partial folding of the polymer, that protects that central mers from the solvent in a fraction of the polymer configurations.

+ +

References

Molecules built with JSME: B. Bienfait and P. Ertl, JSME: a free molecule editor in JavaScript, Journal of Cheminformatics 5:24 (2013) http://biomodel.uah.es/en/DIY/JSME/draw.en.htm

The system was built with Packmol.

The simulations were perfomed with NAMD, with CHARMM36 parameters.

diff --git a/dev/example3/index.html b/dev/example3/index.html new file mode 100644 index 00000000..5a762e34 --- /dev/null +++ b/dev/example3/index.html @@ -0,0 +1,513 @@ + +◦ POPC membrane in water/ethanol · ComplexMixtures.jl

POPC membrane in water/ethanol

In this example ComplexMixtures.jl is used to study the interactions of a POPC membrane with a mixture of 20%(mol/mol) ethanol in water. At this concentration ethanol destabilizes the membrane.

+ +

System image: a POPC membrane (center) solvated by a mixture of water (purple) and ethanol (green). The system is composed by 59 POPC, 5000 water, and 1000 ethanol molecules.

Index

Data, packages, and execution

The files required to run this example are:

  • equilibrated.pdb: The PDB file of the complete system.
  • traj_POPC.dcd: Trajectory file. This is a 365Mb file, necessary for running from scratch the calculations.

To run the scripts, we suggest the following procedure:

  1. Create a directory, for example example3.
  2. Copy the required data files above to this directory.
  3. Launch julia in that directory: activate the directory environment, and install the required packages. This launching Julia and executing:
    import Pkg 
    +Pkg.activate(".")
    +Pkg.add(["ComplexMixtures", "PDBTools", "Plots", "LaTeXStrings", "EasyFit"])
    +exit()
  4. Copy the code of each script in to a file, and execute with:
    julia -t auto script.jl
    Alternativelly (and perhaps preferrably), copy line by line the content of the script into the Julia REPL, to follow each step of the calculation.

MDDF and KB integrals

Here we show the distribution functions and KB integrals associated to the solvation of the membrane by water and ethanol.

Complete example code: click here!
import Pkg;
+Pkg.activate(".");
+
+using PDBTools
+using ComplexMixtures
+using Plots
+using LaTeXStrings
+using EasyFit: movavg
+
+# The full trajectory file is available at: 
+# https://www.dropbox.com/scl/fi/hcenxrdf8g8hfbllyakhy/traj_POPC.dcd?rlkey=h9zivtwgya3ivva1i6q6xmr2p&dl=0
+trajectory_file = "./traj_POPC.dcd"
+
+# Load a PDB file of the system
+system = readPDB("./equilibrated.pdb")
+
+# Select the atoms corresponding to glycerol and water
+popc = select(system, "resname POPC")
+water = select(system, "water")
+ethanol = select(system, "resname ETOH")
+
+# Set the complete membrane as the solute. We use nmols=1 here such
+# that the membrane is considered a single solute in the calculation. 
+solute = AtomSelection(popc, nmols=1)
+
+# Compute water-POPC distribution and KB integral 
+solvent = AtomSelection(water, natomspermol=3)
+
+# Set the trajectory structure
+trajectory = Trajectory(trajectory_file, solute, solvent)
+
+# We want to get reasonably converged KB integrals, which usually
+# require large solute domains. Distribution functions converge 
+# rapidly (~10Angs or less), on the other side.
+options = Options(dbulk=20.0)
+
+# Compute the mddf and associated properties
+mddf_water_POPC = mddf(trajectory, options)
+
+# Save results to file for later use
+save(mddf_water_POPC, "./mddf_water_POPC.json")
+println("Results saved to ./mddf_water_POPC.json file")
+
+# Compute ethanol-POPC distribution and KB integral 
+solvent = AtomSelection(ethanol, natomspermol=9)
+traj = Trajectory(trajectory_file, solute, solvent)
+mddf_ethanol_POPC = mddf(traj, options)
+
+# Save results for later use
+save(mddf_ethanol_POPC, "./mddf_ethanol_POPC.json")
+println("Results saved to ./mddf_ethanol_POPC.json file")
+
+#
+# Plot the MDDF and KB integrals
+#
+# Plot defaults
+plot_font = "Computer Modern"
+default(
+    fontfamily=plot_font,
+    linewidth=2.5, 
+    framestyle=:box, 
+    label=nothing, 
+    grid=false,
+    palette=:tab10
+)
+scalefontsizes(); scalefontsizes(1.3)
+
+#
+# Plots cossolvent-POPC MDDFs in subplot 1
+#
+plot(layout=(2,1))
+# Water MDDF
+plot!(
+    mddf_water_POPC.d, # distances
+    movavg(mddf_water_POPC.mddf,n=10).x, # water MDDF - smoothed
+    label="Water",
+    subplot=1
+)
+# Ethanol MDDF
+plot!(
+    mddf_ethanol_POPC.d, # distances
+    movavg(mddf_ethanol_POPC.mddf,n=10).x, # water MDDF - smoothed
+    label="Ethanol",
+    subplot=1
+)
+# Plot settings
+plot!(
+    xlabel=L"\textrm{Distance / \AA}",
+    ylabel="MDDF",
+    xlim=(0,10),
+    subplot=1
+)
+
+#
+# Plot cossolvent-POPC KB integrals in subplot 2
+#
+# Water KB
+plot!(
+    mddf_water_POPC.d, # distances
+    mddf_water_POPC.kb, # water KB
+    label="Water",
+    subplot=2
+)
+# Ethanol KB
+plot!(
+    mddf_ethanol_POPC.d, # distances
+    mddf_ethanol_POPC.kb, # ethanol KB
+    label="Ethanol",
+    subplot=2
+)
+# Plot settings
+plot!(
+    xlabel=L"\textrm{Distance / \AA}",
+    ylabel=L"\textrm{KB~/~L~mol^{-1}}",
+    xlim=(0,10),
+    subplot=2
+)
+
+savefig("popc_water_ethanol_mddf_kb.png")
+println("Plot saved to popc_water_ethanol_mddf_kb.png file")
+
+
+

Output

The distribution functions are shown in the first panel of the figure below, and the KB integrals are shown in the second panel.

Clearly, both water and ethanol accumulate on the proximity of the membrane. The distribution functions suggest that ethanol displays a greater local density augmentation, reaching concentrations roughly 4 times higher than bulk concentrations. Water has a peak at hydrogen-bonding distances (~1.8$\mathrm{\AA}$) and a secondary peak at 2.5$\mathrm{\AA}$.

Despite the fact that ethanol displays a greater relative density (relative to its own bulk concentration) at short distances, the KB integral of water turns out to be greater (more positive) than that of ethanol. This implies that the membrane is preferentially hydrated.

Ethanol group contributions

The minimum-distance distribution function can be decomposed into the contributions of the ethanol molecule groups.

Complete example code: click here!
import Pkg;
+Pkg.activate(".");
+
+using ComplexMixtures
+using PDBTools
+using Plots
+using LaTeXStrings
+using EasyFit: movavg
+
+# Some default settings for the plots
+plot_font = "Computer Modern"
+Plots.default(
+    fontfamily=plot_font,
+    linewidth=1.5,
+    framestyle=:box,
+    label=nothing,
+    grid=false,
+)
+scalefontsizes(); scalefontsizes(1.3)
+
+# Read system PDB file
+system = readPDB("equilibrated.pdb")
+ethanol = select(system, "resname ETOH")
+
+# Load the pre-calculated MDDF of ethanol
+mddf_ethanol_POPC = load("mddf_ethanol_POPC.json")
+
+#
+# Contributions of the ethanol groups
+#
+# Define the groups using selections. Set a dict, in which the keys are the group names
+# and the values are the selections
+groups = Dict(
+    "Hydroxyl" => select(ethanol, "name O1 or name HO1"),
+    "Aliphatic chain" => select(ethanol, "not name O1 and not name HO1"),
+)
+# plot the total mddf and the contributions of the groups
+x = mddf_ethanol_POPC.d
+plot(x, movavg(mddf_ethanol_POPC.mddf, n=10).x, label="Total MDDF")
+for (group_name, group_atoms) in pairs(groups)
+    cont = contributions(mddf_ethanol_POPC, SolventGroup(group_atoms))
+    y = movavg(cont, n=10).x
+    plot!(x, y, label=group_name)
+end
+# Plot settings
+plot!(
+    xlim=(1, 8),
+    xlabel=L"\textrm{Distance / \AA}",
+    ylabel="MDDF"
+)
+savefig("./mddf_ethanol_groups.png")
+println("The plot was saved as mddf_ethanol_groups.png")

In the figure below we show the contributions of the ethanol hydroxyl and aliphatic chain groups to the total MDDF.

https://raw.githubusercontent.com/m3g/ComplexMixturesExamples/main/POPC_in_Water-Ethanol/results/mddf_ethanol_groups.png

As expected, the MDDF at hydrogen-bonding distances is composed by contributions of the ethanol hydroxyl group, and the non-specific interactions at ~2.5$\mathrm{\AA}$ have a greater contribution of the aliphatic chain of the solvent molecules. It is interesting to explore the chemical complexity of POPC in what concerns these interactions.

Interaction of POPC groups with water

The MDDF can also be decomposed into the contributions of the solute atoms and chemical groups. First, we show the contributions of the POPC chemical groups to the water-POPC distribution.

Complete example code: click here!
import Pkg;
+Pkg.activate(".");
+
+using ComplexMixtures
+using PDBTools
+using Plots
+using LaTeXStrings
+using EasyFit: movavg
+
+# Some default settings for the plots
+plot_font = "Computer Modern"
+Plots.default(
+    fontfamily=plot_font,
+    linewidth=2,
+    framestyle=:box,
+    label=nothing,
+    grid=false,
+)
+scalefontsizes();
+scalefontsizes(1.3);
+
+# Read system PDB file
+system = readPDB("equilibrated.pdb")
+
+# Load the pre-calculated MDDF of water
+mddf_water_POPC = load("mddf_water_POPC.json")
+
+#
+# Here we define the POPC groups, from the atom names. Each group
+# is a vector of atom names, and the keys are the group names.
+#
+groups = Dict(
+    "Choline" => ["N", "C12", "H12A", "H12B", "C13", "H13A", "H13B", "H13C", "C14",
+        "H14A", "H14B", "H14C", "C15", "H15A", "H15B", "H15C", "C11", "H11A", "H11B"],
+    "Phosphate" => ["P", "O13", "O14", "O12"],
+    "Glycerol" => ["O11", "C1", "HA", "HB", "C2", "HS", "O21", "C3", "HX", "HY", "O31"],
+    "Oleoyl" => ["O22", "C21", "H2R", "H2S", "C22", "C23", "H3R", "H3S", "C24", "H4R", "H4S",
+        "C25", "H5R", "H5S", "C26", "H6R", "H6S", "C27", "H7R", "H7S", "C28", "H8R", "H8S",
+        "C29", "H91", "C210", "H101", "C211", "H11R", "H11S", "C212", "H12R", "H12S",
+        "C213", "H13R", "H13S", "C214", "H14R", "H14S", "C215", "H15R", "H15S",
+        "C216", "H16R", "H16S", "C217", "H17R", "H17S", "C218", "H18R", "H18S", "H18T"],
+    "Palmitoyl" => ["C31", "O32", "C32", "H2X", "H2Y", "C33", "H3X", "H3Y", "C34", "H4X", "H4Y",
+        "C35", "H5X", "H5Y", "C36", "H6X", "H6Y", "C37", "H7X", "H7Y", "C38", "H8X",
+        "H8Y", "C39", "H9X", "H9Y", "C310", "H10X", "H10Y", "C311", "H11X", "H11Y",
+        "C312", "H12X", "H12Y", "C313", "H13X", "H13Y", "C314", "H14X", "H14Y", "C315",
+        "H15X", "H15Y", "C316", "H16X", "H16Y", "H16Z"],
+)
+
+#
+# plot the total mddf and the contributions of the groups
+#
+x = mddf_water_POPC.d
+plot(
+    x,
+    movavg(mddf_water_POPC.mddf, n=10).x,
+    label="Total water-POPC MDDF"
+)
+for (group_name, group_atoms) in pairs(groups)
+    cont = contributions(mddf_water_POPC, SoluteGroup(group_atoms))
+    y = movavg(cont, n=10).x
+    plot!(x, y, label=group_name)
+end
+# Plot settings
+plot!(
+    xlim=(1, 5),
+    xlabel=L"\textrm{Distance / \AA}",
+    ylabel="MDDF"
+)
+savefig("./mddf_POPC_water_groups.png")
+println("The plot was saved as mddf_POPC_water_groups.png")
+
+

https://raw.githubusercontent.com/m3g/ComplexMixturesExamples/main/POPC_in_Water-Ethanol/results/mddf_popc_water_groups.png

Not surprisingly, water interactions occur majoritarily with the Phosphate and Choline groups of POPC molecules, that is, with the polar head of the lipid. The interactions at hydrogen-bonding distances are dominated by the phosphate group, and non-specific interaction occur mostly with the choline group. Some water molecules penetrate the membrane and interact with the glycerol and aliphatic chains of POPC, but these contributions are clearly secondary.

Interaction of POPC groups with ethanol

The interactions of ethanol molecules with the membrane are more interesting, because ethanol penetrates the membrane. Here we decompose the ethanol-POPC distribution function into the contributions of the POPC chemical groups.

Complete example code: click here!
import Pkg;
+Pkg.activate(".");
+
+using ComplexMixtures
+using PDBTools
+using Plots
+using LaTeXStrings
+using EasyFit: movavg
+
+# Some default settings for the plots
+plot_font = "Computer Modern"
+Plots.default(
+    fontfamily=plot_font,
+    linewidth=2,
+    framestyle=:box,
+    label=nothing,
+    grid=false,
+)
+scalefontsizes();
+scalefontsizes(1.3);
+
+# Read system PDB file
+system = readPDB("equilibrated.pdb")
+
+# Load the pre-calculated MDDF of ethanol
+mddf_ethanol_POPC = load("mddf_ethanol_POPC.json")
+
+#
+# Here we define the POPC groups, from the atom names. Each group
+# is a vector of atom names, and the keys are the group names.
+#
+groups = Dict(
+    "Choline" => ["N", "C12", "H12A", "H12B", "C13", "H13A", "H13B", "H13C", "C14",
+        "H14A", "H14B", "H14C", "C15", "H15A", "H15B", "H15C", "C11", "H11A", "H11B"],
+    "Phosphate" => ["P", "O13", "O14", "O12"],
+    "Glycerol" => ["O11", "C1", "HA", "HB", "C2", "HS", "O21", "C3", "HX", "HY", "O31"],
+    "Oleoyl" => ["O22", "C21", "H2R", "H2S", "C22", "C23", "H3R", "H3S", "C24", "H4R", "H4S",
+        "C25", "H5R", "H5S", "C26", "H6R", "H6S", "C27", "H7R", "H7S", "C28", "H8R", "H8S",
+        "C29", "H91", "C210", "H101", "C211", "H11R", "H11S", "C212", "H12R", "H12S",
+        "C213", "H13R", "H13S", "C214", "H14R", "H14S", "C215", "H15R", "H15S",
+        "C216", "H16R", "H16S", "C217", "H17R", "H17S", "C218", "H18R", "H18S", "H18T"],
+    "Palmitoyl" => ["C31", "O32", "C32", "H2X", "H2Y", "C33", "H3X", "H3Y", "C34", "H4X", "H4Y",
+        "C35", "H5X", "H5Y", "C36", "H6X", "H6Y", "C37", "H7X", "H7Y", "C38", "H8X",
+        "H8Y", "C39", "H9X", "H9Y", "C310", "H10X", "H10Y", "C311", "H11X", "H11Y",
+        "C312", "H12X", "H12Y", "C313", "H13X", "H13Y", "C314", "H14X", "H14Y", "C315",
+        "H15X", "H15Y", "C316", "H16X", "H16Y", "H16Z"],
+)
+
+#
+# plot the total mddf and the contributions of the groups
+#
+x = mddf_ethanol_POPC.d
+plot(
+    x,
+    movavg(mddf_ethanol_POPC.mddf, n=10).x,
+    label="Total ethanol-POPC MDDF"
+)
+for (group_name, group_atoms) in pairs(groups)
+    cont = contributions(mddf_ethanol_POPC, SoluteGroup(group_atoms))
+    y = movavg(cont, n=10).x
+    plot!(x, y, label=group_name)
+end
+# Plot settings
+plot!(
+    xlim=(1.3, 5),
+    ylim=(0, 1.8),
+    xlabel=L"\textrm{Distance / \AA}",
+    ylabel="MDDF"
+)
+savefig("./mddf_POPC_ethanol_groups.png")
+println("The plot was saved as mddf_POPC_ethanol_groups.png")
+

https://raw.githubusercontent.com/m3g/ComplexMixturesExamples/main/POPC_in_Water-Ethanol/results/mddf_popc_ethanol_groups.png

Ethanol molecules interact with the choline and phosphate groups of POPC molecules, as do water molecules. The contributions to the MDDF at hydrogen-bonding distances come essentially from ethanol-phosphate interactions.

However, ethanol molecules interact frequently with the glycerol and aliphatic chains of POPC. Interactions with the Oleoyl chain are slightly stronger than with the Palmitoyl chain. This means that ethanol penetrates the hydrophobic core of the membrane, displaying non-specific interactions with the lipids and with the glycerol group. These interactions are probably associated to the destabilizing role of ethanol in the membrane structure.

Density map on POPC chains

The MDDFs can be decomposed at more granular level, in which each chemical group of the aliphatic chains of the POPC molecules are considered independently. This allows the study of the penetration of the ethanol molecules in the membrane. In the figure below, the carbonyl following the glycerol group of the POPC molecules is represented in the left, and going to the right the aliphatic chain groups are sequentially shown.

Complete example code: click here!
import Pkg;
+Pkg.activate(".");
+
+using ComplexMixtures
+using PDBTools
+using Plots
+using LaTeXStrings
+using EasyFit: movavg
+
+# Some default settings for the plots
+plot_font = "Computer Modern"
+Plots.default(
+    fontfamily=plot_font,
+    linewidth=2,
+    framestyle=:box,
+    label=nothing,
+    grid=false,
+)
+scalefontsizes();
+scalefontsizes(1.3);
+
+# Read system PDB file
+system = readPDB("equilibrated.pdb")
+
+# Load the pre-calculated MDDF of ethanol
+mddf_ethanol_POPC = load("mddf_ethanol_POPC.json")
+
+# Splitting the oleoyl chain into groups along the the chain. 
+# The labels `CH_2` etc stand for `CH₂`, for example, in LaTeX notation, 
+# for a nicer plot axis ticks formatting.
+oleoyl_groups = [
+    "CO" => ["O22", "C21"],
+    "CH_2" => ["H2R", "H2S", "C22"],
+    "CH_2" => ["C23", "H3R", "H3S"],
+    "CH_2" => ["C24", "H4R", "H4S"],
+    "CH_2" => ["C25", "H5R", "H5S"],
+    "CH_2" => ["C26", "H6R", "H6S"],
+    "CH_2" => ["C27", "H7R", "H7S"],
+    "CH_2" => ["C28", "H8R", "H8S"],
+    "CH" => ["C29", "H91"],
+    "CH" => ["C210", "H101"],
+    "CH_2" => ["C211", "H11R", "H11S"],
+    "CH_2" => ["C212", "H12R", "H12S"],
+    "CH_2" => ["C213", "H13R", "H13S"],
+    "CH_2" => ["C214", "H14R", "H14S"],
+    "CH_2" => ["C215", "H15R", "H15S"],
+    "CH_2" => ["C216", "H16R", "H16S"],
+    "CH_2" => ["C217", "H17R", "H17S"],
+    "CH_3" => ["C218", "H18R", "H18S", "H18T"]
+]
+
+# Format tick labels with LaTeX
+labels_o = [latexstring("\\textrm{$key}") for (key, val) in oleoyl_groups]
+
+# We first collect the contributions of each group into a vector of vectors:
+gcontrib = Vector{Float64}[] # empty vector of vectors
+for (group_name, group_atoms) in oleoyl_groups
+    group_contributions = contributions(mddf_ethanol_POPC, SoluteGroup(group_atoms))
+    push!(gcontrib, movavg(group_contributions; n=10).x)
+end
+
+# Convert the vector of vectors into a matrix
+gcontrib = stack(gcontrib)
+
+# Find the indices of the MDDF where the distances are between 1.5 and 3.0 Å
+idmin = findfirst(d -> d > 1.5, mddf_ethanol_POPC.d)
+idmax = findfirst(d -> d > 3.0, mddf_ethanol_POPC.d)
+
+# The plot will have two lines, the first plot will contain the 
+# oleoyl groups contributions, and the second plot will contain the
+# contributions of the palmitoyl groups.
+plot(layout=(2, 1))
+
+# Plot the contributions of the oleoyl groups
+contourf!(
+    1:length(oleoyl_groups),
+    mddf_ethanol_POPC.d[idmin:idmax],
+    gcontrib[idmin:idmax, :],
+    color=cgrad(:tempo), linewidth=1, linecolor=:black,
+    colorbar=:none, levels=10,
+    ylabel=L"r/\AA", xrotation=60,
+    xticks=(1:length(oleoyl_groups), labels_o), 
+    subplot=1,
+)
+annotate!( 14, 2.7, text("Oleoyl", :left, 12, plot_font), subplot=1)
+
+#
+# Repeat procedure for the palmitoyl groups
+#
+palmitoyl_groups = [
+    "CO" => ["C31", "O32"],
+    "CH_2" => ["C32", "H2X", "H2Y"],
+    "CH_2" => ["C33", "H3X", "H3Y"],
+    "CH_2" => ["C34", "H4X", "H4Y"],
+    "CH_2" => ["C35", "H5X", "H5Y"],
+    "CH_2" => ["C36", "H6X", "H6Y"],
+    "CH_2" => ["C37", "H7X", "H7Y"],
+    "CH_2" => ["C38", "H8X", "H8Y"],
+    "CH_2" => ["C39", "H9X", "H9Y"],
+    "CH_2" => ["C310", "H10X", "H10Y"],
+    "CH_2" => ["C311", "H11X", "H11Y"],
+    "CH_2" => ["C312", "H12X", "H12Y"],
+    "CH_2" => ["C313", "H13X", "H13Y"],
+    "CH_2" => ["C314", "H14X", "H14Y"],
+    "CH_2" => ["C315", "H15X", "H15Y"],
+    "CH_3" => ["C316", "H16X", "H16Y", "H16Z"],
+]
+
+# Format tick labels with LaTeX
+labels_p = [latexstring("\\textrm{$key}") for (key, val) in palmitoyl_groups]
+
+# We first collect the contributions of each group into a # vector of vectors:
+gcontrib = Vector{Float64}[] # empty vector of vectors
+for (group_name, group_atoms) in palmitoyl_groups
+    group_contributions = contributions(mddf_ethanol_POPC, SoluteGroup(group_atoms))
+    push!(gcontrib, movavg(group_contributions; n=10).x)
+end
+
+# Convert the vector of vectors into a matrix
+gcontrib = stack(gcontrib)
+
+# Plot the contributions of the palmitoyl groups
+contourf!(
+    1:length(palmitoyl_groups),
+    mddf_ethanol_POPC.d[idmin:idmax],
+    gcontrib[idmin:idmax, :],
+    color=cgrad(:tempo), linewidth=1, linecolor=:black,
+    colorbar=:none, levels=10,
+    xlabel="Group",
+    ylabel=L"r/\AA", xrotation=60,
+    xticks=(1:length(labels_p), labels_p), 
+    bottom_margin=0.5Plots.Measures.cm,
+    subplot=2,
+)
+annotate!( 12, 2.7, text("Palmitoyl", :left, 12, plot_font), subplot=2)
+
+savefig("POPC_ethanol_chains.png")
+println("The plot was saved as POPC_ethanol_chains.png")
+
+#
+# Now, plot a similar map for the water interactions with the POPC chain
+#
+mddf_water_POPC = load("mddf_water_POPC.json")
+plot(layout=(2, 1))
+
+# Plot the contributions of the oleoyl groups
+# We first collect the contributions of each group into a vector of vectors:
+gcontrib = Vector{Float64}[] # empty vector of vectors
+for (group_name, group_atoms) in oleoyl_groups
+    group_contributions = contributions(mddf_water_POPC, SoluteGroup(group_atoms))
+    push!(gcontrib, movavg(group_contributions; n=10).x)
+end
+# Convert the vector of vectors into a matrix
+gcontrib = stack(gcontrib)
+# Plot matrix as density map
+contourf!(
+    1:length(oleoyl_groups),
+    mddf_water_POPC.d[idmin:idmax],
+    gcontrib[idmin:idmax, :],
+    color=cgrad(:tempo), linewidth=1, linecolor=:black,
+    colorbar=:none, levels=10,
+    ylabel=L"r/\AA", xrotation=60,
+    xticks=(1:length(oleoyl_groups), labels_o), subplot=1,
+)
+annotate!( 14, 2.7, text("Oleoyl", :left, 12, plot_font), subplot=1)
+
+# Plot the contributions of the palmitoyl groups
+# We first collect the contributions of each group into a vector of vectors:
+gcontrib = Vector{Float64}[] # empty vector of vectors
+for (group_name, group_atoms) in palmitoyl_groups
+    group_contributions = contributions(mddf_water_POPC, SoluteGroup(group_atoms))
+    push!(gcontrib, movavg(group_contributions; n=10).x)
+end
+# Convert the vector of vectors into a matrix
+gcontrib = stack(gcontrib)
+# Plot matrix as density map
+contourf!(
+    1:length(palmitoyl_groups),
+    mddf_water_POPC.d[idmin:idmax],
+    gcontrib[idmin:idmax, :],
+    color=cgrad(:tempo), linewidth=1, linecolor=:black,
+    colorbar=:none, levels=10,
+    xlabel="Group",
+    ylabel=L"r/\AA", xrotation=60,
+    xticks=(1:length(palmitoyl_groups), labels_o), 
+    subplot=2,
+    bottom_margin=0.5Plots.Measures.cm,
+)
+annotate!( 12, 2.7, text("Palmitoyl", :left, 12, plot_font), subplot=2)
+
+savefig("POPC_water_chains.png")
+println("The plot was saved as POPC_water_chains.png")

./assets/scripts/example3/POPC_ethanol_chains.png

Ethanol displays an important density augmentation at the vicinity of the carbonyl that follows the glycerol group, and accumulates on the proximity of the aliphatic chain. The density of ethanol decreases as one advances into the aliphatic chain, displaying a minimum around the insaturation in the Oleoyl chain. The terminal methyl group of both chains display a greater solvation by ethanol, suggesting the twisting of the aliphatic chain expose these terminal groups to membrane depth where ethanol is already abundant.

The equivalent maps for water are strikingly different, and show that water is excluded from the interior of the membrane:

./assets/scripts/example3/POPC_water_chains.png

References

Membrane built with the VMD membrane plugin.

Water and ethanol layers added with Packmol.

The simulations were performed with NAMD, with CHARMM36 parameters.

Density of the ethanol-water mixture from: https://wissen.science-and-fun.de/chemistry/chemistry/density-tables/ethanol-water-mixtures/

diff --git a/dev/example4/index.html b/dev/example4/index.html new file mode 100644 index 00000000..79985e31 --- /dev/null +++ b/dev/example4/index.html @@ -0,0 +1,282 @@ + +◦ Water/Glycerol mixture · ComplexMixtures.jl

Glycerol/water mixture

This example illustrates the use of ComplexMixtures.jl to study the solution structure of a crowded (1:1 molar fraction) solution of glycerol in water. Here, we compute the distribution function and atomic contributions associated to the inter-species interactions (water-glycerol) and the glycerol-glycerol auto-correlation function. This example aims to illustrate how to obtain a detailed molecular picture of the solvation structures in an homogeneous mixture.

The system simulated consists of 1000 water molecules (red) and 1000 glycerol molecules (purple).

+ +

Index

Data, packages, and execution

The files required to run this example are:

  • equilibrated.pdb: The PDB file of the complete system.
  • traj_Glyc.dcd: Trajectory file. This is a 200Mb file, necessary for running from scratch the calculations.

To run the scripts, we suggest the following procedure:

  1. Create a directory, for example example4.
  2. Copy the required data files above to this directory.
  3. Launch julia in that directory: activate the directory environment, and install the required packages. This launching Julia and executing:
    import Pkg 
    +Pkg.activate(".")
    +Pkg.add(["ComplexMixtures", "PDBTools", "Plots", "LaTeXStrings", "EasyFit"])
    +exit()
  4. Copy the code of each script in to a file, and execute with:
    julia -t auto script.jl
    Alternativelly (and perhaps preferrably), copy line by line the content of the script into the Julia REPL, to follow each step of the calculation.

Glycerol-Glycerol and Water-Glycerol distribution functions

The first and most simple analysis is the computation of the minimum-distance distribution functions between the components of the solution. In this example we focus on the distributions of the two components relative to the glycerol molecules. Thus, we display the glycerol auto-correlation function, and the water-glycerol correlation function in the first panel of the figure below. The second panel displays the KB integrals of the two components computed from each of these distributions.

Complete example code: click here!
import Pkg;
+Pkg.activate(".");
+
+using PDBTools
+using ComplexMixtures
+using Plots
+using LaTeXStrings
+using EasyFit: movavg
+
+# Load a PDB file of the system
+system = readPDB("./equilibrated.pdb")
+
+# The full trajectory file is available at: 
+# https://www.dropbox.com/scl/fi/ag7k2d7i9d7ivbd5zmtl9/traj_Glyc.dcd?rlkey=93i31a5ytlzb34ulzjz315eyq&dl=0
+trajectory_file = "./traj_Glyc.dcd"
+
+# Select the atoms corresponding to glycerol and water (using PDBTools)
+glyc = select(system, "resname GLLM")
+water = select(system, "water")
+
+# Compute Glycerol-Glycerol auto correlation mddf 
+solute = AtomSelection(glyc, natomspermol=14)
+trajectory = Trajectory(trajectory_file, solute) # solute and solvent are the same
+
+# We define a large solute domain (large dbulk) to obtain a good convergence
+# for the KB integral. The mddf converges at much shorter distances.   
+options = Options(dbulk=20.0)
+mddf_glyc = mddf(trajectory, options)
+
+# Save results for later analysis
+save(mddf_glyc, "./mddf_glyc.json")
+
+# Compute water-glycerol mddf
+solvent = AtomSelection(water, natomspermol=3)
+trajectory = Trajectory(trajectory_file, solute, solvent)
+mddf_glyc_water = mddf(trajectory, options)
+
+# Save results for later analysis
+save(mddf_glyc_water, "./mddf_glyc_water.json")
+
+#
+# Plot the MDDFs 
+#
+Plots.default(
+    fontfamily="Computer Modern",
+    linewidth=2,
+    framestyle=:box,
+    grid=false,
+    label=nothing,
+)
+scalefontsizes();
+scalefontsizes(1.3)
+plot(layout=(2, 1))
+
+# glycerol-glycerol auto correlation
+x = mddf_glyc.d # distances
+y = movavg(mddf_glyc.mddf, n=10).x # the mddf (using movavg to smooth noise)
+plot!(x, y, label="Glycerol-Glycerol", subplot=1)
+
+# water-glycerol correlation
+x = mddf_glyc_water.d
+y = movavg(mddf_glyc_water.mddf, n=10).x
+plot!(x, y, label="Glycerol-Water", subplot=1)
+plot!(ylabel="MDDF", xlim=(1.5, 8), subplot=1)
+
+# Plot the KB integrals
+# glycrerol-glycerol
+y = movavg(mddf_glyc.kb, n=10).x
+plot!(x, y, subplot=2)
+
+# water-glycerol
+y = movavg(mddf_glyc_water.kb, n=10).x
+plot!(x, y, subplot=2)
+
+# plot settings
+plot!(
+    xlabel=L"\textrm{Distance / \AA}",
+    ylabel=L"\textrm{KB~/~cm^3~mol^{-1}}",
+    xlim=(0, 20),
+    subplot=2
+)
+
+# Save plot
+savefig("./mddf_kb.png")
+println("Plot saved to mddf_kb.png")
+
+

Both water and glycerol form hydrogen bonds with (other) glycerol molecules, as indicated by the peaks at ~1.8$\mathrm{\AA}$. The auto-correlation function of glycerol shows a more marked second peak corresponding to non-specific interactions, which (as we will show) are likely associated to interactions of its aliphatic groups.

The KB integrals in the second panel show similar values water and glycerol, with the KB integral for water being slightly greater. This means that glycerol molecules are (sightly, if the result is considered reliable) preferentially hydrated from a macroscopic standpoint.

Glycerol group contributions to MDDFs

Complete example code: click here!
import Pkg;
+Pkg.activate(".");
+
+using PDBTools
+using ComplexMixtures
+using Plots
+using LaTeXStrings
+using EasyFit: movavg
+
+# Load a PDB file of the system
+system = readPDB("./equilibrated.pdb")
+
+# Select the atoms corresponding to glycerol and water (using PDBTools)
+glyc = select(system, "resname GLLM")
+water = select(system, "water")
+
+# Load previously computed mddfs
+mddf_glyc = load("./mddf_glyc.json")
+mddf_glyc_water = load("./mddf_glyc_water.json")
+
+# Plot some group contributions to the MDDF. We select the atom names
+# corresponding to each type of group of the glycerol molecule.  
+hydroxyls = ["O1", "O2", "O3", "HO1", "HO2", "HO3"]
+aliphatic = ["C1", "C2", "C3", "H11", "H12", "H2", "H31", "H32"]
+
+#
+# Extract the contributions of these atoms to the MDDFs
+#
+# glycerol-glycerol
+mddf_glyc_hydroxyls = contributions(mddf_glyc, SoluteGroup(hydroxyls))
+mddf_glyc_aliphatic = contributions(mddf_glyc, SoluteGroup(aliphatic))
+# glycerol-water
+mddf_glyc_water_hydroxyls = contributions(mddf_glyc_water, SoluteGroup(hydroxyls))
+mddf_glyc_water_aliphatic = contributions(mddf_glyc_water, SoluteGroup(aliphatic))
+
+#
+# Plot the contributions
+#
+Plots.default(
+    fontfamily="Computer Modern",
+    linewidth=2,
+    framestyle=:box,
+    grid=false,
+    label=nothing,
+)
+scalefontsizes();
+scalefontsizes(1.2)
+plot(layout=(2, 1))
+
+#
+# Group contributions to glycerol-glycerol auto correlation
+#
+x = mddf_glyc.d # distances
+# Total mddf
+y = movavg(mddf_glyc.mddf; n=10).x
+plot!(x, y, label="Total", subplot=1)
+# Hydroxyls
+y = movavg(mddf_glyc_hydroxyls; n=10).x
+plot!(x, y, label="Hydroxyls", subplot=1)
+# Aliphatic
+y = movavg(mddf_glyc_aliphatic; n=10).x
+plot!(x, y, label="Aliphatic", subplot=1)
+
+#
+# Group contributions to glycerol-water correlation
+#
+x = mddf_glyc_water.d # distances
+# Total mddf
+y = movavg(mddf_glyc_water.mddf; n=10).x
+plot!(x, y, label="Total", subplot=2)
+# Hydroxyls
+y = movavg(mddf_glyc_water_hydroxyls; n=10).x
+plot!(x, y, label="Hydroxyls", subplot=2)
+# Aliphatic
+y = movavg(mddf_glyc_water_aliphatic; n=10).x
+plot!(x, y, label="Aliphatic", subplot=2)
+
+# plot settings
+plot!(ylabel="Glyc-Glyc MDDF", xlim=(1.0, 8.0), subplot=1)
+plot!(ylabel="Glyc-Water MDDF", xlim=(1.0, 8.0), subplot=2)
+plot!(xlabel=L"\mathrm{Distance / \AA}", subplot=2)
+
+# Save figure
+savefig("./mddf_group_contributions.png")
+println("Plot saved to mddf_group_contributions.png")
+

2D map of group contributions

The above distributions can be split into the contributions of each glycerol chemical group. The 2D maps below display this decomposition.

Complete example code: click here!
import Pkg;
+Pkg.activate(".");
+
+using PDBTools
+using ComplexMixtures
+using Plots
+using LaTeXStrings
+using EasyFit: movavg
+
+# Load a PDB file of the system
+system = readPDB("./equilibrated.pdb")
+
+# Select the atoms corresponding to glycerol and water (using PDBTools)
+glyc = select(system, "resname GLLM")
+water = select(system, "water")
+
+# Load previously computed mddfs
+mddf_glyc = load("./mddf_glyc.json")
+mddf_glyc_water = load("./mddf_glyc_water.json")
+
+# 2D maps plot of group contributions
+
+# Glycerol groups
+groups = [
+    "OH" => ["O1", "HO1"], # first hydroxyl
+    "CH_2" => ["C1", "H11", "H12"], # first CH2
+    "OH" => ["O2", "HO2"], # second hydroxyl
+    "CH" => ["C2", "H2"], # CH
+    "CH_2" => ["C3", "H31", "H32"], # second CH2
+    "OH" => ["O3", "HO3"] # third hydroxyl
+]
+labels = latexstring.("\\textrm{$name}" for (name, atoms) in groups)
+
+#
+# Contributions to Glycerol-Glycerol autocorrelation
+# First, create a vector of vectors, in which each component carries the
+# contributions of each Glycerol group to the MDDF
+#
+group_contrib = Vector{Float64}[] # empty vector of vectors
+for (name, atoms) in groups 
+    push!(group_contrib, contributions(mddf_glyc, SolventGroup(atoms)))
+end
+
+# Convert output to a matrix to plot a 2D map
+group_contrib = stack(group_contrib)
+
+# The distance range to plot
+idmin = findfirst(d -> d > 1.5, mddf_glyc.d)
+idmax = findfirst(d -> d > 3.0, mddf_glyc.d)
+
+#
+# plot the map
+#
+Plots.default(
+    fontfamily="Computer Modern",
+    linewidth=2,
+    framestyle=:box,
+    grid=false,
+    label=nothing,
+)
+scalefontsizes();
+scalefontsizes(1.3)
+
+plot(layout=(2, 1))
+
+# map of contributions to the Glycerol-Glycerol autocorrelation
+contourf!(
+    1:length(groups),
+    mddf_glyc.d[idmin:idmax],
+    group_contrib[idmin:idmax, :],
+    color=cgrad(:tempo), linewidth=1, linecolor=:black,
+    colorbar=:none, levels=5,
+    xticks=(1:length(groups), labels), xrotation=60,
+    ylabel=L"r/\AA",
+    subplot=1
+)
+
+# Water-glycerol interactions (Glycerol contributions)
+group_contrib = Vector{Float64}[] # empty vector of vectors
+for (name, atoms) in groups 
+    push!(group_contrib, contributions(mddf_glyc_water, SoluteGroup(atoms)))
+end
+
+# Convert output to a matrix to plot a 2D map
+group_contrib = stack(group_contrib)
+
+# map of the contributions of Glycerol groups to the Glycerol-Water correlation
+contourf!(
+    1:length(groups),
+    mddf_glyc_water.d[idmin:idmax],
+    group_contrib[idmin:idmax, :],
+    color=cgrad(:tempo), linewidth=1, linecolor=:black,
+    colorbar=:none, levels=5,
+    xticks=(1:length(groups), labels), xrotation=60,
+    ylabel=L"r/\AA",
+    subplot=2
+)
+plot!(
+    xlabel="Glycerol group", 
+    bottommargin = 0.5Plots.Measures.cm,
+    subplot=2
+)
+
+savefig("./GlycerolWater_map.png")
+println("Plot saved to GlycerolWater_map.png")

The interesting result here is that the $\mathrm{CH}$ group of glycerol is protected from both solvents. There is a strong density augmentation at the vicinity of hydroxyl groups, and the second peak of the MDDFs is clearly associated to interactions with the $\mathrm{CH_2}$ groups.

diff --git a/dev/examples/index.html b/dev/examples/index.html index b7e97f55..eff4596d 100644 --- a/dev/examples/index.html +++ b/dev/examples/index.html @@ -1,71 +1,5 @@ -Full Example · ComplexMixtures.jl

Example

Note

At this repository various examples are available illustrating the execution and possibilities of the package. Here we discuss one of these examples in detail.

The following examples consider a system composed a protein solvated by a mixture of water and glycerol, built with Packmol. The simulations were performed with NAMD with periodic boundary conditions and a NPT ensemble at room temperature and pressure. Molecular pictures were produced with VMD and plots were produced with Julia's Plots library.

- -

Image of the system of the example: a protein solvated by a mixture of glycreol (green) and water, at a concentration of 50%vv.

How to run this example

  • Download and install Julia

  • Install the required packages. Within Julia, do:

julia> import Pkg
-
-julia> Pkg.add(["ComplexMixtures", "PDBTools", "Plots", "LaTeXStrings", "Formatting"])
  • Get the files:
git clone https://github.com/m3g/ComplexMixturesExamples

The files associated to the following examples are distributed at this page.

Data

The Data directory contains the a pdb file of the system (system.pdb) and a sample from the trajectory (glyc50.dcd), with a few frames. It also contains the result of running the mddf calculation on the complete trajectory, results_glyc50.json. This last file was produced by ComplexMixtures, as indicated in the following examples.

The sample trajectory is provided so that the first example can be run, yet do not expect that the results are the same, as the sampling is much lower in this case. The complete trajectory can be retrieved from this link (3GB file).

Minimum-Distance Distribution function

Here we compute the minimum-distance distribution function, the Kirkwood-Buff integral, and the atomic contributions of the solvent to the density.

This example illustrates the regular usage of ComplexMixtures, to compute the minimum distance distribution function, KB-integrals and group contributions.

How to run this example

cd ComplexMixturesExamples/Protein_in_Glycerol/MDDF
-julia -t auto mddf.jl

Detailed explanation of the example:

Loading the packages required for computing the MDDF.

using PDBTools
-using ComplexMixtures

Load the pdb file of the system using PDBTools:

atoms = readPDB("../Data/system.pdb")

Create arrays of atoms with the protein and Glycerol atoms, using the select function of the PDBTools package:

protein = select(atoms,"protein")
-glyc = select(atoms,"resname GLYC")

Setup solute and solvent structures, required for computing the MDDF, with Selection function of the ComplexMixtures package:

solute = Selection(protein,nmols=1)
-solvent = Selection(glyc,natomspermol=14)

Read and setup the Trajectory structure required for the computations:

trajectory = Trajectory("../Data/glyc50_complete.dcd",solute,solvent)

Run the calculation and get results:

results = mddf(trajectory)
Note

To change the options of the calculation, set the Options structure accordingly and pass it as a parameter to mddf. For example:

options = Options(cutoff=10.)
-mddf(trajectory,options)

The complete set of options available is described here.

Save the reults to recover them later if required

save(results,"./glyc50.json")

The trajectory that was loaded was for a toy-example. The complete trajectory is available here, but it is a 3GB file. The same procedure above was performed with that file and produced the results_Glyc50.json file, which is available in the Data directory here. We will continue with this file instead.

Load the actual results obtained with the complete simulation:

results = load("../Data/results_glyc50.json")

Results are loaded, and now we can plot the data obtained.

Produce plots

MDDF and Kirkwood-Buff integrals

Load some packages that we will use to produce the plots:

using Plots, Plots.PlotMeasures, LaTeXStrings

Some default options that make the plots prettier:

default(
-    fontfamily="Computer Modern",
-    linewidth=2, framestyle=:box, label=nothing, grid=false
-)

First, we will plot the MDDF and the corresponding Kirkwood-Buff integral, which are available in the results.mddf and results.kb fields of the results data set. The distances are available in the results.d vector. We also plot here an horizontal line and save the figure as a pdf file.

plot(layout=(1,2))
-plot!(results.d,results.mddf,xlabel=L"r/\AA",ylabel="mddf",subplot=1)
-hline!([1],linestyle=:dash,linecolor=:gray,subplot=1)
-plot!(
-    results.d,results.kb/1000, #to L/mol
-    xlabel=L"r/\AA",ylabel=L"G_{us}/\mathrm{L~mol^{-1}}",
-    subplot=2
-)
-plot!(size=(800,300),margin=4mm)
-savefig("./mddf.pdf")

This will produce the following plot:

- -

Atomic contributions to the MDDF

Selecting the atoms corresponding to the hydroxyl groups, and of the aliphatic carbons of Glycerol. Here we list the types of the atoms as specified by the force-field.

hydroxyls = ["O1","O2","O3","H1","H2","H3"]
-aliphatic = ["C1","C2","HA","HB","HC","HD"]

The contributions function of ComplexMixtures will extract from the result the contributions of each set of atoms to the total MDDF:

hydr_contributions = contributions(solvent,results.solvent_atom,hydroxyls)
-aliph_contributions = contributions(solvent,results.solvent_atom,aliphatic)

And, finally, here we plot these group contributions on top of the total MDDF:

plot(results.d,results.mddf,xlabel=L"r/\AA",ylabel="mddf",size=(600,400))
-plot!(results.d,hydr_contributions,label="Hydroxils")
-plot!(results.d,aliph_contributions,label="Aliphatic chain")
-hline!([1],linestyle=:dash,linecolor=:gray)
-savefig("./mddf_atom_contrib.pdf")

This will produce the following figure:

- -

Note how hydroxyl clearly are the sole contribution to the peak at ~1.9 Angstroms, corresponding to hydrogen-bonding interactions. The aliphatic groups contribute importantly to the shoulder at larger distances, which correspond to non-specific interactions.

2D residue contribution density map

In this example we compute the density map of Glycerol in the vicinity of a set of residues of a protein, from the minimum-distance distribution function.

The MDDF can be decomposed in the contributions of each atom of the solute or of the solvent. Here, we sum up te contributions of all the atoms of each residue of the solute, which is a protein, and plot a density map with the final information. The output figure obtained is:

- -

How to run this example:

cd ComplexMixturesExamples/Protein_in_Glycerol/Density2D
-julia density2D.jl

Detailed explanation of the example:

Here, we use the contourf function of the Plots package of Julia. A detailed explanation of the input file density2D.jl is provide below:

Loading packages that will be used:

using Plots
-using LaTeXStrings
-using Formatting
-using ComplexMixtures, PDBTools

Some default options so the plot looks nice

plot_font = "Computer Modern"
-default(
-    fontfamily=plot_font,
-    linewidth=2, framestyle=:box, label=nothing
-)

Read the PDB file (using PDBTools)

pdb = readPDB("./system.pdb")

Load results of the ComplexMixtures run

R = load("./results_glyc50.json")  

Define which are the solute molecules (the protein)

protein = select(pdb,"protein")
-solute = Selection(protein,nmols=1)

Define which are the solvent molecules (Glycerol here)

glycerol = select(pdb,"resname GLYC")
-solvent = Selection(glycerol,natomspermol=14)

Retrive the resiude contribution data

Collect which are the protein residues

residues = collect(eachresidue(protein))

Set a matrix that will store the results, with a number of lines corresponding to the length of the MDDF histogram, and with a number of columns corresponding to the number of residues:

rescontrib = zeros(length(R.mddf),length(residues))

Now, collect the contribution of each residue as a column of the above matrix. The notation pairs(residues) returns tuples containing the index ires and the corresponding residue. The .= symbol sets each element of the corresponding column of the rescontrib matrix to the output of contributions (by broadcasting).

for (ires,residue) in pairs(residues)
-  rescontrib[:,ires] .= contributions(solute,R.solute_atom,residue)
-end

Plot only for distances within 1.5 and 3.5:

Here, we will plot only the contributions from residue 70 to residue 110, and from distances ranging from 1.5 to 3.5 which is where most of the action occurs:

irange=70:110
-idmin = findfirst( d -> d > 1.5, R.d)
-idmax = findfirst( d -> d > 3.5, R.d)

To obtain pretty labels for the residues in the x-axis, we retrieve the one-letter residue names and concatenate them with the residue number converted to strings:

labels = PDBTools.oneletter.(resname.(residues)).*format.(resnum.(residues))

And, finally, we produce the plot, with a series of options that make this particular contour plot look nice:

contourf(
-    irange, # x
-    R.d[idmin:idmax], # y
-    rescontrib[idmin:idmax,irange], # z
-    xlabel="Residue", ylabel=L"r/\AA",
-    xticks=(irange,labels[irange]), xrotation=60,
-    xtickfont=font(6,plot_font),
-    color=cgrad(:tempo), linewidth=0.1, linecolor=:black,
-    colorbar=:none, levels=5,
-    size=(500,280)
-)

The final figure is saved as a pdf file:

savefig("./density2D.pdf")

3D residue contribution density map

In this example we compute three-dimensional representations of the density map of Glycerol in the vicinity of a set of residues of a protein, from the minimum-distance distribution function.

Here, the MDDF is decomposed at each distance according to the contributions of each solute (the protein) residue. The grid is created such that, at each point in space around the protein, it is possible to identify:

  1. Which atom is the closest atom of the solute to that point.

  2. Which is the contribution of that atom (or residue) to the distribution function.

Therefore, by filtering the 3D density map at each distance one can visualize over the solute structure which are the regions that mostly interact with the solvent of choice at each distance. Typical images of such a density are:

- -

In the figure on the left, the points in space around the protein are selected with the following properties: distance from the protein smaller than 2.0Å and relative contribution to the MDDF at the corresponding distance of at least 10% of the maximum contribution. Thus, we are selecting the regions of the protein corresponding to the most stable hydrogen-bonding interactions. The color of the points is the contribution to the MDDF, from blue to red. Thus, the most reddish-points corresponds to the regions where the most stable hydrogen bonds were formed. We have marked two regions here, on opposite sides of the protein, with arrows.

Clicking on those points we obtain which are the atoms of the protein contributing to the MDDF at that region. In particular, the arrow on the right points to the strongest red region, which corresponds to an Aspartic acid. These residues are shown explicitly under the density (represented as a transparent surface) on the figure in the center.

The figure on the right displays, overlapped with the hydrogen-bonding residues, the most important contributions to the second peak of the distribution, corresponding to distances from the protein between 2.0 and 3.5Å. Notably, the regions involved are different from the ones forming hydrogen bonds, indicating that non-specific interactions with the protein (and not a second solvation shell) are responsible for the second peak.

A short tutorial video showing how to open the input and output PDB files in VMD and produce images of the density is available here:

- -

How to run this example:

cd ComplexMixturesExamples/Protein_in_Glycerol/Density3D
-julia density3D.jl

Alternatively, open Julia and copy/paste or the commands in density3D.jl or use include("./density3D.jl"). These options will allow you to remain on the Julia section with access to the grid data structure that was generated and corresponds to the output grid.pdb file.

This will create (actually overwrite) the grid.pdb file. Here we provide a previously setup VMD session that contains the data with the visualization choices used to generate the figure above. Load it with:

vmd -e grid.vmd

Detailed explanation of the example:

Initially we load the ComplexMixtures and PDBTools packages:

using ComplexMixtures, PDBTools

With the readPDB function of PDBTools, we read the PDB file of the system simulated:

pdb = readPDB("../Data/system.pdb")

and using ComplexMixtures, we load the results from the calculation of the MDDF of Glycerol around the protein, which was computed previously:

R = load("../Data/results_glyc50.json")  

The solute here is the protein, and we need to setup the structures that define which atoms and type of solute it is. First, we select from the atoms of the pdb file of the system, those belonging to the protein, using select from PDBTools:

protein = select(pdb,"protein")

and then we define the solute structure that is actually used in ComplexMixtures, by passing those atoms and specifying that the solute is a single molecule to the Selection function of ComplexMixtures:

solute = Selection(protein,nmols=1)

The 3D grid representing the density around the protein is computed with the grid3D function provided by ComplexMixtures. It receives the solute structure (of type Selection), the list of solute atoms (of type PDBTools.Atoms, as the protein selection above), the name of the output file and some optional parameters to define the grid. Here we compute the grid only between 1.5 and 3.5Å, characterizing the first and second solvation shells. The grid has by default a step of 0.5Å.

grid = grid3D(
-    solute=solute,
-    solute_atoms=protein,
-    mddf_result=R,
-    output_file="grid.pdb",
-    dmin=1.5,
-    dmax=3.5
-)

The command above will generate the grid, save it to grid.pdb and let it available in the grid.pdb array of atoms, for further inspection, if desired.

By changing dmin, dmax, and step, one controls the grid size and resolution. This may generate very large output files.

+Examples: · ComplexMixtures.jl

Examples

List of examples

How to run these examples

1 Download and install Julia

To run the scripts, we suggest the following procedure:

  1. Create a directory, for example example1.
  2. Copy the required data files, indicated in each example.
  3. Launch julia in that directory, activate the directory environment, and install the required packages. This is done by launching Julia and executing:
    import Pkg 
    +Pkg.activate(".")
    +Pkg.add(["ComplexMixtures", "PDBTools", "Plots", "LaTeXStrings, EasyFit"])
    +exit()
  4. Copy the code of each script in to a file, and execute with:
    julia -t auto script.jl
    Alternativelly (and perhaps preferrably), copy line by line the content of the script into the Julia REPL, to follow each step of the calculation. For a more advanced Julia usage, we suggest the VSCode IDE with the Julia Language Support extension.
diff --git a/dev/help/index.html b/dev/help/index.html deleted file mode 100644 index 4c9b4ea4..00000000 --- a/dev/help/index.html +++ /dev/null @@ -1,47 +0,0 @@ - -Help entries · ComplexMixtures.jl

Help entries

ComplexMixtures.ChemFileType
struct ChemFile{T<:(AbstractVector)} <: Trajectory

Structure to contain a trajectory as read by Chemfiles.jl

  • filename::String

  • format::AbstractString

  • stream::ComplexMixtures.Stream{<:Chemfiles.Trajectory}

  • nframes::Int64

  • solute::Selection

  • solvent::Selection

  • x_solute::Vector{T} where T<:(AbstractVector)

  • x_solvent::Vector{T} where T<:(AbstractVector)

  • unitcell::StaticArraysCore.MMatrix{3, 3, Float64, 9}

  • natoms::Int64

source
ComplexMixtures.ChemFileMethod
ChemFile(filename::String, solute::Selection, solvent::Selection;format="" , T::Type = SVector{3,Float64})

Function open will set up the IO stream of the trajectory, fill up the number of frames field and additional parameters if required.

source
ComplexMixtures.DensityType
mutable struct Density

Structure to contain the density values obtained from the calculation.

  • solute::Float64

  • solvent::Float64

  • solvent_bulk::Float64

source
ComplexMixtures.MinimumDistanceType
struct MinimumDistance

Internal structure or function, interface may change.

Extended help

This structure contains the information, for each molecule, of if it is within the cutoff distance of the solute, the atom indexes of the associated minimum distance, the distance, and a label to mark if the reference atom of the molecule is within the cutoff distance of the solute.

The lists of minimum-distances are stored in arrays of type Vector{MinimumDistance}. The index of this vector corresponds to the index of the molecule in the original array.

  • within_cutoff::Bool

  • i::Int64

  • j::Int64

  • d::Float64

  • ref_atom_within_cutoff::Bool

  • d_ref_atom::Float64

source
ComplexMixtures.NamdDCDType
struct NamdDCD{T<:(AbstractVector)} <: Trajectory

Structure to contain the data of a trajectory in NAMD/DCD format.

  • filename::String

  • stream::ComplexMixtures.Stream{<:FortranFiles.FortranFile}

  • nframes::Int64

  • solute::Selection

  • solvent::Selection

  • x_solute::Vector{T} where T<:(AbstractVector)

  • x_solvent::Vector{T} where T<:(AbstractVector)

  • lastatom::Int64

  • unitcell_read::Vector{Float64}

  • x_read::Vector{Float32}

  • y_read::Vector{Float32}

  • z_read::Vector{Float32}

source
ComplexMixtures.NamdDCDMethod
NamdDCD(filename::String, solute::Selection, solvent::Selection;T::Type = SVector{3,Float64})

This function initializes the structure above, returning the data and the vectors with appropriate lengths.

source
ComplexMixtures.OptionsType
struct Options

Structure that contains the detailed input options.

  • firstframe::Int64

  • lastframe::Int64

  • stride::Int64

  • irefatom::Int64

  • n_random_samples::Int64

  • binstep::Float64

  • dbulk::Float64

  • cutoff::Float64

  • usecutoff::Bool

  • lcell::Int64

  • GC::Bool

  • GC_threshold::Float64

  • seed::Int64

  • StableRNG::Bool

  • nthreads::Int64

  • silent::Bool

  • frame_weights::Vector{Float64}

source
ComplexMixtures.OverviewType

Internal structure or function, interface may change.

mutable struct Overview

Structure that is used to dispatch the show of a overview.

  • R::Result

  • domain_molar_volume::Float64

  • density::ComplexMixtures.Density

  • solvent_molar_volume::Float64

  • solvent_molar_volume_bulk::Float64

  • solute_molar_volume::Float64

source
ComplexMixtures.PDBTrajType
struct PDBTraj{T<:(AbstractVector)} <: Trajectory

Structure to contain PDB trajectories. Frames must be separated by "END", and with periodic cell sizes in the "CRYST1" field, for each frame.

This structure and functions can be used as a template to implement the reading of other trajectory formats.

  • filename::String

  • stream::ComplexMixtures.Stream{<:IOStream}

  • nframes::Int64

  • unitcell::StaticArraysCore.MMatrix{3, 3, Float64, 9}

  • solute::Selection

  • solvent::Selection

  • x_solute::Vector{T} where T<:(AbstractVector)

  • x_solvent::Vector{T} where T<:(AbstractVector)

source
ComplexMixtures.PDBTrajMethod
PDBTraj(pdbfile::String, solute::Selection, solvent::Selection;T::Type = SVector{3,Float64})

Function open will set up the IO stream of the trajectory, fill up the number of frames field and additional parameters if required

source
ComplexMixtures.ResultType
mutable struct Result{T<:VecOrMat{Float64}}

Structure to contain the results of the MDDF calculation.

  • Version::VersionNumber

  • nbins::Int64

  • dbulk::Float64

  • cutoff::Float64

  • d::Vector{Float64}

  • md_count::Vector{Float64}

  • md_count_random::Vector{Float64}

  • coordination_number::Vector{Float64}

  • coordination_number_random::Vector{Float64}

  • mddf::Vector{Float64}

  • kb::Vector{Float64}

  • autocorrelation::Bool

  • solvent::ComplexMixtures.SolSummary

  • solute::ComplexMixtures.SolSummary

  • solute_atom::VecOrMat{Float64}

  • solvent_atom::VecOrMat{Float64}

  • rdf_count::Vector{Float64}

  • rdf_count_random::Vector{Float64}

  • sum_rdf_count::Vector{Float64}

  • sum_rdf_count_random::Vector{Float64}

  • rdf::Vector{Float64}

  • kb_rdf::Vector{Float64}

  • density::ComplexMixtures.Density

  • volume::ComplexMixtures.Volume

  • options::Options

  • irefatom::Int64

  • lastframe_read::Int64

  • nframes_read::Int64

  • files::Vector{String}

  • weights::Vector{Float64}

The Result{Vector{Float64}} parametric type is necessary only for reading the JSON3 saved file.

source
ComplexMixtures.SelectionType
struct Selection

Structure that contains the information about the solute and solvent molecules.

  • natoms::Int64

  • nmols::Int64

  • natomspermol::Int64

  • index::Vector{Int64}

  • imol::Vector{Int64}

  • names::Vector{String}

source
ComplexMixtures.SolSummaryType

Internal structure or function, interface may change.

struct SolSummary

Structures to contain the details of a solute or solvent to store in the results of the MDDF calculation.

  • natoms::Int64

  • nmols::Int64

  • natomspermol::Int64

source
ComplexMixtures.TrajectoryType
Trajectory(filename::String, solute::Selection, solvent::Selection; format::String = "", chemfiles = false)

Trajectory constructor data type.

Defaults to reading with the Chemfiles infrastructure, except for DCD and PDB trajectory files, if the "PDBTraj" option is provided.

See memory issue (https://github.com/chemfiles/Chemfiles.jl/issues/44)

source
ComplexMixtures.UnitsType

Internal structure or function, interface may change.

struct Units{T}

Unit conversions.

  • mole::Any

  • Angs3tocm3::Any

  • Angs3toL::Any

  • Angs3tocm3permol::Any

  • Angs3toLpermol::Any

  • SitesperAngs3tomolperL::Any

source
ComplexMixtures.VolumeType
mutable struct Volume

Structures to contain the volumes obtained from calculations.

  • total::Float64

  • bulk::Float64

  • domain::Float64

  • shell::Vector{Float64}

source
Base.isapproxMethod
Base.isapprox(r1::T, r2::T; debug=false) where T <: CMTypes

Internal structure or function, interface may change.

Function to test if two runs offered similar results. Mostly used in the package testing routines.

source
Base.mergeMethod
merge(r::Vector{Result})

This function merges the results of MDDF calculations obtained by running the same analysis on multiple trajectories, or multiple parts of the same trajectory. It returns a Result structure of the same type, with all the functions and counters representing averages of the set provided weighted by the number of frames read in each Result set.

source
Base.writeMethod
write(R::ComplexMixtures.Result, filename::String, solute::Selection, solvent::Selection)

Function to write the final results to output files as simple tables that are human-readable and easy to analyze with other software

If the solute and solvent selections are provides, pass on the atom names.

source
Base.writeMethod
write(R::ComplexMixtures.Result, filename::String; 
-      solute_names::Vector{String} = ["nothing"], 
-      solvent_names::Vector{String} = ["nothing"])

Optional passing of atom names.

source
ComplexMixtures.VMDselectMethod
VMDselect(inputfile::String, selection::String; vmd="vmd", srcload=nothing)

Select atoms using vmd selection syntax, with vmd in background

Returns the list of index (one-based) and atom names

Function to return the selection from a input file (topology, coordinates, etc), by calling VMD in the background.

The srcload argument can be used to load a list of scripts before loading the input file, for example with macros to define custom selection keywords.

source
ComplexMixtures.contributionsMethod
contributions(s::Selection, atom_contributions::Matrix{Float64}, selection)

Extract the contribution of a given atom type selection from the solute or solvent atomic contributions to the MDDF.

s here is the solute or solvent selection (type ComplexMixtures.Selection) atom_contributions is the R.solute_atom or R.solvent_atom arrays of the Result structure, and the last argument is the selection of atoms from the solute to be considered, given as a list of indexes, list of atom names, vector of PDBTools.Atoms, or a PDBTools.Residue.

Extended help

For selections of one molecule, the function has an additional keyword option first_atom_is_ref that is false by default. If set to true, the index first atom of the selection is considered as a reference atom. For example if a solute has 100 atoms, but its first atom in the PDB file is number 901, the selection of indexes [1, 2, 3] will refer to atoms with indexes [901, 902, 903].

source
ComplexMixtures.coordination_numberFunction
coordination_number(trajectory::Trajectory, options::Options)

Computes the coordination numbers for each solute molecule in the trajectory, given the Trajectory. This is an auxiliary function of the ComplexMixtures package, which is used to compute coordination numbers when the normalization of the distribution is not possible or needed.

The output is a Result structure, which contains the data as the result of a call to mddf, except that all counters which require normalization of the distribution will be zero. In summary, this result data structure can be used to compute the coordination numbers, but not the MDDF, RDF, or KB integrals.

Examples

julia> trajectory = Trajectory("./trajectory.dcd",solute,solvent);
-
-julia> results = mddf(trajectory);
-
-julia> coordination_numbers = coordination_number(trajectory);
source
ComplexMixtures.coordination_numberFunction
coordination_number(R::Result) = R.coordination_number
-coordination_number(R::Result, group_contributions::Vector{Float64})
-coordination_number(s::Selection, atom_contributions::Matrix{Float64}, R::Result, group)

Computes the coordination number of a given group of atoms from the solute or solvent atomic contributions to the MDDF. If no group is defined (first call above), the coordination number of the whole solute or solvent is returned.

If the group_contributions to the mddf are computed previously with the contributions function, the result can be used to compute the coordination number by calling coordination_number(R::Result, group_contributions).

Otherwise, the coordination number can be computed directly with the second call, where:

s is the solute or solvent selection (type ComplexMixtures.Selection)

atom_contributions is the R.solute_atom or R.solvent_atom arrays of the Result structure

R is the Result structure,

and the last argument is the selection of atoms from the solute to be considered, given as a list of indexes, list of atom names, or a selection following the syntax of PDBTools, or vector of PDBTools.Atoms, or a PDBTools.Residue

Examples

In the following example we compute the coordination number of the atoms of residue 50 (of the solute) with the solvent atoms of TMAO, as a function of the distance. Finally, we show the average number of TMAO molecules within 5 Angstroms of residue 50. The findlast(<(5), R.d) part of the code below returns the index of the last element of the R.d array that is smaller than 5 Angstroms.

Precomputing the group contributions Using the contributions function

using ComplexMixtures, PDBTools
-pdb = readPDB("test/data/NAMD/structure.pdb");
-R = load("test/data/NAMD/protein_tmao.json");
-solute = Selection(PDBTools.select(pdb, "protein"), nmols=1);
-residue50 = PDBTools.select(pdb, "residue 50");
-# Compute the group contributions to the MDDF
-residue50_contribution = contributions(solute, R.solute_atom, residue50);
-# Now compute the coordination number
-residue50_coordination = coordination_number(R, residue50_contribution)
-# Output the average number of TMAO molecules within 5 Angstroms of residue 50
-residue50_coordination[findlast(<(5), R.d)]

Without precomputing the group_contribution

using ComplexMixtures, PDBTools
-pdb = readPDB("test/data/NAMD/structure.pdb");
-R = load("test/data/NAMD/protein_tmao.json");
-solute = Selection(PDBTools.select(pdb, "protein"), nmols=1);
-residue50 = PDBTools.select(pdb, "residue 50");
-# Compute the coordination number
-residue50_coordination = coordination_number(solute, R.solute_atom, R, group)
-# Output the average number of TMAO molecules within 5 Angstroms of residue 50
-residue50_coordination[findlast(<(5), R.d)]
source
ComplexMixtures.coordination_number_frame!Method
coordination_number_frame!(R::Result, system::AbstractPeriodicSystem, buff::Buffer, frame_weight)

Internal structure or function, interface may change.

Computes the coordination numbers for a single frame. Modifies the data in the R (type Result) structure.

source
ComplexMixtures.eulermatMethod
eulermat(beta, gamma, theta, deg::String)

Internal structure or function, interface may change.

This routine was added because it defines the rotation in the "human" way, an is thus used to set the position of the fixed molecules. deg can only be "degree", in which case the angles with be considered in degrees. If no deg argument is provided, radians are used.

That means: beta is a counterclockwise rotation around x axis. gamma is a counterclockwise rotation around y axis. theta is a counterclockwise rotation around z axis.

source
ComplexMixtures.finalresults!Method
finalresults!(R::Result, options::Options, trajectory::Trajectory)

Internal structure or function, interface may change.

Function that computes the final results of all the data computed by averaging according to the sampling of each type of data, and converts to common units.

Computes also the final distribution functions and KB integrals.

This function modified the values contained in the R data structure.

source
ComplexMixtures.grMethod
gr(R::Result) = gr(R.d,R.rdf_count,R.density.solvent_bulk,R.options.binstep)

If a Result structure is provided without further details, use the rdf count and the bulk solvent density.

source
ComplexMixtures.grMethod
gr(r::Vector{Float64}, count::Vector{Float64}, density::Float64, binstep::Float64)

Computes the radial distribution function from the count data and the density.

This is exactly a conventional g(r) if a single atom was chosen as the solute and solvent selections.

Returns both the g(r) and the kb(r)

source
ComplexMixtures.grid3DMethod
grid3D(solute,solute_atoms,mddf_result,output_file; dmin=1.5, ddax=5.0, step=0.5)

This function builds the grid of the 3D density function and fills an array of mutable structures of type Atom, containing the position of the atoms of grid, the closest atom to that position, and distance.

solute is a ComplexMixtuers.Selection, defining the solute. solute_atoms is the corresponding vector of PDBTools.Atoms, and mddf_result is the result of a mddf_result calculation with the correspondign solute.

dmin and dmax define the range of distance where the density grid will be built, and step defines how fine the grid must be. Be aware that fine grids involve usually a very large (hundreds of thousands points).

All parameters can be provides as keyword parameters.

Example

julia> using ComplexMixtures, PDBTools
-
-julia> pdb = readPDB("./system.pdb");
-
-julia> R = ComplexMixtures.load("./results.json");
-
-julia> protein = select(pdb,"protein");
-
-julia> solute = ComplexMixtures.Selection(protein,nmols=1);
-
-julia> grid = ComplexMixtures.grid3D(solute=solute, solute_atoms=protein, mddf_result=R, output_file="grid.pdb");
-

grid will contain a vector of Atoms with the information of the MDDF at each grid point, and the same data will be written in the grid.pdb file. This PDB file can be opened in VMD, for example, and contain in the beta field the contribution of each protein residue to the MDDF at each point in space relative to the protein, and in the occupancy field the distance to the protein. Examples of how this information can be visualized are provided in the user guide of ComplexMixtures.

source
ComplexMixtures.itypeMethod
itype(iatom::Int, natomspermol::Int)

Internal structure or function, interface may change.

Given the index of the atom in the vector of coordinates of the solute or the solvent, returns the type of the atom, that is, the index of this atom within the molecule (goes from 1 to natomspermol)

source
ComplexMixtures.loadMethod
load(filename::String)

Function to load the json saved results file into the Result data structure.

source
ComplexMixtures.mddfFunction
mddf(trajectory::Trajectory, options::Options)

Function that computes the minimum-distance distribution function, atomic contributions, and KB integrals, given the Trajectory structure of the simulation and, optionally, parameters given as a second argument of the Options type. This is the main function of the ComplexMixtures package.

Examples

julia> trajectory = Trajectory("./trajectory.dcd",solute,solvent);
-
-julia> results = mddf(trajectory);

or, to set some custom optional parameter,

julia> options = Options(lastframe=1000);
-
-julia> results = mddf(trajectory,options);
source
ComplexMixtures.mddf_frame!Method
mddf_frame!(R::Result, system::AbstractPeriodicSystem, buff::Buffer, options::Options, frame_weight, RNG)

Internal structure or function, interface may change.

Computes the MDDF for a single frame. Modifies the data in the R (type Result) structure.

source
ComplexMixtures.minimum_distances!Method
minimum_distances!(system::CellListMap.PeriodicSystem, R::Result)

Internal structure or function, interface may change.

Function that computes the list of distances of solvent molecules to a solute molecule. It updates the lists of minimum distances.

source
ComplexMixtures.mol_indexMethod
mol_index(i_atom, natomspermol) = (i_atom-1) ÷ natomspermol + 1

Internal structure or function, interface may change.

Extended help

Sets the index of the molecule of an atom in the simples situation, in which all molecules have the same number of atoms.

source
ComplexMixtures.mol_rangeMethod
mol_range(imol, n_atoms_per_molecule)

Internal structure or function, interface may change.

Given the index and the number of atoms per molecule, returns the range of indices of of an array of coordinates that corresponds to the molecule.

source
ComplexMixtures.move!Method
move!(x::AbstractVector, newcm::AbstractVector,beta, gamma, theta)

Internal structure or function, interface may change.

Translates and rotates a molecule according to the desired input center of coordinates and Euler rotations modifyies the vector x.

source
ComplexMixtures.random_move!Method
random_move!(x_ref::AbstractVector{T}, 
-             irefatom::Int,
-             system::AbstractPeriodicSystem,
-             x_new::AbstractVector{T}, RNG) where {T<:SVector}

Internal structure or function, interface may change.

Function that generates a new random position for a molecule.

The new position is returned in x_new, a previously allocated array.

source
ComplexMixtures.randomize_solvent!Method
randomize_solvent!(system, buff, n_solvent_in_bulk, options, RNG)

Internal structure or function, interface may change.

Generate a random solvent distribution from the bulk molecules of a solvent

source
ComplexMixtures.setbinMethod
setbin(d,step)

Internal structure or function, interface may change.

Function that sets to which histogram bin a data point pertains simple, but important to keep consistency over all calls.

source
ComplexMixtures.setup_PeriodicSystemMethod
setup_PeriodicSystem(trajectory::Trajectory, options::Options)

Internal structure or function, interface may change.

Setup the periodic system from CellListMap, to compute minimimum distances. The system will be setup such that xpositions corresponds to one molecule of the solute, and ypositions contains all coordinates of all atoms of the solvent.

source
ComplexMixtures.shellradiusMethod
shellradius(i,step)

Internal structure or function, interface may change.

Compute the point in which the radius comprises half of the volume of the shell.

source
ComplexMixtures.sphericalshellvolumeMethod
sphericalshellvolume(i,step)

Internal structure or function, interface may change.

Computes the volume of the spherical shell defined within [(i-1)step,istep].

source
ComplexMixtures.sum!Method
sum!(R1::Result, R2::Result)

Internal structure or function, interface may change.

Sum the counts of two Results structures, adding the result to the first structure as in R1 = R1 + R2.

source
ComplexMixtures.titleMethod
title(R::Result, solute::Selection, solvent::Selection)
-title(R::Result, solute::Selection, solvent::Selection, nspawn::Int)

Internal structure or function, interface may change.

Print some information about the run.

source
ComplexMixtures.update_list!Method
update_list!(i, j, d2, iref_atom::Int, mol_index_i::F, isolute::Int, list::Vector{MinimumDistance{T}}) where {F<:Function, T}

Internal structure or function, interface may change.

Function that updates a list of minimum distances given the indexes of the atoms involved for one pair within cutoff, for autocorrelations (such that the identity of isolute is needed)

source
ComplexMixtures.update_list!Method
update_list!(i, j, d2, iref_atom::Int, mol_index_i::F, list::Vector{MinimumDistance{T}}) where {F<:Function, T}

Internal structure or function, interface may change.

Function that updates a list of minimum distances given the indexes of the atoms involved for one pair within cutoff.

source
ComplexMixtures.update_mdMethod
update_md(md1::MinimumDistance{T}, md2::MinimumDistance{T}) where {T}

Internal structure or function, interface may change.

Function that returns the updated minimum distance structure after comparing two structures associated with the same molecule.

source
ComplexMixtures.updatecounters!Method
updatecounters!(R::Result, system::AbstractPeriodicSystem)

Internal structure or function, interface may change.

Function that updates the minimum-distance counters in R

source
ComplexMixtures.viewmolMethod
viewmol(i::Int, x::Vector{T}, n::Int) where T

Internal structure or function, interface may change.

Returns a view of a coordinate vector corresponding to the atoms of a molecule with index i. n is the number of atoms of the molecule.

source
ComplexMixtures.which_typesMethod
which_types(s::Selection, indexes::Vector{Int})

Internal structure or function, interface may change.

Function that returns the list of the indexes of the types of the atoms in a selection. For example, if a selection corresponds to a solvent of water molecules: There are three types, 1, 2, and 3, corresponding to the three atoms of the water molecule. If the indexes provided are, for instance, 11, 12, and 13, corresponding to a water molecule, this function will return 1, 2 and 3.

This is used to get equivalent-atom contributions to the distribution functions. For example, the input indexes span all water molecules, the output of this function will be still the three indexes corresponding to the three types of atoms that exist in a water molecule.

It is not possible to compute the contribution of one individual water molecule if the distribution function was computed for all molecules. Thus, the necessity to identify the types of atoms involved in a selection.

source
ComplexMixtures.writexyzMethod
writexyz(x::Vector{T}, file::String) where T <: AbstractVector

Internal structure or function, interface may change.

Print test xyz file.

source
diff --git a/dev/index.html b/dev/index.html index 226856b7..2da78404 100644 --- a/dev/index.html +++ b/dev/index.html @@ -1,5 +1,5 @@ -Introduction · ComplexMixtures.jl

Introduction

ComplexMixtures.jl is a package to study the solute and solvent interactions of mixtures of molecules of complex shape. Conventional radial distribution functions are not appropriate to represent the structure of a solvent around a solute with many atoms, and a variable, non-spherical shape.

Typical solutes of complex shape are proteins, nucleic acids, and polymers in general. Smaller molecules like lipids, carbohydrates, etc, are also complex enough such that representing the structure of the solution of those molecules with distribution functions is not trivial.

Minimum-Distance Distribution Functions (MDDFs) are a very general and practical way to represent solute-solvent interactions for molecules with arbitrarily complex sizes and geometries. Briefly, instead of computing the density distribution function of a particular atom or the center-of-mass of the molecules, one computes the distribution function of the minimum-distance between any solute and solvent atoms. This provides a size and shape-independent distribution which is very natural to interpret in terms of molecular interactions.

Additionally, the MDDFs can be decomposed into contributions of each type of atom (or groups of atoms) of the solute and solvent molecules, such that the profiles of the distributions can be interpreted in terms of the chemical nature of the species involved in the interactions at each distance.

Finally, as with radial distribution functions, MDDFs can be used to compute Kirkwood-Buff integrals to connect the accumulation or depletion of the solvents components to thermodynamic properties, like protein structural stability, solubility, and others.

Features

Check out our examples repository, featuring the analysis of solvation structures for proteins, polymers, membrane, and complex solutions! The examples are also described in our featured article.

1. Minimum-distance distribution functions: understanding solvation at a molecular level

This figure illustrates one of the main features of minimum-distance distribution functions, by showing the distribution of DMF molecules at the surface of an polyacrylamide molecule. The direct interactions are evident by the peak at hydrogen-bonding distances and, additionally, the contribution of each group of atoms of the DMF can be clearly distinguished by decomposing the total MDDF into atomic or chemical group contributions.

+Introduction · ComplexMixtures.jl

Introduction

ComplexMixtures.jl is a package to study the solute and solvent interactions of mixtures of molecules of complex shape. Conventional radial distribution functions are not appropriate to represent the structure of a solvent around a solute with many atoms, and a variable, non-spherical shape.

Typical solutes of complex shape are proteins, nucleic acids, and polymers in general. Smaller molecules like lipids, carbohydrates, etc, are also complex enough such that representing the structure of the solution of those molecules with distribution functions is not trivial.

Minimum-Distance Distribution Functions (MDDFs) are a very general and practical way to represent solute-solvent interactions for molecules with arbitrarily complex sizes and geometries. Briefly, instead of computing the density distribution function of a particular atom or the center-of-mass of the molecules, one computes the distribution function of the minimum-distance between any solute and solvent atoms. This provides a size and shape-independent distribution which is very natural to interpret in terms of molecular interactions.

Additionally, the MDDFs can be decomposed into contributions of each type of atom (or groups of atoms) of the solute and solvent molecules, such that the profiles of the distributions can be interpreted in terms of the chemical nature of the species involved in the interactions at each distance.

Finally, as with radial distribution functions, MDDFs can be used to compute Kirkwood-Buff integrals to connect the accumulation or depletion of the solvents components to thermodynamic properties, like protein structural stability, solubility, and others.

Compat

Important: This manual refers to version 2 of ComplexMixtures.jl. There are syntax changes relative to the 1.X series, and analysis scripts written for the previous versions won't work. The list of changes necessary to updated the scripts is described here.

Features

Check out our examples, featuring the analysis of solvation structures for proteins, polymers, membrane, and complex solutions! The examples are also described in our featured article.

1. Minimum-distance distribution functions: understanding solvation at a molecular level

This figure illustrates one of the main features of minimum-distance distribution functions, by showing the distribution of DMF molecules at the surface of an polyacrylamide molecule. The direct interactions are evident by the peak at hydrogen-bonding distances and, additionally, the contribution of each group of atoms of the DMF can be clearly distinguished by decomposing the total MDDF into atomic or chemical group contributions.


Minimum distance distribution function and its decomposition into the chemical @@ -16,4 +16,4 @@
Preferential interaction parameters obtained for the solvation of a protein by ionic liquids.

-

In particular, the plot shows that besides being preferentially excluded from the protein surface at high concentrations in the native state, suggesting protein folding stabilization, the interactions with the protein in the denatured states are stronger, leading to denaturation at all concentrations.

References

  • L. Martínez, ComplexMixtures.jl: Investigating the structure of solutions of complex-shaped molecules from a solvent-shell perspective. J. Mol. Liq. 347, 117945, 2022. [Full Text]

  • L. Martínez, S. Shimizu, Molecular interpretation of preferential interactions in protein solvation: a solvent-shell perspective by means of minimum-distance distribution functions. J. Chem. Theor. Comp. 13, 6358–6372, 2017. [Full Text]

See also

Seminar

Applications

  • A. F. Pereira, V. Piccoli, L. Martínez, Trifluoroethanol direct interactions with protein backbones destabilize alpha-helices. J. Mol. Liq. 365, 120209, 2022. [Full Text]

  • V. Piccoli, L. Martínez, Ionic liquid solvation of proteins in native and denatured states. J. Mol. Liq. 363, 119953, 2022. [Full Text]

  • V. Piccoli, L. Martínez, Correlated counterion effects in the solvation of proteins by ionic-liquids. J. Mol. Liq. 320, 114347, 2020. [Full Text]

  • I. P. de Oliveira, L. Martínez, The shift in urea orientation at protein surfaces at low pH is compatible with a direct mechanism of protein denaturation. Phys. Chem. Chem. Phys. 22, 354-367, 2020. [Full Text]

  • I. P. de Oliveira, L. Martínez, Molecular basis for competitive solvation of the Burkholderia cepacia lipase by sorbitol and urea. Phys. Chem. Chem. Phys. 18, 21797-21808, 2016. [Full Text]

+

In particular, the plot shows that besides being preferentially excluded from the protein surface at high concentrations in the native state, suggesting protein folding stabilization, the interactions with the protein in the denatured states are stronger, leading to denaturation at all concentrations.

References

  • L. Martínez, ComplexMixtures.jl: Investigating the structure of solutions of complex-shaped molecules from a solvent-shell perspective. J. Mol. Liq. 347, 117945, 2022. [Full Text]

  • L. Martínez, S. Shimizu, Molecular interpretation of preferential interactions in protein solvation: a solvent-shell perspective by means of minimum-distance distribution functions. J. Chem. Theor. Comp. 13, 6358–6372, 2017. [Full Text]

See also

Seminar

Applications

  • A. F. Pereira, V. Piccoli, L. Martínez, Trifluoroethanol direct interactions with protein backbones destabilize alpha-helices. J. Mol. Liq. 365, 120209, 2022. [Full Text]

  • V. Piccoli, L. Martínez, Ionic liquid solvation of proteins in native and denatured states. J. Mol. Liq. 363, 119953, 2022. [Full Text]

  • V. Piccoli, L. Martínez, Correlated counterion effects in the solvation of proteins by ionic-liquids. J. Mol. Liq. 320, 114347, 2020. [Full Text]

  • I. P. de Oliveira, L. Martínez, The shift in urea orientation at protein surfaces at low pH is compatible with a direct mechanism of protein denaturation. Phys. Chem. Chem. Phys. 22, 354-367, 2020. [Full Text]

  • I. P. de Oliveira, L. Martínez, Molecular basis for competitive solvation of the Burkholderia cepacia lipase by sorbitol and urea. Phys. Chem. Chem. Phys. 18, 21797-21808, 2016. [Full Text]

diff --git a/dev/installation/index.html b/dev/installation/index.html index 595a3045..9acfb1f4 100644 --- a/dev/installation/index.html +++ b/dev/installation/index.html @@ -1,12 +1,14 @@ -Installation · ComplexMixtures.jl

Installation

Note

This is a package written in Julia. We invite you to experiment with the language, but if you want to just call this package from Python, read the From Python section of the manual. Understanding all the features of the package requires reading the manual as whole. The syntaxes of using this package from Julia or Python are almost identical, and the motivation for using Python should be mostly the familiarity with further analysis tools, as the plotting packages.

Install Julia

First you need to install the Julia language, version 1.9 or greater is required. Using the juliaup tool is a highly recommended way of installing and keeping Julia up to date.

Alternatively, you can install Julia by downloading the binaries directly from the Julia webpage.

Note

New to Julia? Julia is a modern high-level yet performant programming language. Some tips and a nice workflow for using it effectively can be found here.

For this specific package, following a the step-by-step examples provided here after installing Julia should be enough.

Install the packages

Within Julia, to install the packages required for running the examples here you need to do:

julia> import Pkg
+Installation · ComplexMixtures.jl

Installation

Note

This is a package written in Julia. We invite you to experiment with the language, but if you want to just call this package from Python, read the From Python section of the manual. Understanding all the features of the package requires reading the manual as whole. The syntaxes of using this package from Julia or Python are almost identical, and the motivation for using Python should be mostly the familiarity with further analysis tools, as the plotting packages.

Install Julia

First you need to install the Julia language, version 1.9 or greater is required. Using the juliaup tool is a highly recommended way of installing and keeping Julia up to date.

Alternatively, you can install Julia by downloading the binaries directly from the Julia webpage.

Note

New to Julia? Julia is a modern high-level yet performant programming language. Some tips and a nice workflow for using it effectively can be found here.

For this specific package, following a the step-by-step examples provided here after installing Julia should be enough.

Install the packages

Within Julia, to install the packages required for running the examples here you need to do:

julia> import Pkg
 
-julia> Pkg.add(["ComplexMixtures","Plots","PDBTools"])

Please read the recommended workflow below, for further information and to be sure to have a smoother experience.

Create an environment

Once Julia is installed, we recommend to create an environment that will contain all the packages you may use for your analyses, including ComplexMixtures, in such a way that your results can always be reproduced and you don't get any version incompatibility.

We illustrate this by creating the "MyNewPaper" environment, which will be hosted in a simple directory,

mkdir /home/user/Documents/MyNewPaper

Then, start Julia and activate the environment that will be hosted there:

julia> import Pkg; Pkg.activate("/home/user/Documents/MyNewPaper")
-  Activating new project at `~/Documents/MyNewPaper`

and add to this environment the packages that your analyses will require:

julia> Pkg.add(["ComplexMixtures","PDBTools","Plots"])

That's it. Close Julia. Note that this created the files Manifest.toml and Project.toml in the MyNewPaper directory, which contain the information of packages and exact package versions you are using now on in this environment. Saving these files may be relevant for the future exact reproduction of your analyses.

Run your analysis scripts in that environment

Now, your analysis scripts, described in the next section in details, will look like:

import Pkg; Pkg.activate("/home/user/Documents/MyNewPaper")
+julia> Pkg.add(["ComplexMixtures", "PBTools", "Plots", "EasyFit", "LaTeXStrings"])

Here, PDBTools.jl is an auxiliary package to read PDB files and select atoms within them. The Plots, EasyFit and LaTeXStrings packages will help producing nice looking plots.

Please read the recommended workflow below, for further information and to be sure to have a smoother experience.

Create an environment

Once Julia is installed, we recommend to create an environment that will contain all the packages you may use for your analyses, including ComplexMixtures, in such a way that your results can always be reproduced and you don't get any version incompatibility.

We illustrate this by creating the "MyNewPaper" environment, which will be hosted in a simple directory,

mkdir /home/user/Documents/MyNewPaper

Then, start Julia and activate the environment that will be hosted there:

julia> import Pkg; Pkg.activate("/home/user/Documents/MyNewPaper")
+  Activating new project at `~/Documents/MyNewPaper`

and add to this environment the packages that your analyses will require:

julia> import Pkg; Pkg.add(["ComplexMixtures","PDBTools","Plots", "EasyFit", "LaTeXStrings"])

That's it. Close Julia. Note that this created the files Manifest.toml and Project.toml in the MyNewPaper directory, which contain the information of packages and exact package versions you are using now on in this environment. Saving these files may be relevant for the future exact reproduction of your analyses.

Run your analysis scripts in that environment

Now, your analysis scripts, described in the next section in details, will look like:

import Pkg; Pkg.activate("/home/user/Documents/MyNewPaper")
 
 using ComplexMixtures
 using PDBTools
 using Plots
+using EasyFit
+using LaTeXStrings
 
 # etc ... 

And the script can be run with julia -t auto script.jl (where -t auto allows for multi-threading), or included in julia with julia> include("./scritp.jl"), as described in the next section.

Tip

By loading the package with

using ComplexMixtures

the most common functions of the package become readily available by their direct name, for example mddf(...).

If you don't want to bring the functions into the scope of your script, use

import ComplexMixtures

Then, the functions of the package are called, for example, using ComplexMixtures.mddf(...). To avoid having to write ComplexMixtures all the time, define an acronym. For example:

import ComplexMixtures as CM
-CM.mddf(...)
+CM.mddf(...)
diff --git a/dev/mddf/index.html b/dev/mddf/index.html index 97da4507..280f309f 100644 --- a/dev/mddf/index.html +++ b/dev/mddf/index.html @@ -1,4 +1,12 @@ -Computing the MDDF · ComplexMixtures.jl

Computing the Minimum-Distance Distribution Function

The main function of the ComplexMixtures package actually computes the MDDF between the solute and the solvent chosen.

It is run with the following command:

results = mddf(trajectory)  

The MDDF along with other results, like the corresponding KB integrals, are returned in the results data structure, which is described in the next section.

It is possible to tune several options of the calculation, by setting the Options data structure with user-defined values in advance. The most common parameters to be set by the user are probably dbulk and stride.

dbulk defines the distance from the solute above which the user believes that the reference solute molecule does not significantly anymore the structure of the solvent. The default value is 10 Angstroms, but for large solvent molecules this might not be enough. To increase dbulk, use:

options = Options(dbulk=15.)
+Computing the MDDF · ComplexMixtures.jl

Computing the Minimum-Distance Distribution Function

The main function of the ComplexMixtures package actually computes the MDDF between the solute and the solvent chosen.

It is run with the following command:

results = mddf(trajectory)  

The MDDF along with other results, like the corresponding KB integrals, are returned in the results data structure, which is described in the next section.

It is possible to tune several options of the calculation, by setting the Options data structure with user-defined values in advance. The most common parameters to be set by the user are probably dbulk and stride.

dbulk defines the distance from the solute above which the user believes that the reference solute molecule does not significantly anymore the structure of the solvent. The default value is 10 Angstroms, but for large solvent molecules this might not be enough. To increase dbulk, use:

options = Options(dbulk=15.)
 results = mddf(trajectory,options)

stride defines if some frames will be skip during the calculation (for speedup). For example, if stride=5, only one in five frames will be considered. Adjust stride with:

options = Options(stride=5)
-results = mddf(trajectory,options)

See the Options section for further details and other options to set.

+results = mddf(trajectory,options)

See the Options section for further details and other options to set.

Reference functions

ComplexMixtures.coordination_numberFunction
coordination_number(trajectory::Trajectory, options::Options)

Computes the coordination numbers for each solute molecule in the trajectory, given the Trajectory. This is an auxiliary function of the ComplexMixtures package, which is used to compute coordination numbers when the normalization of the distribution is not possible or needed.

The output is a Result structure, which contains the data as the result of a call to mddf, except that all counters which require normalization of the distribution will be zero. In summary, this result data structure can be used to compute the coordination numbers, but not the MDDF, RDF, or KB integrals.

Examples

julia> trajectory = Trajectory("./trajectory.dcd",solute,solvent);
+
+julia> results = mddf(trajectory);
+
+julia> coordination_numbers = coordination_number(trajectory);
source
ComplexMixtures.mddfMethod
mddf(trajectory::Trajectory, options::Options; frame_weights = Float64[], coordination_number_only = false)

Function that computes the minimum-distance distribution function, atomic contributions, and KB integrals, given the Trajectory structure of the simulation and, optionally, parameters given as a second argument of the Options type. This is the main function of the ComplexMixtures package.

Examples

julia> trajectory = Trajectory("./trajectory.dcd",solute,solvent);
+
+julia> results = mddf(trajectory);

or, to set some custom optional parameter,

julia> options = Options(lastframe=1000);
+
+julia> results = mddf(trajectory,options);
source
diff --git a/dev/multiple/index.html b/dev/multiple/index.html index 3f23bcb3..64df045c 100644 --- a/dev/multiple/index.html +++ b/dev/multiple/index.html @@ -1,11 +1,11 @@ -Multiple trajectories · ComplexMixtures.jl

Working with multiple trajectories

Very commonly, one has multiple trajectories of the same system, and we want to obtain the average results of all trajectories. We provide a simple scheme to average the results of multiple MDDF calculations:

Create a vector of result data structures, without initialization

Let us assume that we have three Gromacs trajectories, with file names traj1.xtc, traj2.xtc, traj3.xtc. First let us create a list with these file names:

trajectory_files = [ "traj1.xtc" , "traj2.xtc" , "traj3.xtc" ]

And define an empty vector of Result structures:

results = Result[]

Run the calculations in a loop

The calculation on the multiple trajectories is then performed in a simple loop, such as

atoms = PDBTools.readPDB("./system.pdb")
-solute = Selection(atoms,"protein",nmols=1)
-solvent = Selection(atoms,"resname TMAO",,natomspermol=14)
+Multiple trajectories · ComplexMixtures.jl

Working with multiple trajectories

Very commonly, one has multiple trajectories of the same system, and we want to obtain the average results of all trajectories. We provide a simple scheme to average the results of multiple MDDF calculations:

Create a vector of result data structures, without initialization

Let us assume that we have three Gromacs trajectories, with file names traj1.xtc, traj2.xtc, traj3.xtc. First let us create a list with these file names:

trajectory_files = [ "traj1.xtc" , "traj2.xtc" , "traj3.xtc" ]

And define an empty vector of Result structures:

results = Result[]

Run the calculations in a loop

The calculation on the multiple trajectories is then performed in a simple loop, such as

atoms = PDBTools.readPDB("./system.pdb")
+solute = AtomSelection(atoms,"protein",nmols=1)
+solvent = AtomSelection(atoms,"resname TMAO",natomspermol=14)
 for file in trajectory_files
-  trajectory = Trajectory(file,solute,solvent)
-  # compute the MDDF data and push the result to the results array
-  push!(results, mddf(trajectory))
+    trajectory = Trajectory(file,solute,solvent)
+    # compute the MDDF data and push the result to the results array
+    push!(results, mddf(trajectory))
 end

Merge the results of several trajectories, with proper weights

Of course, the resulting results vector will contain at each position the results of each calculation. To merge these results in a single result data structure, use:

R = merge(results)

The R structure generated contains the averaged results of all calculations, with weights proportional to the number of frames of each trajectory. That is, if the first trajectory had 2000 frames, and the second and third trajectories have 1000 frames each, the first trajectory will have a weight of 0.5 on the final results. The merge function can be used to merge previously merged results with new results as well.

Tip

The names of the files and and weights are stored in the R.files and R.weights vectors of the results structure:

julia> R.files
 3-element Array{String,1}:
  "./traj1.xtc"
@@ -17,4 +17,4 @@
  0.5
  0.25
  0.25
-

It is not a bad idea to check if that is what you were expecting.

+

It is not a bad idea to check if that is what you were expecting.

diff --git a/dev/options/index.html b/dev/options/index.html index 169c5e8a..7a0a64c4 100644 --- a/dev/options/index.html +++ b/dev/options/index.html @@ -1,5 +1,3 @@ -Options · ComplexMixtures.jl

Options

There are some options to control what exactly is going to be computed to obtain the MDDF. These options can be defined by the user and passed to the mddf function, using, for example:

options = Options(lastframe=1000)
-results = mddf(trajectory,options)

Frame ranges and histogram properties

These are common options that the regular user might want to set in their calculation.

firstframe: Integer, first frame of the trajectory to be considered.

lastframe: Integer, last frame of the trajectory to be considered.

stride: Integer, consider every stride frames, that is, if stride=5 only one in five frames will be considered.

binstep: Real, length of the bin step of the histograms, default = 0.02 Angstroms.

dbulk: Real, distance from which the solution is to be considered as a bulk solution, that is, where the presence of the solute does not affect the structure of the solution anymore. This parameter is important in particular for systems with a single solute molecule (a protein, for example), where the density of the solvent in the box is not the bulk density of the solvent, which must be computed independently. Default: 10 Angstroms.

cutoff: Real, the maximum distance to be considered in the construction of histograms. Default: 10 Angstroms.

usecutoff: true/false: If true, the cutoff distance might be different from dbulk and the density of the solvent in bulk will be estimated from the density within dbulk and cutoff. If false, the density of the solvent is estimated from the density outside dbulk by exclusion. Default: false.

Lower level options

These will probably never be set by the user, unless if dealing with some special system (large very large, or very low density system).

irefatom: Integer, index of the reference atom in the solvent molecule used to compute the shell volumes and domain volumes in the Monte-Carlo volume estimates. The final rdf data is reported for this atom as well. By default, we choose the atom which is closer to the center of coordinates of the molecule, but any choice should be fine.

n_random_samples: Integer, how many samples of random molecules are generated for each solvent molecule to compute the shell volumes and random MDDF counts. Default: 10. Increase this only if you have short trajectory and want to obtain reproducible results for that short trajectory. For long trajectories (most desirable and common), this value can even be decreased to speed up the calculations.

seed: Seed for random number generator. If -1, the seed will be generated from the entropy of the system. If your results are dependent on the seed, is is probable that you do not have enough sampling. Mostly used for testing purposes. Two runs are only identical if ran with the same seed and in serial mode.

StableRNG (::Bool), defaults to false. Use a stable random number generator from the StableRNGs package, to produce identical runs on different architectures and Julia versions. Only used for testing.

nthreads: How many threads to use. By default, it will be the number of physical cores of the computer.

lcell: Integer, the cell length of the linked-cell method (actually the cell length is cutoff/lcell). Default: 1.

GC: Bool, force garbage collection, to avoid memory overflow. Default: true. That this might be required is probably a result of something that can vastly improved in memory management. This may slow down parallel runs significantly if the GC runs too often.

GC_threshold: Float64, minimum fraction of the total memory of the system required to force a GC run. That is, if GC_threshold=0.1, which is the default, every time the free memory becomes less or equal to 10% of the total memory available, a GC run occurs.

Frame statistical reweighing

Compat

Frame reweighing is available in ComplexMixtures 1.4.0 or greater.

Most times the weights of each frame of the trajectory are the same, resulting from some standard MD simulation. If, for some reason, the frames have different statistical weights, the weights can be passed to the as an optional parameter frame_weights.

For example:

julia> using ComplexMixtures
-
-julia> options = Options(frame_weights=[0.2, 0.2, 0.4])

The code above will assign a larger weight to the third frame of the trajectory. These weights are relative (meaning that [1.0, 1.0, 2.0] would produce the same result). What will happen under the hood is that the distance counts of the frames will be multiplied by each frame weight, and normalized for the sum of the weights.

Important: The length of the frame_weights vector must be at least equal to the number of the last frame read from the trajectory. That is, if lastframe is not set, and all the frames will be read, the length of frame_weights must be equal to the length of the trajectory (the stride parameter will skip the information both of the frames and its weights). If lastframe is set, then the length of frame_weights must be at least lastframe (it can be greater, and further values will be ignored). Importantly, the indices of the elements in frame_weights are assumed to correspond to the indices of the frames in the original trajectory file.

+Options · ComplexMixtures.jl

Options

There are some options to control what exactly is going to be computed to obtain the MDDF. These options can be defined by the user and passed to the mddf function, using, for example:

options = Options(lastframe=1000)
+results = mddf(trajectory,options)

Frame ranges and histogram properties

These are common options that the regular user might want to set in their calculation.

firstframe: Integer, first frame of the trajectory to be considered.

lastframe: Integer, last frame of the trajectory to be considered.

stride: Integer, consider every stride frames, that is, if stride=5 only one in five frames will be considered.

binstep: Real, length of the bin step of the histograms, default = 0.02 Angstroms.

dbulk: Real, distance from which the solution is to be considered as a bulk solution, that is, where the presence of the solute does not affect the structure of the solution anymore. This parameter is important in particular for systems with a single solute molecule (a protein, for example), where the density of the solvent in the box is not the bulk density of the solvent, which must be computed independently. Default: 10 Angstroms.

cutoff: Real, the maximum distance to be considered in the construction of histograms. Default: 10 Angstroms.

usecutoff: true/false: If true, the cutoff distance might be different from dbulk and the density of the solvent in bulk will be estimated from the density within dbulk and cutoff. If false, the density of the solvent is estimated from the density outside dbulk by exclusion. Default: false.

Lower level options

These will probably never be set by the user, unless if dealing with some special system (large very large, or very low density system).

irefatom: Integer, index of the reference atom in the solvent molecule used to compute the shell volumes and domain volumes in the Monte-Carlo volume estimates. The final rdf data is reported for this atom as well. By default, we choose the atom which is closer to the center of coordinates of the molecule, but any choice should be fine.

n_random_samples: Integer, how many samples of random molecules are generated for each solvent molecule to compute the shell volumes and random MDDF counts. Default: 10. Increase this only if you have short trajectory and want to obtain reproducible results for that short trajectory. For long trajectories (most desirable and common), this value can even be decreased to speed up the calculations.

seed: Seed for random number generator. If -1, the seed will be generated from the entropy of the system. If your results are dependent on the seed, is is probable that you do not have enough sampling. Mostly used for testing purposes. Two runs are only identical if ran with the same seed and in serial mode.

StableRNG (::Bool), defaults to false. Use a stable random number generator from the StableRNGs package, to produce identical runs on different architectures and Julia versions. Only used for testing.

nthreads: How many threads to use. By default, it will be the number of physical cores of the computer.

lcell: Integer, the cell length of the linked-cell method (actually the cell length is cutoff/lcell). Default: 1.

GC: Bool, force garbage collection, to avoid memory overflow. Default: true. That this might be required is probably a result of something that can vastly improved in memory management. This may slow down parallel runs significantly if the GC runs too often.

GC_threshold: Float64, minimum fraction of the total memory of the system required to force a GC run. That is, if GC_threshold=0.1, which is the default, every time the free memory becomes less or equal to 10% of the total memory available, a GC run occurs.

Frame statistical reweighing

Compat

Frame reweighing is available in ComplexMixtures 2.0.0 or greater.

Most times the weights of each frame of the trajectory are the same, resulting from some standard MD simulation. If, for some reason, the frames have different statistical weights, the weights can be passed to the as an optional parameter frame_weights.

For example:

julia> results = mddf(trajectory, options; frame_weights=[0.0, 1.0, 2.0])

The code above will assign a larger weight to the third frame of the trajectory. These weights are relative (meaning that [0.0, 1.0, 2.0] would produce the same result). What will happen under the hood is that the distance counts of the frames will be multiplied by each frame weight, and normalized for the sum of the weights.

Important: The length of the frame_weights vector must be at least equal to the number of the last frame read from the trajectory. That is, if lastframe is not set, and all the frames will be read, the length of frame_weights must be equal to the length of the trajectory (the stride parameter will skip the information both of the frames and its weights). If lastframe is set, then the length of frame_weights must be at least lastframe (it can be greater, and further values will be ignored). Importantly, the indices of the elements in frame_weights are assumed to correspond to the indices of the frames in the original trajectory file.

Compute coordination number only

For some systems, it may be impossible, or to expensive, to compute the normalization of the minimum-distance distribution function. Nevertheless, the coordination number may still be an interesting information to be retrieved from the simulations. To run the computation to compute coordination numbers only, do:

julia> results = mddf(trajectory, options; coordination_number_only = true)
Note

With coordination_number_only set to true, the arrays associated to MDDFs and KB integrals will be empty in the output data structure.

ComplexMixtures.OptionsType
struct Options

Structure that contains the detailed input options.

  • firstframe::Int64

  • lastframe::Int64

  • stride::Int64

  • irefatom::Int64

  • n_random_samples::Int64

  • binstep::Float64

  • dbulk::Float64

  • cutoff::Float64

  • usecutoff::Bool

  • lcell::Int64

  • GC::Bool

  • GC_threshold::Float64

  • seed::Int64

  • StableRNG::Bool

  • nthreads::Int64

  • silent::Bool

source
diff --git a/dev/parallel/index.html b/dev/parallel/index.html index 453bd5d2..85b5e527 100644 --- a/dev/parallel/index.html +++ b/dev/parallel/index.html @@ -1,4 +1,4 @@ -Parallel execution · ComplexMixtures.jl

Parallel execution

It is highly recommended to run MDDF calculations in parallel, using multiple processors of a single computer. To run the computation in parallel, initialize julia with the -t N option, where N is the number of processes to be used. For example, to use 8 parallel processes, use:

julia -t 8 example.jl

The computation will use a number of parallel processes equal to N.

Note

The number of threads used for computation of the MDDF is the number of threads available to Julia. Many computers allow hyperthreading, and not necessarily this this beneficial for the execution of this package. The optimal number of threads may vary.

Independently of the number of threads initialized with the -t command-line parameter, the number of processes launched by ComplexMixtures in any given computation can be adjusted by the Options(nthreads=N) option. This won't provide any speedup if the optional number of threads is greater than the number of threads available to Julia at runtime.

Warning

If the calculations get Killed by no apparent reason, that is probably because you are running out of memory because of the many parallel computations running. One way to alleviate this problem is to force garbage collection, using

options = Options(GC=true,GC_threshold=0.5)
+Parallel execution · ComplexMixtures.jl

Parallel execution

It is highly recommended to run MDDF calculations in parallel, using multiple processors of a single computer. To run the computation in parallel, initialize julia with the -t N option, where N is the number of processes to be used. For example, to use 8 parallel processes, use:

julia -t 8 example.jl

The computation will use a number of parallel processes equal to N. Use -t auto to automatically pick the number of threads available in your computer.

Note

The number of threads used for computation of the MDDF is the number of threads available to Julia. Many computers allow hyperthreading, and not necessarily this this beneficial for the execution of this package. The optimal number of threads may vary.

Independently of the number of threads initialized with the -t command-line parameter, the number of processes launched by ComplexMixtures in any given computation can be adjusted by the Options(nthreads=N) option. This won't provide any speedup if the optional number of threads is greater than the number of threads available to Julia at runtime.

Warning

If the calculations get Killed by no apparent reason, that is probably because you are running out of memory because of the many parallel computations running. One way to alleviate this problem is to force garbage collection, using

options = Options(GC=true,GC_threshold=0.5)
 R = mddf(trajectory,options)
-

The GC_threshold=0.5 indicates that if the free memory is smaller than 50% of the total memory of the machine, a garbage-collection run will occur. The default parameters are GC=true and GC_threshold=0.3.

+

The GC_threshold=0.5 indicates that if the free memory is smaller than 50% of the total memory of the machine, a garbage-collection run will occur. The default parameters are GC=true and GC_threshold=0.3.

Read the predefinition of atom groups section if you are experiencing memory issues.

diff --git a/dev/python/index.html b/dev/python/index.html index 2d514236..20486b0a 100644 --- a/dev/python/index.html +++ b/dev/python/index.html @@ -1,71 +1,86 @@ -From Python · ComplexMixtures.jl

From Python

Note

Most features of the package are available through this Python interface. However, some flexibility may be reduced and, also, the tunning of the plot appearance is left to the user, as it is expected that he/she is fluent with some tools within Python if choosing this interface.

Python 3 or greater is required.

Please report issues, incompatibilities, or any other difficulty in using the package and its interface.

The following examples consider a system composed a protein solvated by a mixture of water and glycerol, built with Packmol. The simulations were performed with NAMD with periodic boundary conditions and a NPT ensemble at room temperature and pressure. Molecular pictures were produced with VMD.

+From Python · ComplexMixtures.jl

From Python

Note

Most features of the package are available through this Python interface. However, some flexibility may be reduced and, also, the tunning of the plot appearance is left to the user, as it is expected that he/she is fluent with some tools within Python if choosing this interface.

Python 3 or greater is required.

Please report issues, incompatibilities, or any other difficulty in using the package and its interface.

The following examples consider a system composed a protein solvated by a mixture of water and glycerol, built with Packmol. The simulations were performed with NAMD with periodic boundary conditions and a NPT ensemble at room temperature and pressure. Molecular pictures were produced with VMD.

-

Image of the system of the example: a protein solvated by a mixture of glycerol (green) and water, at a concentration of 50%vv. The complete example is available at this repository.

Loading the ComplexMixtures.py file

The Python interface of ComplexMixtures is implemented in the ComplexMixtures.py file. Just download it from the link and save it in a known path.

Installing juliacall

juliacall is a package that allows calling Julia programs from Python. Install it with

pip install juliacall

Installing Julia and underlying packages

Once juliacall is installed, from within Python, execute:

import ComplexMixtures

here we assume that the ComplexMixtures.py file is in the same directory where you launched Python.

Note

On the first time you execute this command, the Julia executable and the required Julia packages (ComplexMixtures and PDBTools) will be downloaded and installed. At the end of the process quit Python (not really required, but we prefer to separate the installation from the use of the module).

How to run this example

The Data directory contains the a pdb file of the system (system.pdb) and a sample from the trajectory (glyc50.dcd), with a few frames. It also contains the result of running the mddf calculation on the complete trajectory, results_glyc50.json. This last file was produced by ComplexMixtures, as indicated in the following examples.

The sample trajectory is provided so that the first example can be run, yet do not expect that the results are the same, as the sampling is much lower in this case. The complete trajectory can be retrieved from this link (3GB file).

We assume that you navigated to the directory of the example, and copied the Python module file to it:

git clone https://github.com/m3g/ComplexMixturesExamples
-cd ComplexMixturesExamples/Protein_in_Glycerol/MDDF
-cp /path/to/ComplexMixtures.py ./
-export JULIA_NUM_THREADS=8

The last line will allow Julia to execute multi-threaded, which will improve a lot the performance on most machines. Set the number of threads to the number of cores of your computer.

Minimum-Distance Distribution function

Note that the example here follows an identical syntax to the Julia example, except that we qualify the name of the loaded module and implicitly load the PDBTools package.

The script to compute the MDDFs as associated data from within python is, then:

import ComplexMixtures as cm
+

Image of the system of the example: a protein solvated by a mixture of glycerol (green) and water, at a concentration of 50%vv.

Loading the ComplexMixtures.py file

The Python interface of ComplexMixtures is implemented in the ComplexMixtures.py file. Just download it from the link and save it in a known path.

Installing juliacall

juliacall is a package that allows calling Julia programs from Python. Install it with

pip install juliacall

Installing Julia and underlying packages

Once juliacall is installed, from within Python, execute:

import ComplexMixtures

here we assume that the ComplexMixtures.py file is in the same directory where you launched Python.

Note

On the first time you execute this command, the Julia executable and the required Julia packages (ComplexMixtures and PDBTools) will be downloaded and installed. At the end of the process quit Python (not really required, but we prefer to separate the installation from the use of the module).

Example

Index

Data, packages, and execution

The files required to run this example are:

To start, create a directory and copy the ComplexMixtures.py file to it. Navigate into this directory, and, to start, set the number of threads that Julia will use, to run the calculations in parallel. Typically, in bash, this means defining teh following environment variable:

export JULIA_NUM_THREADS=8

where 8 is the number of CPU cores available in your computer. For further information about Julia multi-threading, and on setting this environment variable in other systems, please read this section of the Julia manual.

Finally, each script can be executed with, for example:

python3 script.py

Minimum-Distance Distribution function

Complete example code: click here!
# The ComplexMixtures.py file is assumed to be in the current
+# directory.
+# Obtain it from: 
+# https://m3g.github.io/ComplexMixtures.jl/stable/assets/ComplexMixtures.py 
+import ComplexMixtures as cm
 
 # Load the pdb file of the system using `PDBTools`:
-atoms = cm.readPDB("../Data/system.pdb")
+atoms = cm.readPDB("./system.pdb")
 
 # Create arrays of atoms with the protein and Glycerol atoms, 
 # using the `select` function of the `PDBTools` package:
 protein = cm.select(atoms,"protein")
 glyc = cm.select(atoms,"resname GLYC")
+water = cm.select(atoms,"water")
 
 # Setup solute and solvent structures, required for computing the MDDF, 
-# with `Selection` function of the `ComplexMixtures` package:
-solute = cm.Selection(protein,nmols=1)
-solvent = cm.Selection(glyc,natomspermol=14)
+# with `AtomSelection` function of the `ComplexMixtures` package:
+solute = cm.AtomSelection(protein, nmols=1)
+solvent = cm.AtomSelection(glyc, natomspermol=14)
 
 # Read and setup the Trajectory structure required for the computations:
-trajectory = cm.Trajectory("../Data/glyc50_complete.dcd",solute,solvent)
+trajectory = cm.Trajectory("./glyc50_sample.dcd", solute, solvent)
 
 # Run the calculation and get results:
 results = cm.mddf(trajectory)
 
 # Save the reults to recover them later if required
-cm.save(results,"./glyc50.json")
Note

To change the options of the calculation, set the Options structure accordingly and pass it as a parameter to mddf. For example:

options = cm.Options(cutoff=10.)
-results = cm.mddf(trajectory,options)

The complete set of options available is described here.

The trajectory that was loaded was for a toy-example. The complete trajectory is available here, but it is a 3GB file. The same procedure above was performed with that file and produced the results_Glyc50.json file, which is available in the Data directory here. We will continue with this file instead.

Produce plots

MDDF and Kirkwood-Buff integrals

import ComplexMixtures as cm
+cm.save(results,"./glyc50.json")
+print("Results saved to glyc50.json")
+
+# Compute the water distribution function around the protein:
+solvent = cm.AtomSelection(water, natomspermol=3)
+trajectory = cm.Trajectory("./glyc50_sample.dcd", solute, solvent)
+results = cm.mddf(trajectory)
+cm.save(results,"./water.json")
+print("Results saved to water.json")

Note that the example here follows an identical syntax to the Julia example, except that we qualify the name of the loaded module and implicitly load the PDBTools package.

The script to compute the MDDFs as associated data from within python is, then:

Note

To change the options of the calculation, set the Options structure accordingly and pass it as a parameter to mddf. For example:

options = cm.Options(cutoff=10.)
+results = cm.mddf(trajectory,options)

The complete set of options available is described here.

The trajectory that was loaded was for a toy-example. The complete trajectory is available here, but it is a 3GB file. The same procedure above was performed with that file and produced the results_Glyc50.json file, which is available in the Data directory here. We will continue with this file instead.

MDDF and KB integrals

The following python script will produce the typical MDDF and KB integral plot, for the sample system. The noise in the figures is because the trajectory sample is small.

Complete example code: click here!
import ComplexMixtures as cm
 import matplotlib.pyplot as plt
 
 # Load the actual results obtained with the complete simulation:
-results = cm.load("../Data/results_glyc50.json")
+glyc_results = cm.load("./glyc50.json")
+water_results = cm.load("./water.json")
 
 # Plot MDDF and KB
 fig, axs = plt.subplots(2)
-axs[0].plot(results.d, results.mddf)
+axs[0].plot(glyc_results.d, glyc_results.mddf, label="Glycerol")
+axs[0].plot(water_results.d, water_results.mddf, label="Water")
 axs[0].set(ylabel="MDDF")
 
 # Plot KB integral
-axs[1].plot(results.d, results.kb)
-axs[1].set(xlabel="distance / Angs", ylabel="MDDF")
+axs[1].plot(glyc_results.d, glyc_results.kb)
+axs[1].plot(water_results.d, water_results.kb)
+axs[1].set(xlabel="distance / Angs", ylabel="KB integral")
+plt.tight_layout()
 
-plt.savefig("mddf_kb.png")

Atomic contributions to the MDDF

Selecting the atoms corresponding to the hydroxyl groups, and of the aliphatic carbons of Glycerol. Here we list the types of the atoms as specified by the force-field.

import ComplexMixtures as cm
+plt.savefig("mddf_kb.png")

In the top plot, we see that glycerol and water display clear solvation shells around the protein, with glycerol having a greater peak. This accumulation leads to a greater (less negative) KB integral for glycerol than water, as shown in the second plot. This indicates that the protein is preferentially solvated by glycerol in this system (assuming that sampling is adequate in this small trajectory).

Atomic contributions to the MDDF

The following script produces a plot of the group contributions of Glycerol to the total MDDF function. The Glycerol MDDF is split into the contributions of the hydroxyl and aliphatic groups.

Complete example code: click here!
# Load packages
+import ComplexMixtures as cm
 import matplotlib.pyplot as plt
 
-atoms = cm.readPDB("../Data/system.pdb")
-protein = cm.select(atoms,"protein")
-glyc = cm.select(atoms,"resname GLYC")
-solute = cm.Selection(protein,nmols=1)
-solvent = cm.Selection(glyc,natomspermol=14)
+# Read the pdb file and set solvent and solute groups
+atoms = cm.readPDB("./system.pdb")
+protein = cm.select(atoms, "protein")
+glyc = cm.select(atoms, "resname GLYC")
 
-# load results
-results = cm.load("../Data/results_glyc50.json")
+# load previously computed MDDF results
+results = cm.load("./glyc50.json")
 
 # Select atoms by name
 hydroxyls = cm.list(["O1","O2","O3","H1","H2","H3"])
 aliphatic = cm.list(["C1","C2","HA","HB","HC","HD"])
 
-# Extract the contributions of the groups above
-hydr_contributions = cm.contributions(solvent,results.solvent_atom,hydroxyls)
-aliph_contributions = cm.contributions(solvent,results.solvent_atom,aliphatic)
+# Extract the contributions of the Glycerol hydroxyls and aliphatic groups
+hydr_contributions = cm.contributions(results, cm.SolventGroup(hydroxyls))
+aliph_contributions = cm.contributions(results, cm.SolventGroup(aliphatic))
 
 # Plot
-plt.plot(results.d, results.mddf)
-plt.plot(results.d, hydr_contributions)
-plt.plot(results.d, aliph_contributions)
+plt.plot(results.d, results.mddf, label="Total MDDF")
+plt.plot(results.d, hydr_contributions, label="Hydroxyls")
+plt.plot(results.d, aliph_contributions, label="Aliphatic")
+plt.legend()
 plt.xlabel("distance / Angs")
 plt.ylabel("MDDF")
-plt.savefig("group_contributions.png")
Note

The syntax here diverges from the Julia-only examples by requiring the lists of names to be converted to Julia arrays, which happens by using the cm.list(python_list) function calls.

+plt.savefig("group_contributions.png")

Despite the low sampling, it is clear that hydroxyl groups contribute to the greter peak of the distribution, at hydrogen-bonding distances, as expected. The contributions of the aliphatic groups to the MDDF occurs at longer distances, associated to non-specific interactions.

Note

The syntax here diverges from the Julia-only examples by requiring the lists of names to be converted to Julia arrays, which happens by using the cm.list(python_list) function calls.

diff --git a/dev/quickguide/index.html b/dev/quickguide/index.html index dddd44ef..7379c1fe 100644 --- a/dev/quickguide/index.html +++ b/dev/quickguide/index.html @@ -1,44 +1,54 @@ -Quick Guide · ComplexMixtures.jl

Quick Guide

Of course, follow the installation instructions first. A complete working example is shown below, and in the section that follows each command is described in detail.

Complete example

Here we show the input file required for the study of the solvation of a protein by the TMAO solvent, which is a molecule 4 atoms. The protein is assumed to be at infinite dilution in the simulation. The trajectory of the simulation is in DCD format in this example, which is the default output of NAMD and CHARMM simulation packages.

# Activate environment (see the Installation -> Recommended Workflow manual section)
-import Pkg; Pkg.activate("/home/user/MyNewPaper")
+Quick Guide · ComplexMixtures.jl

Quick Guide

Of course, follow the installation instructions first. A complete working example is shown below, and in the section that follows each command is described in detail.

Basic example

Here we show the input file required for the study of the solvation of a protein by the TMAO solvent, which is a molecule 4 atoms. The protein is assumed to be at infinite dilution in the simulation. The trajectory of the simulation is in DCD format in this example, which is the default output of NAMD and CHARMM simulation packages.

Input files

The files necessary to run this would be:

  • system.pdb: a PDB file of the complete simulated system.
  • trajectory.dcd: the simulation trajectory, here exemplified in the DCD format.
  • script.jl: the Julia script, described below.

These files are not provided for this example. For complete running examples, please check our examples section.

The Julia script

# Activate environment (see the Installation -> Recommended Workflow manual section)
+import Pkg;
+Pkg.activate(".");
 
 # Load packages
+using ComplexMixtures
 using PDBTools
-using ComplexMixtures 
 using Plots
 
 # Load PDB file of the system
 atoms = readPDB("./system.pdb")
 
 # Select the protein and the TMAO molecules
-protein = select(atoms,"protein")
-tmao = select(atoms,"resname TMAO")
-
-# Setup solute and solvent structures
-solute = Selection(protein,nmols=1)
-solvent = Selection(tmao,natomspermol=14)
-
-# Setup the Trajectory structure
-trajectory = Trajectory("./trajectory.dcd",solute,solvent)
-
-# Run the calculation and get results
+protein = select(atoms, "protein")
+tmao = select(atoms, "resname TMAO")
+
+# Setup solute and solvent structures. We need to provide
+# either the number of atoms per molecule, or the number
+# of molecules in each selection.
+solute = AtomSelection(protein, nmols=1)
+solvent = AtomSelection(tmao, natomspermol=14)
+
+# Setup the Trajectory structure: this will define which
+# coordinates are used to compute the MDDF when reading
+# the trajectory file.
+trajectory = Trajectory("./trajectory.dcd", solute, solvent)
+
+# Run the calculation and get results: this is the computationally
+# intensive part of the calculation.
 results = mddf(trajectory)
 
 # Save the results to recover them later if required
-save(results,"./results.json")
-
-# Plot the some of the most important results 
-plot(results.d,results.mddf,xlabel="d",ylabel="MDDF") # plot the MDDF
+save(results, "./results.json")
+
+# Plot the some of the most important results.
+#
+# - The results.d array contains the distances. 
+# - The results.mddf array contains the MDDF.
+# - The results.kb array contains the Kirkwood-Buff integrals.
+#
+plot(results.d, results.mddf, xlabel="d / Å", ylabel="MDDF") # plot the MDDF
 savefig("./mddf.pdf")
-plot(results.d,results.kb,xlabel="d",ylabel="KB") # plot the KB 
-savefig("./kb.pdf")

Running the example

Given that this code is saved into a file named example.jl, it can be run within the Julia REPL with:

julia> include("example.jl")

or directly with:

julia -t auto example.jl

where -t auto will launch julia with multi-threading. It is highly recommended to use multi-threading!

Note

Some newer CPUs have "fast" and "slow" cores, designed for performance or energy savings. Thus using all cores, with -t auto, may not be the best strategy for optimal performance. Experimenting with different number of cores using -t N where N is the number of cores used is always necessary for tunning performance.

Detailed description of the example

Start julia and load the ComplexMixtures package, using:

using ComplexMixtures

And here we will use the PDBTools package to obtain the selections of the solute and solvent molecules:

using PDBTools

(see Set solute and solvent for details).

The fastest way to understand how to use this package is through an example.

Let us consider a system of three components: a protein, water, a cosolvent: TMAO (trimetylamine-N-oxyde), which is a common osmolyte known to stabilize protein structures. A picture of this system is shown below, with the protein in blue, water, and TMAO molecules. The system was constructed with Packmol and the figure was produced with VMD.

+plot(results.d, results.kb, xlabel="d / Å", ylabel="KB / cm³ mol⁻¹") # plot the KB +savefig("./kb.pdf")

Given that this code is saved into a file named script.jl, it can be run within the Julia REPL with:

julia> include("script.jl")

or directly with:

julia -t auto script.jl

where -t auto will launch julia with multi-threading. It is highly recommended to use multi-threading!

Note

Some newer CPUs have "fast" and "slow" cores, designed for performance or energy savings. Thus using all cores, with -t auto, may not be the best strategy for optimal performance. Experimenting with different number of cores using -t N where N is the number of cores used is always necessary for tunning performance.

Detailed description of the example

Start julia and load the ComplexMixtures package, using:

using ComplexMixtures

And here we will use the PDBTools package to obtain the selections of the solute and solvent molecules:

using PDBTools

(see Set solute and solvent for details).

The fastest way to understand how to use this package is through an example.

Let us consider a system of three components: a protein, water, a cosolvent: TMAO (trimetylamine-N-oxyde), which is a common osmolyte known to stabilize protein structures. A picture of this system is shown below, with the protein in blue, water, and TMAO molecules. The system was constructed with Packmol and the figure was produced with VMD.

-

We want to study the interactions of the protein with TMAO in this example. The computation of the MDDF is performed by defining the solute and solvent selections, and running the calculation on the trajectory.

Define the protein as the solute

To define the protein as the solute, we will use the PDBTools package, which provides a handy selection syntax. First, read the PDB file using

atoms = readPDB("./system.pdb")

Then, let us select the protein atoms (here we are using the PDBTools.select function):

protein = select(atoms,"protein")

And, finally, let us use the Selection function to setup the structure required by the MDDF calculation:

solute = Selection(protein,nmols=1)
Note

It is necessary to indicate how many molecules (in this case, nmols=1, so that ComplexMixtures knows that the solute is to be considered as single structure. In this case there is no ambiguity, but if the solute was a micelle, for example, this option would let ComplexMixtures know that one wants to consider the micelle as a single structure.

Define TMAO the solvent to be considered

Equivalently, the solvent is set up with:

tmao = select(atoms,"resname TMAO")
-solvent = Selection(tmao,natomspermol=14)
-
Note

Here we opted to provide the number of atoms of a TMAO molecules (with the natomspermol keyword). This is generally more practical for small molecules than to provide the number of molecules.

Set the Trajectory structure

The solute and solvent data structures are then fed into the Trajectory data structure, together with the trajectory file name, with:

trajectory = Trajectory("trajectory.dcd",solute,solvent)

In the case, the trajectory is of NAMD "dcd" format. All formats supported by Chemfiles are automatically recognized.

Finally, run the computation and get the results:

If default options are used (as the bin size of the histograms, read all frames without skipping any), just run the mddf with:

results = mddf(trajectory)
-

Some optional parameters for the computation are available in the Options section.

The results data structure obtained

The results data structure contains all the results of the MDDF calculation, including:

results.d : Vector containing the distances to the solute.

results.mddf : Vector containing the minimum-distance distribution function at each distance.

That means, for example, that

plot(results.d,results.mddf,xlabel="d / \AA",ylabel="MDDF") 
+

We want to study the interactions of the protein with TMAO in this example. The computation of the MDDF is performed by defining the solute and solvent selections, and running the calculation on the trajectory.

Define the protein as the solute

To define the protein as the solute, we will use the PDBTools package, which provides a handy selection syntax. First, read the PDB file using

atoms = readPDB("./system.pdb")

Then, let us select the protein atoms (here we are using the PDBTools.select function):

protein = select(atoms, "protein")

And, finally, let us use the AtomSelection function to setup the structure required by the MDDF calculation:

solute = AtomSelection(protein, nmols=1)
Note

It is necessary to indicate how many molecules (in this case, nmols=1, so that ComplexMixtures knows that the solute is to be considered as single structure. In this case there is no ambiguity, but if the solute was a micelle, for example, this option would let ComplexMixtures know that one wants to consider the micelle as a single structure.

Define TMAO the solvent to be considered

Equivalently, the solvent is set up with:

tmao = select(atoms, "resname TMAO")
+solvent = AtomSelection(tmao, natomspermol=14)
Note

Here we opted to provide the number of atoms of a TMAO molecules (with the natomspermol keyword). This is generally more practical for small molecules than to provide the number of molecules.

Set the Trajectory structure

The solute and solvent data structures are then fed into the Trajectory data structure, together with the trajectory file name, with:

trajectory = Trajectory("trajectory.dcd", solute, solvent)

In the case, the trajectory is of NAMD "DCD" format. All formats supported by Chemfiles are automatically recognized.

Finally, run the computation and get the results:

If default options are used (as the bin size of the histograms, read all frames without skipping any), just run the mddf with:

results = mddf(trajectory)
+

Some optional parameters for the computation are available in the Options section. Depending on the number of atoms and trajectory length, this can take a while. Computing a MDDF is much more expensive than computing a regular radial distribution function, because the normalization requires the generation of an ideal distribution of the molecules in the system.

The results data structure obtained

The results data structure contains all the results of the MDDF calculation, including:

results.d : Vector containing the distances to the solute.

results.mddf : Vector containing the minimum-distance distribution function at each distance.

That means, for example, that

plot(results.d, results.mddf, xlabel="d / Å", ylabel="mddf(d)") 
 

results in the expected plot of the MDDF of TMAO as a function of the distance to the protein:

-

The Kirkwood-Buff integral corresponding to that distribution is provided in the results.kb vector, and can be also directly plotted with

plot(results.d,results.kb,xlabel="d / \AA",ylabel="MDDF") 

to obtain:

+

The Kirkwood-Buff integral corresponding to that distribution is provided in the results.kb vector, and can be also directly plotted with

plot(results.d, results.kb, xlabel="d / Å", ylabel="KB(d) / L / mol") 

to obtain:

-

See the Atomic and group contributions section for a detailed account on how to obtain a molecular picture of the solvation by splitting the MDDF in the contributions of each type of atom of the solvent, each type of residue of the protein, etc.

Save the results

The results can be saved into a file (with JSON format) with:

save(results,"./results.json")

And these results can be loaded afterwards with:

load("./results.json")

Alternatively, a human-readable set of output files can be obtained to be analyzed in other software (or plotted with alternative tools), with

write(results,"./results.dat")
+

See the Atomic and group contributions section for a detailed account on how to obtain a molecular picture of the solvation by splitting the MDDF in the contributions of each type of atom of the solvent, each type of residue of the protein, etc.

Save the results

The results can be saved into a file (with JSON format) with:

save(results, "./results.json")

And these results can be loaded afterwards with:

load("./results.json")

Alternatively, a human-readable set of output files can be obtained to be analyzed in other software (or plotted with alternative tools), with

write(results,"./results.dat")
diff --git a/dev/references/index.html b/dev/references/index.html index e6e99ae9..1c051516 100644 --- a/dev/references/index.html +++ b/dev/references/index.html @@ -1,2 +1,2 @@ -References · ComplexMixtures.jl

References

Primary citations

If this package was useful to you, please cite the following papers:

  • L. Martínez, ComplexMixtures.jl: Investigating the structure of solutions of complex-shaped molecules from a solvent-shell perspective. J. Mol. Liq. 347, 117945, 2022. [Full Text]

  • L. Martínez, S. Shimizu, Molecular interpretation of preferential interactions in protein solvation: a solvent-shell perspective by means of minimum-distance distribution functions. J. Chem. Theor. Comp. 13, 6358–6372, 2017. [Full Text]

Applications and examples

  • A. F. Pereira, V. Piccoli, L. Martínez, Trifluoroethanol direct interactions with protein backbones destabilize alpha-helices. J. Mol. Liq. 365, 120209, 2022. [Full Text]

  • V. Piccoli, L. Martínez, Ionic liquid solvation of proteins in native and denatured states. J. Mol. Liq. 363, 119953, 2022. [Full Text]

  • V. Piccoli, L. Martínez, Correlated counterion effects in the solvation of proteins by ionic-liquids. J. Mol. Liq. 320, 114347, 2020. [Full Text]

  • I. P. de Oliveira, L. Martínez, The shift in urea orientation at protein surfaces at low pH is compatible with a direct mechanism of protein denaturation. Phys. Chem. Chem. Phys. 22, 354-367, 2020. [Full Text]

  • I. P. de Oliveira, L. Martínez, Molecular basis for competitive solvation of the Burkholderia cepacia lipase by sorbitol and urea. Phys. Chem. Chem. Phys. 18, 21797-21808, 2016. [Full Text]

See also

  • Packmol: A package for building initial configurations for molecular dynamics simulations.

  • CellListMap.jl: Efficient and customizable implementation of cell lists, which allows the computation of general properties dependent on distances of particles within a cutoff, for example short-range potentials, forces, neighbor lists, etc.

  • MDLovoFit: Automatic identification of mobile and rigid substructures in molecular dynamics simulations and fractional structural fluctuation analysis.

+References · ComplexMixtures.jl

References

Primary citations

If this package was useful to you, please cite the following papers:

  • L. Martínez, ComplexMixtures.jl: Investigating the structure of solutions of complex-shaped molecules from a solvent-shell perspective. J. Mol. Liq. 347, 117945, 2022. [Full Text]

  • L. Martínez, S. Shimizu, Molecular interpretation of preferential interactions in protein solvation: a solvent-shell perspective by means of minimum-distance distribution functions. J. Chem. Theor. Comp. 13, 6358–6372, 2017. [Full Text]

Applications and examples

  • A. F. Pereira, V. Piccoli, L. Martínez, Trifluoroethanol direct interactions with protein backbones destabilize alpha-helices. J. Mol. Liq. 365, 120209, 2022. [Full Text]

  • V. Piccoli, L. Martínez, Ionic liquid solvation of proteins in native and denatured states. J. Mol. Liq. 363, 119953, 2022. [Full Text]

  • V. Piccoli, L. Martínez, Correlated counterion effects in the solvation of proteins by ionic-liquids. J. Mol. Liq. 320, 114347, 2020. [Full Text]

  • I. P. de Oliveira, L. Martínez, The shift in urea orientation at protein surfaces at low pH is compatible with a direct mechanism of protein denaturation. Phys. Chem. Chem. Phys. 22, 354-367, 2020. [Full Text]

  • I. P. de Oliveira, L. Martínez, Molecular basis for competitive solvation of the Burkholderia cepacia lipase by sorbitol and urea. Phys. Chem. Chem. Phys. 18, 21797-21808, 2016. [Full Text]

See also

  • Packmol: A package for building initial configurations for molecular dynamics simulations.

  • CellListMap.jl: Efficient and customizable implementation of cell lists, which allows the computation of general properties dependent on distances of particles within a cutoff, for example short-range potentials, forces, neighbor lists, etc.

  • MDLovoFit: Automatic identification of mobile and rigid substructures in molecular dynamics simulations and fractional structural fluctuation analysis.

diff --git a/dev/results/index.html b/dev/results/index.html index 1fbc8913..a029c56f 100644 --- a/dev/results/index.html +++ b/dev/results/index.html @@ -1,5 +1,5 @@ -Results · ComplexMixtures.jl

Results

The results of a MDDF calculation are returned in a data structure which contains the MDDF, KB integrals, and atomic contributions. The following section will assume that the computation was performed by calling the mddf function with

results = mddf(trajectory)

such that the results variable contain the Result data structure. By default, the histograms contain 500 bins (binstep=0.002 and cutoff=10.) such that all data-vectors will contain 500 lines.

To learn how to save and load saved data, read the next section.

The Result data structure: main data

The most important data to be read from results are the distances, minimum-distance distribution function, and KB integrals. These data is stored in the following vectors:

Distances of the histograms: results.d

The following vector will contain values ranging from 0. to cutoff, and the distance at each bin is the distance in that bin for which half of the volume of the bin is within d, and half of the volume is above d, if the volume was spherical:

julia> results.d
+Results · ComplexMixtures.jl

Results

The results of a MDDF calculation are returned in a data structure which contains the MDDF, KB integrals, and atomic contributions. The following section will assume that the computation was performed by calling the mddf function with

results = mddf(trajectory)

such that the results variable contain the Result data structure. By default, the histograms contain 500 bins (binstep=0.002 and cutoff=10.) such that all data-vectors will contain 500 lines.

To learn how to save and load saved data, read the next section.

The Result data structure: main data

The most important data to be read from results are the distances, minimum-distance distribution function, and KB integrals. These data is stored in the following vectors:

Distances of the histograms: results.d

The following vector will contain values ranging from 0. to cutoff, and the distance at each bin is the distance in that bin for which half of the volume of the bin is within d, and half of the volume is above d, if the volume was spherical:

julia> results.d
 500-element Array{Float64,1}:
  0.015874010519682
  0.033019272488946275
@@ -21,4 +21,4 @@
      ⋮
     0.72186381783
     1.13624162115

A typical plot of results.kb as a function of results.d will look like:

Thus, this plot was obtained with the following code:

using Plots
-plot(results.d,results.kb,xlabel="d/A",ylabel="mddf(d) / L/mol") 

Units

  • The distance is assumed to be in Å, as this is the most common distance units in molecular simulations. The coordinates of the atoms are assumed be provided in Å.

  • The minimum-distance distribution function is unit-less, since it is the ratio of the density at each distance divided by an ideal-gas density.

  • The Kirkwood-Buff integrals are returned in cm³ mol⁻¹, if the coordinates were provided in Å.

Warning

If the coordinates are not in Å, the calculation will proceed normally, but the units of the KB integrals, which has units of volume per mol, should be converted to conform the length unit provided.

Coordination number and other data

Obtaining the MDDF involves the computation of some intermediate properties that are frequently useful for additional solution structure analysis. In particular, the coordination numbers are computed. For example, the coordination number as a function from the distance to the solute can be retrieved from a Results data structure with:

coordination_number = results.coordination_number

and this data can be plotted against the distances by:

plot(result.d,results.coordination_number)

The coordination number of subgroups can also be obtained, as explained in the Coordination number section.

The complete data available is:

ParameterMeaningType of valueComment
dVector of distances of the histograms.Vector{Float64}To be used as the x coordinate on plotting any of the data.
md_countNon-normalized count of minimum distances at each d.Vector{Float64}This is the number of minimum distances found at each histogram bin, without normalization. Usually this is not interesting to analyze, because it is dependent on the bin size.
md_count_randomNumber of minimum distances found at each histogram bin for the random distribution.Vector{Float64}This is the normalization required to convert the md_count array into the minimum-distance distribution.
coordination_numberCumulative number of sites found for each histogram distance.Vector{Float64}This is the coordination number, that is, the number of sites found cumulative up to each distance, without any normalization.
coordination_number_randomCumulative site count for the random distribution.Vector{Float64}Usually not interesting for analysis.
mddfThe final distribution function.Vector{Float64}This is the MDDF computed (md_count normalized by md_count_random). It is the main result of the calculation.
kbThe final Kirkwood-Buff integral.Vector{Float64}This is the final KB integral, as a function of the integration distance from the solute. Computed as coordination_number - coordination_number_random
solute_atomAtomic contributions of the solute.Matrix{Float64}This is a matrix with nbins lines and solute.natomspermol columns, containing the atomic contributions of each solute atom to the complete MDDF.
solvent_atomAtomic contributions of the solvent.Matrix{Float64}This is a matrix with nbins lines and solvent.natomspermol columns, containing the atomic contributions of each solvent atom to the complete MDDF.
density.soluteDensity (concentration) of the solute in the complete simulation box.Float64In units of molecules/$\textrm{\AA}^3$
density.solventDensity (concentration) of the solvent in the complete simulation box.Float64In units of molecules/$\textrm{\AA}^3$
density.solvent_bulkDensity (concentration) of the solute in the bulk region.Float64In units of molecules/$\textrm{\AA}^3$
volumeVolume measures.VolumeContains the total volume of the simulation, the bulk volume, the volume of the solute domain and the shell volume of each bin of the histogram. These are computed by numerical integration from the random distributions.
filesList of files read.Vector{String}
weightsWeights of each file in the final counts.Vector{Float64}If the trajectories have different lengths or number of frames, the weights are adapted accordingly.

Other Result parameters available which are set at Options:

ParameterMeaningType of valueComment
nbinsNumber of bins of the histograms.Int
dbulkDistance from solute of bulk solution.Float64
cutoffMaximum distance to be considered for histograms.Float64
autocorrelationThe solute is the same as the solvent?BoolAutomatically set if solute == solvent.
soluteProperties of the soluteSolSummaryContains the number of atoms, number of atoms per molecule and number of molecules of the solute.
solventProperties of the solvent.SolSummaryContains the number of atoms, number of atoms per molecule and number of molecules of the solvent.
irefatomThis is a reference atom that is used to generate random rotations and translations internally.IntCounts of the distributions for this atom are performed automatically to obtain radial (or proximal) distribution functions. Can be used for testing purposes.
rdf_countThis is the md_count minimum distance count of irefatom.Vector{Float64}This corresponds to the conventional radial distribution function if the solute contains only one atom.
rdf_count_randomMinimum distance of irefatom count for the random distribution.Vector{Float64}
rdfDistribution function computed from the irefatom distribution. It is a conventional rdf if the solvent has only one atom.Vector{Float64}
kb_rdfKirkwood-Buff integral computed from the irefatom distribution.Vector{Float64}This must converge, at long distances, to the same value as kb, and can be used for testing.
optionsCalculation options.OptionsCarries (some redundant) options set by the user.
lastframe_readLast frame read from the trajectory.Int
n_frames_readNumber of frames read from the trajectory.IntCan differ from lastframe_read if stride != 1
+plot(results.d,results.kb,xlabel="d/A",ylabel="mddf(d) / L/mol")

Units

  • The distance is assumed to be in Å, as this is the most common distance units in molecular simulations. The coordinates of the atoms are assumed be provided in Å.

  • The minimum-distance distribution function is unit-less, since it is the ratio of the density at each distance divided by an ideal-gas density.

  • The Kirkwood-Buff integrals are returned in cm³ mol⁻¹, if the coordinates were provided in Å.

Warning

If the coordinates are not in Å, the calculation will proceed normally, but the units of the KB integrals, which has units of volume per mol, should be converted to conform the length unit provided.

Coordination number and other data

Obtaining the MDDF involves the computation of some intermediate properties that are frequently useful for additional solution structure analysis. In particular, the coordination numbers are computed. For example, the coordination number as a function from the distance to the solute can be retrieved from a Results data structure with:

coordination_number = results.coordination_number

and this data can be plotted against the distances by:

plot(result.d,results.coordination_number)

The coordination number of subgroups can also be obtained, as explained in the Coordination number section.

The complete data available is:

ParameterMeaningType of valueComment
dVector of distances of the histograms.Vector{Float64}To be used as the x coordinate on plotting any of the data.
md_countNon-normalized count of minimum distances at each d.Vector{Float64}This is the number of minimum distances found at each histogram bin, without normalization. Usually this is not interesting to analyze, because it is dependent on the bin size.
md_count_randomNumber of minimum distances found at each histogram bin for the random distribution.Vector{Float64}This is the normalization required to convert the md_count array into the minimum-distance distribution.
coordination_numberCumulative number of sites found for each histogram distance.Vector{Float64}This is the coordination number, that is, the number of sites found cumulative up to each distance, without any normalization.
coordination_number_randomCumulative site count for the random distribution.Vector{Float64}Usually not interesting for analysis.
mddfThe final distribution function.Vector{Float64}This is the MDDF computed (md_count normalized by md_count_random). It is the main result of the calculation.
kbThe final Kirkwood-Buff integral.Vector{Float64}This is the final KB integral, as a function of the integration distance from the solute. Computed as coordination_number - coordination_number_random
solute_atomAtomic contributions of the solute.Matrix{Float64}This is a matrix with nbins lines and solute.natomspermol columns, containing the atomic contributions of each solute atom to the complete MDDF.
solvent_atomAtomic contributions of the solvent.Matrix{Float64}This is a matrix with nbins lines and solvent.natomspermol columns, containing the atomic contributions of each solvent atom to the complete MDDF.
density.soluteDensity (concentration) of the solute in the complete simulation box.Float64In units of molecules/$\textrm{\AA}^3$
density.solventDensity (concentration) of the solvent in the complete simulation box.Float64In units of molecules/$\textrm{\AA}^3$
density.solvent_bulkDensity (concentration) of the solute in the bulk region.Float64In units of molecules/$\textrm{\AA}^3$
volumeVolume measures.VolumeContains the total volume of the simulation, the bulk volume, the volume of the solute domain and the shell volume of each bin of the histogram. These are computed by numerical integration from the random distributions.
filesList of files read.Vector{String}
weightsWeights of each file in the final counts.Vector{Float64}If the trajectories have different lengths or number of frames, the weights are adapted accordingly.

Other Result parameters available which are set at Options:

ParameterMeaningType of valueComment
nbinsNumber of bins of the histograms.Int
dbulkDistance from solute of bulk solution.Float64
cutoffMaximum distance to be considered for histograms.Float64
autocorrelationThe solute is the same as the solvent?BoolAutomatically set if solute == solvent.
soluteProperties of the soluteAtomSelectionContains the number of atoms, number of atoms per molecule and number of molecules of the solute.
solventProperties of the solvent.AtomSelectionContains the number of atoms, number of atoms per molecule and number of molecules of the solvent.
irefatomThis is a reference atom that is used to generate random rotations and translations internally.IntCounts of the distributions for this atom are performed automatically to obtain radial (or proximal) distribution functions. Can be used for testing purposes.
rdf_countThis is the md_count minimum distance count of irefatom.Vector{Float64}This corresponds to the conventional radial distribution function if the solute contains only one atom.
rdf_count_randomMinimum distance of irefatom count for the random distribution.Vector{Float64}
rdfDistribution function computed from the irefatom distribution. It is a conventional rdf if the solvent has only one atom.Vector{Float64}
kb_rdfKirkwood-Buff integral computed from the irefatom distribution.Vector{Float64}This must converge, at long distances, to the same value as kb, and can be used for testing.
optionsCalculation options.OptionsCarries (some redundant) options set by the user.
lastframe_readLast frame read from the trajectory.Int
n_frames_readNumber of frames read from the trajectory.IntCan differ from lastframe_read if stride != 1

Reference functions

ComplexMixtures.DensityType
mutable struct Density

Structure to contain the density values obtained from the calculation.

  • solute::Float64

  • solvent::Float64

  • solvent_bulk::Float64

source
ComplexMixtures.ResultType
mutable struct Result

Structure to contain the results of the MDDF calculation.

  • Version::VersionNumber

  • nbins::Int64

  • dbulk::Float64

  • cutoff::Float64

  • d::Vector{Float64}

  • md_count::Vector{Float64}

  • md_count_random::Vector{Float64}

  • coordination_number::Vector{Float64}

  • coordination_number_random::Vector{Float64}

  • mddf::Vector{Float64}

  • kb::Vector{Float64}

  • autocorrelation::Bool

  • solute::AtomSelection

  • solvent::AtomSelection

  • solute_group_count::Vector{Vector{Float64}}

  • solvent_group_count::Vector{Vector{Float64}}

  • rdf_count::Vector{Float64}

  • rdf_count_random::Vector{Float64}

  • sum_rdf_count::Vector{Float64}

  • sum_rdf_count_random::Vector{Float64}

  • rdf::Vector{Float64}

  • kb_rdf::Vector{Float64}

  • density::ComplexMixtures.Density

  • volume::ComplexMixtures.Volume

  • files::Vector{ComplexMixtures.TrajectoryFileOptions}

  • weights::Vector{Float64}

The Result{Vector{Float64}} parametric type is necessary only for reading the JSON3 saved file.

source
Base.mergeMethod
merge(r::Vector{Result})

This function merges the results of MDDF calculations obtained by running the same analysis on multiple trajectories, or multiple parts of the same trajectory. It returns a Result structure of the same type, with all the functions and counters representing averages of the set provided weighted by the number of frames read in each Result set.

source
ComplexMixtures.loadMethod
load(filename::String)

Function to load the json saved results file into the Result data structure.

source
diff --git a/dev/save/index.html b/dev/save/index.html index 6a4e9564..b6f1cd87 100644 --- a/dev/save/index.html +++ b/dev/save/index.html @@ -1,2 +1,6 @@ -Save and load · ComplexMixtures.jl

Save and load results

Three functions serve the purpose of saving and loading the results obtained with ComplexMixtures:

Save data to recover it later

save(results,"results.json")

where results is the output data structure of the mddf() calculation, and results.json is the output file to be created. The file is written in JSON format, thus is not naturally human-readable.

Load saved data

results = load("results.json")

The load function reads the output of the save function above, and restores the results data structure.

Write data in a human-readable format

If you Want the results to be written as simple ASCII tables such that you can read them with another analysis program, plotting graphic, or just want to inspect the data visually, use:

write(results,"results.dat")

Three files will be created by this function:

results.dat: Contains the main results, as the MDDF and KB-integral data.

results-ATOM_CONTRIB_SOLVENT.dat: contains the contribution of each atom type of the solvent to the MDDF.

results-ATOM_CONTRIB_SOLUTE.dat: contains the contribution of each atom type of the solute to the MDDF.

+Save and load · ComplexMixtures.jl

Save and load results

Three functions serve the purpose of saving and loading the results obtained with ComplexMixtures:

Save data to recover it later

save(results,"results.json")

where results is the output data structure of the mddf() calculation, and results.json is the output file to be created. The file is written in JSON format, thus is not naturally human-readable.

Load saved data

results = load("results.json")

The load function reads the output of the save function above, and restores the results data structure.

Write data in a human-readable format

If you Want the results to be written as simple ASCII tables such that you can read them with another analysis program, plotting graphic, or just want to inspect the data visually, use:

write(results,"results.dat")

Three files will be created by this function:

results.dat: Contains the main results, as the MDDF and KB-integral data.

results-ATOM_CONTRIB_SOLVENT.dat: contains the contribution of each atom type of the solvent to the MDDF.

results-ATOM_CONTRIB_SOLUTE.dat: contains the contribution of each atom type of the solute to the MDDF.

Base.writeMethod
write(
+    R::Result, filename::String;
+    solute_group_names::Vector{String} = R.solute.group_names,
+    solvent_group_names::Vector{String} = R.solvent.group_names,
+)

Function to write the final results to output files as simple tables that are human-readable and easy to analyze with other software

If the solute and solvent group names are defined in R, the solute_group_names and solvent_group_names arguments are not necessary. If they are not defined, the user can pass the names of the groups as strings in the solute_group_names and solvent_group_names arguments.

source
diff --git a/dev/search_index.js b/dev/search_index.js index fac259ba..2e18a6d8 100644 --- a/dev/search_index.js +++ b/dev/search_index.js @@ -1,3 +1,3 @@ var documenterSearchIndex = {"docs": -[{"location":"references/#References","page":"References","title":"References","text":"","category":"section"},{"location":"references/#Primary-citations","page":"References","title":"Primary citations","text":"","category":"section"},{"location":"references/","page":"References","title":"References","text":"If this package was useful to you, please cite the following papers:","category":"page"},{"location":"references/","page":"References","title":"References","text":"L. Martínez, ComplexMixtures.jl: Investigating the structure of solutions of complex-shaped molecules from a solvent-shell perspective. J. Mol. Liq. 347, 117945, 2022. [Full Text]\nL. Martínez, S. Shimizu, Molecular interpretation of preferential interactions in protein solvation: a solvent-shell perspective by means of minimum-distance distribution functions. J. Chem. Theor. Comp. 13, 6358–6372, 2017. [Full Text]","category":"page"},{"location":"references/#Applications-and-examples","page":"References","title":"Applications and examples","text":"","category":"section"},{"location":"references/","page":"References","title":"References","text":"A. F. Pereira, V. Piccoli, L. Martínez, Trifluoroethanol direct interactions with protein backbones destabilize alpha-helices. J. Mol. Liq. 365, 120209, 2022. [Full Text]\nV. Piccoli, L. Martínez, Ionic liquid solvation of proteins in native and denatured states. J. Mol. Liq. 363, 119953, 2022. [Full Text]\nV. Piccoli, L. Martínez, Correlated counterion effects in the solvation of proteins by ionic-liquids. J. Mol. Liq. 320, 114347, 2020. [Full Text]\nI. P. de Oliveira, L. Martínez, The shift in urea orientation at protein surfaces at low pH is compatible with a direct mechanism of protein denaturation. Phys. Chem. Chem. Phys. 22, 354-367, 2020. [Full Text]\nI. P. de Oliveira, L. Martínez, Molecular basis for competitive solvation of the Burkholderia cepacia lipase by sorbitol and urea. Phys. Chem. Chem. Phys. 18, 21797-21808, 2016. [Full Text]","category":"page"},{"location":"references/#See-also","page":"References","title":"See also","text":"","category":"section"},{"location":"references/","page":"References","title":"References","text":"Packmol: A package for building initial configurations for molecular dynamics simulations.\nCellListMap.jl: Efficient and customizable implementation of cell lists, which allows the computation of general properties dependent on distances of particles within a cutoff, for example short-range potentials, forces, neighbor lists, etc.\nMDLovoFit: Automatic identification of mobile and rigid substructures in molecular dynamics simulations and fractional structural fluctuation analysis. ","category":"page"},{"location":"results/#results","page":"Results","title":"Results","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"The results of a MDDF calculation are returned in a data structure which contains the MDDF, KB integrals, and atomic contributions. The following section will assume that the computation was performed by calling the mddf function with ","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"results = mddf(trajectory)","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"such that the results variable contain the Result data structure. By default, the histograms contain 500 bins (binstep=0.002 and cutoff=10.) such that all data-vectors will contain 500 lines.","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"To learn how to save and load saved data, read the next section.","category":"page"},{"location":"results/#The-Result-data-structure:-main-data","page":"Results","title":"The Result data structure: main data","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"The most important data to be read from results are the distances, minimum-distance distribution function, and KB integrals. These data is stored in the following vectors:","category":"page"},{"location":"results/#Distances-of-the-histograms:-results.d","page":"Results","title":"Distances of the histograms: results.d","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"The following vector will contain values ranging from 0. to cutoff, and the distance at each bin is the distance in that bin for which half of the volume of the bin is within d, and half of the volume is above d, if the volume was spherical: ","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"julia> results.d\n500-element Array{Float64,1}:\n 0.015874010519682\n 0.033019272488946275\n ⋮\n 9.970010030080179\n 9.99001000999998","category":"page"},{"location":"results/#Minimum-distance-distribution-function:-results.mddf","page":"Results","title":"Minimum-distance distribution function: results.mddf","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"The results.mddf vector will contain the main result, which the minimum-distance distribution function. For a properly-sampled simulation, it will be zero at very short distances and converge to 1.0 for distances smaller than the cutoff:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"julia> results.mddf\n500-element Array{Float64,1}:\n 0.0\n 0.0\n ⋮\n 0.999052514965403\n 1.001030818286187\n","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"A typical plot of results.mddf as a function of results.d will look like:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"Thus, this plot was obtained with the following code:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"using Plots\nplot(results.d,results.mddf,xlabel=\"d/A\",ylabel=\"mddf(d) / L/mol\") ","category":"page"},{"location":"results/#Kirkwood-Buff-integral:-results.kb","page":"Results","title":"Kirkwood-Buff integral: results.kb","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"The results.kb vector will contain the Kirkwood-Buff integral computed as a function of the minimum-distance to the solute. For properly sampled simulations, it is expected to converge at large distances. ","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"julia> results.kb\n500-element Array{Float64,1}:\n 0.0\n -0.3249356504752985\n -2.9804719721525\n ⋮\n 0.72186381783\n 1.13624162115","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"A typical plot of results.kb as a function of results.d will look like:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"Thus, this plot was obtained with the following code:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"using Plots\nplot(results.d,results.kb,xlabel=\"d/A\",ylabel=\"mddf(d) / L/mol\") ","category":"page"},{"location":"results/#Units","page":"Results","title":"Units","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"The distance is assumed to be in Å, as this is the most common distance units in molecular simulations. The coordinates of the atoms are assumed be provided in Å. \nThe minimum-distance distribution function is unit-less, since it is the ratio of the density at each distance divided by an ideal-gas density.\nThe Kirkwood-Buff integrals are returned in cm³ mol⁻¹, if the coordinates were provided in Å.","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"warning: Warning\nIf the coordinates are not in Å, the calculation will proceed normally, but the units of the KB integrals, which has units of volume per mol, should be converted to conform the length unit provided. ","category":"page"},{"location":"results/#Coordination-number-and-other-data","page":"Results","title":"Coordination number and other data","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"Obtaining the MDDF involves the computation of some intermediate properties that are frequently useful for additional solution structure analysis. In particular, the coordination numbers are computed. For example, the coordination number as a function from the distance to the solute can be retrieved from a Results data structure with:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"coordination_number = results.coordination_number","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"and this data can be plotted against the distances by:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"plot(result.d,results.coordination_number)","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"The coordination number of subgroups can also be obtained, as explained in the Coordination number section.","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"The complete data available is:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"Parameter Meaning Type of value Comment\nd Vector of distances of the histograms. Vector{Float64} To be used as the x coordinate on plotting any of the data.\nmd_count Non-normalized count of minimum distances at each d. Vector{Float64} This is the number of minimum distances found at each histogram bin, without normalization. Usually this is not interesting to analyze, because it is dependent on the bin size.\nmd_count_random Number of minimum distances found at each histogram bin for the random distribution. Vector{Float64} This is the normalization required to convert the md_count array into the minimum-distance distribution.\ncoordination_number Cumulative number of sites found for each histogram distance. Vector{Float64} This is the coordination number, that is, the number of sites found cumulative up to each distance, without any normalization.\ncoordination_number_random Cumulative site count for the random distribution. Vector{Float64} Usually not interesting for analysis.\nmddf The final distribution function. Vector{Float64} This is the MDDF computed (md_count normalized by md_count_random). It is the main result of the calculation.\nkb The final Kirkwood-Buff integral. Vector{Float64} This is the final KB integral, as a function of the integration distance from the solute. Computed as coordination_number - coordination_number_random\nsolute_atom Atomic contributions of the solute. Matrix{Float64} This is a matrix with nbins lines and solute.natomspermol columns, containing the atomic contributions of each solute atom to the complete MDDF.\nsolvent_atom Atomic contributions of the solvent. Matrix{Float64} This is a matrix with nbins lines and solvent.natomspermol columns, containing the atomic contributions of each solvent atom to the complete MDDF.\ndensity.solute Density (concentration) of the solute in the complete simulation box. Float64 In units of molecules/textrmAA^3\ndensity.solvent Density (concentration) of the solvent in the complete simulation box. Float64 In units of molecules/textrmAA^3\ndensity.solvent_bulk Density (concentration) of the solute in the bulk region. Float64 In units of molecules/textrmAA^3\nvolume Volume measures. Volume Contains the total volume of the simulation, the bulk volume, the volume of the solute domain and the shell volume of each bin of the histogram. These are computed by numerical integration from the random distributions.\nfiles List of files read. Vector{String} \nweights Weights of each file in the final counts. Vector{Float64} If the trajectories have different lengths or number of frames, the weights are adapted accordingly.\n ","category":"page"},{"location":"results/#Other-Result-parameters-available-which-are-set-at-Options:","page":"Results","title":"Other Result parameters available which are set at Options:","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"Parameter Meaning Type of value Comment\nnbins Number of bins of the histograms. Int \ndbulk Distance from solute of bulk solution. Float64 \ncutoff Maximum distance to be considered for histograms. Float64 \nautocorrelation The solute is the same as the solvent? Bool Automatically set if solute == solvent.\nsolute Properties of the solute SolSummary Contains the number of atoms, number of atoms per molecule and number of molecules of the solute.\nsolvent Properties of the solvent. SolSummary Contains the number of atoms, number of atoms per molecule and number of molecules of the solvent.\nirefatom This is a reference atom that is used to generate random rotations and translations internally. Int Counts of the distributions for this atom are performed automatically to obtain radial (or proximal) distribution functions. Can be used for testing purposes.\nrdf_count This is the md_count minimum distance count of irefatom. Vector{Float64} This corresponds to the conventional radial distribution function if the solute contains only one atom.\nrdf_count_random Minimum distance of irefatom count for the random distribution. Vector{Float64} \nrdf Distribution function computed from the irefatom distribution. It is a conventional rdf if the solvent has only one atom. Vector{Float64} \nkb_rdf Kirkwood-Buff integral computed from the irefatom distribution. Vector{Float64} This must converge, at long distances, to the same value as kb, and can be used for testing.\noptions Calculation options. Options Carries (some redundant) options set by the user.\nlastframe_read Last frame read from the trajectory. Int \nn_frames_read Number of frames read from the trajectory. Int Can differ from lastframe_read if stride != 1\n ","category":"page"},{"location":"help/#Help-entries","page":"Help entries","title":"Help entries","text":"","category":"section"},{"location":"help/","page":"Help entries","title":"Help entries","text":"Modules=[ComplexMixtures]","category":"page"},{"location":"help/#ComplexMixtures.ComplexMixturesTypes","page":"Help entries","title":"ComplexMixtures.ComplexMixturesTypes","text":"Internal structure or function, interface may change.\n\nUnion of types to define comparison operators.\n\n\n\n\n\n","category":"type"},{"location":"help/#ComplexMixtures.ChemFile","page":"Help entries","title":"ComplexMixtures.ChemFile","text":"struct ChemFile{T<:(AbstractVector)} <: Trajectory\n\nStructure to contain a trajectory as read by Chemfiles.jl\n\nfilename::String\nformat::AbstractString\nstream::ComplexMixtures.Stream{<:Chemfiles.Trajectory}\nnframes::Int64\nsolute::Selection\nsolvent::Selection\nx_solute::Vector{T} where T<:(AbstractVector)\nx_solvent::Vector{T} where T<:(AbstractVector)\nunitcell::StaticArraysCore.MMatrix{3, 3, Float64, 9}\nnatoms::Int64\n\n\n\n\n\n","category":"type"},{"location":"help/#ComplexMixtures.ChemFile-Tuple{String, Selection, Selection}","page":"Help entries","title":"ComplexMixtures.ChemFile","text":"ChemFile(filename::String, solute::Selection, solvent::Selection;format=\"\" , T::Type = SVector{3,Float64})\n\nFunction open will set up the IO stream of the trajectory, fill up the number of frames field and additional parameters if required.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.Density","page":"Help entries","title":"ComplexMixtures.Density","text":"mutable struct Density\n\nStructure to contain the density values obtained from the calculation.\n\nsolute::Float64\nsolvent::Float64\nsolvent_bulk::Float64\n\n\n\n\n\n","category":"type"},{"location":"help/#ComplexMixtures.MinimumDistance","page":"Help entries","title":"ComplexMixtures.MinimumDistance","text":"struct MinimumDistance\n\nInternal structure or function, interface may change.\n\nExtended help\n\nThis structure contains the information, for each molecule, of if it is within the cutoff distance of the solute, the atom indexes of the associated minimum distance, the distance, and a label to mark if the reference atom of the molecule is within the cutoff distance of the solute.\n\nThe lists of minimum-distances are stored in arrays of type Vector{MinimumDistance}. The index of this vector corresponds to the index of the molecule in the original array.\n\nwithin_cutoff::Bool\ni::Int64\nj::Int64\nd::Float64\nref_atom_within_cutoff::Bool\nd_ref_atom::Float64\n\n\n\n\n\n","category":"type"},{"location":"help/#ComplexMixtures.NamdDCD","page":"Help entries","title":"ComplexMixtures.NamdDCD","text":"struct NamdDCD{T<:(AbstractVector)} <: Trajectory\n\nStructure to contain the data of a trajectory in NAMD/DCD format.\n\nfilename::String\nstream::ComplexMixtures.Stream{<:FortranFiles.FortranFile}\nnframes::Int64\nsolute::Selection\nsolvent::Selection\nx_solute::Vector{T} where T<:(AbstractVector)\nx_solvent::Vector{T} where T<:(AbstractVector)\nlastatom::Int64\nunitcell_read::Vector{Float64}\nx_read::Vector{Float32}\ny_read::Vector{Float32}\nz_read::Vector{Float32}\n\n\n\n\n\n","category":"type"},{"location":"help/#ComplexMixtures.NamdDCD-Tuple{String, Selection, Selection}","page":"Help entries","title":"ComplexMixtures.NamdDCD","text":"NamdDCD(filename::String, solute::Selection, solvent::Selection;T::Type = SVector{3,Float64})\n\nThis function initializes the structure above, returning the data and the vectors with appropriate lengths.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.Options","page":"Help entries","title":"ComplexMixtures.Options","text":"struct Options\n\nStructure that contains the detailed input options.\n\nfirstframe::Int64\nlastframe::Int64\nstride::Int64\nirefatom::Int64\nn_random_samples::Int64\nbinstep::Float64\ndbulk::Float64\ncutoff::Float64\nusecutoff::Bool\nlcell::Int64\nGC::Bool\nGC_threshold::Float64\nseed::Int64\nStableRNG::Bool\nnthreads::Int64\nsilent::Bool\nframe_weights::Vector{Float64}\n\n\n\n\n\n","category":"type"},{"location":"help/#ComplexMixtures.Overview","page":"Help entries","title":"ComplexMixtures.Overview","text":"Internal structure or function, interface may change.\n\nmutable struct Overview\n\nStructure that is used to dispatch the show of a overview.\n\nR::Result\ndomain_molar_volume::Float64\ndensity::ComplexMixtures.Density\nsolvent_molar_volume::Float64\nsolvent_molar_volume_bulk::Float64\nsolute_molar_volume::Float64\n\n\n\n\n\n","category":"type"},{"location":"help/#ComplexMixtures.PDBTraj","page":"Help entries","title":"ComplexMixtures.PDBTraj","text":"struct PDBTraj{T<:(AbstractVector)} <: Trajectory\n\nStructure to contain PDB trajectories. Frames must be separated by \"END\", and with periodic cell sizes in the \"CRYST1\" field, for each frame.\n\nThis structure and functions can be used as a template to implement the reading of other trajectory formats. \n\nfilename::String\nstream::ComplexMixtures.Stream{<:IOStream}\nnframes::Int64\nunitcell::StaticArraysCore.MMatrix{3, 3, Float64, 9}\nsolute::Selection\nsolvent::Selection\nx_solute::Vector{T} where T<:(AbstractVector)\nx_solvent::Vector{T} where T<:(AbstractVector)\n\n\n\n\n\n","category":"type"},{"location":"help/#ComplexMixtures.PDBTraj-Tuple{String, Selection, Selection}","page":"Help entries","title":"ComplexMixtures.PDBTraj","text":"PDBTraj(pdbfile::String, solute::Selection, solvent::Selection;T::Type = SVector{3,Float64})\n\nFunction open will set up the IO stream of the trajectory, fill up the number of frames field and additional parameters if required \n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.Result","page":"Help entries","title":"ComplexMixtures.Result","text":"mutable struct Result{T<:VecOrMat{Float64}}\n\nStructure to contain the results of the MDDF calculation.\n\nVersion::VersionNumber\nnbins::Int64\ndbulk::Float64\ncutoff::Float64\nd::Vector{Float64}\nmd_count::Vector{Float64}\nmd_count_random::Vector{Float64}\ncoordination_number::Vector{Float64}\ncoordination_number_random::Vector{Float64}\nmddf::Vector{Float64}\nkb::Vector{Float64}\nautocorrelation::Bool\nsolvent::ComplexMixtures.SolSummary\nsolute::ComplexMixtures.SolSummary\nsolute_atom::VecOrMat{Float64}\nsolvent_atom::VecOrMat{Float64}\nrdf_count::Vector{Float64}\nrdf_count_random::Vector{Float64}\nsum_rdf_count::Vector{Float64}\nsum_rdf_count_random::Vector{Float64}\nrdf::Vector{Float64}\nkb_rdf::Vector{Float64}\ndensity::ComplexMixtures.Density\nvolume::ComplexMixtures.Volume\noptions::Options\nirefatom::Int64\nlastframe_read::Int64\nnframes_read::Int64\nfiles::Vector{String}\nweights::Vector{Float64}\n\nThe Result{Vector{Float64}} parametric type is necessary only for reading the JSON3 saved file. \n\n\n\n\n\n","category":"type"},{"location":"help/#ComplexMixtures.Selection","page":"Help entries","title":"ComplexMixtures.Selection","text":"struct Selection\n\nStructure that contains the information about the solute and solvent molecules.\n\nnatoms::Int64\nnmols::Int64\nnatomspermol::Int64\nindex::Vector{Int64}\nimol::Vector{Int64}\nnames::Vector{String}\n\n\n\n\n\n","category":"type"},{"location":"help/#ComplexMixtures.SolSummary","page":"Help entries","title":"ComplexMixtures.SolSummary","text":"Internal structure or function, interface may change.\n\nstruct SolSummary\n\nStructures to contain the details of a solute or solvent to store in the results of the MDDF calculation.\n\nnatoms::Int64\nnmols::Int64\nnatomspermol::Int64\n\n\n\n\n\n","category":"type"},{"location":"help/#ComplexMixtures.Trajectory","page":"Help entries","title":"ComplexMixtures.Trajectory","text":"Trajectory(filename::String, solute::Selection, solvent::Selection; format::String = \"\", chemfiles = false)\n\nTrajectory constructor data type. \n\nDefaults to reading with the Chemfiles infrastructure, except for DCD and PDB trajectory files, if the \"PDBTraj\" option is provided.\n\nSee memory issue (https://github.com/chemfiles/Chemfiles.jl/issues/44)\n\n\n\n\n\n","category":"type"},{"location":"help/#ComplexMixtures.Units","page":"Help entries","title":"ComplexMixtures.Units","text":"Internal structure or function, interface may change.\n\nstruct Units{T}\n\nUnit conversions.\n\nmole::Any\nAngs3tocm3::Any\nAngs3toL::Any\nAngs3tocm3permol::Any\nAngs3toLpermol::Any\nSitesperAngs3tomolperL::Any\n\n\n\n\n\n","category":"type"},{"location":"help/#ComplexMixtures.Volume","page":"Help entries","title":"ComplexMixtures.Volume","text":"mutable struct Volume\n\nStructures to contain the volumes obtained from calculations.\n\ntotal::Float64\nbulk::Float64\ndomain::Float64\nshell::Vector{Float64}\n\n\n\n\n\n","category":"type"},{"location":"help/#Base.isapprox-Union{Tuple{T}, Tuple{T, T}} where T<:Union{ComplexMixtures.SolSummary, ComplexMixtures.Density, Options, Selection, ComplexMixtures.Volume, Result}","page":"Help entries","title":"Base.isapprox","text":"Base.isapprox(r1::T, r2::T; debug=false) where T <: CMTypes\n\nInternal structure or function, interface may change.\n\nFunction to test if two runs offered similar results. Mostly used in the package testing routines.\n\n\n\n\n\n","category":"method"},{"location":"help/#Base.merge-Tuple{Vector{<:Result}}","page":"Help entries","title":"Base.merge","text":"merge(r::Vector{Result})\n\nThis function merges the results of MDDF calculations obtained by running the same analysis on multiple trajectories, or multiple parts of the same trajectory. It returns a Result structure of the same type, with all the functions and counters representing averages of the set provided weighted by the number of frames read in each Result set.\n\n\n\n\n\n","category":"method"},{"location":"help/#Base.write-Tuple{Result, String, Selection, Selection}","page":"Help entries","title":"Base.write","text":"write(R::ComplexMixtures.Result, filename::String, solute::Selection, solvent::Selection)\n\nFunction to write the final results to output files as simple tables that are human-readable and easy to analyze with other software\n\nIf the solute and solvent selections are provides, pass on the atom names.\n\n\n\n\n\n","category":"method"},{"location":"help/#Base.write-Tuple{Result, String}","page":"Help entries","title":"Base.write","text":"write(R::ComplexMixtures.Result, filename::String; \n solute_names::Vector{String} = [\"nothing\"], \n solvent_names::Vector{String} = [\"nothing\"])\n\nOptional passing of atom names.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.VMDselect-Tuple{String, String}","page":"Help entries","title":"ComplexMixtures.VMDselect","text":"VMDselect(inputfile::String, selection::String; vmd=\"vmd\", srcload=nothing)\n\nSelect atoms using vmd selection syntax, with vmd in background\n\nReturns the list of index (one-based) and atom names\n\nFunction to return the selection from a input file (topology, coordinates, etc), by calling VMD in the background.\n\nThe srcload argument can be used to load a list of scripts before loading the input file, for example with macros to define custom selection keywords.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.contributions-Tuple{Selection, Matrix{Float64}, Vector{Int64}}","page":"Help entries","title":"ComplexMixtures.contributions","text":"contributions(s::Selection, atom_contributions::Matrix{Float64}, selection)\n\nExtract the contribution of a given atom type selection from the solute or solvent atomic contributions to the MDDF.\n\ns here is the solute or solvent selection (type ComplexMixtures.Selection) atom_contributions is the R.solute_atom or R.solvent_atom arrays of the Result structure, and the last argument is the selection of atoms from the solute to be considered, given as a list of indexes, list of atom names, vector of PDBTools.Atoms, or a PDBTools.Residue. \n\nExtended help\n\nFor selections of one molecule, the function has an additional keyword option first_atom_is_ref that is false by default. If set to true, the index first atom of the selection is considered as a reference atom. For example if a solute has 100 atoms, but its first atom in the PDB file is number 901, the selection of indexes [1, 2, 3] will refer to atoms with indexes [901, 902, 903].\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.coordination_number","page":"Help entries","title":"ComplexMixtures.coordination_number","text":"coordination_number(trajectory::Trajectory, options::Options)\n\nComputes the coordination numbers for each solute molecule in the trajectory, given the Trajectory. This is an auxiliary function of the ComplexMixtures package, which is used to compute coordination numbers when the normalization of the distribution is not possible or needed. \n\nThe output is a Result structure, which contains the data as the result of a call to mddf, except that all counters which require normalization of the distribution will be zero. In summary, this result data structure can be used to compute the coordination numbers, but not the MDDF, RDF, or KB integrals.\n\nExamples\n\njulia> trajectory = Trajectory(\"./trajectory.dcd\",solute,solvent);\n\njulia> results = mddf(trajectory);\n\njulia> coordination_numbers = coordination_number(trajectory);\n\n\n\n\n\n","category":"function"},{"location":"help/#ComplexMixtures.coordination_number-2","page":"Help entries","title":"ComplexMixtures.coordination_number","text":"coordination_number(R::Result) = R.coordination_number\ncoordination_number(R::Result, group_contributions::Vector{Float64})\ncoordination_number(s::Selection, atom_contributions::Matrix{Float64}, R::Result, group)\n\nComputes the coordination number of a given group of atoms from the solute or solvent atomic contributions to the MDDF. If no group is defined (first call above), the coordination number of the whole solute or solvent is returned.\n\nIf the group_contributions to the mddf are computed previously with the contributions function, the result can be used to compute the coordination number by calling coordination_number(R::Result, group_contributions).\n\nOtherwise, the coordination number can be computed directly with the second call, where:\n\ns is the solute or solvent selection (type ComplexMixtures.Selection)\n\natom_contributions is the R.solute_atom or R.solvent_atom arrays of the Result structure\n\nR is the Result structure,\n\nand the last argument is the selection of atoms from the solute to be considered, given as a list of indexes, list of atom names, or a selection following the syntax of PDBTools, or vector of PDBTools.Atoms, or a PDBTools.Residue\n\nExamples\n\nIn the following example we compute the coordination number of the atoms of residue 50 (of the solute) with the solvent atoms of TMAO, as a function of the distance. Finally, we show the average number of TMAO molecules within 5 Angstroms of residue 50. The findlast(<(5), R.d) part of the code below returns the index of the last element of the R.d array that is smaller than 5 Angstroms.\n\nPrecomputing the group contributions Using the contributions function\n\nusing ComplexMixtures, PDBTools\npdb = readPDB(\"test/data/NAMD/structure.pdb\");\nR = load(\"test/data/NAMD/protein_tmao.json\");\nsolute = Selection(PDBTools.select(pdb, \"protein\"), nmols=1);\nresidue50 = PDBTools.select(pdb, \"residue 50\");\n# Compute the group contributions to the MDDF\nresidue50_contribution = contributions(solute, R.solute_atom, residue50);\n# Now compute the coordination number\nresidue50_coordination = coordination_number(R, residue50_contribution)\n# Output the average number of TMAO molecules within 5 Angstroms of residue 50\nresidue50_coordination[findlast(<(5), R.d)]\n\nWithout precomputing the group_contribution\n\nusing ComplexMixtures, PDBTools\npdb = readPDB(\"test/data/NAMD/structure.pdb\");\nR = load(\"test/data/NAMD/protein_tmao.json\");\nsolute = Selection(PDBTools.select(pdb, \"protein\"), nmols=1);\nresidue50 = PDBTools.select(pdb, \"residue 50\");\n# Compute the coordination number\nresidue50_coordination = coordination_number(solute, R.solute_atom, R, group)\n# Output the average number of TMAO molecules within 5 Angstroms of residue 50\nresidue50_coordination[findlast(<(5), R.d)]\n\n\n\n\n\n","category":"function"},{"location":"help/#ComplexMixtures.coordination_number_frame!-Tuple{Result, CellListMap.PeriodicSystems.AbstractPeriodicSystem, ComplexMixtures.Buffer, Float64}","page":"Help entries","title":"ComplexMixtures.coordination_number_frame!","text":"coordination_number_frame!(R::Result, system::AbstractPeriodicSystem, buff::Buffer, frame_weight)\n\nInternal structure or function, interface may change.\n\nComputes the coordination numbers for a single frame. Modifies the data in the R (type Result) structure.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.eulermat-Tuple{Any, Any, Any, String}","page":"Help entries","title":"ComplexMixtures.eulermat","text":"eulermat(beta, gamma, theta, deg::String)\n\nInternal structure or function, interface may change.\n\nThis routine was added because it defines the rotation in the \"human\" way, an is thus used to set the position of the fixed molecules. deg can only be \"degree\", in which case the angles with be considered in degrees. If no deg argument is provided, radians are used.\n\nThat means: beta is a counterclockwise rotation around x axis. gamma is a counterclockwise rotation around y axis. theta is a counterclockwise rotation around z axis.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.finalresults!-Tuple{Result, Options, Trajectory}","page":"Help entries","title":"ComplexMixtures.finalresults!","text":"finalresults!(R::Result, options::Options, trajectory::Trajectory)\n\nInternal structure or function, interface may change.\n\nFunction that computes the final results of all the data computed by averaging according to the sampling of each type of data, and converts to common units.\n\nComputes also the final distribution functions and KB integrals.\n\nThis function modified the values contained in the R data structure.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.gr-Tuple{Result}","page":"Help entries","title":"ComplexMixtures.gr","text":"gr(R::Result) = gr(R.d,R.rdf_count,R.density.solvent_bulk,R.options.binstep)\n\nIf a Result structure is provided without further details, use the rdf count and the bulk solvent density.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.gr-Tuple{Vector{Float64}, Vector{Float64}, Float64, Float64}","page":"Help entries","title":"ComplexMixtures.gr","text":"gr(r::Vector{Float64}, count::Vector{Float64}, density::Float64, binstep::Float64)\n\nComputes the radial distribution function from the count data and the density.\n\nThis is exactly a conventional g(r) if a single atom was chosen as the solute and solvent selections.\n\nReturns both the g(r) and the kb(r)\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.grid3D-Tuple{}","page":"Help entries","title":"ComplexMixtures.grid3D","text":"grid3D(solute,solute_atoms,mddf_result,output_file; dmin=1.5, ddax=5.0, step=0.5)\n\nThis function builds the grid of the 3D density function and fills an array of mutable structures of type Atom, containing the position of the atoms of grid, the closest atom to that position, and distance. \n\nsolute is a ComplexMixtuers.Selection, defining the solute. solute_atoms is the corresponding vector of PDBTools.Atoms, and mddf_result is the result of a mddf_result calculation with the correspondign solute. \n\ndmin and dmax define the range of distance where the density grid will be built, and step defines how fine the grid must be. Be aware that fine grids involve usually a very large (hundreds of thousands points).\n\nAll parameters can be provides as keyword parameters.\n\nExample\n\njulia> using ComplexMixtures, PDBTools\n\njulia> pdb = readPDB(\"./system.pdb\");\n\njulia> R = ComplexMixtures.load(\"./results.json\");\n\njulia> protein = select(pdb,\"protein\");\n\njulia> solute = ComplexMixtures.Selection(protein,nmols=1);\n\njulia> grid = ComplexMixtures.grid3D(solute=solute, solute_atoms=protein, mddf_result=R, output_file=\"grid.pdb\");\n\n\ngrid will contain a vector of Atoms with the information of the MDDF at each grid point, and the same data will be written in the grid.pdb file. This PDB file can be opened in VMD, for example, and contain in the beta field the contribution of each protein residue to the MDDF at each point in space relative to the protein, and in the occupancy field the distance to the protein. Examples of how this information can be visualized are provided in the user guide of ComplexMixtures. \n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.itype-Tuple{Int64, Int64}","page":"Help entries","title":"ComplexMixtures.itype","text":"itype(iatom::Int, natomspermol::Int)\n\nInternal structure or function, interface may change.\n\nGiven the index of the atom in the vector of coordinates of the solute or the solvent, returns the type of the atom, that is, the index of this atom within the molecule (goes from 1 to natomspermol)\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.load-Tuple{String}","page":"Help entries","title":"ComplexMixtures.load","text":"load(filename::String)\n\nFunction to load the json saved results file into the Result data structure.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.mddf","page":"Help entries","title":"ComplexMixtures.mddf","text":"mddf(trajectory::Trajectory, options::Options)\n\nFunction that computes the minimum-distance distribution function, atomic contributions, and KB integrals, given the Trajectory structure of the simulation and, optionally, parameters given as a second argument of the Options type. This is the main function of the ComplexMixtures package. \n\nExamples\n\njulia> trajectory = Trajectory(\"./trajectory.dcd\",solute,solvent);\n\njulia> results = mddf(trajectory);\n\nor, to set some custom optional parameter,\n\njulia> options = Options(lastframe=1000);\n\njulia> results = mddf(trajectory,options);\n\n\n\n\n\n","category":"function"},{"location":"help/#ComplexMixtures.mddf_frame!-Tuple{Result, CellListMap.PeriodicSystems.AbstractPeriodicSystem, ComplexMixtures.Buffer, Options, Any, Any}","page":"Help entries","title":"ComplexMixtures.mddf_frame!","text":"mddf_frame!(R::Result, system::AbstractPeriodicSystem, buff::Buffer, options::Options, frame_weight, RNG)\n\nInternal structure or function, interface may change.\n\nComputes the MDDF for a single frame. Modifies the data in the R (type Result) structure.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.minimum_distances!-Tuple{CellListMap.PeriodicSystems.AbstractPeriodicSystem, Result, Int64}","page":"Help entries","title":"ComplexMixtures.minimum_distances!","text":"minimum_distances!(system::CellListMap.PeriodicSystem, R::Result)\n\nInternal structure or function, interface may change.\n\nFunction that computes the list of distances of solvent molecules to a solute molecule. It updates the lists of minimum distances. \n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.mol_index-Tuple{Any, Any}","page":"Help entries","title":"ComplexMixtures.mol_index","text":"mol_index(i_atom, natomspermol) = (i_atom-1) ÷ natomspermol + 1\n\nInternal structure or function, interface may change.\n\nExtended help\n\nSets the index of the molecule of an atom in the simples situation, in which all molecules have the same number of atoms. \n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.mol_range-Tuple{Any, Any}","page":"Help entries","title":"ComplexMixtures.mol_range","text":"mol_range(imol, n_atoms_per_molecule)\n\nInternal structure or function, interface may change.\n\nGiven the index and the number of atoms per molecule, returns the range of indices of of an array of coordinates that corresponds to the molecule.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.move!-Union{Tuple{T}, Tuple{AbstractVector{T}, T, Any, Any, Any}} where T<:(StaticArraysCore.SVector)","page":"Help entries","title":"ComplexMixtures.move!","text":"move!(x::AbstractVector, newcm::AbstractVector,beta, gamma, theta)\n\nInternal structure or function, interface may change.\n\nTranslates and rotates a molecule according to the desired input center of coordinates and Euler rotations modifyies the vector x.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.overview-Tuple{Result}","page":"Help entries","title":"ComplexMixtures.overview","text":"overview(R::Result)\n\nFunction that outputs the volumes and densities in the most natural units.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.random_move!-Tuple{AbstractVector{<:StaticArraysCore.SVector{3}}, Int64, CellListMap.PeriodicSystems.AbstractPeriodicSystem, Any}","page":"Help entries","title":"ComplexMixtures.random_move!","text":"random_move!(x_ref::AbstractVector{T}, \n irefatom::Int,\n system::AbstractPeriodicSystem,\n x_new::AbstractVector{T}, RNG) where {T<:SVector}\n\nInternal structure or function, interface may change.\n\nFunction that generates a new random position for a molecule.\n\nThe new position is returned in x_new, a previously allocated array.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.randomize_solvent!-Tuple{CellListMap.PeriodicSystems.AbstractPeriodicSystem, ComplexMixtures.Buffer, Int64, Result, Any}","page":"Help entries","title":"ComplexMixtures.randomize_solvent!","text":"randomize_solvent!(system, buff, n_solvent_in_bulk, options, RNG)\n\nInternal structure or function, interface may change.\n\nGenerate a random solvent distribution from the bulk molecules of a solvent\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.save-Tuple{Result, String}","page":"Help entries","title":"ComplexMixtures.save","text":"save(R::Result, filename::String)\n\nFunction to write the result data structure to a json file.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.setbin-Tuple{Any, Any}","page":"Help entries","title":"ComplexMixtures.setbin","text":"setbin(d,step)\n\nInternal structure or function, interface may change.\n\nFunction that sets to which histogram bin a data point pertains simple, but important to keep consistency over all calls.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.setup_PeriodicSystem-Tuple{Trajectory, Options}","page":"Help entries","title":"ComplexMixtures.setup_PeriodicSystem","text":"setup_PeriodicSystem(trajectory::Trajectory, options::Options)\n\nInternal structure or function, interface may change.\n\nSetup the periodic system from CellListMap, to compute minimimum distances. The system will be setup such that xpositions corresponds to one molecule of the solute, and ypositions contains all coordinates of all atoms of the solvent. \n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.shellradius-Tuple{Any, Any}","page":"Help entries","title":"ComplexMixtures.shellradius","text":"shellradius(i,step)\n\nInternal structure or function, interface may change.\n\nCompute the point in which the radius comprises half of the volume of the shell.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.sphereradiusfromshellvolume-Tuple{Any, Any}","page":"Help entries","title":"ComplexMixtures.sphereradiusfromshellvolume","text":"sphereradiusfromshellvolume(volume,step)\n\nInternal structure or function, interface may change.\n\nComputes the radius that corresponds to a spherical shell of a given volume.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.sphericalshellvolume-Tuple{Any, Any}","page":"Help entries","title":"ComplexMixtures.sphericalshellvolume","text":"sphericalshellvolume(i,step)\n\nInternal structure or function, interface may change.\n\nComputes the volume of the spherical shell defined within [(i-1)step,istep].\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.sum!-Tuple{Result, Result}","page":"Help entries","title":"ComplexMixtures.sum!","text":"sum!(R1::Result, R2::Result)\n\nInternal structure or function, interface may change.\n\nSum the counts of two Results structures, adding the result to the first structure as in R1 = R1 + R2.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.title-Tuple{Result, Selection, Selection}","page":"Help entries","title":"ComplexMixtures.title","text":"title(R::Result, solute::Selection, solvent::Selection)\ntitle(R::Result, solute::Selection, solvent::Selection, nspawn::Int)\n\nInternal structure or function, interface may change.\n\nPrint some information about the run.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.update_list!-Tuple{Any, Any, Any, Any, Any, Any, Vector{ComplexMixtures.MinimumDistance}}","page":"Help entries","title":"ComplexMixtures.update_list!","text":"update_list!(i, j, d2, iref_atom::Int, mol_index_i::F, isolute::Int, list::Vector{MinimumDistance{T}}) where {F<:Function, T}\n\nInternal structure or function, interface may change.\n\nFunction that updates a list of minimum distances given the indexes of the atoms involved for one pair within cutoff, for autocorrelations (such that the identity of isolute is needed)\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.update_list!-Tuple{Any, Any, Any, Any, Any, Vector{ComplexMixtures.MinimumDistance}}","page":"Help entries","title":"ComplexMixtures.update_list!","text":"update_list!(i, j, d2, iref_atom::Int, mol_index_i::F, list::Vector{MinimumDistance{T}}) where {F<:Function, T}\n\nInternal structure or function, interface may change.\n\nFunction that updates a list of minimum distances given the indexes of the atoms involved for one pair within cutoff.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.update_md-Tuple{ComplexMixtures.MinimumDistance, ComplexMixtures.MinimumDistance}","page":"Help entries","title":"ComplexMixtures.update_md","text":"update_md(md1::MinimumDistance{T}, md2::MinimumDistance{T}) where {T}\n\nInternal structure or function, interface may change.\n\nFunction that returns the updated minimum distance structure after comparing two structures associated with the same molecule.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.updatecounters!-Tuple{Result, CellListMap.PeriodicSystems.AbstractPeriodicSystem, Float64}","page":"Help entries","title":"ComplexMixtures.updatecounters!","text":"updatecounters!(R::Result, system::AbstractPeriodicSystem)\n\nInternal structure or function, interface may change.\n\nFunction that updates the minimum-distance counters in R\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.viewmol-Union{Tuple{T}, Tuple{Int64, Vector{T}, Int64}} where T","page":"Help entries","title":"ComplexMixtures.viewmol","text":"viewmol(i::Int, x::Vector{T}, n::Int) where T\n\nInternal structure or function, interface may change.\n\nReturns a view of a coordinate vector corresponding to the atoms of a molecule with index i. n is the number of atoms of the molecule.\n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.which_types-Tuple{Selection, Vector{Int64}}","page":"Help entries","title":"ComplexMixtures.which_types","text":"which_types(s::Selection, indexes::Vector{Int})\n\nInternal structure or function, interface may change.\n\nFunction that returns the list of the indexes of the types of the atoms in a selection. For example, if a selection corresponds to a solvent of water molecules: There are three types, 1, 2, and 3, corresponding to the three atoms of the water molecule. If the indexes provided are, for instance, 11, 12, and 13, corresponding to a water molecule, this function will return 1, 2 and 3.\n\nThis is used to get equivalent-atom contributions to the distribution functions. For example, the input indexes span all water molecules, the output of this function will be still the three indexes corresponding to the three types of atoms that exist in a water molecule. \n\nIt is not possible to compute the contribution of one individual water molecule if the distribution function was computed for all molecules. Thus, the necessity to identify the types of atoms involved in a selection. \n\n\n\n\n\n","category":"method"},{"location":"help/#ComplexMixtures.writexyz-Union{Tuple{T}, Tuple{Vector{T}, String}} where T<:(AbstractVector)","page":"Help entries","title":"ComplexMixtures.writexyz","text":"writexyz(x::Vector{T}, file::String) where T <: AbstractVector\n\nInternal structure or function, interface may change.\n\nPrint test xyz file.\n\n\n\n\n\n","category":"method"},{"location":"mddf/#Computing-the-Minimum-Distance-Distribution-Function","page":"Computing the MDDF","title":"Computing the Minimum-Distance Distribution Function","text":"","category":"section"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"The main function of the ComplexMixtures package actually computes the MDDF between the solute and the solvent chosen. ","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"It is run with the following command:","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"results = mddf(trajectory) ","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"The MDDF along with other results, like the corresponding KB integrals, are returned in the results data structure, which is described in the next section.","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"It is possible to tune several options of the calculation, by setting the Options data structure with user-defined values in advance. The most common parameters to be set by the user are probably dbulk and stride. ","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"dbulk defines the distance from the solute above which the user believes that the reference solute molecule does not significantly anymore the structure of the solvent. The default value is 10 Angstroms, but for large solvent molecules this might not be enough. To increase dbulk, use: ","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"options = Options(dbulk=15.)\nresults = mddf(trajectory,options)","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"stride defines if some frames will be skip during the calculation (for speedup). For example, if stride=5, only one in five frames will be considered. Adjust stride with: ","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"options = Options(stride=5)\nresults = mddf(trajectory,options)","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"See the Options section for further details and other options to set.","category":"page"},{"location":"installation/#Installation","page":"Installation","title":"Installation","text":"","category":"section"},{"location":"installation/","page":"Installation","title":"Installation","text":"note: Note\nThis is a package written in Julia. We invite you to experiment with the language, but if you want to just call this package from Python, read the From Python section of the manual. Understanding all the features of the package requires reading the manual as whole. The syntaxes of using this package from Julia or Python are almost identical, and the motivation for using Python should be mostly the familiarity with further analysis tools, as the plotting packages. ","category":"page"},{"location":"installation/#Install-Julia","page":"Installation","title":"Install Julia","text":"","category":"section"},{"location":"installation/","page":"Installation","title":"Installation","text":"First you need to install the Julia language, version 1.9 or greater is required. Using the juliaup tool is a highly recommended way of installing and keeping Julia up to date.","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"Alternatively, you can install Julia by downloading the binaries directly from the Julia webpage.","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"note: Note\nNew to Julia? Julia is a modern high-level yet performant programming language. Some tips and a nice workflow for using it effectively can be found here. For this specific package, following a the step-by-step examples provided here after installing Julia should be enough. ","category":"page"},{"location":"installation/#Install-the-packages","page":"Installation","title":"Install the packages","text":"","category":"section"},{"location":"installation/","page":"Installation","title":"Installation","text":"Within Julia, to install the packages required for running the examples here you need to do:","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"julia> import Pkg\n\njulia> Pkg.add([\"ComplexMixtures\",\"Plots\",\"PDBTools\"])","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"Please read the recommended workflow below, for further information and to be sure to have a smoother experience.","category":"page"},{"location":"installation/#Recommended-workflow-for-reproducibility","page":"Installation","title":"Recommended workflow for reproducibility","text":"","category":"section"},{"location":"installation/#Create-an-environment","page":"Installation","title":"Create an environment","text":"","category":"section"},{"location":"installation/","page":"Installation","title":"Installation","text":"Once Julia is installed, we recommend to create an environment that will contain all the packages you may use for your analyses, including ComplexMixtures, in such a way that your results can always be reproduced and you don't get any version incompatibility.","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"We illustrate this by creating the \"MyNewPaper\" environment, which will be hosted in a simple directory,","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"mkdir /home/user/Documents/MyNewPaper","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"Then, start Julia and activate the environment that will be hosted there:","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"julia> import Pkg; Pkg.activate(\"/home/user/Documents/MyNewPaper\")\n Activating new project at `~/Documents/MyNewPaper`","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"and add to this environment the packages that your analyses will require:","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"julia> Pkg.add([\"ComplexMixtures\",\"PDBTools\",\"Plots\"])","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"That's it. Close Julia. Note that this created the files Manifest.toml and Project.toml in the MyNewPaper directory, which contain the information of packages and exact package versions you are using now on in this environment. Saving these files may be relevant for the future exact reproduction of your analyses. ","category":"page"},{"location":"installation/#Run-your-analysis-scripts-in-that-environment","page":"Installation","title":"Run your analysis scripts in that environment","text":"","category":"section"},{"location":"installation/","page":"Installation","title":"Installation","text":"Now, your analysis scripts, described in the next section in details, will look like: ","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"import Pkg; Pkg.activate(\"/home/user/Documents/MyNewPaper\")\n\nusing ComplexMixtures\nusing PDBTools\nusing Plots\n\n# etc ... ","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"And the script can be run with julia -t auto script.jl (where -t auto allows for multi-threading), or included in julia with julia> include(\"./scritp.jl\"), as described in the next section.","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"tip: Tip\nBy loading the package with using ComplexMixturesthe most common functions of the package become readily available by their direct name, for example mddf(...).If you don't want to bring the functions into the scope of your script, useimport ComplexMixturesThen, the functions of the package are called, for example, using ComplexMixtures.mddf(...). To avoid having to write ComplexMixtures all the time, define an acronym. For example:import ComplexMixtures as CM\nCM.mddf(...)","category":"page"},{"location":"contrib/#contributions","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"","category":"section"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"One of the interesting features of Minimum-Distance distributions is that they can be naturally decomposed into the atomic or group contributions. Simply put, if a MDDF has a peak at a hydrogen-bonding distance, it is natural to decompose that peak into the contributions of each type of solute or solvent atom to that peak. ","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"To obtain the atomic contributions of an atom or group of atoms, the contributions function is provided. For example, in a system composed of a protein and water, we would have defined the solute and solvent using:","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"using PDBTools, ComplexMixtures\natoms = readPDB(\"system.pdb\")\nprotein = select(atoms,\"protein\")\nwater = select(atoms,\"water\")\nsolute = Selection(protein,nmols=1)\nsolvent = Selection(water,natomspermol=3)","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"The MDDF calculation is executed with:","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"trajectory = Trajectory(\"trajectory.dcd\",solute,solvent)\nresults = mddf(trajectory)","category":"page"},{"location":"contrib/#Atomic-contributions-in-the-result-data-structure","page":"Atomic and group contributions","title":"Atomic contributions in the result data structure","text":"","category":"section"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"The results data structure contains the decomposition of the MDDF into the contributions of every type of atom of the solute and the solvent. These data is available at the results.solute_atom and results.solvent_atom arrays: ","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"julia> results.solute_atom\n50×1463 Array{Float64,2}:\n 0.0 0.0 0.0 … 0.0 0.0 0.0\n 0.0 0.0 0.0 … 0.0 0.0 0.0\n ...\n 0.0 0.14245 0.0 … 0.0 0.0 0.0\n 0.0 0.0 0.0 … 0.0 0.0 0.0\n\njulia> results.solvent_atom \n50×3 Array{Float64,2}:\n 0.0 0.0 0.0 \n 0.0 0.0 0.0 \n ...\n 0.26087 0.26087 0.173913\n 0.25641 0.0854701 0.170940","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"Here, 50 is the number of bins of the histogram, whose distances are available at the results.d vector.","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"It is expected that for a protein most of the atoms do not contribute to the MDDF, and that all values are zero at very short distances, smaller than the radii of the atoms.","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"The three columns of the results.solvent_atom array correspond to the thee atoms of the water molecule in this example. The sequence of atoms correspond to that of the PDB file, but can be retrieved with:","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"julia> solvent.names\n3-element Array{String,1}:\n \"OH2\"\n \"H1\"\n \"H2\"","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"Therefore, if the first column of the results.solvent_atom vector is plotted as a function of the distances, one gets the contributions to the MDDF of the Oxygen atom of water. For example, here we plot the total MDDF and the Oxygen contributions: ","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"using Plots\nplot(results.d,results.mddf,label=\"Total MDDF\",linewidth=2)\nplot!(results.d,results.solvent_atom[:,1],label=\"OH2\",linewidth=2)\nplot!(xlabel=\"Distance / Å\",ylabel=\"MDDF\")","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"","category":"page"},{"location":"contrib/#Selecting-groups-by-atom-names-or-indexes","page":"Atomic and group contributions","title":"Selecting groups by atom names or indexes","text":"","category":"section"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"To plot the contributions of the hydrogen atoms of water to the total MDDF, we have to select the two atoms, named H1 and H2. The contributions function provides several practical ways of doing that, with or without the use of PDBTools. ","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"The contributions function receives three parameters: ","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"The solute or solvent data structure, created with Selection. \nThe array of atomic contributions (here results.solute_atom or results.solvent_atom), corresponding to the selection in 1.\nA selection of a group of atoms within the molecule of interest, provided as described below. ","category":"page"},{"location":"contrib/#Selecting-by-indexes-within-the-molecule","page":"Atomic and group contributions","title":"Selecting by indexes within the molecule","text":"","category":"section"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"To select simply by the index of the atoms of the molecules, just provide a list of indexes to the contributions function. For example, to select the hydrogen atoms, which are the second and third atoms of the water molecule, use:","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"julia> indexes = [ 2, 3 ]\njulia> h_contributions = contributions(solvent,R.solvent_atom,indexes)\n500-element Array{Float64,1}:\n 0.0\n 0.0\n ⋮\n 0.7742706465861815\n 0.8084139794974875","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"Plotting both the oxygen (index = 1) and hydrogen contributions results in:","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"","category":"page"},{"location":"contrib/#Selecting-by-atom-name","page":"Atomic and group contributions","title":"Selecting by atom name","text":"","category":"section"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"The exact same plot above could be obtained by providing lists of atom names instead of indexes to the contributions function:","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"oxygen = [\"OH2\"]\no_contributions = contributions(solvent,R.solvent_atom,oxygen) \nhydrogens = [\"H1\",\"H2\"]\nh_contributions = contributions(solvent,R.solvent_atom,hydrogens)","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"The above plot can be obtained with:","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"using Plots\nplot(results.d,results.mddf,label=\"Total MDDF\",linewidth=2)\nplot!(results.d,o_contributions,label=\"OH2\",linewidth=2)\nplot!(results.d,h_contributions,label=\"Hydrogen atoms\",linewidth=2)\nplot!(xlabel=\"Distance / Å\",ylabel=\"MDDF\")","category":"page"},{"location":"contrib/#General-selections-using-PDBTools","page":"Atomic and group contributions","title":"General selections using PDBTools","text":"","category":"section"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"More interesting and general is to select atoms of a complex molecule, like a protein, using residue names, types, etc. Here we illustrate how this is done by providing selection strings to contributions to obtain the contributions to the MDDF of different types of residues of a protein to the total MDDF. ","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"For example, if we want to split the contributions of the charged and neutral residues to the total MDDF distribution, we could use to following code. Here, solute refers to the protein.","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"charged_residues = PDBTools.select(atoms,\"charged\")\ncharged_contributions = contributions(solute,R.solute_atom,charged_residues)\n\nneutral_residues = PDBTools.select(atoms,\"neutral\")\nneutral_contributions = contributions(solute,R.solute_atom,neutral_residues)","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"The charged and neutral outputs are vectors containing the contributions of these residues to the total MDDF. The corresponding plot is: ","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"plot(results.d,results.mddf,label=\"Total MDDF\",linewidth=2)\nplot!(results.d,charged_contributions,label=\"Charged residues\",linewidth=2)\nplot!(results.d,neutral_contributions,label=\"Neutral residues\",linewidth=2)\nplot!(xlabel=\"Distance / Å\",ylabel=\"MDDF\")","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"Resulting in:","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"Note here how charged residues contribute strongly to the peak at hydrogen-bonding distances, but much less in general. Of course all selection options could be used, to obtain the contributions of specific types of residues, atoms, the backbone, the side-chains, etc. ","category":"page"},{"location":"parallel/#Parallel-execution","page":"Parallel execution","title":"Parallel execution","text":"","category":"section"},{"location":"parallel/","page":"Parallel execution","title":"Parallel execution","text":"It is highly recommended to run MDDF calculations in parallel, using multiple processors of a single computer. To run the computation in parallel, initialize julia with the -t N option, where N is the number of processes to be used. For example, to use 8 parallel processes, use:","category":"page"},{"location":"parallel/","page":"Parallel execution","title":"Parallel execution","text":"julia -t 8 example.jl","category":"page"},{"location":"parallel/","page":"Parallel execution","title":"Parallel execution","text":"The computation will use a number of parallel processes equal to N. ","category":"page"},{"location":"parallel/","page":"Parallel execution","title":"Parallel execution","text":"note: Note\nThe number of threads used for computation of the MDDF is the number of threads available to Julia. Many computers allow hyperthreading, and not necessarily this this beneficial for the execution of this package. The optimal number of threads may vary.Independently of the number of threads initialized with the -t command-line parameter, the number of processes launched by ComplexMixtures in any given computation can be adjusted by the Options(nthreads=N) option. This won't provide any speedup if the optional number of threads is greater than the number of threads available to Julia at runtime.","category":"page"},{"location":"parallel/","page":"Parallel execution","title":"Parallel execution","text":"warning: Warning\nIf the calculations get Killed by no apparent reason, that is probably because you are running out of memory because of the many parallel computations running. One way to alleviate this problem is to force garbage collection, usingoptions = Options(GC=true,GC_threshold=0.5)\nR = mddf(trajectory,options)\nThe GC_threshold=0.5 indicates that if the free memory is smaller than 50% of the total memory of the machine, a garbage-collection run will occur. The default parameters are GC=true and GC_threshold=0.3. ","category":"page"},{"location":"multiple/#Working-with-multiple-trajectories","page":"Multiple trajectories","title":"Working with multiple trajectories","text":"","category":"section"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"Very commonly, one has multiple trajectories of the same system, and we want to obtain the average results of all trajectories. We provide a simple scheme to average the results of multiple MDDF calculations:","category":"page"},{"location":"multiple/#Create-a-vector-of-result-data-structures,-without-initialization","page":"Multiple trajectories","title":"Create a vector of result data structures, without initialization","text":"","category":"section"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"Let us assume that we have three Gromacs trajectories, with file names traj1.xtc, traj2.xtc, traj3.xtc. First let us create a list with these file names:","category":"page"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"trajectory_files = [ \"traj1.xtc\" , \"traj2.xtc\" , \"traj3.xtc\" ]","category":"page"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"And define an empty vector of Result structures:","category":"page"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"results = Result[]","category":"page"},{"location":"multiple/#Run-the-calculations-in-a-loop","page":"Multiple trajectories","title":"Run the calculations in a loop","text":"","category":"section"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"The calculation on the multiple trajectories is then performed in a simple loop, such as","category":"page"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"atoms = PDBTools.readPDB(\"./system.pdb\")\nsolute = Selection(atoms,\"protein\",nmols=1)\nsolvent = Selection(atoms,\"resname TMAO\",,natomspermol=14)\nfor file in trajectory_files\n trajectory = Trajectory(file,solute,solvent)\n # compute the MDDF data and push the result to the results array\n push!(results, mddf(trajectory))\nend","category":"page"},{"location":"multiple/#Merge-the-results-of-several-trajectories,-with-proper-weights","page":"Multiple trajectories","title":"Merge the results of several trajectories, with proper weights","text":"","category":"section"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"Of course, the resulting results vector will contain at each position the results of each calculation. To merge these results in a single result data structure, use:","category":"page"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"R = merge(results)","category":"page"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"The R structure generated contains the averaged results of all calculations, with weights proportional to the number of frames of each trajectory. That is, if the first trajectory had 2000 frames, and the second and third trajectories have 1000 frames each, the first trajectory will have a weight of 0.5 on the final results. The merge function can be used to merge previously merged results with new results as well.","category":"page"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"tip: Tip\nThe names of the files and and weights are stored in the R.files and R.weights vectors of the results structure:julia> R.files\n3-element Array{String,1}:\n \"./traj1.xtc\"\n \"./traj2.xtc\"\n \"./traj3.xtc\"\n\njulia> R.weights\n2-element Array{Float64,1}:\n 0.5\n 0.25\n 0.25\nIt is not a bad idea to check if that is what you were expecting.","category":"page"},{"location":"save/#save","page":"Save and load","title":"Save and load results","text":"","category":"section"},{"location":"save/","page":"Save and load","title":"Save and load","text":"Three functions serve the purpose of saving and loading the results obtained with ComplexMixtures:","category":"page"},{"location":"save/#Save-data-to-recover-it-later","page":"Save and load","title":"Save data to recover it later","text":"","category":"section"},{"location":"save/","page":"Save and load","title":"Save and load","text":"save(results,\"results.json\")","category":"page"},{"location":"save/","page":"Save and load","title":"Save and load","text":"where results is the output data structure of the mddf() calculation, and results.json is the output file to be created. The file is written in JSON format, thus is not naturally human-readable.","category":"page"},{"location":"save/#Load-saved-data","page":"Save and load","title":"Load saved data","text":"","category":"section"},{"location":"save/","page":"Save and load","title":"Save and load","text":"results = load(\"results.json\")","category":"page"},{"location":"save/","page":"Save and load","title":"Save and load","text":"The load function reads the output of the save function above, and restores the results data structure.","category":"page"},{"location":"save/#Write-data-in-a-human-readable-format","page":"Save and load","title":"Write data in a human-readable format","text":"","category":"section"},{"location":"save/","page":"Save and load","title":"Save and load","text":"If you Want the results to be written as simple ASCII tables such that you can read them with another analysis program, plotting graphic, or just want to inspect the data visually, use:","category":"page"},{"location":"save/","page":"Save and load","title":"Save and load","text":"write(results,\"results.dat\")","category":"page"},{"location":"save/","page":"Save and load","title":"Save and load","text":"Three files will be created by this function:","category":"page"},{"location":"save/","page":"Save and load","title":"Save and load","text":"results.dat: Contains the main results, as the MDDF and KB-integral data.","category":"page"},{"location":"save/","page":"Save and load","title":"Save and load","text":"results-ATOM_CONTRIB_SOLVENT.dat: contains the contribution of each atom type of the solvent to the MDDF.","category":"page"},{"location":"save/","page":"Save and load","title":"Save and load","text":"results-ATOM_CONTRIB_SOLUTE.dat: contains the contribution of each atom type of the solute to the MDDF.","category":"page"},{"location":"trajectory/#trajectories","page":"Loading the trajectory","title":"Loading trajectories","text":"","category":"section"},{"location":"trajectory/","page":"Loading the trajectory","title":"Loading the trajectory","text":"To initialize a trajectory file for computation, use the command","category":"page"},{"location":"trajectory/","page":"Loading the trajectory","title":"Loading the trajectory","text":"trajectory = Trajectory(\"trajectory.xtc\",solute,solvent)","category":"page"},{"location":"trajectory/","page":"Loading the trajectory","title":"Loading the trajectory","text":"where solute and solvent are defined with the Selection function described before. This function opens the stream for reading frames, which are read once a time when the coordinates are required for computing the MDDF.","category":"page"},{"location":"trajectory/","page":"Loading the trajectory","title":"Loading the trajectory","text":"The Trajectory function uses Chemfiles in background, and thus the most common trajectory formats are supported, as the ones produced with NAMD, Gromacs, LAMMPS, Amber, etc. ","category":"page"},{"location":"trajectory/","page":"Loading the trajectory","title":"Loading the trajectory","text":"tip: Tip\nThe format of the trajectory file is automatically determined by Chemfiles from the extension of the file. However, it can be provided by the user with the format keyword, for example:trajectory = Trajectory(\"trajectory.xtc\",solute,solvent,format=\"xtc\")","category":"page"},{"location":"quickguide/#Quick-Guide","page":"Quick Guide","title":"Quick Guide","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Of course, follow the installation instructions first. A complete working example is shown below, and in the section that follows each command is described in detail.","category":"page"},{"location":"quickguide/#Complete-example","page":"Quick Guide","title":"Complete example","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Here we show the input file required for the study of the solvation of a protein by the TMAO solvent, which is a molecule 4 atoms. The protein is assumed to be at infinite dilution in the simulation. The trajectory of the simulation is in DCD format in this example, which is the default output of NAMD and CHARMM simulation packages.","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"# Activate environment (see the Installation -> Recommended Workflow manual section)\nimport Pkg; Pkg.activate(\"/home/user/MyNewPaper\")\n\n# Load packages\nusing PDBTools\nusing ComplexMixtures \nusing Plots\n\n# Load PDB file of the system\natoms = readPDB(\"./system.pdb\")\n\n# Select the protein and the TMAO molecules\nprotein = select(atoms,\"protein\")\ntmao = select(atoms,\"resname TMAO\")\n\n# Setup solute and solvent structures\nsolute = Selection(protein,nmols=1)\nsolvent = Selection(tmao,natomspermol=14)\n\n# Setup the Trajectory structure\ntrajectory = Trajectory(\"./trajectory.dcd\",solute,solvent)\n\n# Run the calculation and get results\nresults = mddf(trajectory)\n\n# Save the results to recover them later if required\nsave(results,\"./results.json\")\n\n# Plot the some of the most important results \nplot(results.d,results.mddf,xlabel=\"d\",ylabel=\"MDDF\") # plot the MDDF\nsavefig(\"./mddf.pdf\")\nplot(results.d,results.kb,xlabel=\"d\",ylabel=\"KB\") # plot the KB \nsavefig(\"./kb.pdf\")","category":"page"},{"location":"quickguide/#Running-the-example","page":"Quick Guide","title":"Running the example","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Given that this code is saved into a file named example.jl, it can be run within the Julia REPL with:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"julia> include(\"example.jl\")","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"or directly with:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"julia -t auto example.jl","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"where -t auto will launch julia with multi-threading. It is highly recommended to use multi-threading!","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"note: Note\nSome newer CPUs have \"fast\" and \"slow\" cores, designed for performance or energy savings. Thus using all cores, with -t auto, may not be the best strategy for optimal performance. Experimenting with different number of cores using -t N where N is the number of cores used is always necessary for tunning performance.","category":"page"},{"location":"quickguide/#Detailed-description-of-the-example","page":"Quick Guide","title":"Detailed description of the example","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Start julia and load the ComplexMixtures package, using:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"using ComplexMixtures","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"And here we will use the PDBTools package to obtain the selections of the solute and solvent molecules: ","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"using PDBTools","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"(see Set solute and solvent for details).","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"The fastest way to understand how to use this package is through an example. ","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Let us consider a system of three components: a protein, water, a cosolvent: TMAO (trimetylamine-N-oxyde), which is a common osmolyte known to stabilize protein structures. A picture of this system is shown below, with the protein in blue, water, and TMAO molecules. The system was constructed with Packmol and the figure was produced with VMD.","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"
\n\n
","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"We want to study the interactions of the protein with TMAO in this example. The computation of the MDDF is performed by defining the solute and solvent selections, and running the calculation on the trajectory.","category":"page"},{"location":"quickguide/#Define-the-protein-as-the-solute","page":"Quick Guide","title":"Define the protein as the solute","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"To define the protein as the solute, we will use the PDBTools package, which provides a handy selection syntax. First, read the PDB file using ","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"atoms = readPDB(\"./system.pdb\")","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Then, let us select the protein atoms (here we are using the PDBTools.select function):","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"protein = select(atoms,\"protein\")","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"And, finally, let us use the Selection function to setup the structure required by the MDDF calculation:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"solute = Selection(protein,nmols=1)","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"note: Note\nIt is necessary to indicate how many molecules (in this case, nmols=1, so that ComplexMixtures knows that the solute is to be considered as single structure. In this case there is no ambiguity, but if the solute was a micelle, for example, this option would let ComplexMixtures know that one wants to consider the micelle as a single structure.","category":"page"},{"location":"quickguide/#Define-TMAO-the-solvent-to-be-considered","page":"Quick Guide","title":"Define TMAO the solvent to be considered","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Equivalently, the solvent is set up with:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"tmao = select(atoms,\"resname TMAO\")\nsolvent = Selection(tmao,natomspermol=14)\n","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"note: Note\nHere we opted to provide the number of atoms of a TMAO molecules (with the natomspermol keyword). This is generally more practical for small molecules than to provide the number of molecules.","category":"page"},{"location":"quickguide/#Set-the-Trajectory-structure","page":"Quick Guide","title":"Set the Trajectory structure","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"The solute and solvent data structures are then fed into the Trajectory data structure, together with the trajectory file name, with:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"trajectory = Trajectory(\"trajectory.dcd\",solute,solvent)","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"In the case, the trajectory is of NAMD \"dcd\" format. All formats supported by Chemfiles are automatically recognized. ","category":"page"},{"location":"quickguide/#Finally,-run-the-computation-and-get-the-results:","page":"Quick Guide","title":"Finally, run the computation and get the results:","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"If default options are used (as the bin size of the histograms, read all frames without skipping any), just run the mddf with:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"results = mddf(trajectory)\n","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Some optional parameters for the computation are available in the Options section.","category":"page"},{"location":"quickguide/#The-results-data-structure-obtained","page":"Quick Guide","title":"The results data structure obtained","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"The results data structure contains all the results of the MDDF calculation, including:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"results.d : Vector containing the distances to the solute. ","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"results.mddf : Vector containing the minimum-distance distribution function at each distance.","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"That means, for example, that ","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"plot(results.d,results.mddf,xlabel=\"d / \\AA\",ylabel=\"MDDF\") \n","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"results in the expected plot of the MDDF of TMAO as a function of the distance to the protein:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"
\n\n
","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"The Kirkwood-Buff integral corresponding to that distribution is provided in the results.kb vector, and can be also directly plotted with ","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"plot(results.d,results.kb,xlabel=\"d / \\AA\",ylabel=\"MDDF\") ","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"to obtain:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"
\n\n
","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"See the Atomic and group contributions section for a detailed account on how to obtain a molecular picture of the solvation by splitting the MDDF in the contributions of each type of atom of the solvent, each type of residue of the protein, etc.","category":"page"},{"location":"quickguide/#Save-the-results","page":"Quick Guide","title":"Save the results","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"The results can be saved into a file (with JSON format) with:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"save(results,\"./results.json\")","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"And these results can be loaded afterwards with:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"load(\"./results.json\")","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Alternatively, a human-readable set of output files can be obtained to be analyzed in other software (or plotted with alternative tools), with","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"write(results,\"./results.dat\")","category":"page"},{"location":"selection/#selections","page":"Set solute and solvent","title":"Set the solute and solvent selections","text":"","category":"section"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"The solute and solvent are defined in ComplexMixtures as lists (vectors) of the indexes of the atoms of the system. The solute and solvent information is stored in the Selection structure. For example, if the solute is a molecule formed by the first 5 atoms of the system, it would be defined as: ","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"indexes = [ 1, 2, 3, 4, 5 ]\nsolute = Selection(indexes,nmols=1)","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"note: Note\nWe need to inform the Selection function about the number of atoms of each molecule (using natomspermol=3, for example), or the number of molecules (using nmols=1000, for example), such that the atoms belonging to each molecule can be determined without ambiguity. ","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"The atom names can be also provided such that some of the output files contain more information on the atomic contributions. In this case the syntax is:","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"indexes = [ 1, 2, 3, 4, 5 ]\nnames = [ \"H1\", \"H2\", \"H3\", \"H4\", \"C\" ]\nsolute = Selection(indexes,names,nmols=1)","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"warning: Warning\nThe indexing in ComplexMixtures is 1-based. That means that the first atom of your structure file is in position 1 of the coordinates. Please be careful if using any selection tool to be sure that your selection is correct.","category":"page"},{"location":"selection/#Using-PDBTools","page":"Set solute and solvent","title":"Using PDBTools","text":"","category":"section"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"PDBTools is a package we developed to read and write PDB files, which provides a simple selection tool. It is installed as a dependency of ComplexMixtures. Given a PDB file of the simulated system, the solute can be defined as, for example,","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"using PDBTools\natoms = PDBTools.readPDB(\"system.pdb\")\nprotein = PDBTools.select(atoms,\"protein\")\nsolute = Selection(protein,nmols=1)","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"If the solvent is, for instance, water, the indexes of the water molecules can be obtained with:","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"water = PDBTools.select(atoms,\"water\")\nsolvent = Selection(water,natomspermol=3)","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"or, alternatively, a more compact syntax can be used, for example:","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"water = PDBTools.select(\"system.pdb\",\"resname TIP3P\")\nsolvent = Selection(water,natomspermol=3)","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"or even providing just the names of the input file and selection, which will run PDBTools in background:","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"solvent = Selection(\"sytem.pdb\",\"water\",water,natomspermol=3)","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"warning: Warning\nThe selection syntax of PDBTools is somewhat limited. Verify if the selections correspond to the the desired sets of atoms every time.","category":"page"},{"location":"selection/#Using-VMD","page":"Set solute and solvent","title":"Using VMD","text":"","category":"section"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"VMD is a very popular and powerful package for visualization of simulations. It contains a very versatile library to read topologies and trajectory files, and a powerful selection syntax. We provide here a wrapper to VMD which allows using its capabilities. ","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"For example, the solute can be defined with: ","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"indexes, names = VMDselect(\"./system.gro\",\"protein\",vmd=\"/usr/bin/vmd\")\nsolute = Selection(indexes,names,nmols=1)","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"The main advantage here is that all the file types that VMD supports are supported. But VMD needs to be installed and is run in background, and it takes a few seconds. ","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"The VMDSelect function also accepts an optional keyword parameter srcload, which can be used to load custom scripts within vmd before running setting the selection. This allows the definition of tcl scripts with custom selection macros, for instance. The usage would be: ","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"sel = VMSelect(\"file.pdb\", \"resname MYRES\"; srcload = [ \"mymacros1.tcl\", \"mymacros2.tcl\" ])","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"Which corresponds to sourceing each of the macro files in VMD before defining the selection with the custom MYRES name.","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"compat: Compat\nCustom script source loading in VMDSelect was introduced in ComplexMixtures version 1.3.0.","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"warning: Warning\nVMD uses 0-based indexing and VMDselect adjusts that. However, if a selection is performed by index, as with index 1, VMD will select the second atom, and the output will be [2]. Selections by type, name, segment, residue name, etc, won't be a problem.","category":"page"},{"location":"tools/#Tools","page":"Tools","title":"Tools","text":"","category":"section"},{"location":"tools/","page":"Tools","title":"Tools","text":"A set of examples of analyses that can be performed with ComplexMixtures is given in this site. A brief the description of the possible results is provided here. ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"Some tools are provided to analyze the results:","category":"page"},{"location":"tools/#coordination_number","page":"Tools","title":"Coordination numbers","text":"","category":"section"},{"location":"tools/","page":"Tools","title":"Tools","text":"The function","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"coordination_number(R::Result, group_contributions::Vector{Float64})","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"computes the coordination number of a given group of atoms from the solute or solvent atomic contributions to the MDDF. Here, R is the result of the mddf calculation, and group_contributions is the output of the contributions function for the desired set of atoms.","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"If no group is defined, the coordination number of the complete solute is returned, which is equivalent to the R.coordination_number field of the Result data structure:","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"coordination_number(R::Result) == R.coordination_number","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"note: Note\nThere are some systems for which the normalization of the distributions is not necessary or possible. It is still possible to compute the coordination numbers, by running, instead of mddf, the coordination_number function:coordination_number(trajectory::Trajectory, options::Options)This call will return Result data structure but with all fields requiring normalization with zeros. In summary, this result data structure can be used to compute the coordination numbers, but not the MDDF, RDF, or KB integrals.","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"compat: Compat\nThe use independent computation of coordination numbers was introduced in version 1.1.","category":"page"},{"location":"tools/#Example","page":"Tools","title":"Example","text":"","category":"section"},{"location":"tools/","page":"Tools","title":"Tools","text":"In the following example we compute the coordination number of the atoms of residue 50 (which belongs to the solute - a protein) with the solvent atoms of TMAO, as a function of the distance. The plot produced will show side by side the residue contribution to the MDDF and the corresponding coordination number.","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"using ComplexMixtures, PDBTools\nusing Plots, EasyFit\npdb = readPDB(\"test/data/NAMD/structure.pdb\")\nR = load(\"test/data/NAMD/protein_tmao.json\")\nsolute = Selection(PDBTools.select(pdb, \"protein\"), nmols=1)\nresidue50 = PDBTools.select(pdb, \"residue 50\")\n# Compute the group contribution to the MDDF\nresidue50_contribution = contributions(solute, R.solute_atom, residue50)\n# Now compute the coordination number\nresidue50_coordination = coordination_number(R, residue50_contribution)\n# Plot with twin y-axis\nplot(R.d, movavg(residue50_contribution,n=10).x,\n xaxis=\"distance / Å\", \n yaxis=\"MDDF contribution\", \n linewidth=2, label=nothing, color=1\n)\nplot!(twinx(),R.d, residue50_coordination, \n yaxis=\"Coordination number\", \n linewidth=2, label=nothing, color=2\n)\nplot!(title=\"Residue 50\", framestyle=:box, subplot=1)","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"With appropriate input data, this code produces:","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"
\n\n
","category":"page"},{"location":"tools/#Computing-a-2D-density-map-around-a-macromolecule","page":"Tools","title":"Computing a 2D density map around a macromolecule","text":"","category":"section"},{"location":"tools/","page":"Tools","title":"Tools","text":"One nice way to visualize the accumulation or depletion of a solvent around a macromolecule (a protein, for example), is to obtain a 2D map of the density as a function of the distance from its surface. For example, in the figure below the density of a solute (here, Glycerol), in the neighborhood of a protein is shown:","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"
\n\n
","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"Here, one can see that Glycerol accumulates on Asp76 and on the proximity of hydrogen-bonding residues (Serine residues mostly). This figure was obtained by extracting from atomic contributions of the protein the contribution of each residue to the MDDF. Using PDBTools, this can be done with, for example: ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"residues = collect(eachresidue(protein))\nresidue_contributions = zeros(length(R.d),length(residues))\nfor (i,residue) in pairs(residues)\n c = contributions(solute,R.solute_atom,residue) \n residue_contributions[:,i] .= c\nend","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"The above produces a matrix with a number of columns equal to the number of residues and a number of rows equal to the number of MDDF points. That matrix can be plotted as a contour map with adequate plotting software. A complete running example is provided here, producing the figure above. ","category":"page"},{"location":"tools/#Computing-a-3D-density-map-around-a-macromolecule","page":"Tools","title":"Computing a 3D density map around a macromolecule","text":"","category":"section"},{"location":"tools/","page":"Tools","title":"Tools","text":"Three-dimensional representations of the distribution functions can also be obtained from the MDDF results. These 3D representations are obtained from the fact that the MDDFs can be decomposed into the contributions of each solute atom, and that each point in space is closest to a single solute atom as well. Thus, each point in space can be associated to one solute atom, and the contribution of that atom to the MDDF at the corresponding distance can be obtained. ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"For example, the distribution function of a hydrogen-bonding liquid solvating a protein will display a characteristic peak at about 1.8Å. The MDDF at that distance can be decomposed into the contributions of all atoms of the protein which were found to form hydrogen bonds to the solvent. A 3D representation of these contributions can be obtained by computing, around a static protein (solute) structure, which are the regions in space which are closer to each atom of the protein. The position in space is then marked with the atom of the protein to which that region \"belongs\" and with the contribution of that atom to the MDDF at each distance within that region. A special function to compute this 3D distribution is provided here: grid3D. ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"This is better illustrated by a graphical representation. In the figure below we see a 3D representation of the MDDF of Glycerol around a protein, computed from a simulation of this protein in a mixture of water and Glycerol. A complete set of files and a script to reproduce this example is available here. ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"
\n\n
","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"In the figure on the left, the points in space around the protein are selected with the following properties: distance from the protein smaller than 2.0Å and relative contribution to the MDDF at the corresponding distance of at least 10% of the maximum contribution. Thus, we are selecting the regions of the protein corresponding to the most stable hydrogen-bonding interactions. The color of the points is the contribution to the MDDF, from blue to red. Thus, the most reddish-points corresponds to the regions where the most stable hydrogen bonds were formed. We have marked two regions here, on opposite sides of the protein, with arrows.","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"Clicking on those points we obtain which are the atoms of the protein contributing to the MDDF at that region. In particular, the arrow on the right points to the strongest red region, which corresponds to an Aspartic acid. These residues are shown explicitly under the density (represented as a transparent surface) on the figure in the center. ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"The figure on the right displays, overlapped with the hydrogen-bonding residues, the most important contributions to the second peak of the distribution, corresponding to distances from the protein between 2.0 and 3.5Å. Notably, the regions involved are different from the ones forming hydrogen bonds, indicating that non-specific interactions with the protein (and not a second solvation shell) are responsible for the second peak. ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"An example input file which produces the files required for producing these images is:","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"using ComplexMixtures, PDBTools\n\n# PDB file of the system simulated\npdb = readPDB(\"../Data/system.pdb\")\n\n# Load results of a ComplexMixtures run\nR = load(\"../Data/results_glyc50.json\") \n\n# Inform which is the solute\nprotein = select(pdb,\"protein\")\nsolute = Selection(protein,nmols=1)\n\n# Compute the 3D density grid and output it to the PDB file\ngrid = grid3D(\n solute=solute,\n solute_atoms=protein,\n mddf_result=R,\n output_file=\"grid.pdb\"\n)","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"The call to grid3D in the last command will write an output a PDB file with the grid points, which loaded in a visualization software side-by-side with the protein structure, allows the production of the images shown. The grid.pdb file contains a regular PDB format, but the atoms are grid points. The identity of the atoms correspond to the identity of the protein atom contributing to the MDDF at that point (the closest protein atom). The temperature-factor column (beta) contains the relative contribution of that atom to the MDDF at the corresponding distance, and the occupancy field contains the distance itself.","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"The output grid variable contains the same information of the PDB file, which can be analyzed with the tools of PDBTools if the user wants to.","category":"page"},{"location":"tools/#Computing-radial-distribution-functions","page":"Tools","title":"Computing radial distribution functions","text":"","category":"section"},{"location":"tools/","page":"Tools","title":"Tools","text":"The distributions returned by the mddf function (the mddf and rdf vectors), are normalized by the random reference state or using a site count based on the numerical integration of the volume corresponding to each minimum-distance to the solute. ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"If, however, the solute is defined by a single atom (as the oxygen atom of water, for example), the numerical integration of the volume can be replaced by a simple analytical spherical shell volume, reducing noise. The ComplexMixtures.gr function returns the radial distribution function and the KB integral computed from the results, using this volume estimate: ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"g, kb = ComplexMixtures.gr(R)","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"By default, the single-reference count (rdf_count) of the Result structure will be used to compute the radial distribution function. The function can be called with explicit control of all input parameters: ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"g, kb = ComplexMixtures.gr(r,count,density,binstep)","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"where:","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"Parameter Definition Result structure output data to provide\nr Vector of distances The d vector\ncount Number of site counts at each r The rdf or mddf vectors\ndensity Bulk density The density.solvent_bulk or density.solvent densities.\nbinstep The histogram step The options.binstep\n ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"Example:","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"...\nR = mddf(trajectory,options)\ng, kb = ComplexMixtures.gr(R.d,R.rdf_count,R.density.solvent_bulk,R.options.binstep)","category":"page"},{"location":"tools/#Overview-of-the-solvent-and-solute-properties","page":"Tools","title":"Overview of the solvent and solute properties","text":"","category":"section"},{"location":"tools/","page":"Tools","title":"Tools","text":"The output to the REPL of the Result structure provides an overview of the properties of the solution. The data can be retrieved into a data structure using the overview function. Examples: ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"...\njulia> results = mddf(trajectory)\n\njulia> results\n\n-------------------------------------------------------------------------------\n\n MDDF Overview: \n\n Solvent properties: \n ------------------- \n\n Simulation concentration: 1.5209006318095133 mol L⁻¹\n Molar volume: 657.5051512801567 cm³ mol⁻¹\n\n Concentration in bulk: 1.4918842545752287 mol L⁻¹\n Molar volume in bulk: 670.2932864484995 cm³ mol⁻¹ \n\n Solute properties: \n ------------------ \n\n Simulation Concentration: 1.5209006318095133 mol L⁻¹\n Estimated solute partial molar volume: 657.5051512801567 cm³ mol⁻¹\n\n Using dbulk = 20.0Å: \n Molar volume of the solute domain: 30292.570006549242 cm³ mol⁻¹\n\n Auto-correlation: true\n\n Trajectory files and weights: \n ./vinicius.xtc - w = 1.0\n\n Long range MDDF mean (expected 1.0): 1.1090804621839963 +/- 0.04298849642932878\n Long range RDF mean (expected 1.0): 1.15912932236198 +/- 0.05735018864444404\n\n-------------------------------------------------------------------------------","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"In this case, since solute and solvent are equivalent and the system is homogeneous, the molar volumes and concentrations are similar. This is not the case if the molecules are different or if the solute is at infinite dilution (in which case the bulk solvent density might be different from the solvent density in the simulation). ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"To retrieve the data of the overview structure use, for example:","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"julia> overview = overview(results);\n\njulia> overview.solute_molar_volume\n657.5051512801567","category":"page"},{"location":"examples/#examples","page":"Full Example","title":"Example","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"note: Note\nAt this repository various examples are available illustrating the execution and possibilities of the package. Here we discuss one of these examples in detail.","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"The following examples consider a system composed a protein solvated by a mixture of water and glycerol, built with Packmol. The simulations were performed with NAMD with periodic boundary conditions and a NPT ensemble at room temperature and pressure. Molecular pictures were produced with VMD and plots were produced with Julia's Plots library.","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"
\n\n
","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Image of the system of the example: a protein solvated by a mixture of glycreol (green) and water, at a concentration of 50%vv. ","category":"page"},{"location":"examples/#How-to-run-this-example","page":"Full Example","title":"How to run this example","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Download and install Julia\nInstall the required packages. Within Julia, do:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"julia> import Pkg\n\njulia> Pkg.add([\"ComplexMixtures\", \"PDBTools\", \"Plots\", \"LaTeXStrings\", \"Formatting\"])","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Get the files:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"git clone https://github.com/m3g/ComplexMixturesExamples","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"The files associated to the following examples are distributed at this page. ","category":"page"},{"location":"examples/#Data","page":"Full Example","title":"Data","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"The Data directory contains the a pdb file of the system (system.pdb) and a sample from the trajectory (glyc50.dcd), with a few frames. It also contains the result of running the mddf calculation on the complete trajectory, results_glyc50.json. This last file was produced by ComplexMixtures, as indicated in the following examples. ","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"The sample trajectory is provided so that the first example can be run, yet do not expect that the results are the same, as the sampling is much lower in this case. The complete trajectory can be retrieved from this link (3GB file). ","category":"page"},{"location":"examples/#Minimum-Distance-Distribution-function","page":"Full Example","title":"Minimum-Distance Distribution function","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Here we compute the minimum-distance distribution function, the Kirkwood-Buff integral, and the atomic contributions of the solvent to the density.","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"This example illustrates the regular usage of ComplexMixtures, to compute the minimum distance distribution function, KB-integrals and group contributions. ","category":"page"},{"location":"examples/#How-to-run-this-example-2","page":"Full Example","title":"How to run this example","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"cd ComplexMixturesExamples/Protein_in_Glycerol/MDDF\njulia -t auto mddf.jl","category":"page"},{"location":"examples/#Detailed-explanation-of-the-example:","page":"Full Example","title":"Detailed explanation of the example:","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Loading the packages required for computing the MDDF. ","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"using PDBTools\nusing ComplexMixtures","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Load the pdb file of the system using PDBTools:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"atoms = readPDB(\"../Data/system.pdb\")","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Create arrays of atoms with the protein and Glycerol atoms, using the select function of the PDBTools package:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"protein = select(atoms,\"protein\")\nglyc = select(atoms,\"resname GLYC\")","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Setup solute and solvent structures, required for computing the MDDF, with Selection function of the ComplexMixtures package:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"solute = Selection(protein,nmols=1)\nsolvent = Selection(glyc,natomspermol=14)","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Read and setup the Trajectory structure required for the computations:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"trajectory = Trajectory(\"../Data/glyc50_complete.dcd\",solute,solvent)","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Run the calculation and get results:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"results = mddf(trajectory)","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"note: Note\nTo change the options of the calculation, set the Options structure accordingly and pass it as a parameter to mddf. For example:options = Options(cutoff=10.)\nmddf(trajectory,options)The complete set of options available is described here.","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Save the reults to recover them later if required","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"save(results,\"./glyc50.json\")","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"The trajectory that was loaded was for a toy-example. The complete trajectory is available here, but it is a 3GB file. The same procedure above was performed with that file and produced the results_Glyc50.json file, which is available in the Data directory here. We will continue with this file instead. ","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Load the actual results obtained with the complete simulation:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"results = load(\"../Data/results_glyc50.json\")","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Results are loaded, and now we can plot the data obtained.","category":"page"},{"location":"examples/#Produce-plots","page":"Full Example","title":"Produce plots","text":"","category":"section"},{"location":"examples/#MDDF-and-Kirkwood-Buff-integrals","page":"Full Example","title":"MDDF and Kirkwood-Buff integrals","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Load some packages that we will use to produce the plots:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"using Plots, Plots.PlotMeasures, LaTeXStrings","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Some default options that make the plots prettier:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"default(\n fontfamily=\"Computer Modern\",\n linewidth=2, framestyle=:box, label=nothing, grid=false\n)","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"First, we will plot the MDDF and the corresponding Kirkwood-Buff integral, which are available in the results.mddf and results.kb fields of the results data set. The distances are available in the results.d vector. We also plot here an horizontal line and save the figure as a pdf file. ","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"plot(layout=(1,2))\nplot!(results.d,results.mddf,xlabel=L\"r/\\AA\",ylabel=\"mddf\",subplot=1)\nhline!([1],linestyle=:dash,linecolor=:gray,subplot=1)\nplot!(\n results.d,results.kb/1000, #to L/mol\n xlabel=L\"r/\\AA\",ylabel=L\"G_{us}/\\mathrm{L~mol^{-1}}\",\n subplot=2\n)\nplot!(size=(800,300),margin=4mm)\nsavefig(\"./mddf.pdf\")","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"This will produce the following plot:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"
\n\n
","category":"page"},{"location":"examples/#Atomic-contributions-to-the-MDDF","page":"Full Example","title":"Atomic contributions to the MDDF","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Selecting the atoms corresponding to the hydroxyl groups, and of the aliphatic carbons of Glycerol. Here we list the types of the atoms as specified by the force-field.","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"hydroxyls = [\"O1\",\"O2\",\"O3\",\"H1\",\"H2\",\"H3\"]\naliphatic = [\"C1\",\"C2\",\"HA\",\"HB\",\"HC\",\"HD\"]","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"The contributions function of ComplexMixtures will extract from the result the contributions of each set of atoms to the total MDDF:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"hydr_contributions = contributions(solvent,results.solvent_atom,hydroxyls)\naliph_contributions = contributions(solvent,results.solvent_atom,aliphatic)","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"And, finally, here we plot these group contributions on top of the total MDDF:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"plot(results.d,results.mddf,xlabel=L\"r/\\AA\",ylabel=\"mddf\",size=(600,400))\nplot!(results.d,hydr_contributions,label=\"Hydroxils\")\nplot!(results.d,aliph_contributions,label=\"Aliphatic chain\")\nhline!([1],linestyle=:dash,linecolor=:gray)\nsavefig(\"./mddf_atom_contrib.pdf\")","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"This will produce the following figure:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"
\n\n
","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Note how hydroxyl clearly are the sole contribution to the peak at ~1.9 Angstroms, corresponding to hydrogen-bonding interactions. The aliphatic groups contribute importantly to the shoulder at larger distances, which correspond to non-specific interactions. ","category":"page"},{"location":"examples/#2D-residue-contribution-density-map","page":"Full Example","title":"2D residue contribution density map","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"In this example we compute the density map of Glycerol in the vicinity of a set of residues of a protein, from the minimum-distance distribution function. ","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"The MDDF can be decomposed in the contributions of each atom of the solute or of the solvent. Here, we sum up te contributions of all the atoms of each residue of the solute, which is a protein, and plot a density map with the final information. The output figure obtained is:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"
\n\n
","category":"page"},{"location":"examples/#How-to-run-this-example:","page":"Full Example","title":"How to run this example:","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"cd ComplexMixturesExamples/Protein_in_Glycerol/Density2D\njulia density2D.jl","category":"page"},{"location":"examples/#Detailed-explanation-of-the-example:-2","page":"Full Example","title":"Detailed explanation of the example:","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Here, we use the contourf function of the Plots package of Julia. A detailed explanation of the input file density2D.jl is provide below: ","category":"page"},{"location":"examples/#Loading-packages-that-will-be-used:","page":"Full Example","title":"Loading packages that will be used:","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"using Plots\nusing LaTeXStrings\nusing Formatting\nusing ComplexMixtures, PDBTools","category":"page"},{"location":"examples/#Some-default-options-so-the-plot-looks-nice","page":"Full Example","title":"Some default options so the plot looks nice","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"plot_font = \"Computer Modern\"\ndefault(\n fontfamily=plot_font,\n linewidth=2, framestyle=:box, label=nothing\n)","category":"page"},{"location":"examples/#Read-the-PDB-file-(using-PDBTools)","page":"Full Example","title":"Read the PDB file (using PDBTools)","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"pdb = readPDB(\"./system.pdb\")","category":"page"},{"location":"examples/#Load-results-of-the-ComplexMixtures-run","page":"Full Example","title":"Load results of the ComplexMixtures run","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"R = load(\"./results_glyc50.json\") ","category":"page"},{"location":"examples/#Define-which-are-the-solute-molecules-(the-protein)","page":"Full Example","title":"Define which are the solute molecules (the protein)","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"protein = select(pdb,\"protein\")\nsolute = Selection(protein,nmols=1)","category":"page"},{"location":"examples/#Define-which-are-the-solvent-molecules-(Glycerol-here)","page":"Full Example","title":"Define which are the solvent molecules (Glycerol here)","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"glycerol = select(pdb,\"resname GLYC\")\nsolvent = Selection(glycerol,natomspermol=14)","category":"page"},{"location":"examples/#Retrive-the-resiude-contribution-data","page":"Full Example","title":"Retrive the resiude contribution data","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Collect which are the protein residues ","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"residues = collect(eachresidue(protein))","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Set a matrix that will store the results, with a number of lines corresponding to the length of the MDDF histogram, and with a number of columns corresponding to the number of residues:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"rescontrib = zeros(length(R.mddf),length(residues))","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Now, collect the contribution of each residue as a column of the above matrix. The notation pairs(residues) returns tuples containing the index ires and the corresponding residue. The .= symbol sets each element of the corresponding column of the rescontrib matrix to the output of contributions (by broadcasting). ","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"for (ires,residue) in pairs(residues)\n rescontrib[:,ires] .= contributions(solute,R.solute_atom,residue)\nend","category":"page"},{"location":"examples/#Plot-only-for-distances-within-1.5-and-3.5:","page":"Full Example","title":"Plot only for distances within 1.5 and 3.5:","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Here, we will plot only the contributions from residue 70 to residue 110, and from distances ranging from 1.5 to 3.5 which is where most of the action occurs:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"irange=70:110\nidmin = findfirst( d -> d > 1.5, R.d)\nidmax = findfirst( d -> d > 3.5, R.d)","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"To obtain pretty labels for the residues in the x-axis, we retrieve the one-letter residue names and concatenate them with the residue number converted to strings:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"labels = PDBTools.oneletter.(resname.(residues)).*format.(resnum.(residues))","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"And, finally, we produce the plot, with a series of options that make this particular contour plot look nice:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"contourf(\n irange, # x\n R.d[idmin:idmax], # y\n rescontrib[idmin:idmax,irange], # z\n xlabel=\"Residue\", ylabel=L\"r/\\AA\",\n xticks=(irange,labels[irange]), xrotation=60,\n xtickfont=font(6,plot_font),\n color=cgrad(:tempo), linewidth=0.1, linecolor=:black,\n colorbar=:none, levels=5,\n size=(500,280)\n)","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"The final figure is saved as a pdf file:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"savefig(\"./density2D.pdf\")","category":"page"},{"location":"examples/#3D-residue-contribution-density-map","page":"Full Example","title":"3D residue contribution density map","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"In this example we compute three-dimensional representations of the density map of Glycerol in the vicinity of a set of residues of a protein, from the minimum-distance distribution function. ","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Here, the MDDF is decomposed at each distance according to the contributions of each solute (the protein) residue. The grid is created such that, at each point in space around the protein, it is possible to identify: ","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Which atom is the closest atom of the solute to that point.\nWhich is the contribution of that atom (or residue) to the distribution function.","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Therefore, by filtering the 3D density map at each distance one can visualize over the solute structure which are the regions that mostly interact with the solvent of choice at each distance. Typical images of such a density are:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"
\n\n
","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"In the figure on the left, the points in space around the protein are selected with the following properties: distance from the protein smaller than 2.0Å and relative contribution to the MDDF at the corresponding distance of at least 10% of the maximum contribution. Thus, we are selecting the regions of the protein corresponding to the most stable hydrogen-bonding interactions. The color of the points is the contribution to the MDDF, from blue to red. Thus, the most reddish-points corresponds to the regions where the most stable hydrogen bonds were formed. We have marked two regions here, on opposite sides of the protein, with arrows.","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Clicking on those points we obtain which are the atoms of the protein contributing to the MDDF at that region. In particular, the arrow on the right points to the strongest red region, which corresponds to an Aspartic acid. These residues are shown explicitly under the density (represented as a transparent surface) on the figure in the center.","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"The figure on the right displays, overlapped with the hydrogen-bonding residues, the most important contributions to the second peak of the distribution, corresponding to distances from the protein between 2.0 and 3.5Å. Notably, the regions involved are different from the ones forming hydrogen bonds, indicating that non-specific interactions with the protein (and not a second solvation shell) are responsible for the second peak. ","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"A short tutorial video showing how to open the input and output PDB files in VMD and produce images of the density is available here: ","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"
\n\n
","category":"page"},{"location":"examples/#How-to-run-this-example:-2","page":"Full Example","title":"How to run this example:","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"cd ComplexMixturesExamples/Protein_in_Glycerol/Density3D\njulia density3D.jl","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Alternatively, open Julia and copy/paste or the commands in density3D.jl or use include(\"./density3D.jl\"). These options will allow you to remain on the Julia section with access to the grid data structure that was generated and corresponds to the output grid.pdb file. ","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"This will create (actually overwrite) the grid.pdb file. Here we provide a previously setup VMD session that contains the data with the visualization choices used to generate the figure above. Load it with:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"vmd -e grid.vmd","category":"page"},{"location":"examples/#Detailed-explanation-of-the-example:-3","page":"Full Example","title":"Detailed explanation of the example:","text":"","category":"section"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"Initially we load the ComplexMixtures and PDBTools packages:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"using ComplexMixtures, PDBTools","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"With the readPDB function of PDBTools, we read the PDB file of the system simulated:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"pdb = readPDB(\"../Data/system.pdb\")","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"and using ComplexMixtures, we load the results from the calculation of the MDDF of Glycerol around the protein, which was computed previously:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"R = load(\"../Data/results_glyc50.json\") ","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"The solute here is the protein, and we need to setup the structures that define which atoms and type of solute it is. First, we select from the atoms of the pdb file of the system, those belonging to the protein, using select from PDBTools:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"protein = select(pdb,\"protein\")","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"and then we define the solute structure that is actually used in ComplexMixtures, by passing those atoms and specifying that the solute is a single molecule to the Selection function of ComplexMixtures:","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"solute = Selection(protein,nmols=1)","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"The 3D grid representing the density around the protein is computed with the grid3D function provided by ComplexMixtures. It receives the solute structure (of type Selection), the list of solute atoms (of type PDBTools.Atoms, as the protein selection above), the name of the output file and some optional parameters to define the grid. Here we compute the grid only between 1.5 and 3.5Å, characterizing the first and second solvation shells. The grid has by default a step of 0.5Å. ","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"grid = grid3D(\n solute=solute,\n solute_atoms=protein,\n mddf_result=R,\n output_file=\"grid.pdb\",\n dmin=1.5,\n dmax=3.5\n)","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"The command above will generate the grid, save it to grid.pdb and let it available in the grid.pdb array of atoms, for further inspection, if desired. ","category":"page"},{"location":"examples/","page":"Full Example","title":"Full Example","text":"By changing dmin, dmax, and step, one controls the grid size and resolution. This may generate very large output files.","category":"page"},{"location":"options/#options","page":"Options","title":"Options","text":"","category":"section"},{"location":"options/","page":"Options","title":"Options","text":"There are some options to control what exactly is going to be computed to obtain the MDDF. These options can be defined by the user and passed to the mddf function, using, for example: ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"options = Options(lastframe=1000)\nresults = mddf(trajectory,options)","category":"page"},{"location":"options/#Frame-ranges-and-histogram-properties","page":"Options","title":"Frame ranges and histogram properties","text":"","category":"section"},{"location":"options/","page":"Options","title":"Options","text":"These are common options that the regular user might want to set in their calculation.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"firstframe: Integer, first frame of the trajectory to be considered.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"lastframe: Integer, last frame of the trajectory to be considered.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"stride: Integer, consider every stride frames, that is, if stride=5 only one in five frames will be considered.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"binstep: Real, length of the bin step of the histograms, default = 0.02 Angstroms.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"dbulk: Real, distance from which the solution is to be considered as a bulk solution, that is, where the presence of the solute does not affect the structure of the solution anymore. This parameter is important in particular for systems with a single solute molecule (a protein, for example), where the density of the solvent in the box is not the bulk density of the solvent, which must be computed independently. Default: 10 Angstroms. ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"cutoff: Real, the maximum distance to be considered in the construction of histograms. Default: 10 Angstroms. ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"usecutoff: true/false: If true, the cutoff distance might be different from dbulk and the density of the solvent in bulk will be estimated from the density within dbulk and cutoff. If false, the density of the solvent is estimated from the density outside dbulk by exclusion. Default: false. ","category":"page"},{"location":"options/#Lower-level-options","page":"Options","title":"Lower level options","text":"","category":"section"},{"location":"options/","page":"Options","title":"Options","text":"These will probably never be set by the user, unless if dealing with some special system (large very large, or very low density system).","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"irefatom: Integer, index of the reference atom in the solvent molecule used to compute the shell volumes and domain volumes in the Monte-Carlo volume estimates. The final rdf data is reported for this atom as well. By default, we choose the atom which is closer to the center of coordinates of the molecule, but any choice should be fine. ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"n_random_samples: Integer, how many samples of random molecules are generated for each solvent molecule to compute the shell volumes and random MDDF counts. Default: 10. Increase this only if you have short trajectory and want to obtain reproducible results for that short trajectory. For long trajectories (most desirable and common), this value can even be decreased to speed up the calculations. ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"seed: Seed for random number generator. If -1, the seed will be generated from the entropy of the system. If your results are dependent on the seed, is is probable that you do not have enough sampling. Mostly used for testing purposes. Two runs are only identical if ran with the same seed and in serial mode. ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"StableRNG (::Bool), defaults to false. Use a stable random number generator from the StableRNGs package, to produce identical runs on different architectures and Julia versions. Only used for testing. ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"nthreads: How many threads to use. By default, it will be the number of physical cores of the computer.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"lcell: Integer, the cell length of the linked-cell method (actually the cell length is cutoff/lcell). Default: 1. ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"GC: Bool, force garbage collection, to avoid memory overflow. Default: true. That this might be required is probably a result of something that can vastly improved in memory management. This may slow down parallel runs significantly if the GC runs too often.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"GC_threshold: Float64, minimum fraction of the total memory of the system required to force a GC run. That is, if GC_threshold=0.1, which is the default, every time the free memory becomes less or equal to 10% of the total memory available, a GC run occurs. ","category":"page"},{"location":"options/#Frame-statistical-reweighing","page":"Options","title":"Frame statistical reweighing","text":"","category":"section"},{"location":"options/","page":"Options","title":"Options","text":"compat: Compat\nFrame reweighing is available in ComplexMixtures 1.4.0 or greater.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"Most times the weights of each frame of the trajectory are the same, resulting from some standard MD simulation. If, for some reason, the frames have different statistical weights, the weights can be passed to the as an optional parameter frame_weights.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"For example:","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"julia> using ComplexMixtures\n\njulia> options = Options(frame_weights=[0.2, 0.2, 0.4])","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"The code above will assign a larger weight to the third frame of the trajectory. These weights are relative (meaning that [1.0, 1.0, 2.0] would produce the same result). What will happen under the hood is that the distance counts of the frames will be multiplied by each frame weight, and normalized for the sum of the weights.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"Important: The length of the frame_weights vector must be at least equal to the number of the last frame read from the trajectory. That is, if lastframe is not set, and all the frames will be read, the length of frame_weights must be equal to the length of the trajectory (the stride parameter will skip the information both of the frames and its weights). If lastframe is set, then the length of frame_weights must be at least lastframe (it can be greater, and further values will be ignored). Importantly, the indices of the elements in frame_weights are assumed to correspond to the indices of the frames in the original trajectory file.","category":"page"},{"location":"python/#python","page":"From Python","title":"From Python","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"note: Note\nMost features of the package are available through this Python interface. However, some flexibility may be reduced and, also, the tunning of the plot appearance is left to the user, as it is expected that he/she is fluent with some tools within Python if choosing this interface.Python 3 or greater is required.Please report issues, incompatibilities, or any other difficulty in using the package and its interface.","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"The following examples consider a system composed a protein solvated by a mixture of water and glycerol, built with Packmol. The simulations were performed with NAMD with periodic boundary conditions and a NPT ensemble at room temperature and pressure. Molecular pictures were produced with VMD.","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"
\n\n
","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"Image of the system of the example: a protein solvated by a mixture of glycerol (green) and water, at a concentration of 50%vv. The complete example is available at this repository.","category":"page"},{"location":"python/#Loading-the-ComplexMixtures.py-file","page":"From Python","title":"Loading the ComplexMixtures.py file","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"The Python interface of ComplexMixtures is implemented in the ComplexMixtures.py file. Just download it from the link and save it in a known path.","category":"page"},{"location":"python/#Installing-juliacall","page":"From Python","title":"Installing juliacall","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"juliacall is a package that allows calling Julia programs from Python. Install it with","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"pip install juliacall","category":"page"},{"location":"python/#Installing-Julia-and-underlying-packages","page":"From Python","title":"Installing Julia and underlying packages","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"Once juliacall is installed, from within Python, execute:","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"import ComplexMixtures","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"here we assume that the ComplexMixtures.py file is in the same directory where you launched Python.","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"note: Note\nOn the first time you execute this command, the Julia executable and the required Julia packages (ComplexMixtures and PDBTools) will be downloaded and installed. At the end of the process quit Python (not really required, but we prefer to separate the installation from the use of the module). ","category":"page"},{"location":"python/#How-to-run-this-example","page":"From Python","title":"How to run this example","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"The Data directory contains the a pdb file of the system (system.pdb) and a sample from the trajectory (glyc50.dcd), with a few frames. It also contains the result of running the mddf calculation on the complete trajectory, results_glyc50.json. This last file was produced by ComplexMixtures, as indicated in the following examples. ","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"The sample trajectory is provided so that the first example can be run, yet do not expect that the results are the same, as the sampling is much lower in this case. The complete trajectory can be retrieved from this link (3GB file). ","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"We assume that you navigated to the directory of the example, and copied the Python module file to it: ","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"git clone https://github.com/m3g/ComplexMixturesExamples\ncd ComplexMixturesExamples/Protein_in_Glycerol/MDDF\ncp /path/to/ComplexMixtures.py ./\nexport JULIA_NUM_THREADS=8","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"The last line will allow Julia to execute multi-threaded, which will improve a lot the performance on most machines. Set the number of threads to the number of cores of your computer.","category":"page"},{"location":"python/#Minimum-Distance-Distribution-function","page":"From Python","title":"Minimum-Distance Distribution function","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"Note that the example here follows an identical syntax to the Julia example, except that we qualify the name of the loaded module and implicitly load the PDBTools package.","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"The script to compute the MDDFs as associated data from within python is, then:","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"import ComplexMixtures as cm\n\n# Load the pdb file of the system using `PDBTools`:\natoms = cm.readPDB(\"../Data/system.pdb\")\n\n# Create arrays of atoms with the protein and Glycerol atoms, \n# using the `select` function of the `PDBTools` package:\nprotein = cm.select(atoms,\"protein\")\nglyc = cm.select(atoms,\"resname GLYC\")\n\n# Setup solute and solvent structures, required for computing the MDDF, \n# with `Selection` function of the `ComplexMixtures` package:\nsolute = cm.Selection(protein,nmols=1)\nsolvent = cm.Selection(glyc,natomspermol=14)\n\n# Read and setup the Trajectory structure required for the computations:\ntrajectory = cm.Trajectory(\"../Data/glyc50_complete.dcd\",solute,solvent)\n\n# Run the calculation and get results:\nresults = cm.mddf(trajectory)\n\n# Save the reults to recover them later if required\ncm.save(results,\"./glyc50.json\")","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"note: Note\nTo change the options of the calculation, set the Options structure accordingly and pass it as a parameter to mddf. For example:options = cm.Options(cutoff=10.)\nresults = cm.mddf(trajectory,options)The complete set of options available is described here.","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"The trajectory that was loaded was for a toy-example. The complete trajectory is available here, but it is a 3GB file. The same procedure above was performed with that file and produced the results_Glyc50.json file, which is available in the Data directory here. We will continue with this file instead. ","category":"page"},{"location":"python/#Produce-plots","page":"From Python","title":"Produce plots","text":"","category":"section"},{"location":"python/#MDDF-and-Kirkwood-Buff-integrals","page":"From Python","title":"MDDF and Kirkwood-Buff integrals","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"import ComplexMixtures as cm\nimport matplotlib.pyplot as plt\n\n# Load the actual results obtained with the complete simulation:\nresults = cm.load(\"../Data/results_glyc50.json\")\n\n# Plot MDDF and KB\nfig, axs = plt.subplots(2)\naxs[0].plot(results.d, results.mddf)\naxs[0].set(ylabel=\"MDDF\")\n\n# Plot KB integral\naxs[1].plot(results.d, results.kb)\naxs[1].set(xlabel=\"distance / Angs\", ylabel=\"MDDF\")\n\nplt.savefig(\"mddf_kb.png\")","category":"page"},{"location":"python/#Atomic-contributions-to-the-MDDF","page":"From Python","title":"Atomic contributions to the MDDF","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"Selecting the atoms corresponding to the hydroxyl groups, and of the aliphatic carbons of Glycerol. Here we list the types of the atoms as specified by the force-field. ","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"import ComplexMixtures as cm\nimport matplotlib.pyplot as plt\n\natoms = cm.readPDB(\"../Data/system.pdb\")\nprotein = cm.select(atoms,\"protein\")\nglyc = cm.select(atoms,\"resname GLYC\")\nsolute = cm.Selection(protein,nmols=1)\nsolvent = cm.Selection(glyc,natomspermol=14)\n\n# load results\nresults = cm.load(\"../Data/results_glyc50.json\")\n\n# Select atoms by name\nhydroxyls = cm.list([\"O1\",\"O2\",\"O3\",\"H1\",\"H2\",\"H3\"])\naliphatic = cm.list([\"C1\",\"C2\",\"HA\",\"HB\",\"HC\",\"HD\"])\n\n# Extract the contributions of the groups above\nhydr_contributions = cm.contributions(solvent,results.solvent_atom,hydroxyls)\naliph_contributions = cm.contributions(solvent,results.solvent_atom,aliphatic)\n\n# Plot\nplt.plot(results.d, results.mddf)\nplt.plot(results.d, hydr_contributions)\nplt.plot(results.d, aliph_contributions)\nplt.xlabel(\"distance / Angs\")\nplt.ylabel(\"MDDF\")\nplt.savefig(\"group_contributions.png\")","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"note: Note\nThe syntax here diverges from the Julia-only examples by requiring the lists of names to be converted to Julia arrays, which happens by using the cm.list(python_list) function calls.","category":"page"},{"location":"#Introduction","page":"Introduction","title":"Introduction","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"ComplexMixtures.jl is a package to study the solute and solvent interactions of mixtures of molecules of complex shape. Conventional radial distribution functions are not appropriate to represent the structure of a solvent around a solute with many atoms, and a variable, non-spherical shape. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Typical solutes of complex shape are proteins, nucleic acids, and polymers in general. Smaller molecules like lipids, carbohydrates, etc, are also complex enough such that representing the structure of the solution of those molecules with distribution functions is not trivial.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Minimum-Distance Distribution Functions (MDDFs) are a very general and practical way to represent solute-solvent interactions for molecules with arbitrarily complex sizes and geometries. Briefly, instead of computing the density distribution function of a particular atom or the center-of-mass of the molecules, one computes the distribution function of the minimum-distance between any solute and solvent atoms. This provides a size and shape-independent distribution which is very natural to interpret in terms of molecular interactions. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Additionally, the MDDFs can be decomposed into contributions of each type of atom (or groups of atoms) of the solute and solvent molecules, such that the profiles of the distributions can be interpreted in terms of the chemical nature of the species involved in the interactions at each distance. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Finally, as with radial distribution functions, MDDFs can be used to compute Kirkwood-Buff integrals to connect the accumulation or depletion of the solvents components to thermodynamic properties, like protein structural stability, solubility, and others.","category":"page"},{"location":"#Features","page":"Introduction","title":"Features","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"Check out our examples repository, featuring the analysis of solvation structures for proteins, polymers, membrane, and complex solutions! The examples are also described in our featured article.","category":"page"},{"location":"#1.-Minimum-distance-distribution-functions:-understanding-solvation-at-a-molecular-level","page":"Introduction","title":"1. Minimum-distance distribution functions: understanding solvation at a molecular level","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"This figure illustrates one of the main features of minimum-distance distribution functions, by showing the distribution of DMF molecules at the surface of an polyacrylamide molecule. The direct interactions are evident by the peak at hydrogen-bonding distances and, additionally, the contribution of each group of atoms of the DMF can be clearly distinguished by decomposing the total MDDF into atomic or chemical group contributions. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"
\n\n
\nMinimum distance distribution function and its decomposition into the chemical\ngroups of the solvent (top) and solute (bottom) molecules.

\n
","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Decomposition of the total MDDF into the contributions of the solute atoms (in this case, a protein) is also possible. Any chemical group decomposition is possible. Here, we decompose the MDDF into the contribution of each protein residue. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"
\n\n
\nDensity map of a solvent in the vicinity of each protein residue. \n
","category":"page"},{"location":"#2.-Thermodynamic-interpretation-through-Kirkwood-Buff-theory","page":"Introduction","title":"2. Thermodynamic interpretation through Kirkwood-Buff theory","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"Minimum-distance distribution functions can be used to compute Kirkwood-Buff integrals, and thus, thermodynamic parameters associated to solvation. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Kirkwood-Buff integrals carry the information of the total accumulation or depletion of each solvent around a solute. For example, the figure below displays the KB integrals of an ionic liquid solvating different conformational states of a protein [link]. The figure illustrates that the solvation structures are dependent on the protein folding state. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"
\n\n
\nKirkwood-Buff integrals of an ionic liquid solvating a protein in different conformational states.

\n
","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"From differences in KB integrals among cosolvents, the Preferential Solvation parameter can be computed. This is an important parameter because it can be measured experimentally and is ultimately associated with the equilibrium thermodynamics of the solvation. In the following figure, we show that, for example, the preferential solvation of a protein in different folding states is dependent in a non-trivial way on the concentration of an ionic liquid in aqueous solutions. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"
\n\n
\nPreferential interaction parameters obtained for the solvation of a protein by ionic liquids.

\n
","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"In particular, the plot shows that besides being preferentially excluded from the protein surface at high concentrations in the native state, suggesting protein folding stabilization, the interactions with the protein in the denatured states are stronger, leading to denaturation at all concentrations. ","category":"page"},{"location":"#References","page":"Introduction","title":"References","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"L. Martínez, ComplexMixtures.jl: Investigating the structure of solutions of complex-shaped molecules from a solvent-shell perspective. J. Mol. Liq. 347, 117945, 2022. [Full Text]\nL. Martínez, S. Shimizu, Molecular interpretation of preferential interactions in protein solvation: a solvent-shell perspective by means of minimum-distance distribution functions. J. Chem. Theor. Comp. 13, 6358–6372, 2017. [Full Text]","category":"page"},{"location":"#See-also","page":"Introduction","title":"See also","text":"","category":"section"},{"location":"#Seminar","page":"Introduction","title":"Seminar","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"Presentation about ComplexMixtures.jl and protein-solvent interactions: https://youtu.be/umSRjsITzyA","category":"page"},{"location":"#Applications","page":"Introduction","title":"Applications","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"A. F. Pereira, V. Piccoli, L. Martínez, Trifluoroethanol direct interactions with protein backbones destabilize alpha-helices. J. Mol. Liq. 365, 120209, 2022. [Full Text]\nV. Piccoli, L. Martínez, Ionic liquid solvation of proteins in native and denatured states. J. Mol. Liq. 363, 119953, 2022. [Full Text]\nV. Piccoli, L. Martínez, Correlated counterion effects in the solvation of proteins by ionic-liquids. J. Mol. Liq. 320, 114347, 2020. [Full Text]\nI. P. de Oliveira, L. Martínez, The shift in urea orientation at protein surfaces at low pH is compatible with a direct mechanism of protein denaturation. Phys. Chem. Chem. Phys. 22, 354-367, 2020. [Full Text]\nI. P. de Oliveira, L. Martínez, Molecular basis for competitive solvation of the Burkholderia cepacia lipase by sorbitol and urea. Phys. Chem. Chem. Phys. 18, 21797-21808, 2016. [Full Text]","category":"page"}] +[{"location":"references/#References","page":"References","title":"References","text":"","category":"section"},{"location":"references/#Primary-citations","page":"References","title":"Primary citations","text":"","category":"section"},{"location":"references/","page":"References","title":"References","text":"If this package was useful to you, please cite the following papers:","category":"page"},{"location":"references/","page":"References","title":"References","text":"L. Martínez, ComplexMixtures.jl: Investigating the structure of solutions of complex-shaped molecules from a solvent-shell perspective. J. Mol. Liq. 347, 117945, 2022. [Full Text]\nL. Martínez, S. Shimizu, Molecular interpretation of preferential interactions in protein solvation: a solvent-shell perspective by means of minimum-distance distribution functions. J. Chem. Theor. Comp. 13, 6358–6372, 2017. [Full Text]","category":"page"},{"location":"references/#Applications-and-examples","page":"References","title":"Applications and examples","text":"","category":"section"},{"location":"references/","page":"References","title":"References","text":"A. F. Pereira, V. Piccoli, L. Martínez, Trifluoroethanol direct interactions with protein backbones destabilize alpha-helices. J. Mol. Liq. 365, 120209, 2022. [Full Text]\nV. Piccoli, L. Martínez, Ionic liquid solvation of proteins in native and denatured states. J. Mol. Liq. 363, 119953, 2022. [Full Text]\nV. Piccoli, L. Martínez, Correlated counterion effects in the solvation of proteins by ionic-liquids. J. Mol. Liq. 320, 114347, 2020. [Full Text]\nI. P. de Oliveira, L. Martínez, The shift in urea orientation at protein surfaces at low pH is compatible with a direct mechanism of protein denaturation. Phys. Chem. Chem. Phys. 22, 354-367, 2020. [Full Text]\nI. P. de Oliveira, L. Martínez, Molecular basis for competitive solvation of the Burkholderia cepacia lipase by sorbitol and urea. Phys. Chem. Chem. Phys. 18, 21797-21808, 2016. [Full Text]","category":"page"},{"location":"references/#See-also","page":"References","title":"See also","text":"","category":"section"},{"location":"references/","page":"References","title":"References","text":"Packmol: A package for building initial configurations for molecular dynamics simulations.\nCellListMap.jl: Efficient and customizable implementation of cell lists, which allows the computation of general properties dependent on distances of particles within a cutoff, for example short-range potentials, forces, neighbor lists, etc.\nMDLovoFit: Automatic identification of mobile and rigid substructures in molecular dynamics simulations and fractional structural fluctuation analysis. ","category":"page"},{"location":"results/#results","page":"Results","title":"Results","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"The results of a MDDF calculation are returned in a data structure which contains the MDDF, KB integrals, and atomic contributions. The following section will assume that the computation was performed by calling the mddf function with ","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"results = mddf(trajectory)","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"such that the results variable contain the Result data structure. By default, the histograms contain 500 bins (binstep=0.002 and cutoff=10.) such that all data-vectors will contain 500 lines.","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"To learn how to save and load saved data, read the next section.","category":"page"},{"location":"results/#The-Result-data-structure:-main-data","page":"Results","title":"The Result data structure: main data","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"The most important data to be read from results are the distances, minimum-distance distribution function, and KB integrals. These data is stored in the following vectors:","category":"page"},{"location":"results/#Distances-of-the-histograms:-results.d","page":"Results","title":"Distances of the histograms: results.d","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"The following vector will contain values ranging from 0. to cutoff, and the distance at each bin is the distance in that bin for which half of the volume of the bin is within d, and half of the volume is above d, if the volume was spherical: ","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"julia> results.d\n500-element Array{Float64,1}:\n 0.015874010519682\n 0.033019272488946275\n ⋮\n 9.970010030080179\n 9.99001000999998","category":"page"},{"location":"results/#Minimum-distance-distribution-function:-results.mddf","page":"Results","title":"Minimum-distance distribution function: results.mddf","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"The results.mddf vector will contain the main result, which the minimum-distance distribution function. For a properly-sampled simulation, it will be zero at very short distances and converge to 1.0 for distances smaller than the cutoff:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"julia> results.mddf\n500-element Array{Float64,1}:\n 0.0\n 0.0\n ⋮\n 0.999052514965403\n 1.001030818286187\n","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"A typical plot of results.mddf as a function of results.d will look like:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"Thus, this plot was obtained with the following code:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"using Plots\nplot(results.d,results.mddf,xlabel=\"d/A\",ylabel=\"mddf(d) / L/mol\") ","category":"page"},{"location":"results/#Kirkwood-Buff-integral:-results.kb","page":"Results","title":"Kirkwood-Buff integral: results.kb","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"The results.kb vector will contain the Kirkwood-Buff integral computed as a function of the minimum-distance to the solute. For properly sampled simulations, it is expected to converge at large distances. ","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"julia> results.kb\n500-element Array{Float64,1}:\n 0.0\n -0.3249356504752985\n -2.9804719721525\n ⋮\n 0.72186381783\n 1.13624162115","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"A typical plot of results.kb as a function of results.d will look like:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"Thus, this plot was obtained with the following code:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"using Plots\nplot(results.d,results.kb,xlabel=\"d/A\",ylabel=\"mddf(d) / L/mol\") ","category":"page"},{"location":"results/#Units","page":"Results","title":"Units","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"The distance is assumed to be in Å, as this is the most common distance units in molecular simulations. The coordinates of the atoms are assumed be provided in Å. \nThe minimum-distance distribution function is unit-less, since it is the ratio of the density at each distance divided by an ideal-gas density.\nThe Kirkwood-Buff integrals are returned in cm³ mol⁻¹, if the coordinates were provided in Å.","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"warning: Warning\nIf the coordinates are not in Å, the calculation will proceed normally, but the units of the KB integrals, which has units of volume per mol, should be converted to conform the length unit provided. ","category":"page"},{"location":"results/#Coordination-number-and-other-data","page":"Results","title":"Coordination number and other data","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"Obtaining the MDDF involves the computation of some intermediate properties that are frequently useful for additional solution structure analysis. In particular, the coordination numbers are computed. For example, the coordination number as a function from the distance to the solute can be retrieved from a Results data structure with:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"coordination_number = results.coordination_number","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"and this data can be plotted against the distances by:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"plot(result.d,results.coordination_number)","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"The coordination number of subgroups can also be obtained, as explained in the Coordination number section.","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"The complete data available is:","category":"page"},{"location":"results/","page":"Results","title":"Results","text":"Parameter Meaning Type of value Comment\nd Vector of distances of the histograms. Vector{Float64} To be used as the x coordinate on plotting any of the data.\nmd_count Non-normalized count of minimum distances at each d. Vector{Float64} This is the number of minimum distances found at each histogram bin, without normalization. Usually this is not interesting to analyze, because it is dependent on the bin size.\nmd_count_random Number of minimum distances found at each histogram bin for the random distribution. Vector{Float64} This is the normalization required to convert the md_count array into the minimum-distance distribution.\ncoordination_number Cumulative number of sites found for each histogram distance. Vector{Float64} This is the coordination number, that is, the number of sites found cumulative up to each distance, without any normalization.\ncoordination_number_random Cumulative site count for the random distribution. Vector{Float64} Usually not interesting for analysis.\nmddf The final distribution function. Vector{Float64} This is the MDDF computed (md_count normalized by md_count_random). It is the main result of the calculation.\nkb The final Kirkwood-Buff integral. Vector{Float64} This is the final KB integral, as a function of the integration distance from the solute. Computed as coordination_number - coordination_number_random\nsolute_atom Atomic contributions of the solute. Matrix{Float64} This is a matrix with nbins lines and solute.natomspermol columns, containing the atomic contributions of each solute atom to the complete MDDF.\nsolvent_atom Atomic contributions of the solvent. Matrix{Float64} This is a matrix with nbins lines and solvent.natomspermol columns, containing the atomic contributions of each solvent atom to the complete MDDF.\ndensity.solute Density (concentration) of the solute in the complete simulation box. Float64 In units of molecules/textrmAA^3\ndensity.solvent Density (concentration) of the solvent in the complete simulation box. Float64 In units of molecules/textrmAA^3\ndensity.solvent_bulk Density (concentration) of the solute in the bulk region. Float64 In units of molecules/textrmAA^3\nvolume Volume measures. Volume Contains the total volume of the simulation, the bulk volume, the volume of the solute domain and the shell volume of each bin of the histogram. These are computed by numerical integration from the random distributions.\nfiles List of files read. Vector{String} \nweights Weights of each file in the final counts. Vector{Float64} If the trajectories have different lengths or number of frames, the weights are adapted accordingly.\n ","category":"page"},{"location":"results/#Other-Result-parameters-available-which-are-set-at-Options:","page":"Results","title":"Other Result parameters available which are set at Options:","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"Parameter Meaning Type of value Comment\nnbins Number of bins of the histograms. Int \ndbulk Distance from solute of bulk solution. Float64 \ncutoff Maximum distance to be considered for histograms. Float64 \nautocorrelation The solute is the same as the solvent? Bool Automatically set if solute == solvent.\nsolute Properties of the solute AtomSelection Contains the number of atoms, number of atoms per molecule and number of molecules of the solute.\nsolvent Properties of the solvent. AtomSelection Contains the number of atoms, number of atoms per molecule and number of molecules of the solvent.\nirefatom This is a reference atom that is used to generate random rotations and translations internally. Int Counts of the distributions for this atom are performed automatically to obtain radial (or proximal) distribution functions. Can be used for testing purposes.\nrdf_count This is the md_count minimum distance count of irefatom. Vector{Float64} This corresponds to the conventional radial distribution function if the solute contains only one atom.\nrdf_count_random Minimum distance of irefatom count for the random distribution. Vector{Float64} \nrdf Distribution function computed from the irefatom distribution. It is a conventional rdf if the solvent has only one atom. Vector{Float64} \nkb_rdf Kirkwood-Buff integral computed from the irefatom distribution. Vector{Float64} This must converge, at long distances, to the same value as kb, and can be used for testing.\noptions Calculation options. Options Carries (some redundant) options set by the user.\nlastframe_read Last frame read from the trajectory. Int \nn_frames_read Number of frames read from the trajectory. Int Can differ from lastframe_read if stride != 1\n ","category":"page"},{"location":"results/#Reference-functions","page":"Results","title":"Reference functions","text":"","category":"section"},{"location":"results/","page":"Results","title":"Results","text":"Modules = [ComplexMixtures]\nPages = [\"results.jl\"]","category":"page"},{"location":"results/#ComplexMixtures.Density","page":"Results","title":"ComplexMixtures.Density","text":"mutable struct Density\n\nStructure to contain the density values obtained from the calculation.\n\nsolute::Float64\nsolvent::Float64\nsolvent_bulk::Float64\n\n\n\n\n\n","category":"type"},{"location":"results/#ComplexMixtures.Result","page":"Results","title":"ComplexMixtures.Result","text":"mutable struct Result\n\nStructure to contain the results of the MDDF calculation.\n\nVersion::VersionNumber\nnbins::Int64\ndbulk::Float64\ncutoff::Float64\nd::Vector{Float64}\nmd_count::Vector{Float64}\nmd_count_random::Vector{Float64}\ncoordination_number::Vector{Float64}\ncoordination_number_random::Vector{Float64}\nmddf::Vector{Float64}\nkb::Vector{Float64}\nautocorrelation::Bool\nsolute::AtomSelection\nsolvent::AtomSelection\nsolute_group_count::Vector{Vector{Float64}}\nsolvent_group_count::Vector{Vector{Float64}}\nrdf_count::Vector{Float64}\nrdf_count_random::Vector{Float64}\nsum_rdf_count::Vector{Float64}\nsum_rdf_count_random::Vector{Float64}\nrdf::Vector{Float64}\nkb_rdf::Vector{Float64}\ndensity::ComplexMixtures.Density\nvolume::ComplexMixtures.Volume\nfiles::Vector{ComplexMixtures.TrajectoryFileOptions}\nweights::Vector{Float64}\n\nThe Result{Vector{Float64}} parametric type is necessary only for reading the JSON3 saved file. \n\n\n\n\n\n","category":"type"},{"location":"results/#Base.merge-Tuple{Vector{<:Result}}","page":"Results","title":"Base.merge","text":"merge(r::Vector{Result})\n\nThis function merges the results of MDDF calculations obtained by running the same analysis on multiple trajectories, or multiple parts of the same trajectory. It returns a Result structure of the same type, with all the functions and counters representing averages of the set provided weighted by the number of frames read in each Result set.\n\n\n\n\n\n","category":"method"},{"location":"results/#ComplexMixtures.load-Tuple{String}","page":"Results","title":"ComplexMixtures.load","text":"load(filename::String)\n\nFunction to load the json saved results file into the Result data structure.\n\n\n\n\n\n","category":"method"},{"location":"results/#ComplexMixtures.overview-Tuple{Result}","page":"Results","title":"ComplexMixtures.overview","text":"overview(R::Result)\n\nFunction that outputs the volumes and densities in the most natural units.\n\n\n\n\n\n","category":"method"},{"location":"results/#ComplexMixtures.save-Tuple{Result, String}","page":"Results","title":"ComplexMixtures.save","text":"save(R::Result, filename::String)\n\nFunction to write the result data structure to a json file.\n\n\n\n\n\n","category":"method"},{"location":"example1/#Protein-in-water/glycerol","page":"◦ Protein in water/glycerol","title":"Protein in water/glycerol","text":"","category":"section"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"The following examples consider a system composed a protein solvated by a mixture of water and glycerol, built with Packmol. The simulations were performed with NAMD with periodic boundary conditions and a NPT ensemble at room temperature and pressure. Molecular pictures were produced with VMD and plots were produced with Julia's Plots library.","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"
\n\n
","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"Image of the system of the example: a protein solvated by a mixture of glycreol (green) and water, at a concentration of 50%vv. ","category":"page"},{"location":"example1/#Index","page":"◦ Protein in water/glycerol","title":"Index","text":"","category":"section"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"Data, packages, and execution\nMDDF, KB integrals, and group contributions\n2D density map\n3D density map","category":"page"},{"location":"example1/#data-example1","page":"◦ Protein in water/glycerol","title":"Data, packages, and execution","text":"","category":"section"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"The files required to run this example are:","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"system.pdb: The PDB file of the complete system.\nglyc50_traj.dcd: Trajectory file. This is a 1GB file, necessary for running from scratch the calculations.","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"To run the scripts, we suggest the following procedure:","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"Create a directory, for example example1.\nCopy the required data files above to this directory.\nLaunch julia in that directory, activate the directory environment, and install the required packages. This is done by launching Julia and executing:\nimport Pkg \nPkg.activate(\".\")\nPkg.add([\"ComplexMixtures\", \"PDBTools\", \"Plots\", \"LaTeXStrings, EasyFit\"])\nexit()\nCopy the code of each script in to a file, and execute with:\njulia -t auto script.jl\nAlternativelly (and perhaps preferrably), copy line by line the content of the script into the Julia REPL, to follow each step of the calculation. For a more advanced Julia usage, we suggest the VSCode IDE with the Julia Language Support extension. ","category":"page"},{"location":"example1/#mddf-example1","page":"◦ Protein in water/glycerol","title":"MDDF, KB integrals, and group contributions","text":"","category":"section"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"Here we compute the minimum-distance distribution function, the Kirkwood-Buff integral, and the atomic contributions of the solvent to the density. This example illustrates the regular usage of ComplexMixtures, to compute the minimum distance distribution function, KB-integrals and group contributions. ","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"
Complete example code: click here!","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`julia\n$(read(\"./assets/scripts/example1/script1.jl\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"

","category":"page"},{"location":"example1/#Output","page":"◦ Protein in water/glycerol","title":"Output","text":"","category":"section"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"The code above will produce the following plots, which contain the minimum-distance distribution of glycerol relative to the protein, and the corresponding KB integral:","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"
\n\n
","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"and the same distribution function, decomposed into the contributions of the hydroxyl and aliphatic groups of glycerol:","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"
\n\n
","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"note: Note\nTo change the options of the calculation, set the Options structure accordingly and pass it as a parameter to mddf. For example:options = Options(cutoff=10.)\nmddf(trajectory,options)The complete set of options available is described here.","category":"page"},{"location":"example1/#2D-map-example1","page":"◦ Protein in water/glycerol","title":"2D density map","text":"","category":"section"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"In this followup from the example aboave, we compute group contributions of the solute (the protein) to the MDDFs, split into the contributions each protein residue. This allows the observation of the penetration of the solvent on the structure, and the strength of the interaction of the solvent, or cossolvent, with each type of residue in the structure.","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"
Complete example code: click here!","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`julia\n$(read(\"./assets/scripts/example1/script2.jl\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"

","category":"page"},{"location":"example1/#Output-2","page":"◦ Protein in water/glycerol","title":"Output","text":"","category":"section"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"The code above will produce the following plot, which contains, for each residue, the contributions of each residue to the distribution function of glycerol, within 1.5 to 3.5 mathrmAA of the surface of the protein.","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"
\n\n
","category":"page"},{"location":"example1/#3D-map-example1","page":"◦ Protein in water/glycerol","title":"3D density map","text":"","category":"section"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"In this example we compute three-dimensional representations of the density map of Glycerol in the vicinity of a set of residues of a protein, from the minimum-distance distribution function. ","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"
Complete example code: click here!","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`julia\n$(read(\"./assets/scripts/example1/script3.jl\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"

","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"Here, the MDDF is decomposed at each distance according to the contributions of each solute (the protein) residue. The grid is created such that, at each point in space around the protein, it is possible to identify: ","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"Which atom is the closest atom of the solute to that point.\nWhich is the contribution of that atom (or residue) to the distribution function.","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"Therefore, by filtering the 3D density map at each distance one can visualize over the solute structure which are the regions that mostly interact with the solvent of choice at each distance. Typical images of such a density are:","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"
\n\n
","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"In the figure on the left, the points in space around the protein are selected with the following properties: distance from the protein smaller than 2.0Å and relative contribution to the MDDF at the corresponding distance of at least 10% of the maximum contribution. Thus, we are selecting the regions of the protein corresponding to the most stable hydrogen-bonding interactions. The color of the points is the contribution to the MDDF, from blue to red. Thus, the most reddish-points corresponds to the regions where the most stable hydrogen bonds were formed. We have marked two regions here, on opposite sides of the protein, with arrows.","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"Clicking on those points we obtain which are the atoms of the protein contributing to the MDDF at that region. In particular, the arrow on the right points to the strongest red region, which corresponds to an Aspartic acid. These residues are shown explicitly under the density (represented as a transparent surface) on the figure in the center.","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"The figure on the right displays, overlapped with the hydrogen-bonding residues, the most important contributions to the second peak of the distribution, corresponding to distances from the protein between 2.0 and 3.5Å. Notably, the regions involved are different from the ones forming hydrogen bonds, indicating that non-specific interactions with the protein (and not a second solvation shell) are responsible for the second peak. ","category":"page"},{"location":"example1/#How-to-run-this-example:","page":"◦ Protein in water/glycerol","title":"How to run this example:","text":"","category":"section"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"Assuming that the input files are available in the script directory, just run the script with:","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"julia density3D.jl","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"Alternatively, open Julia and copy/paste or the commands in density3D.jl or use include(\"./density3D.jl\"). These options will allow you to remain on the Julia section with access to the grid data structure that was generated and corresponds to the output grid.pdb file. ","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"This will create the grid.pdb file. Here we provide a previously setup VMD session that contains the data with the visualization choices used to generate the figure above. Load it with:","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"vmd -e grid.vmd","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"A short tutorial video showing how to open the input and output PDB files in VMD and produce images of the density is available here: ","category":"page"},{"location":"example1/","page":"◦ Protein in water/glycerol","title":"◦ Protein in water/glycerol","text":"
\n\n
","category":"page"},{"location":"installation/#Installation","page":"Installation","title":"Installation","text":"","category":"section"},{"location":"installation/","page":"Installation","title":"Installation","text":"note: Note\nThis is a package written in Julia. We invite you to experiment with the language, but if you want to just call this package from Python, read the From Python section of the manual. Understanding all the features of the package requires reading the manual as whole. The syntaxes of using this package from Julia or Python are almost identical, and the motivation for using Python should be mostly the familiarity with further analysis tools, as the plotting packages. ","category":"page"},{"location":"installation/#Install-Julia","page":"Installation","title":"Install Julia","text":"","category":"section"},{"location":"installation/","page":"Installation","title":"Installation","text":"First you need to install the Julia language, version 1.9 or greater is required. Using the juliaup tool is a highly recommended way of installing and keeping Julia up to date.","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"Alternatively, you can install Julia by downloading the binaries directly from the Julia webpage.","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"note: Note\nNew to Julia? Julia is a modern high-level yet performant programming language. Some tips and a nice workflow for using it effectively can be found here. For this specific package, following a the step-by-step examples provided here after installing Julia should be enough. ","category":"page"},{"location":"installation/#Install-the-packages","page":"Installation","title":"Install the packages","text":"","category":"section"},{"location":"installation/","page":"Installation","title":"Installation","text":"Within Julia, to install the packages required for running the examples here you need to do:","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"julia> import Pkg\n\njulia> Pkg.add([\"ComplexMixtures\", \"PBTools\", \"Plots\", \"EasyFit\", \"LaTeXStrings\"])","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"Here, PDBTools.jl is an auxiliary package to read PDB files and select atoms within them. The Plots, EasyFit and LaTeXStrings packages will help producing nice looking plots. ","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"Please read the recommended workflow below, for further information and to be sure to have a smoother experience.","category":"page"},{"location":"installation/#Recommended-workflow-for-reproducibility","page":"Installation","title":"Recommended workflow for reproducibility","text":"","category":"section"},{"location":"installation/#Create-an-environment","page":"Installation","title":"Create an environment","text":"","category":"section"},{"location":"installation/","page":"Installation","title":"Installation","text":"Once Julia is installed, we recommend to create an environment that will contain all the packages you may use for your analyses, including ComplexMixtures, in such a way that your results can always be reproduced and you don't get any version incompatibility.","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"We illustrate this by creating the \"MyNewPaper\" environment, which will be hosted in a simple directory,","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"mkdir /home/user/Documents/MyNewPaper","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"Then, start Julia and activate the environment that will be hosted there:","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"julia> import Pkg; Pkg.activate(\"/home/user/Documents/MyNewPaper\")\n Activating new project at `~/Documents/MyNewPaper`","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"and add to this environment the packages that your analyses will require:","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"julia> import Pkg; Pkg.add([\"ComplexMixtures\",\"PDBTools\",\"Plots\", \"EasyFit\", \"LaTeXStrings\"])","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"That's it. Close Julia. Note that this created the files Manifest.toml and Project.toml in the MyNewPaper directory, which contain the information of packages and exact package versions you are using now on in this environment. Saving these files may be relevant for the future exact reproduction of your analyses. ","category":"page"},{"location":"installation/#Run-your-analysis-scripts-in-that-environment","page":"Installation","title":"Run your analysis scripts in that environment","text":"","category":"section"},{"location":"installation/","page":"Installation","title":"Installation","text":"Now, your analysis scripts, described in the next section in details, will look like: ","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"import Pkg; Pkg.activate(\"/home/user/Documents/MyNewPaper\")\n\nusing ComplexMixtures\nusing PDBTools\nusing Plots\nusing EasyFit\nusing LaTeXStrings\n\n# etc ... ","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"And the script can be run with julia -t auto script.jl (where -t auto allows for multi-threading), or included in julia with julia> include(\"./scritp.jl\"), as described in the next section.","category":"page"},{"location":"installation/","page":"Installation","title":"Installation","text":"tip: Tip\nBy loading the package with using ComplexMixturesthe most common functions of the package become readily available by their direct name, for example mddf(...).If you don't want to bring the functions into the scope of your script, useimport ComplexMixturesThen, the functions of the package are called, for example, using ComplexMixtures.mddf(...). To avoid having to write ComplexMixtures all the time, define an acronym. For example:import ComplexMixtures as CM\nCM.mddf(...)","category":"page"},{"location":"mddf/#Computing-the-Minimum-Distance-Distribution-Function","page":"Computing the MDDF","title":"Computing the Minimum-Distance Distribution Function","text":"","category":"section"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"The main function of the ComplexMixtures package actually computes the MDDF between the solute and the solvent chosen. ","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"It is run with the following command:","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"results = mddf(trajectory) ","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"The MDDF along with other results, like the corresponding KB integrals, are returned in the results data structure, which is described in the next section.","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"It is possible to tune several options of the calculation, by setting the Options data structure with user-defined values in advance. The most common parameters to be set by the user are probably dbulk and stride. ","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"dbulk defines the distance from the solute above which the user believes that the reference solute molecule does not significantly anymore the structure of the solvent. The default value is 10 Angstroms, but for large solvent molecules this might not be enough. To increase dbulk, use: ","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"options = Options(dbulk=15.)\nresults = mddf(trajectory,options)","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"stride defines if some frames will be skip during the calculation (for speedup). For example, if stride=5, only one in five frames will be considered. Adjust stride with: ","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"options = Options(stride=5)\nresults = mddf(trajectory,options)","category":"page"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"See the Options section for further details and other options to set.","category":"page"},{"location":"mddf/#Reference-functions","page":"Computing the MDDF","title":"Reference functions","text":"","category":"section"},{"location":"mddf/","page":"Computing the MDDF","title":"Computing the MDDF","text":"Modules = [ComplexMixtures]\nPages = [\"mddf.jl\"]","category":"page"},{"location":"mddf/#ComplexMixtures.coordination_number","page":"Computing the MDDF","title":"ComplexMixtures.coordination_number","text":"coordination_number(trajectory::Trajectory, options::Options)\n\nComputes the coordination numbers for each solute molecule in the trajectory, given the Trajectory. This is an auxiliary function of the ComplexMixtures package, which is used to compute coordination numbers when the normalization of the distribution is not possible or needed. \n\nThe output is a Result structure, which contains the data as the result of a call to mddf, except that all counters which require normalization of the distribution will be zero. In summary, this result data structure can be used to compute the coordination numbers, but not the MDDF, RDF, or KB integrals.\n\nExamples\n\njulia> trajectory = Trajectory(\"./trajectory.dcd\",solute,solvent);\n\njulia> results = mddf(trajectory);\n\njulia> coordination_numbers = coordination_number(trajectory);\n\n\n\n\n\n","category":"function"},{"location":"mddf/#ComplexMixtures.mddf-Tuple{Trajectory}","page":"Computing the MDDF","title":"ComplexMixtures.mddf","text":"mddf(trajectory::Trajectory, options::Options; frame_weights = Float64[], coordination_number_only = false)\n\nFunction that computes the minimum-distance distribution function, atomic contributions, and KB integrals, given the Trajectory structure of the simulation and, optionally, parameters given as a second argument of the Options type. This is the main function of the ComplexMixtures package. \n\nExamples\n\njulia> trajectory = Trajectory(\"./trajectory.dcd\",solute,solvent);\n\njulia> results = mddf(trajectory);\n\nor, to set some custom optional parameter,\n\njulia> options = Options(lastframe=1000);\n\njulia> results = mddf(trajectory,options);\n\n\n\n\n\n","category":"method"},{"location":"contrib/#contributions","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"","category":"section"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"One of the interesting features of Minimum-Distance distributions is that they can be naturally decomposed into the atomic or group contributions. Simply put, if a MDDF has a peak at a hydrogen-bonding distance, it is natural to decompose that peak into the contributions of each type of solute or solvent atom to that peak. ","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"To obtain the atomic contributions of an atom or group of atoms, the contributions function is provided. For example, in a system composed of a protein and water, we would have defined the solute and solvent using:","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"using PDBTools, ComplexMixtures\natoms = readPDB(\"system.pdb\")\nprotein = select(atoms,\"protein\")\nwater = select(atoms,\"water\")\nsolute = AtomSelection(protein,nmols=1)\nsolvent = AtomSelection(water,natomspermol=3)","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"The MDDF calculation is executed with:","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"trajectory = Trajectory(\"trajectory.dcd\",solute,solvent)\nresults = mddf(trajectory)","category":"page"},{"location":"contrib/#Atomic-contributions-in-the-result-data-structure","page":"Atomic and group contributions","title":"Atomic contributions in the result data structure","text":"","category":"section"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"The results data structure contains the decomposition of the MDDF into the contributions of every type of atom of the solute and the solvent. These contributions can be retrieved using the contributions function, with the SoluteGroup and SolventGroup selectors.","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"For example, if the MDDF of water (solvent) relative to a solute was computed, and water has atom names OH2, H1, H2, one can retrieve the contributions of the oxygen atom with:","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"OH2 = contributions(results, SolventGroup([\"OH2\"]))","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"or with, if OH2 is the first atom in the molecule,","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"OH2 = contributions(results, SolventGroup([1]))","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"The contributions of the hydrogen atoms can be obtained, similarly, with:","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"H = contributions(results, SolventGroup([\"H1\", \"H2\"]))","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"or with, if OH2 is the first atom in the molecule,","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"H = contributions(results, SolventGroup([2, 3]))","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"Each of these calls will return a vector of the constributions of these atoms to the total MDDF. ","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"For example, here we plot the total MDDF and the Oxygen contributions: ","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"using Plots\nplot(results.d, results.mddf, label=[\"Total MDDF\"], linewidth=2)\nplot!(results.d, contributions(results, SolventGroup([\"OH2\"])), label=[\"OH2\"], linewidth=2)\nplot!(xlabel=\"Distance / Å\", ylabel=\"MDDF\")","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"","category":"page"},{"location":"contrib/#Using-PDBTools","page":"Atomic and group contributions","title":"Using PDBTools","text":"","category":"section"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"If the solute is a protein, or other complex molecule, selections defined with PDBTools can be used. For example, this will retrieve the contribution of the acidic residues of a protein to total MDDF:","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"using PDBTools\natoms = readPDB(\"system.pdb\")\nacidic_residues = select(atoms, \"acidic\")\nacidic_contributions = contributions(results, SoluteGroup(acidic_residues))","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"It is expected that for a protein most of the atoms do not contribute to the MDDF, and that all values are zero at very short distances, smaller than the radii of the atoms.","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"More interesting and general is to select atoms of a complex molecule, like a protein, using residue names, types, etc. Here we illustrate how this is done by providing selection strings to contributions to obtain the contributions to the MDDF of different types of residues of a protein to the total MDDF. ","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"For example, if we want to split the contributions of the charged and neutral residues to the total MDDF distribution, we could use to following code. Here, solute refers to the protein.","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"charged_residues = PDBTools.select(atoms,\"charged\")\ncharged_contributions = contributions(results, SoluteGroup(charged_residues))\n\nneutral_residues = PDBTools.select(atoms,\"neutral\")\nneutral_contributions = contributions(atoms, SoluteGroup(neutral_residues))","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"The charged_contributions and neutral_contributions outputs are vectors containing the contributions of these residues to the total MDDF. The corresponding plot is: ","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"plot(results.d,results.mddf,label=\"Total MDDF\",linewidth=2)\nplot!(results.d,charged_contributions,label=\"Charged residues\",linewidth=2)\nplot!(results.d,neutral_contributions,label=\"Neutral residues\",linewidth=2)\nplot!(xlabel=\"Distance / Å\",ylabel=\"MDDF\")","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"Resulting in:","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"","category":"page"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"Note here how charged residues contribute strongly to the peak at hydrogen-bonding distances, but much less in general. Of course all selection options could be used, to obtain the contributions of specific types of residues, atoms, the backbone, the side-chains, etc. ","category":"page"},{"location":"contrib/#Reference-functions","page":"Atomic and group contributions","title":"Reference functions","text":"","category":"section"},{"location":"contrib/","page":"Atomic and group contributions","title":"Atomic and group contributions","text":"Modules = [ComplexMixtures]\nPages = [\"contributions.jl\"]","category":"page"},{"location":"contrib/#ComplexMixtures.contributions-Tuple{Result, Union{SoluteGroup, SolventGroup}}","page":"Atomic and group contributions","title":"ComplexMixtures.contributions","text":"contributions(R::Result, group::Union{SoluteGroup,SolventGroup}; type = :mddf)\n\nReturns the contributions of the atoms of the solute or solvent to the MDDF, coordiantion number or MD count.\n\nArguments\n\nR::Result: The result of a calculation.\ngroup::Union{SoluteGroup,SolventGroup}: The group of atoms to consider.\ntype::Symbol: The type of contributions to return. Can be :mddf (default), :coordination_number or :md_count.\n\nExamples\n\njulia> using ComplexMixtures, PDBTools\n\njulia> dir = ComplexMixtures.Testing.data_dir*\"/Gromacs\";\n\njulia> atoms = readPDB(dir*\"/system.pdb\");\n\njulia> protein = select(atoms, \"protein\");\n\njulia> emim = select(atoms, \"resname EMI\"); \n\njulia> solute = AtomSelection(protein, nmols = 1)\nAtomSelection \n 1231 atoms belonging to 1 molecule(s).\n Atoms per molecule: 1231\n Number of groups: 1231\n\njulia> solvent = AtomSelection(emim, natomspermol = 20)\nAtomSelection \n 5080 atoms belonging to 254 molecule(s).\n Atoms per molecule: 20\n Number of groups: 20\n\njulia> results = load(dir*\"/protein_EMI.json\"); # load pre-calculated results\n\njulia> contributions(results, SoluteGroup([\"CA\", \"CB\"])) # contribution of CA and CB atoms to the MDDF\n\n\n\n\n\n\n","category":"method"},{"location":"parallel/#Parallel-execution","page":"Parallel execution","title":"Parallel execution","text":"","category":"section"},{"location":"parallel/","page":"Parallel execution","title":"Parallel execution","text":"It is highly recommended to run MDDF calculations in parallel, using multiple processors of a single computer. To run the computation in parallel, initialize julia with the -t N option, where N is the number of processes to be used. For example, to use 8 parallel processes, use:","category":"page"},{"location":"parallel/","page":"Parallel execution","title":"Parallel execution","text":"julia -t 8 example.jl","category":"page"},{"location":"parallel/","page":"Parallel execution","title":"Parallel execution","text":"The computation will use a number of parallel processes equal to N. Use -t auto to automatically pick the number of threads available in your computer. ","category":"page"},{"location":"parallel/","page":"Parallel execution","title":"Parallel execution","text":"note: Note\nThe number of threads used for computation of the MDDF is the number of threads available to Julia. Many computers allow hyperthreading, and not necessarily this this beneficial for the execution of this package. The optimal number of threads may vary.Independently of the number of threads initialized with the -t command-line parameter, the number of processes launched by ComplexMixtures in any given computation can be adjusted by the Options(nthreads=N) option. This won't provide any speedup if the optional number of threads is greater than the number of threads available to Julia at runtime.","category":"page"},{"location":"parallel/","page":"Parallel execution","title":"Parallel execution","text":"warning: Warning\nIf the calculations get Killed by no apparent reason, that is probably because you are running out of memory because of the many parallel computations running. One way to alleviate this problem is to force garbage collection, usingoptions = Options(GC=true,GC_threshold=0.5)\nR = mddf(trajectory,options)\nThe GC_threshold=0.5 indicates that if the free memory is smaller than 50% of the total memory of the machine, a garbage-collection run will occur. The default parameters are GC=true and GC_threshold=0.3. Read the predefinition of atom groups section if you are experiencing memory issues.","category":"page"},{"location":"multiple/#Working-with-multiple-trajectories","page":"Multiple trajectories","title":"Working with multiple trajectories","text":"","category":"section"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"Very commonly, one has multiple trajectories of the same system, and we want to obtain the average results of all trajectories. We provide a simple scheme to average the results of multiple MDDF calculations:","category":"page"},{"location":"multiple/#Create-a-vector-of-result-data-structures,-without-initialization","page":"Multiple trajectories","title":"Create a vector of result data structures, without initialization","text":"","category":"section"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"Let us assume that we have three Gromacs trajectories, with file names traj1.xtc, traj2.xtc, traj3.xtc. First let us create a list with these file names:","category":"page"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"trajectory_files = [ \"traj1.xtc\" , \"traj2.xtc\" , \"traj3.xtc\" ]","category":"page"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"And define an empty vector of Result structures:","category":"page"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"results = Result[]","category":"page"},{"location":"multiple/#Run-the-calculations-in-a-loop","page":"Multiple trajectories","title":"Run the calculations in a loop","text":"","category":"section"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"The calculation on the multiple trajectories is then performed in a simple loop, such as","category":"page"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"atoms = PDBTools.readPDB(\"./system.pdb\")\nsolute = AtomSelection(atoms,\"protein\",nmols=1)\nsolvent = AtomSelection(atoms,\"resname TMAO\",natomspermol=14)\nfor file in trajectory_files\n trajectory = Trajectory(file,solute,solvent)\n # compute the MDDF data and push the result to the results array\n push!(results, mddf(trajectory))\nend","category":"page"},{"location":"multiple/#Merge-the-results-of-several-trajectories,-with-proper-weights","page":"Multiple trajectories","title":"Merge the results of several trajectories, with proper weights","text":"","category":"section"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"Of course, the resulting results vector will contain at each position the results of each calculation. To merge these results in a single result data structure, use:","category":"page"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"R = merge(results)","category":"page"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"The R structure generated contains the averaged results of all calculations, with weights proportional to the number of frames of each trajectory. That is, if the first trajectory had 2000 frames, and the second and third trajectories have 1000 frames each, the first trajectory will have a weight of 0.5 on the final results. The merge function can be used to merge previously merged results with new results as well.","category":"page"},{"location":"multiple/","page":"Multiple trajectories","title":"Multiple trajectories","text":"tip: Tip\nThe names of the files and and weights are stored in the R.files and R.weights vectors of the results structure:julia> R.files\n3-element Array{String,1}:\n \"./traj1.xtc\"\n \"./traj2.xtc\"\n \"./traj3.xtc\"\n\njulia> R.weights\n2-element Array{Float64,1}:\n 0.5\n 0.25\n 0.25\nIt is not a bad idea to check if that is what you were expecting.","category":"page"},{"location":"save/#save","page":"Save and load","title":"Save and load results","text":"","category":"section"},{"location":"save/","page":"Save and load","title":"Save and load","text":"Three functions serve the purpose of saving and loading the results obtained with ComplexMixtures:","category":"page"},{"location":"save/#Save-data-to-recover-it-later","page":"Save and load","title":"Save data to recover it later","text":"","category":"section"},{"location":"save/","page":"Save and load","title":"Save and load","text":"save(results,\"results.json\")","category":"page"},{"location":"save/","page":"Save and load","title":"Save and load","text":"where results is the output data structure of the mddf() calculation, and results.json is the output file to be created. The file is written in JSON format, thus is not naturally human-readable.","category":"page"},{"location":"save/#Load-saved-data","page":"Save and load","title":"Load saved data","text":"","category":"section"},{"location":"save/","page":"Save and load","title":"Save and load","text":"results = load(\"results.json\")","category":"page"},{"location":"save/","page":"Save and load","title":"Save and load","text":"The load function reads the output of the save function above, and restores the results data structure.","category":"page"},{"location":"save/#Write-data-in-a-human-readable-format","page":"Save and load","title":"Write data in a human-readable format","text":"","category":"section"},{"location":"save/","page":"Save and load","title":"Save and load","text":"If you Want the results to be written as simple ASCII tables such that you can read them with another analysis program, plotting graphic, or just want to inspect the data visually, use:","category":"page"},{"location":"save/","page":"Save and load","title":"Save and load","text":"write(results,\"results.dat\")","category":"page"},{"location":"save/","page":"Save and load","title":"Save and load","text":"Three files will be created by this function:","category":"page"},{"location":"save/","page":"Save and load","title":"Save and load","text":"results.dat: Contains the main results, as the MDDF and KB-integral data.","category":"page"},{"location":"save/","page":"Save and load","title":"Save and load","text":"results-ATOM_CONTRIB_SOLVENT.dat: contains the contribution of each atom type of the solvent to the MDDF.","category":"page"},{"location":"save/","page":"Save and load","title":"Save and load","text":"results-ATOM_CONTRIB_SOLUTE.dat: contains the contribution of each atom type of the solute to the MDDF.","category":"page"},{"location":"save/","page":"Save and load","title":"Save and load","text":"Modules = [ComplexMixtures]\nPages = [\"tools/write.jl\"]","category":"page"},{"location":"save/#Base.write-Tuple{Result, String}","page":"Save and load","title":"Base.write","text":"write(\n R::Result, filename::String;\n solute_group_names::Vector{String} = R.solute.group_names,\n solvent_group_names::Vector{String} = R.solvent.group_names,\n)\n\nFunction to write the final results to output files as simple tables that are human-readable and easy to analyze with other software\n\nIf the solute and solvent group names are defined in R, the solute_group_names and solvent_group_names arguments are not necessary. If they are not defined, the user can pass the names of the groups as strings in the solute_group_names and solvent_group_names arguments.\n\n\n\n\n\n","category":"method"},{"location":"trajectory/#trajectories","page":"Loading the trajectory","title":"Loading trajectories","text":"","category":"section"},{"location":"trajectory/","page":"Loading the trajectory","title":"Loading the trajectory","text":"To initialize a trajectory file for computation, use the command","category":"page"},{"location":"trajectory/","page":"Loading the trajectory","title":"Loading the trajectory","text":"trajectory = Trajectory(\"trajectory.xtc\",solute,solvent)","category":"page"},{"location":"trajectory/","page":"Loading the trajectory","title":"Loading the trajectory","text":"where solute and solvent are defined with the AtomSelection function described before. This function opens the stream for reading frames, which are read once a time when the coordinates are required for computing the MDDF.","category":"page"},{"location":"trajectory/","page":"Loading the trajectory","title":"Loading the trajectory","text":"The Trajectory function uses Chemfiles in background, and thus the most common trajectory formats are supported, as the ones produced with NAMD, Gromacs, LAMMPS, Amber, etc. ","category":"page"},{"location":"trajectory/","page":"Loading the trajectory","title":"Loading the trajectory","text":"tip: Tip\nThe format of the trajectory file is automatically determined by Chemfiles from the extension of the file. However, it can be provided by the user with the format keyword, for example:trajectory = Trajectory(\"trajectory.xtc\",solute,solvent,format=\"xtc\")","category":"page"},{"location":"trajectory/#Reference-functions","page":"Loading the trajectory","title":"Reference functions","text":"","category":"section"},{"location":"trajectory/","page":"Loading the trajectory","title":"Loading the trajectory","text":"Modules = [ComplexMixtures]\nPages = [\"Trajectory.jl\"]","category":"page"},{"location":"trajectory/#ComplexMixtures.Trajectory","page":"Loading the trajectory","title":"ComplexMixtures.Trajectory","text":"Trajectory(filename::String, solute::AtomSelection, solvent::AtomSelection; format::String = \"\", chemfiles = false)\n\nTrajectory constructor data type. \n\nDefaults to reading with the Chemfiles infrastructure, except for DCD and PDB trajectory files, if the \"PDBTraj\" option is provided.\n\nSee memory issue (https://github.com/chemfiles/Chemfiles.jl/issues/44)\n\n\n\n\n\n","category":"type"},{"location":"quickguide/#Quick-Guide","page":"Quick Guide","title":"Quick Guide","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Of course, follow the installation instructions first. A complete working example is shown below, and in the section that follows each command is described in detail.","category":"page"},{"location":"quickguide/#Basic-example","page":"Quick Guide","title":"Basic example","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Here we show the input file required for the study of the solvation of a protein by the TMAO solvent, which is a molecule 4 atoms. The protein is assumed to be at infinite dilution in the simulation. The trajectory of the simulation is in DCD format in this example, which is the default output of NAMD and CHARMM simulation packages.","category":"page"},{"location":"quickguide/#Input-files","page":"Quick Guide","title":"Input files","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"The files necessary to run this would be:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"system.pdb: a PDB file of the complete simulated system.\ntrajectory.dcd: the simulation trajectory, here exemplified in the DCD format.\nscript.jl: the Julia script, described below.","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"These files are not provided for this example. For complete running examples, please check our examples section.","category":"page"},{"location":"quickguide/#The-Julia-script","page":"Quick Guide","title":"The Julia script","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`julia\n$(read(\"./assets/scripts/basic/script.jl\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Given that this code is saved into a file named script.jl, it can be run within the Julia REPL with:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"julia> include(\"script.jl\")","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"or directly with:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"julia -t auto script.jl","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"where -t auto will launch julia with multi-threading. It is highly recommended to use multi-threading!","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"note: Note\nSome newer CPUs have \"fast\" and \"slow\" cores, designed for performance or energy savings. Thus using all cores, with -t auto, may not be the best strategy for optimal performance. Experimenting with different number of cores using -t N where N is the number of cores used is always necessary for tunning performance.","category":"page"},{"location":"quickguide/#Detailed-description-of-the-example","page":"Quick Guide","title":"Detailed description of the example","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Start julia and load the ComplexMixtures package, using:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"using ComplexMixtures","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"And here we will use the PDBTools package to obtain the selections of the solute and solvent molecules: ","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"using PDBTools","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"(see Set solute and solvent for details).","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"The fastest way to understand how to use this package is through an example. ","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Let us consider a system of three components: a protein, water, a cosolvent: TMAO (trimetylamine-N-oxyde), which is a common osmolyte known to stabilize protein structures. A picture of this system is shown below, with the protein in blue, water, and TMAO molecules. The system was constructed with Packmol and the figure was produced with VMD.","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"
\n\n
","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"We want to study the interactions of the protein with TMAO in this example. The computation of the MDDF is performed by defining the solute and solvent selections, and running the calculation on the trajectory.","category":"page"},{"location":"quickguide/#Define-the-protein-as-the-solute","page":"Quick Guide","title":"Define the protein as the solute","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"To define the protein as the solute, we will use the PDBTools package, which provides a handy selection syntax. First, read the PDB file using ","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"atoms = readPDB(\"./system.pdb\")","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Then, let us select the protein atoms (here we are using the PDBTools.select function):","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"protein = select(atoms, \"protein\")","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"And, finally, let us use the AtomSelection function to setup the structure required by the MDDF calculation:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"solute = AtomSelection(protein, nmols=1)","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"note: Note\nIt is necessary to indicate how many molecules (in this case, nmols=1, so that ComplexMixtures knows that the solute is to be considered as single structure. In this case there is no ambiguity, but if the solute was a micelle, for example, this option would let ComplexMixtures know that one wants to consider the micelle as a single structure.","category":"page"},{"location":"quickguide/#Define-TMAO-the-solvent-to-be-considered","page":"Quick Guide","title":"Define TMAO the solvent to be considered","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Equivalently, the solvent is set up with:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"tmao = select(atoms, \"resname TMAO\")\nsolvent = AtomSelection(tmao, natomspermol=14)","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"note: Note\nHere we opted to provide the number of atoms of a TMAO molecules (with the natomspermol keyword). This is generally more practical for small molecules than to provide the number of molecules.","category":"page"},{"location":"quickguide/#Set-the-Trajectory-structure","page":"Quick Guide","title":"Set the Trajectory structure","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"The solute and solvent data structures are then fed into the Trajectory data structure, together with the trajectory file name, with:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"trajectory = Trajectory(\"trajectory.dcd\", solute, solvent)","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"In the case, the trajectory is of NAMD \"DCD\" format. All formats supported by Chemfiles are automatically recognized. ","category":"page"},{"location":"quickguide/#Finally,-run-the-computation-and-get-the-results:","page":"Quick Guide","title":"Finally, run the computation and get the results:","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"If default options are used (as the bin size of the histograms, read all frames without skipping any), just run the mddf with:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"results = mddf(trajectory)\n","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Some optional parameters for the computation are available in the Options section. Depending on the number of atoms and trajectory length, this can take a while. Computing a MDDF is much more expensive than computing a regular radial distribution function, because the normalization requires the generation of an ideal distribution of the molecules in the system. ","category":"page"},{"location":"quickguide/#The-results-data-structure-obtained","page":"Quick Guide","title":"The results data structure obtained","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"The results data structure contains all the results of the MDDF calculation, including:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"results.d : Vector containing the distances to the solute. ","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"results.mddf : Vector containing the minimum-distance distribution function at each distance.","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"That means, for example, that ","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"plot(results.d, results.mddf, xlabel=\"d / Å\", ylabel=\"mddf(d)\") \n","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"results in the expected plot of the MDDF of TMAO as a function of the distance to the protein:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"
\n\n
","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"The Kirkwood-Buff integral corresponding to that distribution is provided in the results.kb vector, and can be also directly plotted with ","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"plot(results.d, results.kb, xlabel=\"d / Å\", ylabel=\"KB(d) / L / mol\") ","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"to obtain:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"
\n\n
","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"See the Atomic and group contributions section for a detailed account on how to obtain a molecular picture of the solvation by splitting the MDDF in the contributions of each type of atom of the solvent, each type of residue of the protein, etc.","category":"page"},{"location":"quickguide/#Save-the-results","page":"Quick Guide","title":"Save the results","text":"","category":"section"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"The results can be saved into a file (with JSON format) with:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"save(results, \"./results.json\")","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"And these results can be loaded afterwards with:","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"load(\"./results.json\")","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"Alternatively, a human-readable set of output files can be obtained to be analyzed in other software (or plotted with alternative tools), with","category":"page"},{"location":"quickguide/","page":"Quick Guide","title":"Quick Guide","text":"write(results,\"./results.dat\")","category":"page"},{"location":"selection/#selections","page":"Set solute and solvent","title":"Solute and solvent selections","text":"","category":"section"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"The solute and solvent are defined by selecting subsets of atoms from the system. These subsets are defined by the AtomSelection data structures. ","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"To construct a AtomSelection data structure, one needs to provide, at least, the (1-based) indices of the atoms that belong to the selection, and either the number of atoms of each molecule or the number of molecules in the selection.","category":"page"},{"location":"selection/#Using-the-PDBTools-package","page":"Set solute and solvent","title":"Using the PDBTools package","text":"","category":"section"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"The PDBTools package helps the construction of the solute and solvent data structures, by providing a convenient selection syntax. Additionally, it sets up the names of the atoms of the system in the data structure, which can be used to retrieve atom and and group contributions to MDDFs and coordination numbers. ","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"For example, here we define a protein of a system as the solute:","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"julia> using ComplexMixtures, PDBTools\n\njulia> atoms = readPDB(ComplexMixtures.Testing.pdbfile);\n\njulia> protein = select(atoms, \"protein\");\n\njulia> solute = AtomSelection(protein, nmols=1)\nAtomSelection \n 1463 atoms belonging to 1 molecule(s).\n Atoms per molecule: 1463\n Number of groups: 1463 ","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"We need to inform the AtomSelection function about the number of atoms of each molecule (using natomspermol=3, for example), or the number of molecules (using nmols=1000, for example), such that the atoms belonging to each molecule can be determined without ambiguity. ","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"Now, we define the solvent of the system as the water molecules:","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"julia> water = select(atoms, \"water\"); \n\njulia> solvent = AtomSelection(water, natomspermol=3)\nAtomSelection \n 58014 atoms belonging to 19338 molecule(s).\n Atoms per molecule: 3\n Number of groups: 3","category":"page"},{"location":"selection/#Using-VMD","page":"Set solute and solvent","title":"Using VMD","text":"","category":"section"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"VMD is a very popular and powerful package for visualization of simulations. It contains a very versatile library to read topologies and trajectory files, and a powerful selection syntax. The PDBTools.jl (v1.0 or greater) package provides a simple wrapper to VMD that allows using the same syntax at it supports.","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"For example, the solute can be defined with: ","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"using ComplexMixtures, PDBTools\n\nindices, names = select_with_vmd(\"./system.gro\", \"protein\", vmd=\"/usr/bin/vmd\")\n\nsolute = AtomSelection(indices, names, nmols=1)","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"The main advantage here is that all the file types that VMD supports are supported. But VMD needs to be installed and is run in background, and it takes a few seconds to be executed.","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"The VMDSelect function also accepts an optional keyword parameter srcload, which can be used to load custom scripts within vmd before setting the selection. This allows the definition of tcl scripts with custom selection macros, for instance. The usage would be: ","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"using PDBTools\n\nsel = select_with_vmd(\n \"file.pdb\", \n \"resname MYRES\"; \n srcload = [ \"mymacros1.tcl\", \"mymacros2.tcl\" ]\n)","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"Which corresponds to sourceing each of the macro files in VMD before defining the selection with the custom MYRES name.","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"warning: Warning\nVMD uses 0-based indexing and VMDselect adjusts that. However, if a selection is performed by index, as with index 1, VMD will select the second atom, and the output will be [2]. AtomSelections by type, name, segment, residue name, etc, won't be a problem.","category":"page"},{"location":"selection/#predefinition-of-groups","page":"Set solute and solvent","title":"Predefinition of atom groups","text":"","category":"section"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"Importantly, this should be only a concern for the solvation analysis of systems in which individual molecules are very large. This feature was introduced in version 2.0 of the package to support the study of small molecule distribution in virus structures, of millions of atoms. ","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"By default, the contribution of each type of atom to the coordination number counts is stored, to allow the decomposition of the final MDDFs into any group contribution. However, when a structure, like a virus, has millions of atoms, storing the contribution of each atom becomes prohibitive in terms of memory. Thus, one may need to predefine the groups in which the contributions will be analyzed.","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"Here, we illustrate this feature by presselecting the acidic and basic residues of a protein:","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"julia> using ComplexMixtures, PDBTools\n\njulia> atoms = readPDB(ComplexMixtures.Testing.pdbfile);\n\njulia> protein = select(atoms, \"protein\");\n\njulia> acidic_residues = select(atoms, \"protein and acidic\");\n\njulia> basic_residues = select(atoms, \"protein and basic\");\n\njulia> solute = AtomSelection(\n protein, \n nmols=1,\n group_atom_indices = [ index.(acidic_residues), index.(basic_residues) ],\n group_names = [ \"acidic residues\", \"basic residues\" ]\n )\nAtomSelection \n 1463 atoms belonging to 1 molecule(s).\n Atoms per molecule: 1463\n Number of groups: 1463 ","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"In this example, then, the solute AtomSelection has two groups. The indices of the atoms of the groups are stored in the group_atom_indices vector and the group names in the group_names vector. The atom_group auxiliary function is the most practical way to retrive the indices of the atoms of the group.","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"julia> atom_group(solute, \"acidic residues\")\n162-element Vector{Int64}:\n 24\n 25\n 26\n ⋮\n 1436\n 1437","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"With these group selections predefined, the contributions of these groups to the MDDF or coordination numbers can be retrived directly from the result data structure with, for example:","category":"page"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"julia> result = mddf(trajectory, solute, solvent);\n\njulia> acidic_residue_contributions = contributions(result, SoluteGroup(\"acidic residues\"))","category":"page"},{"location":"selection/#Reference-functions","page":"Set solute and solvent","title":"Reference functions","text":"","category":"section"},{"location":"selection/","page":"Set solute and solvent","title":"Set solute and solvent","text":"Modules = [ComplexMixtures]\nPages = [\"AtomSelection.jl\"]","category":"page"},{"location":"selection/#ComplexMixtures.AtomSelection","page":"Set solute and solvent","title":"ComplexMixtures.AtomSelection","text":"struct AtomSelection\n\nStructure that contains the information about the solute and solvent molecules.\n\nnmols::Int64\nnatomspermol::Int64\nindices::Vector{Int64}\ncustom_groups::Bool\ngroup_atom_indices::Vector{Vector{Int64}}\ngroup_names::Vector{String}\n\n\n\n\n\n","category":"type"},{"location":"selection/#ComplexMixtures.AtomSelection-Tuple","page":"Set solute and solvent","title":"ComplexMixtures.AtomSelection","text":"AtomSelection constructors\n\nThe AtomSelection structure carries the information of the molecules that are going to be used to compute the MDDF. The structure can be initialized in different ways:\n\nInitialize the structure providing a vector of PDBTools.Atom(s).\n\n AtomSelection(\n atoms::AbstractVector{<:PDBTools.Atom}; \n nmols::Int = 0, \n natomspermol::Int = 0,\n group_atom_indices::Union{Nothing,Vector{Vector{Int}}} = nothing,\n group_names::Vector{String} = String[]\n ) \n\nThe indices of the atoms will be retrived from the indices of the atoms as defined in the PDB file, thus the PDB file must correspond to the same system as that of the simulation. \n\nEither the number of molecules (nmols) or the number of atoms per molecule (natomspermol) must be provided.\n\nIf group_atom_indices is nothing or group_names is empty, the names of the groups will be retrieved from the atom names, and in the coordination numbers of each individual atom will be stored.\n\nExample\n\njulia> using ComplexMixtures, PDBTools\n\njulia> pdbfile = ComplexMixtures.Testing.pdbfile;\n\njulia> atoms = PDBTools.readPDB(pdbfile, \"resname TMAO\");\n\njulia> atsel = AtomSelection(atoms, natomspermol=14)\nAtomSelection \n 2534 atoms belonging to 181 molecule(s).\n Atoms per molecule: 14\n Number of groups: 14 \n\njulia> atom_group_name(atsel, 1)\n\"N\"\n\njulia> atom_group_name(atsel, 5)\n\"O1\"\n\njulia> length(atom_group_names(atsel))\n14\n\nLower level: initialize the structure providing the index of atoms and groups.\n\n AtomSelection(\n indices::Vector{Int};\n nmols::Int = 0,\n natomspermol::Int = 0,\n group_atom_indices::Union{Nothing,Vector{Vector{Int}}} = nothing,\n group_names::Vector{String} = String[]\n )\n\nConstruct an AtomSelection structure from the most low-level information: the index of atoms and groups.\n\nEither the number of molecules (nmols) or the number of atoms per molecule (natomspermol) must be provided.\n\nGroups of atoms can be defined by providing a vector of vectors of atom indices (group_atom_indices), and a vector of group names (group_names). If group_atom_indices is set to nothing, the coordination numbers of each individual atoms wil be stored.\n\nExamples\n\njulia> using ComplexMixtures\n\njulia> AtomSelection([1,2,3], nmols=1)\nAtomSelection \n 3 atoms belonging to 1 molecule(s).\n Atoms per molecule: 3\n Number of groups: 3\n\njulia> AtomSelection([1,2,3], natomspermol=1)\nAtomSelection \n 3 atoms belonging to 3 molecule(s).\n Atoms per molecule: 1\n Number of groups: 1\n\njulia> AtomSelection([1,2,3], natomspermol=1, group_atom_indices=[[1,2],[3]], group_names=[\"G1\", \"G2\"])\nAtomSelection \n 3 atoms belonging to 3 molecule(s).\n Atoms per molecule: 1\n Number of groups: 2 \n\n\n\n\n\n","category":"method"},{"location":"selection/#ComplexMixtures.SoluteGroup","page":"Set solute and solvent","title":"ComplexMixtures.SoluteGroup","text":"SoluteGroup and SolventGroup data structures.\n\nThese structures are used to select groups of atoms to extract their contributions from the MDDF results. \n\nMost tipically, the groups are defined from a selection of atoms with the PDBTools package, or by providing directly the indices of teh atoms in the structure. \n\nAlternativelly, if the groups were predefined, the groups can be selected by group index or group name. \n\nThe possible constructors are:\n\nSoluteGroup(atoms::Vector{PDBTools.Atom})\nSoluteGroup(atom_indices::Vector{Int})\nSoluteGroup(atom_names::Vector{String})\nSoluteGroup(group_name::String)\nSoluteGroup(residue::PDBTools.Residue)\nSoluteGroup(atsel::AtomSelection)\n\nabove, each constructor can be replaced by SolventGroup. The resulting data structures are used as input parameters for the contributions function:\n\ncontributions(results::Result, group::Union{SoluteGroup, SolventGroup}; type=:mddf)\n\nSee the contributions help entry for additional information.\n\nExamples\n\nDefining solute groups with different input types:\n\njulia> using ComplexMixtures, PDBTools\n\njulia> atoms = PDBTools.readPDB(ComplexMixtures.Testing.pdbfile, \"protein\"); \n\njulia> SoluteGroup(atoms) # vector of PDBTools.Atom(s)\nSoluteGroup defined by:\n atom_indices: [ 1, 2, ..., 1462, 1463 ] - 1463 atoms\n\njulia> SoluteGroup(PDBTools.index.(atoms)) # vector of atom indices\nSoluteGroup defined by:\n atom_indices: [ 1, 2, ..., 1462, 1463 ] - 1463 atoms\n\njulia> SoluteGroup(PDBTools.name.(atoms)) # vector of atom names\nSoluteGroup defined by:\n atom_names: [ N, HT1, ..., HG22, HG23 ] - 1463 atoms\n \njulia> SoluteGroup(\"acidic residues\") # predefined group name\nSoluteGroup defined by:\n group_name: \"acidic residues\"\n\njulia> SoluteGroup(1) # predefined group index\nSoluteGroup defined by:\n group_index: 1\n\njulia> SoluteGroup(collect(eachresidue(atoms))[2]) # PDBTools.Residue(s)\nSoluteGroup defined by:\n atom_indices: [ 13, 14, ..., 22, 23 ] - 11 atoms\n\n\n\n\n\n\n","category":"type"},{"location":"selection/#ComplexMixtures.SolventGroup","page":"Set solute and solvent","title":"ComplexMixtures.SolventGroup","text":"SoluteGroup and SolventGroup data structures.\n\nThese structures are used to select groups of atoms to extract their contributions from the MDDF results. \n\nMost tipically, the groups are defined from a selection of atoms with the PDBTools package, or by providing directly the indices of teh atoms in the structure. \n\nAlternativelly, if the groups were predefined, the groups can be selected by group index or group name. \n\nThe possible constructors are:\n\nSoluteGroup(atoms::Vector{PDBTools.Atom})\nSoluteGroup(atom_indices::Vector{Int})\nSoluteGroup(atom_names::Vector{String})\nSoluteGroup(group_name::String)\nSoluteGroup(residue::PDBTools.Residue)\nSoluteGroup(atsel::AtomSelection)\n\nabove, each constructor can be replaced by SolventGroup. The resulting data structures are used as input parameters for the contributions function:\n\ncontributions(results::Result, group::Union{SoluteGroup, SolventGroup}; type=:mddf)\n\nSee the contributions help entry for additional information.\n\nExamples\n\nDefining solute groups with different input types:\n\njulia> using ComplexMixtures, PDBTools\n\njulia> atoms = PDBTools.readPDB(ComplexMixtures.Testing.pdbfile, \"protein\"); \n\njulia> SoluteGroup(atoms) # vector of PDBTools.Atom(s)\nSoluteGroup defined by:\n atom_indices: [ 1, 2, ..., 1462, 1463 ] - 1463 atoms\n\njulia> SoluteGroup(PDBTools.index.(atoms)) # vector of atom indices\nSoluteGroup defined by:\n atom_indices: [ 1, 2, ..., 1462, 1463 ] - 1463 atoms\n\njulia> SoluteGroup(PDBTools.name.(atoms)) # vector of atom names\nSoluteGroup defined by:\n atom_names: [ N, HT1, ..., HG22, HG23 ] - 1463 atoms\n \njulia> SoluteGroup(\"acidic residues\") # predefined group name\nSoluteGroup defined by:\n group_name: \"acidic residues\"\n\njulia> SoluteGroup(1) # predefined group index\nSoluteGroup defined by:\n group_index: 1\n\njulia> SoluteGroup(collect(eachresidue(atoms))[2]) # PDBTools.Residue(s)\nSoluteGroup defined by:\n atom_indices: [ 13, 14, ..., 22, 23 ] - 11 atoms\n\n\n\n\n\n\n","category":"type"},{"location":"selection/#ComplexMixtures.atom_group-Tuple{AtomSelection, Int64}","page":"Set solute and solvent","title":"ComplexMixtures.atom_group","text":"atom_group(atsel::AtomSelection, i::Int)\natom_group(atsel::AtomSelection, groupname::String)\n\natom_group(atsel::AtomSelection, i::Int)\natom_group(atsel::AtomSelection, groupname::String)\n\nReturn the indices of the atoms that belong to a given group.\n\nExample\n\njulia> using ComplexMixtures\n\njulia> atsel = AtomSelection([1,2,3], natomspermol=1, group_atom_indices=[[1,2],[3]], group_names=[\"G1\", \"G2\"])\nAtomSelection \n 3 atoms belonging to 3 molecule(s).\n Atoms per molecule: 1\n Number of groups: 2\n\njulia> atom_group(atsel, 1)\n2-element Vector{Int64}:\n 1\n 2\n\njulia> atom_group(atsel, \"G2\")\n1-element Vector{Int64}:\n 3\n\njulia> atom_group_name(atsel, 1)\n\"G1\"\n\n\n\n\n\n","category":"method"},{"location":"selection/#ComplexMixtures.atom_group_name-Tuple{AtomSelection, Int64}","page":"Set solute and solvent","title":"ComplexMixtures.atom_group_name","text":"atom_group_name(atsel::AtomSelection, i::Int)\natom_group_names(atsel::AtomSelection)\n\nReturn the name of the group of atoms with index i. The atom_group_names function returns a vector with the names of all the groups.\n\nExample\n\njulia> using ComplexMixtures\n\njulia> atsel = AtomSelection([1,2,3], natomspermol=1, group_atom_indices=[[1,2],[3]], group_names=[\"G1\", \"G2\"])\nAtomSelection \n 3 atoms belonging to 3 molecule(s).\n Atoms per molecule: 1\n Number of groups: 2\n\njulia> atom_group_name(atsel, 1)\n\"G1\"\n\njulia> atom_group_names(atsel)\n2-element Vector{String}:\n \"G1\"\n \"G2\"\n\n\n\n\n\n","category":"method"},{"location":"selection/#ComplexMixtures.atom_group_names-Tuple{Any}","page":"Set solute and solvent","title":"ComplexMixtures.atom_group_names","text":"atom_group_name(atsel::AtomSelection, i::Int)\natom_group_names(atsel::AtomSelection)\n\nReturn the name of the group of atoms with index i. The atom_group_names function returns a vector with the names of all the groups.\n\nExample\n\njulia> using ComplexMixtures\n\njulia> atsel = AtomSelection([1,2,3], natomspermol=1, group_atom_indices=[[1,2],[3]], group_names=[\"G1\", \"G2\"])\nAtomSelection \n 3 atoms belonging to 3 molecule(s).\n Atoms per molecule: 1\n Number of groups: 2\n\njulia> atom_group_name(atsel, 1)\n\"G1\"\n\njulia> atom_group_names(atsel)\n2-element Vector{String}:\n \"G1\"\n \"G2\"\n\n\n\n\n\n","category":"method"},{"location":"example4/#Glycerol/water-mixture","page":"◦ Water/Glycerol mixture","title":"Glycerol/water mixture","text":"","category":"section"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"This example illustrates the use of ComplexMixtures.jl to study the solution structure of a crowded (1:1 molar fraction) solution of glycerol in water. Here, we compute the distribution function and atomic contributions associated to the inter-species interactions (water-glycerol) and the glycerol-glycerol auto-correlation function. This example aims to illustrate how to obtain a detailed molecular picture of the solvation structures in an homogeneous mixture.","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"The system simulated consists of 1000 water molecules (red) and 1000 glycerol molecules (purple).","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"
\n\n
","category":"page"},{"location":"example4/#Index","page":"◦ Water/Glycerol mixture","title":"Index","text":"","category":"section"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"Data, packages, and execution\nGlycerol-Glycerol and Water-Glycerol distribution functions\nGlycerol group contributions to MDDFs\n2D map of group contributions","category":"page"},{"location":"example4/#data-example4","page":"◦ Water/Glycerol mixture","title":"Data, packages, and execution","text":"","category":"section"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"The files required to run this example are:","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"equilibrated.pdb: The PDB file of the complete system.\ntraj_Glyc.dcd: Trajectory file. This is a 200Mb file, necessary for running from scratch the calculations.","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"To run the scripts, we suggest the following procedure:","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"Create a directory, for example example4.\nCopy the required data files above to this directory.\nLaunch julia in that directory: activate the directory environment, and install the required packages. This launching Julia and executing:\nimport Pkg \nPkg.activate(\".\")\nPkg.add([\"ComplexMixtures\", \"PDBTools\", \"Plots\", \"LaTeXStrings\", \"EasyFit\"])\nexit()\nCopy the code of each script in to a file, and execute with:\njulia -t auto script.jl\nAlternativelly (and perhaps preferrably), copy line by line the content of the script into the Julia REPL, to follow each step of the calculation.","category":"page"},{"location":"example4/#glyc_mddf-example4","page":"◦ Water/Glycerol mixture","title":"Glycerol-Glycerol and Water-Glycerol distribution functions","text":"","category":"section"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"The first and most simple analysis is the computation of the minimum-distance distribution functions between the components of the solution. In this example we focus on the distributions of the two components relative to the glycerol molecules. Thus, we display the glycerol auto-correlation function, and the water-glycerol correlation function in the first panel of the figure below. The second panel displays the KB integrals of the two components computed from each of these distributions.","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"
Complete example code: click here!","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`julia\n$(read(\"./assets/scripts/example4/script1.jl\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"

","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"(Image: )","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"Both water and glycerol form hydrogen bonds with (other) glycerol molecules, as indicated by the peaks at ~1.8mathrmAA. The auto-correlation function of glycerol shows a more marked second peak corresponding to non-specific interactions, which (as we will show) are likely associated to interactions of its aliphatic groups.","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"The KB integrals in the second panel show similar values water and glycerol, with the KB integral for water being slightly greater. This means that glycerol molecules are (sightly, if the result is considered reliable) preferentially hydrated from a macroscopic standpoint.","category":"page"},{"location":"example4/#glyc-groups-example4","page":"◦ Water/Glycerol mixture","title":"Glycerol group contributions to MDDFs","text":"","category":"section"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"
Complete example code: click here!","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`julia\n$(read(\"./assets/scripts/example4/script2.jl\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"

","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"(Image: )","category":"page"},{"location":"example4/#map-example4","page":"◦ Water/Glycerol mixture","title":"2D map of group contributions","text":"","category":"section"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"The above distributions can be split into the contributions of each glycerol chemical group. The 2D maps below display this decomposition.","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"
Complete example code: click here!","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`julia\n$(read(\"./assets/scripts/example4/script3.jl\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"

","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"(Image: )","category":"page"},{"location":"example4/","page":"◦ Water/Glycerol mixture","title":"◦ Water/Glycerol mixture","text":"The interesting result here is that the mathrmCH group of glycerol is protected from both solvents. There is a strong density augmentation at the vicinity of hydroxyl groups, and the second peak of the MDDFs is clearly associated to interactions with the mathrmCH_2 groups.","category":"page"},{"location":"example2/#Polyacrylamide-in-DMDF","page":"◦ Polyacrylamide in DMF","title":"Polyacrylamide in DMDF","text":"","category":"section"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"In this example we illustrate how the solvation structure of a polymer can be studied with ComplexMixtures.jl. The system is a 5-mer segment of polyacrylamide (PAE - capped with methyl groups), solvated with dimethylformamide (DMF). The system is interesting because of the different functional groups and polarities involved in the interactions of DMF with PAE. A snapshot of the system is shown below.","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"
\n\n
","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"The structures of DMF and of the polyacrylamide segment are:","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"
\n\n\n\n\n\n\n\n\n
DMFPolyacrylamide
\n
","category":"page"},{"location":"example2/#Index","page":"◦ Polyacrylamide in DMF","title":"Index","text":"","category":"section"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"Data, packages, and execution\nMDDF and KB integrals\nGroup contributions\n2D density map","category":"page"},{"location":"example2/#data-example2","page":"◦ Polyacrylamide in DMF","title":"Data, packages, and execution","text":"","category":"section"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"The files required to run this example are:","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"equilibrated.pdb: The PDB file of the complete system.\ntraj_Polyacry.dcd: Trajectory file. This is a 275Mb file, necessary for running from scratch the calculations.","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"To run the scripts, we suggest the following procedure:","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"Create a directory, for example example2.\nCopy the required data files above to this directory.\nLaunch julia in that directory: activate the directory environment, and install the required packages. This launching Julia and executing:\nimport Pkg \nPkg.activate(\".\")\nPkg.add([\"ComplexMixtures\", \"PDBTools\", \"Plots\", \"LaTeXStrings\", \"EasyFit\"])\nexit()\nCopy the code of each script in to a file, and execute with:\njulia -t auto script.jl\nAlternativelly (and perhaps preferrably), copy line by line the content of the script into the Julia REPL, to follow each step of the calculation.","category":"page"},{"location":"example2/#mddf-example2","page":"◦ Polyacrylamide in DMF","title":"MDDF and KB integrals","text":"","category":"section"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"Here we compute the minimum-distance distribution function, the Kirkwood-Buff integral, and the atomic contributions of the solvent to the density. This example illustrates the regular usage of ComplexMixtures, to compute the minimum distance distribution function, KB-integrals and group contributions. ","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"
Complete example code: click here!","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`julia\n$(read(\"./assets/scripts/example2/script1.jl\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"

","category":"page"},{"location":"example2/#Output","page":"◦ Polyacrylamide in DMF","title":"Output","text":"","category":"section"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"The distribution of DMF molecules around polyacrylamide is shown below. There is a peak at ~2.5Angs, indicating favorable non-specific interactions between the solvent molecules and the polymer. The peak is followed by a dip and diffuse peaks at higher distances. Thus, the DMF molecules are structured around the polymer, but essentially only in the first solvation shell. ","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"(Image: )","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"The KB integral in a bicomponent mixture converges to the (negative of the) apparent molar volume of the solute. It is negative, indicating that the accumulation of DMF in the first solvation shell of the polymer is not enough to compensate the excluded volume of the solute. ","category":"page"},{"location":"example2/#groups-example2","page":"◦ Polyacrylamide in DMF","title":"Group contributions","text":"","category":"section"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"The MDDF can be decomposed into the contributions of the DMF chemical groups, and on the polyacrylamide chemical groups. In the first panel below we show the contributions of the DMF chemical groups to the distribution function.","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"
Complete example code: click here!","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`julia\n$(read(\"./assets/scripts/example2/script2.jl\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"

","category":"page"},{"location":"example2/#Output-2","page":"◦ Polyacrylamide in DMF","title":"Output","text":"","category":"section"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"The decomposition reveals that specific interactions peaking at distances slightly smaller than 2AA exist between the polymer and the carbonyl group of DMF. Thus, there hydrogen bonds between the polymer and this group, which dominate the interactions between the solute and the solvent at short distances. The non-specific interactions peak at 2.5Angs and are composed of contributions of all DMF chemical groups, but particularly of the methyl groups.","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"(Image: )","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"The decomposition of the same MDDF in the contributions of the chemical groups of the polymer is clearly associated to the DMF contributions. The specific, hydrogen-bonding, interactions, are associated to the polymer amine groups. The amine groups also contribute to the non-specific interactions at greater distances, but these are a sum of the contributions of all polymer groups, polar or aliphatic.","category":"page"},{"location":"example2/#2Dmap-example2","page":"◦ Polyacrylamide in DMF","title":"2D density map","text":"","category":"section"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"We can decompose the MDDF into the contributions of each portion of the polymer chain. The map below displays the contributions of each chemical group of the polymer, now split into the mers of the polymer, to the MDDF.","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"
Complete example code: click here!","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`julia\n$(read(\"./assets/scripts/example2/script3.jl\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"

","category":"page"},{"location":"example2/#Output-3","page":"◦ Polyacrylamide in DMF","title":"Output","text":"","category":"section"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"The terminal methyl groups interact strongly with DMF, and strong local density augmentations are visible in particular on the amine groups. These occur at less than 2.0Angs and are characteristic of hydrogen-bond interactions. Interestingly, the DMF molecules are excluded from the aliphatic and carbonyl groups of the polymer, relative to the other groups.","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"Finally, it is noticeable that the central mer is more weakly solvated by DMF than the mers approaching the extremes of the polymer chain. This is likely a result of the partial folding of the polymer, that protects that central mers from the solvent in a fraction of the polymer configurations.","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"
\n\n
","category":"page"},{"location":"example2/#References","page":"◦ Polyacrylamide in DMF","title":"References","text":"","category":"section"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"Molecules built with JSME: B. Bienfait and P. Ertl, JSME: a free molecule editor in JavaScript, Journal of Cheminformatics 5:24 (2013) http://biomodel.uah.es/en/DIY/JSME/draw.en.htm","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"The system was built with Packmol.","category":"page"},{"location":"example2/","page":"◦ Polyacrylamide in DMF","title":"◦ Polyacrylamide in DMF","text":"The simulations were perfomed with NAMD, with CHARMM36 parameters. ","category":"page"},{"location":"example3/#POPC-membrane-in-water/ethanol","page":"◦ POPC membrane in water/ethanol","title":"POPC membrane in water/ethanol","text":"","category":"section"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"In this example ComplexMixtures.jl is used to study the interactions of a POPC membrane with a mixture of 20%(mol/mol) ethanol in water. At this concentration ethanol destabilizes the membrane.","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"
\n\n
","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"System image: a POPC membrane (center) solvated by a mixture of water (purple) and ethanol (green). The system is composed by 59 POPC, 5000 water, and 1000 ethanol molecules. ","category":"page"},{"location":"example3/#Index","page":"◦ POPC membrane in water/ethanol","title":"Index","text":"","category":"section"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"Data, packages, and execution\nMDDF and KB integrals\nGroup contributions\nInteraction of POPC groups with water\nInteraction of POPC groups with ethanol\nDensity map on POPC chains","category":"page"},{"location":"example3/#data-example3","page":"◦ POPC membrane in water/ethanol","title":"Data, packages, and execution","text":"","category":"section"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"The files required to run this example are:","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"equilibrated.pdb: The PDB file of the complete system.\ntraj_POPC.dcd: Trajectory file. This is a 365Mb file, necessary for running from scratch the calculations.","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"To run the scripts, we suggest the following procedure:","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"Create a directory, for example example3.\nCopy the required data files above to this directory.\nLaunch julia in that directory: activate the directory environment, and install the required packages. This launching Julia and executing:\nimport Pkg \nPkg.activate(\".\")\nPkg.add([\"ComplexMixtures\", \"PDBTools\", \"Plots\", \"LaTeXStrings\", \"EasyFit\"])\nexit()\nCopy the code of each script in to a file, and execute with:\njulia -t auto script.jl\nAlternativelly (and perhaps preferrably), copy line by line the content of the script into the Julia REPL, to follow each step of the calculation.","category":"page"},{"location":"example3/#mddf-example3","page":"◦ POPC membrane in water/ethanol","title":"MDDF and KB integrals","text":"","category":"section"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"Here we show the distribution functions and KB integrals associated to the solvation of the membrane by water and ethanol. ","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"
Complete example code: click here!","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`julia\n$(read(\"./assets/scripts/example3/script1.jl\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"

","category":"page"},{"location":"example3/#Output","page":"◦ POPC membrane in water/ethanol","title":"Output","text":"","category":"section"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"The distribution functions are shown in the first panel of the figure below, and the KB integrals are shown in the second panel.","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"(Image: )","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"Clearly, both water and ethanol accumulate on the proximity of the membrane. The distribution functions suggest that ethanol displays a greater local density augmentation, reaching concentrations roughly 4 times higher than bulk concentrations. Water has a peak at hydrogen-bonding distances (~1.8mathrmAA) and a secondary peak at 2.5mathrmAA.","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"Despite the fact that ethanol displays a greater relative density (relative to its own bulk concentration) at short distances, the KB integral of water turns out to be greater (more positive) than that of ethanol. This implies that the membrane is preferentially hydrated.","category":"page"},{"location":"example3/#groups1-example3","page":"◦ POPC membrane in water/ethanol","title":"Ethanol group contributions","text":"","category":"section"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"The minimum-distance distribution function can be decomposed into the contributions of the ethanol molecule groups. ","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"
Complete example code: click here!","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`julia\n$(read(\"./assets/scripts/example3/script2.jl\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"

","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"In the figure below we show the contributions of the ethanol hydroxyl and aliphatic chain groups to the total MDDF.","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"(Image: https://raw.githubusercontent.com/m3g/ComplexMixturesExamples/main/POPC_in_Water-Ethanol/results/mddf_ethanol_groups.png)","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"As expected, the MDDF at hydrogen-bonding distances is composed by contributions of the ethanol hydroxyl group, and the non-specific interactions at ~2.5mathrmAA have a greater contribution of the aliphatic chain of the solvent molecules. It is interesting to explore the chemical complexity of POPC in what concerns these interactions.","category":"page"},{"location":"example3/#groups2-example3","page":"◦ POPC membrane in water/ethanol","title":"Interaction of POPC groups with water","text":"","category":"section"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"The MDDF can also be decomposed into the contributions of the solute atoms and chemical groups. First, we show the contributions of the POPC chemical groups to the water-POPC distribution.","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"
Complete example code: click here!","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`julia\n$(read(\"./assets/scripts/example3/script3.jl\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"

","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"(Image: https://raw.githubusercontent.com/m3g/ComplexMixturesExamples/main/POPC_in_Water-Ethanol/results/mddf_popc_water_groups.png)","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"Not surprisingly, water interactions occur majoritarily with the Phosphate and Choline groups of POPC molecules, that is, with the polar head of the lipid. The interactions at hydrogen-bonding distances are dominated by the phosphate group, and non-specific interaction occur mostly with the choline group. Some water molecules penetrate the membrane and interact with the glycerol and aliphatic chains of POPC, but these contributions are clearly secondary.","category":"page"},{"location":"example3/#groups3-example3","page":"◦ POPC membrane in water/ethanol","title":"Interaction of POPC groups with ethanol","text":"","category":"section"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"The interactions of ethanol molecules with the membrane are more interesting, because ethanol penetrates the membrane. Here we decompose the ethanol-POPC distribution function into the contributions of the POPC chemical groups.","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"
Complete example code: click here!","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`julia\n$(read(\"./assets/scripts/example3/script4.jl\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"

","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"(Image: https://raw.githubusercontent.com/m3g/ComplexMixturesExamples/main/POPC_in_Water-Ethanol/results/mddf_popc_ethanol_groups.png)","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"Ethanol molecules interact with the choline and phosphate groups of POPC molecules, as do water molecules. The contributions to the MDDF at hydrogen-bonding distances come essentially from ethanol-phosphate interactions.","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"However, ethanol molecules interact frequently with the glycerol and aliphatic chains of POPC. Interactions with the Oleoyl chain are slightly stronger than with the Palmitoyl chain. This means that ethanol penetrates the hydrophobic core of the membrane, displaying non-specific interactions with the lipids and with the glycerol group. These interactions are probably associated to the destabilizing role of ethanol in the membrane structure.","category":"page"},{"location":"example3/#map-example3","page":"◦ POPC membrane in water/ethanol","title":"Density map on POPC chains","text":"","category":"section"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"The MDDFs can be decomposed at more granular level, in which each chemical group of the aliphatic chains of the POPC molecules are considered independently. This allows the study of the penetration of the ethanol molecules in the membrane. In the figure below, the carbonyl following the glycerol group of the POPC molecules is represented in the left, and going to the right the aliphatic chain groups are sequentially shown.","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"
Complete example code: click here!","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`julia\n$(read(\"./assets/scripts/example3/script5.jl\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"

","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"(Image: ./assets/scripts/example3/POPC_ethanol_chains.png)","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"Ethanol displays an important density augmentation at the vicinity of the carbonyl that follows the glycerol group, and accumulates on the proximity of the aliphatic chain. The density of ethanol decreases as one advances into the aliphatic chain, displaying a minimum around the insaturation in the Oleoyl chain. The terminal methyl group of both chains display a greater solvation by ethanol, suggesting the twisting of the aliphatic chain expose these terminal groups to membrane depth where ethanol is already abundant.","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"The equivalent maps for water are strikingly different, and show that water is excluded from the interior of the membrane:","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"(Image: ./assets/scripts/example3/POPC_water_chains.png)","category":"page"},{"location":"example3/#References","page":"◦ POPC membrane in water/ethanol","title":"References","text":"","category":"section"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"Membrane built with the VMD membrane plugin. ","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"Water and ethanol layers added with Packmol.","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"The simulations were performed with NAMD, with CHARMM36 parameters. ","category":"page"},{"location":"example3/","page":"◦ POPC membrane in water/ethanol","title":"◦ POPC membrane in water/ethanol","text":"Density of the ethanol-water mixture from: https://wissen.science-and-fun.de/chemistry/chemistry/density-tables/ethanol-water-mixtures/","category":"page"},{"location":"tools/#Tools","page":"Tools","title":"Tools","text":"","category":"section"},{"location":"tools/","page":"Tools","title":"Tools","text":"A set of examples of analyses that can be performed with ComplexMixtures is given in this site. A brief the description of the possible results is provided here. ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"Some tools are provided to analyze the results:","category":"page"},{"location":"tools/#coordination_number","page":"Tools","title":"Coordination numbers","text":"","category":"section"},{"location":"tools/","page":"Tools","title":"Tools","text":"The function","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"coordination_number(R::Result, group::Union{SoluteGroup, SolventGroup})","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"computes the coordination number of a given group of atoms from the solute or solvent atomic contributions to the MDDF. Here, R is the result of the mddf calculation, and group_contributions is the output of the contributions function for the desired set of atoms.","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"If no group is defined, the coordination number of the complete solute is returned, which is equivalent to the R.coordination_number field of the Result data structure:","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"coordination_number(R::Result) == R.coordination_number","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"note: Note\nThere are some systems for which the normalization of the distributions is not necessary or possible. It is still possible to compute the coordination numbers, by running, instead of mddf, the coordination_number function:coordination_number(trajectory::Trajectory, options::Options)This call will return Result data structure but with all fields requiring normalization with zeros. In summary, this result data structure can be used to compute the coordination numbers, but not the MDDF, RDF, or KB integrals.","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"compat: Compat\nThe use independent computation of coordination numbers was introduced in version 1.1.","category":"page"},{"location":"tools/#Example","page":"Tools","title":"Example","text":"","category":"section"},{"location":"tools/","page":"Tools","title":"Tools","text":"In the following example we compute the coordination number of the atoms of residue 50 (which belongs to the solute - a protein) with the solvent atoms of TMAO, as a function of the distance. The plot produced will show side by side the residue contribution to the MDDF and the corresponding coordination number.","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"using ComplexMixtures, PDBTools\nusing Plots, EasyFit\npdb = readPDB(\"test/data/NAMD/structure.pdb\")\nR = load(\"test/data/NAMD/protein_tmao.json\")\nsolute = AtomSelection(PDBTools.select(pdb, \"protein\"), nmols=1)\nresidue50 = PDBTools.select(pdb, \"residue 50\")\n# Compute the group contribution to the MDDF\nresidue50_contribution = contributions(R, SoluteGroup(residue50))\n# Now compute the coordination number\nresidue50_coordination = coordination_number(R, SoluteGroup(residue50))\n# Plot with twin y-axis\nplot(R.d, movavg(residue50_contribution,n=10).x,\n xaxis=\"distance / Å\", \n yaxis=\"MDDF contribution\", \n linewidth=2, label=nothing, color=1\n)\nplot!(twinx(),R.d, residue50_coordination, \n yaxis=\"Coordination number\", \n linewidth=2, label=nothing, color=2\n)\nplot!(title=\"Residue 50\", framestyle=:box, subplot=1)","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"With appropriate input data, this code produces:","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"
\n\n
","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"Modules = [ComplexMixtures]\nPages = [\"coordination_number.jl\"]","category":"page"},{"location":"tools/#ComplexMixtures.coordination_number","page":"Tools","title":"ComplexMixtures.coordination_number","text":"coordination_number(R::Result) = R.coordination_number\ncoordination_number(R::Result, s::Union{SoluteGroup,SolventGroup})\n\nComputes the coordination number of a given group of atoms of the solute or solvent\n\natomic contributions to the MDDF. If no group is defined (first call above), the coordination number of the whole solute or solvent is returned.\n\nIf the group_contributions to the mddf are computed previously with the contributions function, the result can be used to compute the coordination number by calling coordination_number(R::Result, group_contributions).\n\nOtherwise, the coordination number can be computed directly with the second call, where:\n\ns is the solute or solvent selection (type ComplexMixtures.AtomSelection)\n\natom_contributions is the R.solute_atom or R.solvent_atom arrays of the Result structure\n\nR is the Result structure,\n\nand the last argument is the selection of atoms from the solute to be considered, given as a list of indices, list of atom names, or a selection following the syntax of PDBTools, or vector of PDBTools.Atoms, or a PDBTools.Residue\n\nExamples\n\nIn the following example we compute the coordination number of the atoms of residue 50 (of the solute) with the solvent atoms of TMAO, as a function of the distance. Finally, we show the average number of TMAO molecules within 5 Angstroms of residue 50. The findlast(<(5), R.d) part of the code below returns the index of the last element of the R.d array that is smaller than 5 Angstroms.\n\nPrecomputing the group contributions Using the contributions function\n\nusing ComplexMixtures, PDBTools\npdb = readPDB(\"test/data/NAMD/structure.pdb\");\nR = load(\"test/data/NAMD/protein_tmao.json\");\nsolute = AtomSelection(PDBTools.select(pdb, \"protein\"), nmols=1);\nresidue50 = PDBTools.select(pdb, \"residue 50\");\n# Compute the group contributions to the MDDF\nresidue50_contribution = contributions(solute, R.solute_atom, residue50);\n# Now compute the coordination number\nresidue50_coordination = coordination_number(R, residue50_contribution)\n# Output the average number of TMAO molecules within 5 Angstroms of residue 50\nresidue50_coordination[findlast(<(5), R.d)]\n\nWithout precomputing the group_contribution\n\nusing ComplexMixtures, PDBTools\npdb = readPDB(\"test/data/NAMD/structure.pdb\");\nR = load(\"test/data/NAMD/protein_tmao.json\");\nsolute = AtomSelection(PDBTools.select(pdb, \"protein\"), nmols=1);\nresidue50 = PDBTools.select(pdb, \"residue 50\");\n# Compute the coordination number\nresidue50_coordination = coordination_number(solute, R.solute_atom, R, group)\n# Output the average number of TMAO molecules within 5 Angstroms of residue 50\nresidue50_coordination[findlast(<(5), R.d)]\n\n\n\n\n\n","category":"function"},{"location":"tools/#Computing-a-2D-density-map-around-a-macromolecule","page":"Tools","title":"Computing a 2D density map around a macromolecule","text":"","category":"section"},{"location":"tools/","page":"Tools","title":"Tools","text":"One nice way to visualize the accumulation or depletion of a solvent around a macromolecule (a protein, for example), is to obtain a 2D map of the density as a function of the distance from its surface. For example, in the figure below the density of a solute (here, Glycerol), in the neighborhood of a protein is shown:","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"
\n\n
","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"Here, one can see that Glycerol accumulates on Asp76 and on the proximity of hydrogen-bonding residues (Serine residues mostly). This figure was obtained by extracting from atomic contributions of the protein the contribution of each residue to the MDDF. Using PDBTools, this can be done with, for example: ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"residues = collect(eachresidue(protein))\nresidue_contributions = zeros(length(R.d),length(residues))\nfor (i,residue) in pairs(residues)\n c = contributions(results, SoluteGroup(residue)) \n residue_contributions[:,i] .= c\nend","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"The above produces a matrix with a number of columns equal to the number of residues and a number of rows equal to the number of MDDF points. That matrix can be plotted as a contour map with adequate plotting software. A complete running example is provided here, producing the figure above. ","category":"page"},{"location":"tools/#Computing-a-3D-density-map-around-a-macromolecule","page":"Tools","title":"Computing a 3D density map around a macromolecule","text":"","category":"section"},{"location":"tools/","page":"Tools","title":"Tools","text":"Three-dimensional representations of the distribution functions can also be obtained from the MDDF results. These 3D representations are obtained from the fact that the MDDFs can be decomposed into the contributions of each solute atom, and that each point in space is closest to a single solute atom as well. Thus, each point in space can be associated to one solute atom, and the contribution of that atom to the MDDF at the corresponding distance can be obtained. ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"For example, the distribution function of a hydrogen-bonding liquid solvating a protein will display a characteristic peak at about 1.8Å. The MDDF at that distance can be decomposed into the contributions of all atoms of the protein which were found to form hydrogen bonds to the solvent. A 3D representation of these contributions can be obtained by computing, around a static protein (solute) structure, which are the regions in space which are closer to each atom of the protein. The position in space is then marked with the atom of the protein to which that region \"belongs\" and with the contribution of that atom to the MDDF at each distance within that region. A special function to compute this 3D distribution is provided here: grid3D. ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"This is better illustrated by a graphical representation. In the figure below we see a 3D representation of the MDDF of Glycerol around a protein, computed from a simulation of this protein in a mixture of water and Glycerol. A complete set of files and a script to reproduce this example is available here. ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"
\n\n
","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"In the figure on the left, the points in space around the protein are selected with the following properties: distance from the protein smaller than 2.0Å and relative contribution to the MDDF at the corresponding distance of at least 10% of the maximum contribution. Thus, we are selecting the regions of the protein corresponding to the most stable hydrogen-bonding interactions. The color of the points is the contribution to the MDDF, from blue to red. Thus, the most reddish-points corresponds to the regions where the most stable hydrogen bonds were formed. We have marked two regions here, on opposite sides of the protein, with arrows.","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"Clicking on those points we obtain which are the atoms of the protein contributing to the MDDF at that region. In particular, the arrow on the right points to the strongest red region, which corresponds to an Aspartic acid. These residues are shown explicitly under the density (represented as a transparent surface) on the figure in the center. ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"The figure on the right displays, overlapped with the hydrogen-bonding residues, the most important contributions to the second peak of the distribution, corresponding to distances from the protein between 2.0 and 3.5Å. Notably, the regions involved are different from the ones forming hydrogen bonds, indicating that non-specific interactions with the protein (and not a second solvation shell) are responsible for the second peak. ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"The call to grid3D in the last command will write an output a PDB file with the grid points, which loaded in a visualization software side-by-side with the protein structure, allows the production of the images shown. The grid.pdb file contains a regular PDB format, but the atoms are grid points. The identity of the atoms correspond to the identity of the protein atom contributing to the MDDF at that point (the closest protein atom). The temperature-factor column (beta) contains the relative contribution of that atom to the MDDF at the corresponding distance, and the occupancy field contains the distance itself.","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"The output grid variable contains the same information of the PDB file, which can be analyzed with the tools of PDBTools if the user wants to.","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"Modules = [ComplexMixtures]\nPages = [\"tools/grid3D.jl\"]","category":"page"},{"location":"tools/#ComplexMixtures.grid3D","page":"Tools","title":"ComplexMixtures.grid3D","text":"grid3D(\n result::Result, atoms, output_file::Union{Nothing,String} = nothing; \n dmin=1.5, ddax=5.0, step=0.5\n)\n\nThis function builds the grid of the 3D density function and fills an array of mutable structures of type Atom, containing the position of the atoms of grid, the closest atom to that position, and distance. \n\nresult is a ComplexMixtures.Result object atoms is a vector of PDBTools.Atoms with all the atoms of the system. output_file is the name of the file where the grid will be written. If nothing, the grid is only returned as a matrix. \n\ndmin and dmax define the range of distance where the density grid will be built, and step defines how fine the grid must be. Be aware that fine grids involve usually a very large (hundreds of thousands points).\n\nExample\n\njulia> using ComplexMixtures, PDBTools\n\njulia> atoms = readPDB(\"./system.pdb\");\n\njulia> R = ComplexMixtures.load(\"./results.json\");\n\njulia> grid = grid3D(R, atoms, \"grid.pdb\");\n\ngrid will contain a vector of Atoms with the information of the MDDF at each grid point, and the same data will be written in the grid.pdb file. This PDB file can be opened in VMD, for example, and contain in the beta field the contribution of each protein residue to the MDDF at each point in space relative to the protein, and in the occupancy field the distance to the protein. Examples of how this information can be visualized are provided in the user guide of ComplexMixtures. \n\n\n\n\n\n","category":"function"},{"location":"tools/#Computing-radial-distribution-functions","page":"Tools","title":"Computing radial distribution functions","text":"","category":"section"},{"location":"tools/","page":"Tools","title":"Tools","text":"The distributions returned by the mddf function (the mddf and rdf vectors), are normalized by the random reference state or using a site count based on the numerical integration of the volume corresponding to each minimum-distance to the solute. ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"If, however, the solute is defined by a single atom (as the oxygen atom of water, for example), the numerical integration of the volume can be replaced by a simple analytical spherical shell volume, reducing noise. The ComplexMixtures.gr function returns the radial distribution function and the KB integral computed from the results, using this volume estimate: ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"g, kb = ComplexMixtures.gr(R)","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"By default, the single-reference count (rdf_count) of the Result structure will be used to compute the radial distribution function. The function can be called with explicit control of all input parameters: ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"g, kb = ComplexMixtures.gr(r,count,density,binstep)","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"where:","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"Parameter Definition Result structure output data to provide\nr Vector of distances The d vector\ncount Number of site counts at each r The rdf or mddf vectors\ndensity Bulk density The density.solvent_bulk or density.solvent densities.\nbinstep The histogram step The options.binstep\n ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"Example:","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"...\nR = mddf(trajectory,options)\ng, kb = ComplexMixtures.gr(R.d,R.rdf_count,R.density.solvent_bulk,R.options.binstep)","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"Modules = [ComplexMixtures]\nPages = [\"gr.jl\"]","category":"page"},{"location":"tools/#ComplexMixtures.gr-Tuple{Result}","page":"Tools","title":"ComplexMixtures.gr","text":"gr(R::Result) = gr(R.d,R.rdf_count,R.density.solvent_bulk,R.files[1].options.binstep)\n\nIf a Result structure is provided without further details, use the rdf count and the bulk solvent density.\n\n\n\n\n\n","category":"method"},{"location":"tools/#ComplexMixtures.gr-Tuple{Vector{Float64}, Vector{Float64}, Float64, Float64}","page":"Tools","title":"ComplexMixtures.gr","text":"gr(r::Vector{Float64}, count::Vector{Float64}, density::Float64, binstep::Float64)\n\nComputes the radial distribution function from the count data and the density.\n\nThis is exactly a conventional g(r) if a single atom was chosen as the solute and solvent selections.\n\nReturns both the g(r) and the kb(r)\n\n\n\n\n\n","category":"method"},{"location":"tools/#Overview-of-the-solvent-and-solute-properties","page":"Tools","title":"Overview of the solvent and solute properties","text":"","category":"section"},{"location":"tools/","page":"Tools","title":"Tools","text":"The output to the REPL of the Result structure provides an overview of the properties of the solution. The data can be retrieved into a data structure using the overview function. Examples: ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"...\njulia> results = mddf(trajectory)\n\njulia> results\n\n-------------------------------------------------------------------------------\n\n MDDF Overview: \n\n Solvent properties: \n ------------------- \n\n Simulation concentration: 1.5209006318095133 mol L⁻¹\n Molar volume: 657.5051512801567 cm³ mol⁻¹\n\n Concentration in bulk: 1.4918842545752287 mol L⁻¹\n Molar volume in bulk: 670.2932864484995 cm³ mol⁻¹ \n\n Solute properties: \n ------------------ \n\n Simulation Concentration: 1.5209006318095133 mol L⁻¹\n Estimated solute partial molar volume: 657.5051512801567 cm³ mol⁻¹\n\n Using dbulk = 20.0Å: \n Molar volume of the solute domain: 30292.570006549242 cm³ mol⁻¹\n\n Auto-correlation: true\n\n Trajectory files and weights: \n ./vinicius.xtc - w = 1.0\n\n Long range MDDF mean (expected 1.0): 1.1090804621839963 +/- 0.04298849642932878\n Long range RDF mean (expected 1.0): 1.15912932236198 +/- 0.05735018864444404\n\n-------------------------------------------------------------------------------","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"In this case, since solute and solvent are equivalent and the system is homogeneous, the molar volumes and concentrations are similar. This is not the case if the molecules are different or if the solute is at infinite dilution (in which case the bulk solvent density might be different from the solvent density in the simulation). ","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"To retrieve the data of the overview structure use, for example:","category":"page"},{"location":"tools/","page":"Tools","title":"Tools","text":"julia> overview = overview(results);\n\njulia> overview.solute_molar_volume\n657.5051512801567","category":"page"},{"location":"updating_scripts/#updating-scripts","page":"Updating scripts","title":"Updating scripts from v1 to v2","text":"","category":"section"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"The syntax chances necessary to update script from version 1.X to 2.X of the package are:","category":"page"},{"location":"updating_scripts/#Atom-selections","page":"Updating scripts","title":"Atom selections","text":"","category":"section"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"The previous Selection structure was renamed to AtomSelection for clarity.","category":"page"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"Before:","category":"page"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"water = Selection(water; natomspermol=3)","category":"page"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"Now:","category":"page"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"water = AtomSelection(water; natomspermol=3)","category":"page"},{"location":"updating_scripts/#Group-contributions-syntax","page":"Updating scripts","title":"Group contributions syntax","text":"","category":"section"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"The syntax to computing group contributions is improved. Previously, the contrib or contributions functions required three somewhat redundant parameters. ","category":"page"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"Before:","category":"page"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"The call to contributions required 3 parameters: the Selection structure, the matrix of contributions, and the indexes of the atoms for which the contributions were desired:","category":"page"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"h_contributions = contributions(solvent, R.solvent_atom, h_indexes)","category":"page"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"Now:","category":"page"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"The contributions are extracted from the Result data structure, by providing either a SoluteGroup or SolventGroup object, which are setup with the group names, group indexes, atom names, or atom indexes:","category":"page"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"h_contributions = contributions(R, SolventGroup(h_indexes))","category":"page"},{"location":"updating_scripts/#Frame-weights","page":"Updating scripts","title":"Frame weights","text":"","category":"section"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"frame_weights is now an option of the mddf execution. That is previously, they were defined in the Options data structure, and now they are passed to the mddf function.","category":"page"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"Before:","category":"page"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"options = Options(frame_weights=[1.0, 2.0])\nresults = mddf(trajectory, options)","category":"page"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"Now:","category":"page"},{"location":"updating_scripts/","page":"Updating scripts","title":"Updating scripts","text":"results = mddf(trajectory, options; frame_weights=[1.0, 2.0])","category":"page"},{"location":"examples/#examples","page":"Examples:","title":"Examples","text":"","category":"section"},{"location":"examples/#List-of-examples","page":"Examples:","title":"List of examples","text":"","category":"section"},{"location":"examples/","page":"Examples:","title":"Examples:","text":"Protein in water/glycerol\nPolyacrylamide in DMDF\nPOPC membrane in water/ethanol\nGlycerol/water mixture","category":"page"},{"location":"examples/#How-to-run-these-examples","page":"Examples:","title":"How to run these examples","text":"","category":"section"},{"location":"examples/","page":"Examples:","title":"Examples:","text":"1 Download and install Julia","category":"page"},{"location":"examples/","page":"Examples:","title":"Examples:","text":"To run the scripts, we suggest the following procedure:","category":"page"},{"location":"examples/","page":"Examples:","title":"Examples:","text":"Create a directory, for example example1.\nCopy the required data files, indicated in each example.\nLaunch julia in that directory, activate the directory environment, and install the required packages. This is done by launching Julia and executing:\nimport Pkg \nPkg.activate(\".\")\nPkg.add([\"ComplexMixtures\", \"PDBTools\", \"Plots\", \"LaTeXStrings, EasyFit\"])\nexit()\nCopy the code of each script in to a file, and execute with:\njulia -t auto script.jl\nAlternativelly (and perhaps preferrably), copy line by line the content of the script into the Julia REPL, to follow each step of the calculation. For a more advanced Julia usage, we suggest the VSCode IDE with the Julia Language Support extension. ","category":"page"},{"location":"options/#options","page":"Options","title":"Options","text":"","category":"section"},{"location":"options/","page":"Options","title":"Options","text":"There are some options to control what exactly is going to be computed to obtain the MDDF. These options can be defined by the user and passed to the mddf function, using, for example: ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"options = Options(lastframe=1000)\nresults = mddf(trajectory,options)","category":"page"},{"location":"options/#Frame-ranges-and-histogram-properties","page":"Options","title":"Frame ranges and histogram properties","text":"","category":"section"},{"location":"options/","page":"Options","title":"Options","text":"These are common options that the regular user might want to set in their calculation.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"firstframe: Integer, first frame of the trajectory to be considered.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"lastframe: Integer, last frame of the trajectory to be considered.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"stride: Integer, consider every stride frames, that is, if stride=5 only one in five frames will be considered.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"binstep: Real, length of the bin step of the histograms, default = 0.02 Angstroms.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"dbulk: Real, distance from which the solution is to be considered as a bulk solution, that is, where the presence of the solute does not affect the structure of the solution anymore. This parameter is important in particular for systems with a single solute molecule (a protein, for example), where the density of the solvent in the box is not the bulk density of the solvent, which must be computed independently. Default: 10 Angstroms. ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"cutoff: Real, the maximum distance to be considered in the construction of histograms. Default: 10 Angstroms. ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"usecutoff: true/false: If true, the cutoff distance might be different from dbulk and the density of the solvent in bulk will be estimated from the density within dbulk and cutoff. If false, the density of the solvent is estimated from the density outside dbulk by exclusion. Default: false. ","category":"page"},{"location":"options/#Lower-level-options","page":"Options","title":"Lower level options","text":"","category":"section"},{"location":"options/","page":"Options","title":"Options","text":"These will probably never be set by the user, unless if dealing with some special system (large very large, or very low density system).","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"irefatom: Integer, index of the reference atom in the solvent molecule used to compute the shell volumes and domain volumes in the Monte-Carlo volume estimates. The final rdf data is reported for this atom as well. By default, we choose the atom which is closer to the center of coordinates of the molecule, but any choice should be fine. ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"n_random_samples: Integer, how many samples of random molecules are generated for each solvent molecule to compute the shell volumes and random MDDF counts. Default: 10. Increase this only if you have short trajectory and want to obtain reproducible results for that short trajectory. For long trajectories (most desirable and common), this value can even be decreased to speed up the calculations. ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"seed: Seed for random number generator. If -1, the seed will be generated from the entropy of the system. If your results are dependent on the seed, is is probable that you do not have enough sampling. Mostly used for testing purposes. Two runs are only identical if ran with the same seed and in serial mode. ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"StableRNG (::Bool), defaults to false. Use a stable random number generator from the StableRNGs package, to produce identical runs on different architectures and Julia versions. Only used for testing. ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"nthreads: How many threads to use. By default, it will be the number of physical cores of the computer.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"lcell: Integer, the cell length of the linked-cell method (actually the cell length is cutoff/lcell). Default: 1. ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"GC: Bool, force garbage collection, to avoid memory overflow. Default: true. That this might be required is probably a result of something that can vastly improved in memory management. This may slow down parallel runs significantly if the GC runs too often.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"GC_threshold: Float64, minimum fraction of the total memory of the system required to force a GC run. That is, if GC_threshold=0.1, which is the default, every time the free memory becomes less or equal to 10% of the total memory available, a GC run occurs. ","category":"page"},{"location":"options/#Frame-statistical-reweighing","page":"Options","title":"Frame statistical reweighing","text":"","category":"section"},{"location":"options/","page":"Options","title":"Options","text":"compat: Compat\nFrame reweighing is available in ComplexMixtures 2.0.0 or greater.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"Most times the weights of each frame of the trajectory are the same, resulting from some standard MD simulation. If, for some reason, the frames have different statistical weights, the weights can be passed to the as an optional parameter frame_weights.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"For example:","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"julia> results = mddf(trajectory, options; frame_weights=[0.0, 1.0, 2.0])","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"The code above will assign a larger weight to the third frame of the trajectory. These weights are relative (meaning that [0.0, 1.0, 2.0] would produce the same result). What will happen under the hood is that the distance counts of the frames will be multiplied by each frame weight, and normalized for the sum of the weights.","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"Important: The length of the frame_weights vector must be at least equal to the number of the last frame read from the trajectory. That is, if lastframe is not set, and all the frames will be read, the length of frame_weights must be equal to the length of the trajectory (the stride parameter will skip the information both of the frames and its weights). If lastframe is set, then the length of frame_weights must be at least lastframe (it can be greater, and further values will be ignored). Importantly, the indices of the elements in frame_weights are assumed to correspond to the indices of the frames in the original trajectory file.","category":"page"},{"location":"options/#Compute-coordination-number-only","page":"Options","title":"Compute coordination number only","text":"","category":"section"},{"location":"options/","page":"Options","title":"Options","text":"For some systems, it may be impossible, or to expensive, to compute the normalization of the minimum-distance distribution function. Nevertheless, the coordination number may still be an interesting information to be retrieved from the simulations. To run the computation to compute coordination numbers only, do:","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"julia> results = mddf(trajectory, options; coordination_number_only = true)","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"note: Note\nWith coordination_number_only set to true, the arrays associated to MDDFs and KB integrals will be empty in the output data structure. ","category":"page"},{"location":"options/","page":"Options","title":"Options","text":"Modules = [ComplexMixtures]\nPages = [\"Options.jl\"]","category":"page"},{"location":"options/#ComplexMixtures.Options","page":"Options","title":"ComplexMixtures.Options","text":"struct Options\n\nStructure that contains the detailed input options.\n\nfirstframe::Int64\nlastframe::Int64\nstride::Int64\nirefatom::Int64\nn_random_samples::Int64\nbinstep::Float64\ndbulk::Float64\ncutoff::Float64\nusecutoff::Bool\nlcell::Int64\nGC::Bool\nGC_threshold::Float64\nseed::Int64\nStableRNG::Bool\nnthreads::Int64\nsilent::Bool\n\n\n\n\n\n","category":"type"},{"location":"python/#python","page":"From Python","title":"From Python","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"note: Note\nMost features of the package are available through this Python interface. However, some flexibility may be reduced and, also, the tunning of the plot appearance is left to the user, as it is expected that he/she is fluent with some tools within Python if choosing this interface.Python 3 or greater is required.Please report issues, incompatibilities, or any other difficulty in using the package and its interface.","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"The following examples consider a system composed a protein solvated by a mixture of water and glycerol, built with Packmol. The simulations were performed with NAMD with periodic boundary conditions and a NPT ensemble at room temperature and pressure. Molecular pictures were produced with VMD.","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"
\n\n
","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"Image of the system of the example: a protein solvated by a mixture of glycerol (green) and water, at a concentration of 50%vv.","category":"page"},{"location":"python/#Loading-the-ComplexMixtures.py-file","page":"From Python","title":"Loading the ComplexMixtures.py file","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"The Python interface of ComplexMixtures is implemented in the ComplexMixtures.py file. Just download it from the link and save it in a known path.","category":"page"},{"location":"python/#Installing-juliacall","page":"From Python","title":"Installing juliacall","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"juliacall is a package that allows calling Julia programs from Python. Install it with","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"pip install juliacall","category":"page"},{"location":"python/#Installing-Julia-and-underlying-packages","page":"From Python","title":"Installing Julia and underlying packages","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"Once juliacall is installed, from within Python, execute:","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"import ComplexMixtures","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"here we assume that the ComplexMixtures.py file is in the same directory where you launched Python.","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"note: Note\nOn the first time you execute this command, the Julia executable and the required Julia packages (ComplexMixtures and PDBTools) will be downloaded and installed. At the end of the process quit Python (not really required, but we prefer to separate the installation from the use of the module). ","category":"page"},{"location":"python/#Example","page":"From Python","title":"Example","text":"","category":"section"},{"location":"python/#Index","page":"From Python","title":"Index","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"Data, packages, and execution\nMinimum-Distance Distribution function\nMDDF and KB integrals\nAtomic contributions to the MDDF","category":"page"},{"location":"python/#data-pythonexample","page":"From Python","title":"Data, packages, and execution","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"The files required to run this example are:","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"system.pdb: The PDB file of the complete system.\nglyc50_sample.dcd: A 30Mb sample trajectory file. The full trajectory can also be used, but it is a 1GB file.","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"To start, create a directory and copy the ComplexMixtures.py file to it. Navigate into this directory, and, to start, set the number of threads that Julia will use, to run the calculations in parallel. Typically, in bash, this means defining teh following environment variable:","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"export JULIA_NUM_THREADS=8","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"where 8 is the number of CPU cores available in your computer. For further information about Julia multi-threading, and on setting this environment variable in other systems, please read this section of the Julia manual.","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"Finally, each script can be executed with, for example:","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"python3 script.py","category":"page"},{"location":"python/#script1-python","page":"From Python","title":"Minimum-Distance Distribution function","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"
Complete example code: click here!","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`python\n$(read(\"./assets/scripts/python/script1.py\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"

","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"Note that the example here follows an identical syntax to the Julia example, except that we qualify the name of the loaded module and implicitly load the PDBTools package.","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"The script to compute the MDDFs as associated data from within python is, then:","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"note: Note\nTo change the options of the calculation, set the Options structure accordingly and pass it as a parameter to mddf. For example:options = cm.Options(cutoff=10.)\nresults = cm.mddf(trajectory,options)The complete set of options available is described here.","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"The trajectory that was loaded was for a toy-example. The complete trajectory is available here, but it is a 3GB file. The same procedure above was performed with that file and produced the results_Glyc50.json file, which is available in the Data directory here. We will continue with this file instead. ","category":"page"},{"location":"python/#python-plotting1","page":"From Python","title":"MDDF and KB integrals","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"The following python script will produce the typical MDDF and KB integral plot, for the sample system. The noise in the figures is because the trajectory sample is small.","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"
Complete example code: click here!","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`python\n$(read(\"./assets/scripts/python/script2.py\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"

","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"(Image: )","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"In the top plot, we see that glycerol and water display clear solvation shells around the protein, with glycerol having a greater peak. This accumulation leads to a greater (less negative) KB integral for glycerol than water, as shown in the second plot. This indicates that the protein is preferentially solvated by glycerol in this system (assuming that sampling is adequate in this small trajectory).","category":"page"},{"location":"python/#python-plotting2","page":"From Python","title":"Atomic contributions to the MDDF","text":"","category":"section"},{"location":"python/","page":"From Python","title":"From Python","text":"The following script produces a plot of the group contributions of Glycerol to the total MDDF function. The Glycerol MDDF is split into the contributions of the hydroxyl and aliphatic groups.","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"
Complete example code: click here!","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"using Markdown\ncode = Markdown.parse(\"\"\"\n\\`\\`\\`python\n$(read(\"./assets/scripts/python/script3.py\", String))\n\\`\\`\\`\n\"\"\")","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"

","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"(Image: )","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"Despite the low sampling, it is clear that hydroxyl groups contribute to the greter peak of the distribution, at hydrogen-bonding distances, as expected. The contributions of the aliphatic groups to the MDDF occurs at longer distances, associated to non-specific interactions. ","category":"page"},{"location":"python/","page":"From Python","title":"From Python","text":"note: Note\nThe syntax here diverges from the Julia-only examples by requiring the lists of names to be converted to Julia arrays, which happens by using the cm.list(python_list) function calls.","category":"page"},{"location":"#Introduction","page":"Introduction","title":"Introduction","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"ComplexMixtures.jl is a package to study the solute and solvent interactions of mixtures of molecules of complex shape. Conventional radial distribution functions are not appropriate to represent the structure of a solvent around a solute with many atoms, and a variable, non-spherical shape. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Typical solutes of complex shape are proteins, nucleic acids, and polymers in general. Smaller molecules like lipids, carbohydrates, etc, are also complex enough such that representing the structure of the solution of those molecules with distribution functions is not trivial.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Minimum-Distance Distribution Functions (MDDFs) are a very general and practical way to represent solute-solvent interactions for molecules with arbitrarily complex sizes and geometries. Briefly, instead of computing the density distribution function of a particular atom or the center-of-mass of the molecules, one computes the distribution function of the minimum-distance between any solute and solvent atoms. This provides a size and shape-independent distribution which is very natural to interpret in terms of molecular interactions. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Additionally, the MDDFs can be decomposed into contributions of each type of atom (or groups of atoms) of the solute and solvent molecules, such that the profiles of the distributions can be interpreted in terms of the chemical nature of the species involved in the interactions at each distance. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Finally, as with radial distribution functions, MDDFs can be used to compute Kirkwood-Buff integrals to connect the accumulation or depletion of the solvents components to thermodynamic properties, like protein structural stability, solubility, and others.","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"compat: Compat\nImportant: This manual refers to version 2 of ComplexMixtures.jl. There are syntax changes relative to the 1.X series, and analysis scripts written for the previous versions won't work. The list of changes necessary to updated the scripts is described here.","category":"page"},{"location":"#Features","page":"Introduction","title":"Features","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"Check out our examples, featuring the analysis of solvation structures for proteins, polymers, membrane, and complex solutions! The examples are also described in our featured article.","category":"page"},{"location":"#1.-Minimum-distance-distribution-functions:-understanding-solvation-at-a-molecular-level","page":"Introduction","title":"1. Minimum-distance distribution functions: understanding solvation at a molecular level","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"This figure illustrates one of the main features of minimum-distance distribution functions, by showing the distribution of DMF molecules at the surface of an polyacrylamide molecule. The direct interactions are evident by the peak at hydrogen-bonding distances and, additionally, the contribution of each group of atoms of the DMF can be clearly distinguished by decomposing the total MDDF into atomic or chemical group contributions. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"
\n\n
\nMinimum distance distribution function and its decomposition into the chemical\ngroups of the solvent (top) and solute (bottom) molecules.

\n
","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Decomposition of the total MDDF into the contributions of the solute atoms (in this case, a protein) is also possible. Any chemical group decomposition is possible. Here, we decompose the MDDF into the contribution of each protein residue. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"
\n\n
\nDensity map of a solvent in the vicinity of each protein residue. \n
","category":"page"},{"location":"#2.-Thermodynamic-interpretation-through-Kirkwood-Buff-theory","page":"Introduction","title":"2. Thermodynamic interpretation through Kirkwood-Buff theory","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"Minimum-distance distribution functions can be used to compute Kirkwood-Buff integrals, and thus, thermodynamic parameters associated to solvation. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"Kirkwood-Buff integrals carry the information of the total accumulation or depletion of each solvent around a solute. For example, the figure below displays the KB integrals of an ionic liquid solvating different conformational states of a protein [link]. The figure illustrates that the solvation structures are dependent on the protein folding state. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"
\n\n
\nKirkwood-Buff integrals of an ionic liquid solvating a protein in different conformational states.

\n
","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"From differences in KB integrals among cosolvents, the Preferential Solvation parameter can be computed. This is an important parameter because it can be measured experimentally and is ultimately associated with the equilibrium thermodynamics of the solvation. In the following figure, we show that, for example, the preferential solvation of a protein in different folding states is dependent in a non-trivial way on the concentration of an ionic liquid in aqueous solutions. ","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"
\n\n
\nPreferential interaction parameters obtained for the solvation of a protein by ionic liquids.

\n
","category":"page"},{"location":"","page":"Introduction","title":"Introduction","text":"In particular, the plot shows that besides being preferentially excluded from the protein surface at high concentrations in the native state, suggesting protein folding stabilization, the interactions with the protein in the denatured states are stronger, leading to denaturation at all concentrations. ","category":"page"},{"location":"#References","page":"Introduction","title":"References","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"L. Martínez, ComplexMixtures.jl: Investigating the structure of solutions of complex-shaped molecules from a solvent-shell perspective. J. Mol. Liq. 347, 117945, 2022. [Full Text]\nL. Martínez, S. Shimizu, Molecular interpretation of preferential interactions in protein solvation: a solvent-shell perspective by means of minimum-distance distribution functions. J. Chem. Theor. Comp. 13, 6358–6372, 2017. [Full Text]","category":"page"},{"location":"#See-also","page":"Introduction","title":"See also","text":"","category":"section"},{"location":"#Seminar","page":"Introduction","title":"Seminar","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"Presentation about ComplexMixtures.jl and protein-solvent interactions: https://youtu.be/umSRjsITzyA","category":"page"},{"location":"#Applications","page":"Introduction","title":"Applications","text":"","category":"section"},{"location":"","page":"Introduction","title":"Introduction","text":"A. F. Pereira, V. Piccoli, L. Martínez, Trifluoroethanol direct interactions with protein backbones destabilize alpha-helices. J. Mol. Liq. 365, 120209, 2022. [Full Text]\nV. Piccoli, L. Martínez, Ionic liquid solvation of proteins in native and denatured states. J. Mol. Liq. 363, 119953, 2022. [Full Text]\nV. Piccoli, L. Martínez, Correlated counterion effects in the solvation of proteins by ionic-liquids. J. Mol. Liq. 320, 114347, 2020. [Full Text]\nI. P. de Oliveira, L. Martínez, The shift in urea orientation at protein surfaces at low pH is compatible with a direct mechanism of protein denaturation. Phys. Chem. Chem. Phys. 22, 354-367, 2020. [Full Text]\nI. P. de Oliveira, L. Martínez, Molecular basis for competitive solvation of the Burkholderia cepacia lipase by sorbitol and urea. Phys. Chem. Chem. Phys. 18, 21797-21808, 2016. [Full Text]","category":"page"}] } diff --git a/dev/selection/index.html b/dev/selection/index.html index cc31985b..1d161bf4 100644 --- a/dev/selection/index.html +++ b/dev/selection/index.html @@ -1,11 +1,221 @@ -Set solute and solvent · ComplexMixtures.jl

Set the solute and solvent selections

The solute and solvent are defined in ComplexMixtures as lists (vectors) of the indexes of the atoms of the system. The solute and solvent information is stored in the Selection structure. For example, if the solute is a molecule formed by the first 5 atoms of the system, it would be defined as:

indexes = [ 1, 2, 3, 4, 5 ]
-solute = Selection(indexes,nmols=1)
Note

We need to inform the Selection function about the number of atoms of each molecule (using natomspermol=3, for example), or the number of molecules (using nmols=1000, for example), such that the atoms belonging to each molecule can be determined without ambiguity.

The atom names can be also provided such that some of the output files contain more information on the atomic contributions. In this case the syntax is:

indexes = [ 1, 2, 3, 4, 5 ]
-names = [ "H1", "H2", "H3", "H4", "C" ]
-solute = Selection(indexes,names,nmols=1)
Warning

The indexing in ComplexMixtures is 1-based. That means that the first atom of your structure file is in position 1 of the coordinates. Please be careful if using any selection tool to be sure that your selection is correct.

Using PDBTools

PDBTools is a package we developed to read and write PDB files, which provides a simple selection tool. It is installed as a dependency of ComplexMixtures. Given a PDB file of the simulated system, the solute can be defined as, for example,

using PDBTools
-atoms = PDBTools.readPDB("system.pdb")
-protein = PDBTools.select(atoms,"protein")
-solute = Selection(protein,nmols=1)

If the solvent is, for instance, water, the indexes of the water molecules can be obtained with:

water = PDBTools.select(atoms,"water")
-solvent = Selection(water,natomspermol=3)

or, alternatively, a more compact syntax can be used, for example:

water = PDBTools.select("system.pdb","resname TIP3P")
-solvent = Selection(water,natomspermol=3)

or even providing just the names of the input file and selection, which will run PDBTools in background:

solvent = Selection("sytem.pdb","water",water,natomspermol=3)
Warning

The selection syntax of PDBTools is somewhat limited. Verify if the selections correspond to the the desired sets of atoms every time.

Using VMD

VMD is a very popular and powerful package for visualization of simulations. It contains a very versatile library to read topologies and trajectory files, and a powerful selection syntax. We provide here a wrapper to VMD which allows using its capabilities.

For example, the solute can be defined with:

indexes, names = VMDselect("./system.gro","protein",vmd="/usr/bin/vmd")
-solute = Selection(indexes,names,nmols=1)

The main advantage here is that all the file types that VMD supports are supported. But VMD needs to be installed and is run in background, and it takes a few seconds.

The VMDSelect function also accepts an optional keyword parameter srcload, which can be used to load custom scripts within vmd before running setting the selection. This allows the definition of tcl scripts with custom selection macros, for instance. The usage would be:

sel = VMSelect("file.pdb", "resname MYRES"; srcload = [ "mymacros1.tcl", "mymacros2.tcl" ])

Which corresponds to sourceing each of the macro files in VMD before defining the selection with the custom MYRES name.

Compat

Custom script source loading in VMDSelect was introduced in ComplexMixtures version 1.3.0.

Warning

VMD uses 0-based indexing and VMDselect adjusts that. However, if a selection is performed by index, as with index 1, VMD will select the second atom, and the output will be [2]. Selections by type, name, segment, residue name, etc, won't be a problem.

+Set solute and solvent · ComplexMixtures.jl

Solute and solvent selections

The solute and solvent are defined by selecting subsets of atoms from the system. These subsets are defined by the AtomSelection data structures.

To construct a AtomSelection data structure, one needs to provide, at least, the (1-based) indices of the atoms that belong to the selection, and either the number of atoms of each molecule or the number of molecules in the selection.

Using the PDBTools package

The PDBTools package helps the construction of the solute and solvent data structures, by providing a convenient selection syntax. Additionally, it sets up the names of the atoms of the system in the data structure, which can be used to retrieve atom and and group contributions to MDDFs and coordination numbers.

For example, here we define a protein of a system as the solute:

julia> using ComplexMixtures, PDBTools
+
+julia> atoms = readPDB(ComplexMixtures.Testing.pdbfile);
+
+julia> protein = select(atoms, "protein");
+
+julia> solute = AtomSelection(protein, nmols=1)
+AtomSelection 
+    1463 atoms belonging to 1 molecule(s).
+    Atoms per molecule: 1463
+    Number of groups: 1463 

We need to inform the AtomSelection function about the number of atoms of each molecule (using natomspermol=3, for example), or the number of molecules (using nmols=1000, for example), such that the atoms belonging to each molecule can be determined without ambiguity.

Now, we define the solvent of the system as the water molecules:

julia> water = select(atoms, "water"); 
+
+julia> solvent = AtomSelection(water, natomspermol=3)
+AtomSelection 
+    58014 atoms belonging to 19338 molecule(s).
+    Atoms per molecule: 3
+    Number of groups: 3

Using VMD

VMD is a very popular and powerful package for visualization of simulations. It contains a very versatile library to read topologies and trajectory files, and a powerful selection syntax. The PDBTools.jl (v1.0 or greater) package provides a simple wrapper to VMD that allows using the same syntax at it supports.

For example, the solute can be defined with:

using ComplexMixtures, PDBTools
+
+indices, names = select_with_vmd("./system.gro", "protein", vmd="/usr/bin/vmd")
+
+solute = AtomSelection(indices, names, nmols=1)

The main advantage here is that all the file types that VMD supports are supported. But VMD needs to be installed and is run in background, and it takes a few seconds to be executed.

The VMDSelect function also accepts an optional keyword parameter srcload, which can be used to load custom scripts within vmd before setting the selection. This allows the definition of tcl scripts with custom selection macros, for instance. The usage would be:

using PDBTools
+
+sel = select_with_vmd(
+    "file.pdb", 
+    "resname MYRES"; 
+    srcload = [ "mymacros1.tcl", "mymacros2.tcl" ]
+)

Which corresponds to sourceing each of the macro files in VMD before defining the selection with the custom MYRES name.

Warning

VMD uses 0-based indexing and VMDselect adjusts that. However, if a selection is performed by index, as with index 1, VMD will select the second atom, and the output will be [2]. AtomSelections by type, name, segment, residue name, etc, won't be a problem.

Predefinition of atom groups

Importantly, this should be only a concern for the solvation analysis of systems in which individual molecules are very large. This feature was introduced in version 2.0 of the package to support the study of small molecule distribution in virus structures, of millions of atoms.

By default, the contribution of each type of atom to the coordination number counts is stored, to allow the decomposition of the final MDDFs into any group contribution. However, when a structure, like a virus, has millions of atoms, storing the contribution of each atom becomes prohibitive in terms of memory. Thus, one may need to predefine the groups in which the contributions will be analyzed.

Here, we illustrate this feature by presselecting the acidic and basic residues of a protein:

julia> using ComplexMixtures, PDBTools
+
+julia> atoms = readPDB(ComplexMixtures.Testing.pdbfile);
+
+julia> protein = select(atoms, "protein");
+
+julia> acidic_residues = select(atoms, "protein and acidic");
+
+julia> basic_residues = select(atoms, "protein and basic");
+
+julia> solute = AtomSelection(
+        protein, 
+        nmols=1,
+        group_atom_indices = [ index.(acidic_residues), index.(basic_residues) ],
+        group_names = [ "acidic residues", "basic residues" ]
+    )
+AtomSelection 
+    1463 atoms belonging to 1 molecule(s).
+    Atoms per molecule: 1463
+    Number of groups: 1463 

In this example, then, the solute AtomSelection has two groups. The indices of the atoms of the groups are stored in the group_atom_indices vector and the group names in the group_names vector. The atom_group auxiliary function is the most practical way to retrive the indices of the atoms of the group.

julia> atom_group(solute, "acidic residues")
+162-element Vector{Int64}:
+   24
+   25
+   26
+    ⋮
+ 1436
+ 1437

With these group selections predefined, the contributions of these groups to the MDDF or coordination numbers can be retrived directly from the result data structure with, for example:

julia> result = mddf(trajectory, solute, solvent);
+
+julia> acidic_residue_contributions = contributions(result, SoluteGroup("acidic residues"))

Reference functions

ComplexMixtures.AtomSelectionType
struct AtomSelection

Structure that contains the information about the solute and solvent molecules.

  • nmols::Int64

  • natomspermol::Int64

  • indices::Vector{Int64}

  • custom_groups::Bool

  • group_atom_indices::Vector{Vector{Int64}}

  • group_names::Vector{String}

source
ComplexMixtures.AtomSelectionMethod

AtomSelection constructors

The AtomSelection structure carries the information of the molecules that are going to be used to compute the MDDF. The structure can be initialized in different ways:

  1. Initialize the structure providing a vector of PDBTools.Atom(s).
    AtomSelection(
+        atoms::AbstractVector{<:PDBTools.Atom}; 
+        nmols::Int = 0, 
+        natomspermol::Int = 0,
+        group_atom_indices::Union{Nothing,Vector{Vector{Int}}} = nothing,
+        group_names::Vector{String} = String[]
+    ) 

The indices of the atoms will be retrived from the indices of the atoms as defined in the PDB file, thus the PDB file must correspond to the same system as that of the simulation.

Either the number of molecules (nmols) or the number of atoms per molecule (natomspermol) must be provided.

If group_atom_indices is nothing or group_names is empty, the names of the groups will be retrieved from the atom names, and in the coordination numbers of each individual atom will be stored.

Example

julia> using ComplexMixtures, PDBTools
+
+julia> pdbfile = ComplexMixtures.Testing.pdbfile;
+
+julia> atoms = PDBTools.readPDB(pdbfile, "resname TMAO");
+
+julia> atsel = AtomSelection(atoms, natomspermol=14)
+AtomSelection 
+    2534 atoms belonging to 181 molecule(s).
+    Atoms per molecule: 14
+    Number of groups: 14 
+
+julia> atom_group_name(atsel, 1)
+"N"
+
+julia> atom_group_name(atsel, 5)
+"O1"
+
+julia> length(atom_group_names(atsel))
+14
  1. Lower level: initialize the structure providing the index of atoms and groups.
    AtomSelection(
+        indices::Vector{Int};
+        nmols::Int = 0,
+        natomspermol::Int = 0,
+        group_atom_indices::Union{Nothing,Vector{Vector{Int}}} = nothing,
+        group_names::Vector{String} = String[]
+    )

Construct an AtomSelection structure from the most low-level information: the index of atoms and groups.

Either the number of molecules (nmols) or the number of atoms per molecule (natomspermol) must be provided.

Groups of atoms can be defined by providing a vector of vectors of atom indices (group_atom_indices), and a vector of group names (group_names). If group_atom_indices is set to nothing, the coordination numbers of each individual atoms wil be stored.

Examples

julia> using ComplexMixtures
+
+julia> AtomSelection([1,2,3], nmols=1)
+AtomSelection 
+    3 atoms belonging to 1 molecule(s).
+    Atoms per molecule: 3
+    Number of groups: 3
+
+julia> AtomSelection([1,2,3], natomspermol=1)
+AtomSelection 
+    3 atoms belonging to 3 molecule(s).
+    Atoms per molecule: 1
+    Number of groups: 1
+
+julia> AtomSelection([1,2,3], natomspermol=1, group_atom_indices=[[1,2],[3]], group_names=["G1", "G2"])
+AtomSelection 
+    3 atoms belonging to 3 molecule(s).
+    Atoms per molecule: 1
+    Number of groups: 2 
source
ComplexMixtures.SoluteGroupType

SoluteGroup and SolventGroup data structures.

These structures are used to select groups of atoms to extract their contributions from the MDDF results.

Most tipically, the groups are defined from a selection of atoms with the PDBTools package, or by providing directly the indices of teh atoms in the structure.

Alternativelly, if the groups were predefined, the groups can be selected by group index or group name.

The possible constructors are:

SoluteGroup(atoms::Vector{PDBTools.Atom})
+SoluteGroup(atom_indices::Vector{Int})
+SoluteGroup(atom_names::Vector{String})
+SoluteGroup(group_name::String)
+SoluteGroup(residue::PDBTools.Residue)
+SoluteGroup(atsel::AtomSelection)

above, each constructor can be replaced by SolventGroup. The resulting data structures are used as input parameters for the contributions function:

contributions(results::Result, group::Union{SoluteGroup, SolventGroup}; type=:mddf)

See the contributions help entry for additional information.

Examples

Defining solute groups with different input types:

julia> using ComplexMixtures, PDBTools
+
+julia> atoms = PDBTools.readPDB(ComplexMixtures.Testing.pdbfile, "protein"); 
+
+julia> SoluteGroup(atoms) # vector of PDBTools.Atom(s)
+SoluteGroup defined by:
+    atom_indices: [ 1, 2, ..., 1462, 1463 ] - 1463 atoms
+
+julia> SoluteGroup(PDBTools.index.(atoms)) # vector of atom indices
+SoluteGroup defined by:
+    atom_indices: [ 1, 2, ..., 1462, 1463 ] - 1463 atoms
+
+julia> SoluteGroup(PDBTools.name.(atoms)) # vector of atom names
+SoluteGroup defined by:
+    atom_names: [ N, HT1, ..., HG22, HG23 ] - 1463 atoms
+ 
+julia> SoluteGroup("acidic residues") # predefined group name
+SoluteGroup defined by:
+    group_name: "acidic residues"
+
+julia> SoluteGroup(1) # predefined group index
+SoluteGroup defined by:
+    group_index: 1
+
+julia> SoluteGroup(collect(eachresidue(atoms))[2]) # PDBTools.Residue(s)
+SoluteGroup defined by:
+    atom_indices: [ 13, 14, ..., 22, 23 ] - 11 atoms
+
source
ComplexMixtures.SolventGroupType

SoluteGroup and SolventGroup data structures.

These structures are used to select groups of atoms to extract their contributions from the MDDF results.

Most tipically, the groups are defined from a selection of atoms with the PDBTools package, or by providing directly the indices of teh atoms in the structure.

Alternativelly, if the groups were predefined, the groups can be selected by group index or group name.

The possible constructors are:

SoluteGroup(atoms::Vector{PDBTools.Atom})
+SoluteGroup(atom_indices::Vector{Int})
+SoluteGroup(atom_names::Vector{String})
+SoluteGroup(group_name::String)
+SoluteGroup(residue::PDBTools.Residue)
+SoluteGroup(atsel::AtomSelection)

above, each constructor can be replaced by SolventGroup. The resulting data structures are used as input parameters for the contributions function:

contributions(results::Result, group::Union{SoluteGroup, SolventGroup}; type=:mddf)

See the contributions help entry for additional information.

Examples

Defining solute groups with different input types:

julia> using ComplexMixtures, PDBTools
+
+julia> atoms = PDBTools.readPDB(ComplexMixtures.Testing.pdbfile, "protein"); 
+
+julia> SoluteGroup(atoms) # vector of PDBTools.Atom(s)
+SoluteGroup defined by:
+    atom_indices: [ 1, 2, ..., 1462, 1463 ] - 1463 atoms
+
+julia> SoluteGroup(PDBTools.index.(atoms)) # vector of atom indices
+SoluteGroup defined by:
+    atom_indices: [ 1, 2, ..., 1462, 1463 ] - 1463 atoms
+
+julia> SoluteGroup(PDBTools.name.(atoms)) # vector of atom names
+SoluteGroup defined by:
+    atom_names: [ N, HT1, ..., HG22, HG23 ] - 1463 atoms
+ 
+julia> SoluteGroup("acidic residues") # predefined group name
+SoluteGroup defined by:
+    group_name: "acidic residues"
+
+julia> SoluteGroup(1) # predefined group index
+SoluteGroup defined by:
+    group_index: 1
+
+julia> SoluteGroup(collect(eachresidue(atoms))[2]) # PDBTools.Residue(s)
+SoluteGroup defined by:
+    atom_indices: [ 13, 14, ..., 22, 23 ] - 11 atoms
+
source
ComplexMixtures.atom_groupMethod
atom_group(atsel::AtomSelection, i::Int)
+atom_group(atsel::AtomSelection, groupname::String)
+
+atom_group(atsel::AtomSelection, i::Int)
+atom_group(atsel::AtomSelection, groupname::String)

Return the indices of the atoms that belong to a given group.

Example

julia> using ComplexMixtures
+
+julia> atsel = AtomSelection([1,2,3], natomspermol=1, group_atom_indices=[[1,2],[3]], group_names=["G1", "G2"])
+AtomSelection 
+    3 atoms belonging to 3 molecule(s).
+    Atoms per molecule: 1
+    Number of groups: 2
+
+julia> atom_group(atsel, 1)
+2-element Vector{Int64}:
+ 1
+ 2
+
+julia> atom_group(atsel, "G2")
+1-element Vector{Int64}:
+ 3
+
+julia> atom_group_name(atsel, 1)
+"G1"
source
ComplexMixtures.atom_group_nameMethod
atom_group_name(atsel::AtomSelection, i::Int)
+atom_group_names(atsel::AtomSelection)

Return the name of the group of atoms with index i. The atom_group_names function returns a vector with the names of all the groups.

Example

julia> using ComplexMixtures
+
+julia> atsel = AtomSelection([1,2,3], natomspermol=1, group_atom_indices=[[1,2],[3]], group_names=["G1", "G2"])
+AtomSelection 
+    3 atoms belonging to 3 molecule(s).
+    Atoms per molecule: 1
+    Number of groups: 2
+
+julia> atom_group_name(atsel, 1)
+"G1"
+
+julia> atom_group_names(atsel)
+2-element Vector{String}:
+ "G1"
+ "G2"
source
ComplexMixtures.atom_group_namesMethod
atom_group_name(atsel::AtomSelection, i::Int)
+atom_group_names(atsel::AtomSelection)

Return the name of the group of atoms with index i. The atom_group_names function returns a vector with the names of all the groups.

Example

julia> using ComplexMixtures
+
+julia> atsel = AtomSelection([1,2,3], natomspermol=1, group_atom_indices=[[1,2],[3]], group_names=["G1", "G2"])
+AtomSelection 
+    3 atoms belonging to 3 molecule(s).
+    Atoms per molecule: 1
+    Number of groups: 2
+
+julia> atom_group_name(atsel, 1)
+"G1"
+
+julia> atom_group_names(atsel)
+2-element Vector{String}:
+ "G1"
+ "G2"
source
diff --git a/dev/tools/index.html b/dev/tools/index.html index 023a0da5..6e0de187 100644 --- a/dev/tools/index.html +++ b/dev/tools/index.html @@ -1,14 +1,14 @@ -Tools · ComplexMixtures.jl

Tools

A set of examples of analyses that can be performed with ComplexMixtures is given in this site. A brief the description of the possible results is provided here.

Some tools are provided to analyze the results:

Coordination numbers

The function

coordination_number(R::Result, group_contributions::Vector{Float64})

computes the coordination number of a given group of atoms from the solute or solvent atomic contributions to the MDDF. Here, R is the result of the mddf calculation, and group_contributions is the output of the contributions function for the desired set of atoms.

If no group is defined, the coordination number of the complete solute is returned, which is equivalent to the R.coordination_number field of the Result data structure:

coordination_number(R::Result) == R.coordination_number
Note

There are some systems for which the normalization of the distributions is not necessary or possible. It is still possible to compute the coordination numbers, by running, instead of mddf, the coordination_number function:

coordination_number(trajectory::Trajectory, options::Options)

This call will return Result data structure but with all fields requiring normalization with zeros. In summary, this result data structure can be used to compute the coordination numbers, but not the MDDF, RDF, or KB integrals.

Compat

The use independent computation of coordination numbers was introduced in version 1.1.

Example

In the following example we compute the coordination number of the atoms of residue 50 (which belongs to the solute - a protein) with the solvent atoms of TMAO, as a function of the distance. The plot produced will show side by side the residue contribution to the MDDF and the corresponding coordination number.

using ComplexMixtures, PDBTools
+Tools · ComplexMixtures.jl

Tools

A set of examples of analyses that can be performed with ComplexMixtures is given in this site. A brief the description of the possible results is provided here.

Some tools are provided to analyze the results:

Coordination numbers

The function

coordination_number(R::Result, group::Union{SoluteGroup, SolventGroup})

computes the coordination number of a given group of atoms from the solute or solvent atomic contributions to the MDDF. Here, R is the result of the mddf calculation, and group_contributions is the output of the contributions function for the desired set of atoms.

If no group is defined, the coordination number of the complete solute is returned, which is equivalent to the R.coordination_number field of the Result data structure:

coordination_number(R::Result) == R.coordination_number
Note

There are some systems for which the normalization of the distributions is not necessary or possible. It is still possible to compute the coordination numbers, by running, instead of mddf, the coordination_number function:

coordination_number(trajectory::Trajectory, options::Options)

This call will return Result data structure but with all fields requiring normalization with zeros. In summary, this result data structure can be used to compute the coordination numbers, but not the MDDF, RDF, or KB integrals.

Compat

The use independent computation of coordination numbers was introduced in version 1.1.

Example

In the following example we compute the coordination number of the atoms of residue 50 (which belongs to the solute - a protein) with the solvent atoms of TMAO, as a function of the distance. The plot produced will show side by side the residue contribution to the MDDF and the corresponding coordination number.

using ComplexMixtures, PDBTools
 using Plots, EasyFit
 pdb = readPDB("test/data/NAMD/structure.pdb")
 R = load("test/data/NAMD/protein_tmao.json")
-solute = Selection(PDBTools.select(pdb, "protein"), nmols=1)
+solute = AtomSelection(PDBTools.select(pdb, "protein"), nmols=1)
 residue50 = PDBTools.select(pdb, "residue 50")
 # Compute the group contribution to the MDDF
-residue50_contribution = contributions(solute, R.solute_atom, residue50)
+residue50_contribution = contributions(R, SoluteGroup(residue50))
 # Now compute the coordination number
-residue50_coordination = coordination_number(R, residue50_contribution)
+residue50_coordination = coordination_number(R, SoluteGroup(residue50))
 # Plot with twin y-axis
 plot(R.d, movavg(residue50_contribution,n=10).x,
     xaxis="distance / Å", 
@@ -21,36 +21,46 @@
 )
 plot!(title="Residue 50", framestyle=:box, subplot=1)

With appropriate input data, this code produces:

-

Computing a 2D density map around a macromolecule

One nice way to visualize the accumulation or depletion of a solvent around a macromolecule (a protein, for example), is to obtain a 2D map of the density as a function of the distance from its surface. For example, in the figure below the density of a solute (here, Glycerol), in the neighborhood of a protein is shown:

+
ComplexMixtures.coordination_numberFunction
coordination_number(R::Result) = R.coordination_number
+coordination_number(R::Result, s::Union{SoluteGroup,SolventGroup})

Computes the coordination number of a given group of atoms of the solute or solvent

atomic contributions to the MDDF. If no group is defined (first call above), the coordination number of the whole solute or solvent is returned.

If the group_contributions to the mddf are computed previously with the contributions function, the result can be used to compute the coordination number by calling coordination_number(R::Result, group_contributions).

Otherwise, the coordination number can be computed directly with the second call, where:

s is the solute or solvent selection (type ComplexMixtures.AtomSelection)

atom_contributions is the R.solute_atom or R.solvent_atom arrays of the Result structure

R is the Result structure,

and the last argument is the selection of atoms from the solute to be considered, given as a list of indices, list of atom names, or a selection following the syntax of PDBTools, or vector of PDBTools.Atoms, or a PDBTools.Residue

Examples

In the following example we compute the coordination number of the atoms of residue 50 (of the solute) with the solvent atoms of TMAO, as a function of the distance. Finally, we show the average number of TMAO molecules within 5 Angstroms of residue 50. The findlast(<(5), R.d) part of the code below returns the index of the last element of the R.d array that is smaller than 5 Angstroms.

Precomputing the group contributions Using the contributions function

using ComplexMixtures, PDBTools
+pdb = readPDB("test/data/NAMD/structure.pdb");
+R = load("test/data/NAMD/protein_tmao.json");
+solute = AtomSelection(PDBTools.select(pdb, "protein"), nmols=1);
+residue50 = PDBTools.select(pdb, "residue 50");
+# Compute the group contributions to the MDDF
+residue50_contribution = contributions(solute, R.solute_atom, residue50);
+# Now compute the coordination number
+residue50_coordination = coordination_number(R, residue50_contribution)
+# Output the average number of TMAO molecules within 5 Angstroms of residue 50
+residue50_coordination[findlast(<(5), R.d)]

Without precomputing the group_contribution

using ComplexMixtures, PDBTools
+pdb = readPDB("test/data/NAMD/structure.pdb");
+R = load("test/data/NAMD/protein_tmao.json");
+solute = AtomSelection(PDBTools.select(pdb, "protein"), nmols=1);
+residue50 = PDBTools.select(pdb, "residue 50");
+# Compute the coordination number
+residue50_coordination = coordination_number(solute, R.solute_atom, R, group)
+# Output the average number of TMAO molecules within 5 Angstroms of residue 50
+residue50_coordination[findlast(<(5), R.d)]
source

Computing a 2D density map around a macromolecule

One nice way to visualize the accumulation or depletion of a solvent around a macromolecule (a protein, for example), is to obtain a 2D map of the density as a function of the distance from its surface. For example, in the figure below the density of a solute (here, Glycerol), in the neighborhood of a protein is shown:

Here, one can see that Glycerol accumulates on Asp76 and on the proximity of hydrogen-bonding residues (Serine residues mostly). This figure was obtained by extracting from atomic contributions of the protein the contribution of each residue to the MDDF. Using PDBTools, this can be done with, for example:

residues = collect(eachresidue(protein))
 residue_contributions = zeros(length(R.d),length(residues))
 for (i,residue) in pairs(residues)
-  c = contributions(solute,R.solute_atom,residue) 
+  c = contributions(results, SoluteGroup(residue)) 
   residue_contributions[:,i] .= c
-end

The above produces a matrix with a number of columns equal to the number of residues and a number of rows equal to the number of MDDF points. That matrix can be plotted as a contour map with adequate plotting software. A complete running example is provided here, producing the figure above.

Computing a 3D density map around a macromolecule

Three-dimensional representations of the distribution functions can also be obtained from the MDDF results. These 3D representations are obtained from the fact that the MDDFs can be decomposed into the contributions of each solute atom, and that each point in space is closest to a single solute atom as well. Thus, each point in space can be associated to one solute atom, and the contribution of that atom to the MDDF at the corresponding distance can be obtained.

For example, the distribution function of a hydrogen-bonding liquid solvating a protein will display a characteristic peak at about 1.8Å. The MDDF at that distance can be decomposed into the contributions of all atoms of the protein which were found to form hydrogen bonds to the solvent. A 3D representation of these contributions can be obtained by computing, around a static protein (solute) structure, which are the regions in space which are closer to each atom of the protein. The position in space is then marked with the atom of the protein to which that region "belongs" and with the contribution of that atom to the MDDF at each distance within that region. A special function to compute this 3D distribution is provided here: grid3D.

This is better illustrated by a graphical representation. In the figure below we see a 3D representation of the MDDF of Glycerol around a protein, computed from a simulation of this protein in a mixture of water and Glycerol. A complete set of files and a script to reproduce this example is available here.

+end

The above produces a matrix with a number of columns equal to the number of residues and a number of rows equal to the number of MDDF points. That matrix can be plotted as a contour map with adequate plotting software. A complete running example is provided here, producing the figure above.

Computing a 3D density map around a macromolecule

Three-dimensional representations of the distribution functions can also be obtained from the MDDF results. These 3D representations are obtained from the fact that the MDDFs can be decomposed into the contributions of each solute atom, and that each point in space is closest to a single solute atom as well. Thus, each point in space can be associated to one solute atom, and the contribution of that atom to the MDDF at the corresponding distance can be obtained.

For example, the distribution function of a hydrogen-bonding liquid solvating a protein will display a characteristic peak at about 1.8Å. The MDDF at that distance can be decomposed into the contributions of all atoms of the protein which were found to form hydrogen bonds to the solvent. A 3D representation of these contributions can be obtained by computing, around a static protein (solute) structure, which are the regions in space which are closer to each atom of the protein. The position in space is then marked with the atom of the protein to which that region "belongs" and with the contribution of that atom to the MDDF at each distance within that region. A special function to compute this 3D distribution is provided here: grid3D.

This is better illustrated by a graphical representation. In the figure below we see a 3D representation of the MDDF of Glycerol around a protein, computed from a simulation of this protein in a mixture of water and Glycerol. A complete set of files and a script to reproduce this example is available here.

-

In the figure on the left, the points in space around the protein are selected with the following properties: distance from the protein smaller than 2.0Å and relative contribution to the MDDF at the corresponding distance of at least 10% of the maximum contribution. Thus, we are selecting the regions of the protein corresponding to the most stable hydrogen-bonding interactions. The color of the points is the contribution to the MDDF, from blue to red. Thus, the most reddish-points corresponds to the regions where the most stable hydrogen bonds were formed. We have marked two regions here, on opposite sides of the protein, with arrows.

Clicking on those points we obtain which are the atoms of the protein contributing to the MDDF at that region. In particular, the arrow on the right points to the strongest red region, which corresponds to an Aspartic acid. These residues are shown explicitly under the density (represented as a transparent surface) on the figure in the center.

The figure on the right displays, overlapped with the hydrogen-bonding residues, the most important contributions to the second peak of the distribution, corresponding to distances from the protein between 2.0 and 3.5Å. Notably, the regions involved are different from the ones forming hydrogen bonds, indicating that non-specific interactions with the protein (and not a second solvation shell) are responsible for the second peak.

An example input file which produces the files required for producing these images is:

using ComplexMixtures, PDBTools
-
-# PDB file of the system simulated
-pdb = readPDB("../Data/system.pdb")
+

In the figure on the left, the points in space around the protein are selected with the following properties: distance from the protein smaller than 2.0Å and relative contribution to the MDDF at the corresponding distance of at least 10% of the maximum contribution. Thus, we are selecting the regions of the protein corresponding to the most stable hydrogen-bonding interactions. The color of the points is the contribution to the MDDF, from blue to red. Thus, the most reddish-points corresponds to the regions where the most stable hydrogen bonds were formed. We have marked two regions here, on opposite sides of the protein, with arrows.

Clicking on those points we obtain which are the atoms of the protein contributing to the MDDF at that region. In particular, the arrow on the right points to the strongest red region, which corresponds to an Aspartic acid. These residues are shown explicitly under the density (represented as a transparent surface) on the figure in the center.

The figure on the right displays, overlapped with the hydrogen-bonding residues, the most important contributions to the second peak of the distribution, corresponding to distances from the protein between 2.0 and 3.5Å. Notably, the regions involved are different from the ones forming hydrogen bonds, indicating that non-specific interactions with the protein (and not a second solvation shell) are responsible for the second peak.

The call to grid3D in the last command will write an output a PDB file with the grid points, which loaded in a visualization software side-by-side with the protein structure, allows the production of the images shown. The grid.pdb file contains a regular PDB format, but the atoms are grid points. The identity of the atoms correspond to the identity of the protein atom contributing to the MDDF at that point (the closest protein atom). The temperature-factor column (beta) contains the relative contribution of that atom to the MDDF at the corresponding distance, and the occupancy field contains the distance itself.

The output grid variable contains the same information of the PDB file, which can be analyzed with the tools of PDBTools if the user wants to.

ComplexMixtures.grid3DFunction
grid3D(
+    result::Result, atoms, output_file::Union{Nothing,String} = nothing; 
+    dmin=1.5, ddax=5.0, step=0.5
+)

This function builds the grid of the 3D density function and fills an array of mutable structures of type Atom, containing the position of the atoms of grid, the closest atom to that position, and distance.

result is a ComplexMixtures.Result object atoms is a vector of PDBTools.Atoms with all the atoms of the system. output_file is the name of the file where the grid will be written. If nothing, the grid is only returned as a matrix.

dmin and dmax define the range of distance where the density grid will be built, and step defines how fine the grid must be. Be aware that fine grids involve usually a very large (hundreds of thousands points).

Example

julia> using ComplexMixtures, PDBTools
 
-# Load results of a ComplexMixtures run
-R = load("../Data/results_glyc50.json")  
+julia> atoms = readPDB("./system.pdb");
 
-# Inform which is the solute
-protein = select(pdb,"protein")
-solute = Selection(protein,nmols=1)
+julia> R = ComplexMixtures.load("./results.json");
 
-# Compute the 3D density grid and output it to the PDB file
-grid = grid3D(
-    solute=solute,
-    solute_atoms=protein,
-    mddf_result=R,
-    output_file="grid.pdb"
-)

The call to grid3D in the last command will write an output a PDB file with the grid points, which loaded in a visualization software side-by-side with the protein structure, allows the production of the images shown. The grid.pdb file contains a regular PDB format, but the atoms are grid points. The identity of the atoms correspond to the identity of the protein atom contributing to the MDDF at that point (the closest protein atom). The temperature-factor column (beta) contains the relative contribution of that atom to the MDDF at the corresponding distance, and the occupancy field contains the distance itself.

The output grid variable contains the same information of the PDB file, which can be analyzed with the tools of PDBTools if the user wants to.

Computing radial distribution functions

The distributions returned by the mddf function (the mddf and rdf vectors), are normalized by the random reference state or using a site count based on the numerical integration of the volume corresponding to each minimum-distance to the solute.

If, however, the solute is defined by a single atom (as the oxygen atom of water, for example), the numerical integration of the volume can be replaced by a simple analytical spherical shell volume, reducing noise. The ComplexMixtures.gr function returns the radial distribution function and the KB integral computed from the results, using this volume estimate:

g, kb = ComplexMixtures.gr(R)

By default, the single-reference count (rdf_count) of the Result structure will be used to compute the radial distribution function. The function can be called with explicit control of all input parameters:

g, kb = ComplexMixtures.gr(r,count,density,binstep)

where:

ParameterDefinitionResult structure output data to provide
rVector of distancesThe d vector
countNumber of site counts at each rThe rdf or mddf vectors
densityBulk densityThe density.solvent_bulk or density.solvent densities.
binstepThe histogram stepThe options.binstep

Example:

...
+julia> grid = grid3D(R, atoms, "grid.pdb");

grid will contain a vector of Atoms with the information of the MDDF at each grid point, and the same data will be written in the grid.pdb file. This PDB file can be opened in VMD, for example, and contain in the beta field the contribution of each protein residue to the MDDF at each point in space relative to the protein, and in the occupancy field the distance to the protein. Examples of how this information can be visualized are provided in the user guide of ComplexMixtures.

source

Computing radial distribution functions

The distributions returned by the mddf function (the mddf and rdf vectors), are normalized by the random reference state or using a site count based on the numerical integration of the volume corresponding to each minimum-distance to the solute.

If, however, the solute is defined by a single atom (as the oxygen atom of water, for example), the numerical integration of the volume can be replaced by a simple analytical spherical shell volume, reducing noise. The ComplexMixtures.gr function returns the radial distribution function and the KB integral computed from the results, using this volume estimate:

g, kb = ComplexMixtures.gr(R)

By default, the single-reference count (rdf_count) of the Result structure will be used to compute the radial distribution function. The function can be called with explicit control of all input parameters:

g, kb = ComplexMixtures.gr(r,count,density,binstep)

where:

ParameterDefinitionResult structure output data to provide
rVector of distancesThe d vector
countNumber of site counts at each rThe rdf or mddf vectors
densityBulk densityThe density.solvent_bulk or density.solvent densities.
binstepThe histogram stepThe options.binstep

Example:

...
 R = mddf(trajectory,options)
-g, kb = ComplexMixtures.gr(R.d,R.rdf_count,R.density.solvent_bulk,R.options.binstep)

Overview of the solvent and solute properties

The output to the REPL of the Result structure provides an overview of the properties of the solution. The data can be retrieved into a data structure using the overview function. Examples:

...
+g, kb = ComplexMixtures.gr(R.d,R.rdf_count,R.density.solvent_bulk,R.options.binstep)
ComplexMixtures.grMethod
gr(R::Result) = gr(R.d,R.rdf_count,R.density.solvent_bulk,R.files[1].options.binstep)

If a Result structure is provided without further details, use the rdf count and the bulk solvent density.

source
ComplexMixtures.grMethod
gr(r::Vector{Float64}, count::Vector{Float64}, density::Float64, binstep::Float64)

Computes the radial distribution function from the count data and the density.

This is exactly a conventional g(r) if a single atom was chosen as the solute and solvent selections.

Returns both the g(r) and the kb(r)

source

Overview of the solvent and solute properties

The output to the REPL of the Result structure provides an overview of the properties of the solution. The data can be retrieved into a data structure using the overview function. Examples:

...
 julia> results = mddf(trajectory)
 
 julia> results
@@ -88,4 +98,4 @@
 -------------------------------------------------------------------------------

In this case, since solute and solvent are equivalent and the system is homogeneous, the molar volumes and concentrations are similar. This is not the case if the molecules are different or if the solute is at infinite dilution (in which case the bulk solvent density might be different from the solvent density in the simulation).

To retrieve the data of the overview structure use, for example:

julia> overview = overview(results);
 
 julia> overview.solute_molar_volume
-657.5051512801567
+657.5051512801567 diff --git a/dev/trajectory/index.html b/dev/trajectory/index.html index 060a0fdf..a760cc4c 100644 --- a/dev/trajectory/index.html +++ b/dev/trajectory/index.html @@ -1,2 +1,2 @@ -Loading the trajectory · ComplexMixtures.jl

Loading trajectories

To initialize a trajectory file for computation, use the command

trajectory = Trajectory("trajectory.xtc",solute,solvent)

where solute and solvent are defined with the Selection function described before. This function opens the stream for reading frames, which are read once a time when the coordinates are required for computing the MDDF.

The Trajectory function uses Chemfiles in background, and thus the most common trajectory formats are supported, as the ones produced with NAMD, Gromacs, LAMMPS, Amber, etc.

Tip

The format of the trajectory file is automatically determined by Chemfiles from the extension of the file. However, it can be provided by the user with the format keyword, for example:

trajectory = Trajectory("trajectory.xtc",solute,solvent,format="xtc")
+Loading the trajectory · ComplexMixtures.jl

Loading trajectories

To initialize a trajectory file for computation, use the command

trajectory = Trajectory("trajectory.xtc",solute,solvent)

where solute and solvent are defined with the AtomSelection function described before. This function opens the stream for reading frames, which are read once a time when the coordinates are required for computing the MDDF.

The Trajectory function uses Chemfiles in background, and thus the most common trajectory formats are supported, as the ones produced with NAMD, Gromacs, LAMMPS, Amber, etc.

Tip

The format of the trajectory file is automatically determined by Chemfiles from the extension of the file. However, it can be provided by the user with the format keyword, for example:

trajectory = Trajectory("trajectory.xtc",solute,solvent,format="xtc")

Reference functions

ComplexMixtures.TrajectoryType
Trajectory(filename::String, solute::AtomSelection, solvent::AtomSelection; format::String = "", chemfiles = false)

Trajectory constructor data type.

Defaults to reading with the Chemfiles infrastructure, except for DCD and PDB trajectory files, if the "PDBTraj" option is provided.

See memory issue (https://github.com/chemfiles/Chemfiles.jl/issues/44)

source
diff --git a/dev/updating_scripts/index.html b/dev/updating_scripts/index.html new file mode 100644 index 00000000..1e6539fa --- /dev/null +++ b/dev/updating_scripts/index.html @@ -0,0 +1,3 @@ + +Updating scripts · ComplexMixtures.jl

Updating scripts from v1 to v2

The syntax chances necessary to update script from version 1.X to 2.X of the package are:

Atom selections

The previous Selection structure was renamed to AtomSelection for clarity.

  • Before:
water = Selection(water; natomspermol=3)
  • Now:
water = AtomSelection(water; natomspermol=3)

Group contributions syntax

The syntax to computing group contributions is improved. Previously, the contrib or contributions functions required three somewhat redundant parameters.

  • Before:

The call to contributions required 3 parameters: the Selection structure, the matrix of contributions, and the indexes of the atoms for which the contributions were desired:

h_contributions = contributions(solvent, R.solvent_atom, h_indexes)
  • Now:

The contributions are extracted from the Result data structure, by providing either a SoluteGroup or SolventGroup object, which are setup with the group names, group indexes, atom names, or atom indexes:

h_contributions = contributions(R, SolventGroup(h_indexes))

Frame weights

frame_weights is now an option of the mddf execution. That is previously, they were defined in the Options data structure, and now they are passed to the mddf function.

  • Before:
options = Options(frame_weights=[1.0, 2.0])
+results = mddf(trajectory, options)
  • Now:
results = mddf(trajectory, options; frame_weights=[1.0, 2.0])