diff --git a/LICENSE b/LICENSE index b6b2162c89..59b7714698 100644 --- a/LICENSE +++ b/LICENSE @@ -1765,6 +1765,33 @@ PyQCPROT OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +========================================================================== + +DSSP module code for protein secondary structure assignment + - analysis/dssp/pydssp_numpy.py + +MIT License + +Copyright (c) 2022 Shintaro Minami + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. + ========================================================================== MDAnalysis logo (see doc/sphinx/source/logos) diff --git a/package/CHANGELOG b/package/CHANGELOG index e59f1b068d..0214c89ad3 100644 --- a/package/CHANGELOG +++ b/package/CHANGELOG @@ -39,6 +39,7 @@ Fixes * Fix groups.py doctests using sphinx directives (Issue #3925, PR #4374) Enhancements + * Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP) * Added a tqdm progress bar for `MDAnalysis.analysis.pca.PCA.transform()` (PR #4531) * Improved performance of PDBWriter (Issue #2785, PR #4472) diff --git a/package/MDAnalysis/analysis/dssp/__init__.py b/package/MDAnalysis/analysis/dssp/__init__.py new file mode 100644 index 0000000000..870cb65d63 --- /dev/null +++ b/package/MDAnalysis/analysis/dssp/__init__.py @@ -0,0 +1,7 @@ +__all__ = [ + "DSSP", + "assign", + "translate", +] + +from .dssp import DSSP, assign, translate diff --git a/package/MDAnalysis/analysis/dssp/dssp.py b/package/MDAnalysis/analysis/dssp/dssp.py new file mode 100644 index 0000000000..7d515926f2 --- /dev/null +++ b/package/MDAnalysis/analysis/dssp/dssp.py @@ -0,0 +1,427 @@ +""" +Secondary structure assignment (helix, sheet and loop) --- :mod:`MDAnalysis.analysis.dssp` +========================================================================================== + +:Author: Egor Marin +:Year: 2024 +:Copyright: LGPL v2.1+ + +.. versionadded:: 2.8.0 + +The module contains code to build hydrogend bond contact map, +and use it to assign protein secondary structure (:class:`DSSP`). + +This module uses the python version of the original algorithm :footcite:p:`Kabsch1983`, +re-implemented by @ShintaroMinami and available under MIT license from +`ShintaroMinami/PyDSSP `_. + + +.. Note:: + This implementation does not discriminate different types of + beta-sheets, as well as different types of helices, meaning you will get + :math:`3_{10}` helices and π-helices labelled as "helix" too. + + +.. rubric:: Using original `pydssp` +The default implementation uses the original *pydssp* (v.0.9.0) code, rewritten +without usage of the *einops* library and hence having no dependencies. If you want +to explicitly use *pydssp* (or its particular version), install it to your +current environment with ``python -m pip install pydssp``. Please note that the +way MDAnalysis uses *pydssp* does not support *pydssp* 's capability for batch +processing or its use of the *pytorch* library. + +When using this module in published work please cite :footcite:p:`Kabsch1983`. + +.. rubric:: References + +.. footbibliography:: + + +Example applications +-------------------- + +Assigning secondary structure of a PDB file +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +In this example we will simply print a string representing a protein's secondary +structure. + +.. code-block:: python + + from MDAnalysis.tests.datafiles import PDB + from MDAnalysis.analysis.dssp import DSSP + u = mda.Universe(PDB) + s = ''.join(DSSP(u).run().results.dssp[0]) + print(s) + + +Calculating average secondary structure of a trajectory +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +Here we take a trajectory and calculate its average secondary structure, i.e. +assign a secondary structure label 'X' to a residue if most of the frames in the +trajectory got assigned the 'X' label. + +.. code-block:: python + + from MDAnalysis.analysis.dssp import DSSP, translate + from MDAnalysisTests.datafiles import TPR, XTC + u = mda.Universe(TPR, XTC) + long_run = DSSP(u).run() + mean_secondary_structure = translate(long_run.results.dssp_ndarray.mean(axis=0)) + print(''.join(mean_secondary_structure)) + +Running this code produces :: + + '--EEEE----------HHHHHHH----EE----HHHHH------HHHHHHHHHH------HHHHHHHHHHH---------EEEE-----HHHHHHHHH------EEEEEE--HHHHHH----EE--------EE---E----------------------HHHHHHHHHHHHHHHHHHHHHHHHHHHH----EEEEE------HHHHHHHHH--' + +Find parts of the protein that maintain their secondary structure during simulation +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +In this example, we will find residue groups that maintain their secondary structure +along the simulation, and have some meaningful ('E' or 'H') secondary structure +during more than set `threshold` fraction of frames. We will call these residues +"persistent", for clarity, and label them according to the structure +that they maintain during the run: + +.. code-block:: python + + from MDAnalysis.analysis.dssp import DSSP, translate + from MDAnalysisTests.datafiles import TPR, XTC + u = mda.Universe(TPR, XTC) + threshold = 0.8 + + long_run = DSSP(u).run() + persistent_residues = translate( + long_run + .results + .dssp_ndarray + .mean(axis=0) > threshold + ) + print(''.join(persistent_residues)[:20]) + +Running this code produces :: + + '--EEEE----------HHHH' + + +Analysis classes +---------------- + +.. autoclass:: DSSP + :members: + :inherited-members: + + .. attribute:: results.dssp + + Contains the time series of the DSSP assignment as a + :class:`numpy.ndarray` array of shape ``(n_frames, n_residues)`` where each row + contains the assigned secondary structure character for each residue (whose + corresponding resid is stored in :attr:`results.resids`). The three characters + are ['H', 'E', '-'] and representi alpha-helix, sheet and loop, respectively. + + .. attribute:: results.dssp_ndarray + + Contains the one-hot encoding of the time series of the DSSP assignment + as a :class:`numpy.ndarray` Boolean array of shape ``(n_frames, n_residues, 3)`` + where for each residue the encoding is stored as ``(3,)`` shape + :class:`numpy.ndarray` of Booleans so that ``True`` at index 0 represents loop + ('-'), ``True`` at index 1 represents helix ('H'), and ``True`` at index 2 + represents sheet 'E'. + + .. SeeAlso:: :func:`translate` + + + .. attribute:: results.resids + + A :class:`numpy.ndarray` of length ``n_residues`` that contains the residue IDs + (resids) for the protein residues that were assigned a secondary structure. + + +Functions +--------- + +.. autofunction:: assign +.. autofunction:: translate +""" + +from typing import Union +import numpy as np +from MDAnalysis import Universe, AtomGroup + +from ..base import AnalysisBase +from ...due import due, Doi + +due.cite( + Doi("10.1002/bip.360221211"), + description="DSSP algorithm description", + path="MDAnalysis.analysis.dssp", + cite_module=True, +) + +del Doi + + +try: # pragma: no cover + from pydssp.pydssp_numpy import ( + assign, + _get_hydrogen_atom_position, + ) + + HAS_PYDSSP = True + +except ModuleNotFoundError: + HAS_PYDSSP = False + from .pydssp_numpy import ( + assign, + _get_hydrogen_atom_position, + ) + + +class DSSP(AnalysisBase): + """Assign secondary structure using the DSSP algorithm. + + Analyze a selection containing a protein and assign secondary structure + using the Kabsch-Sander algorithm :footcite:p:`Kabsch1983`. Only a subset + of secondary structure categories are implemented: + + - 'H' represents a generic helix (α-helix, π-helix or :math:`3_{10}` helix) + - 'E' represents 'extended strand', participating in beta-ladder (parallel + or antiparallel) + - '-' represents unordered part ("loop") + + The implementation was taken from the pydssp package (v. 0.9.0) + https://github.com/ShintaroMinami/PyDSSP by Shintaro Minami under the + MIT license. + + .. Warning:: + For DSSP to work properly, your atoms must represent a protein. The + hydrogen atom bound to the backbone nitrogen atom is matched by name + as given by the keyword argument `hydrogen_atom`. There may only be + a single backbone nitrogen hydrogen atom per residue; the one exception + is proline, for which there should not exist any such hydrogens. + The default value of `hydrogen_atom` should handle the common naming + conventions in the PDB and in force fields but if you encounter an error + or unusual results during your run, try to figure out how to select the + correct hydrogen atoms and report an issue in the MDAnalysis + `issue tracker `_. + + Parameters + ---------- + atoms : Union[Universe, AtomGroup] + input Universe or AtomGroup. In both cases, only protein residues will + be chosen prior to the analysis via `select_atoms('protein')`. + Heavy atoms of the protein are then selected by name + `heavyatom_names`, and hydrogens are selected by name + `hydrogen_name`. + guess_hydrogens : bool, optional + whether you want to guess hydrogens positions, by default ``True``. + Guessing is made assuming perfect 120 degrees for all bonds that N + atom makes, and a N-H bond length of 1.01 A. + If ``guess_hydrogens`` is False, hydrogen atom positions on N atoms + will be parsed from the trajectory, except for the "hydrogen" atom + positions on PRO residues, and an N-terminal residue. + heavyatom_names : tuple[str], default ("N", "CA", "C", "O O1 OT1") + selection names that will be used to select "N", "CA", "C" and "O" + atom coordinates for the secondary structure determination. The last + string contains multiple values for "O" to account for C-term residues. + hydrogen_name : str, default "H HN HT1 HT2 HT3" + This selection should only select a single hydrogen atom in each residue + (except proline), namely the one bound to the backbone nitrogen. + + .. Note:: + To work with different hydrogen-naming conventions by default, the + default selection is broad but if hydrogens are incorrectly selected + (e.g., a :exc:`ValueError` is raised) you must customize `hydrogen_name` + for your specific case. + + + Raises + ------ + ValueError + if ``guess_hydrogens`` is True but some non-PRO hydrogens are missing. + + Examples + -------- + + For example, you can assign secondary structure for a single PDB file: + + >>> from MDAnalysis.analysis.dssp import DSSP + >>> from MDAnalysisTests.datafiles import PDB + >>> import MDAnalysis as mda + >>> u = mda.Universe(PDB) + >>> run = DSSP(u).run() + >>> print("".join(run.results.dssp[0, :20])) + --EEEEE-----HHHHHHHH + + The :attr:`results.dssp` holds the time series of assigned secondary + structure, with one character for each residue. + + (Note that for displaying purposes we only print the first 20 residues + of frame 0 with ``run.results.dssp[0, :20]`` but one would typically look + at all residues ``run.results.dssp[0]``.) + + The :attr:`results.dssp_ndarray` attribute holds a + ``(n_frames, n_residues, 3)`` shape ndarray with a *one-hot encoding* + of *loop* '-' (index 0), *helix* 'H' (index 1), and *sheet* 'E' + (index 2), respectively for each frame of the trajectory. It can be + used to compute, for instance, the **average secondary structure**: + + >>> from MDAnalysis.analysis.dssp import translate, DSSP + >>> from MDAnalysisTests.datafiles import TPR, XTC + >>> u = mda.Universe(TPR, XTC) + >>> run = DSSP(u).run() + >>> mean_secondary_structure = translate(run.results.dssp_ndarray.mean(axis=0)) + >>> print(''.join(mean_secondary_structure)[:20]) + -EEEEEE------HHHHHHH + + + .. versionadded:: 2.8.0 + """ + + def __init__( + self, + atoms: Union[Universe, AtomGroup], + guess_hydrogens: bool = True, + *, + heavyatom_names: tuple[str] = ("N", "CA", "C", "O O1 OT1"), + hydrogen_name: str = "H HN HT1 HT2 HT3", + ): + self._guess_hydrogens = guess_hydrogens + + ag: AtomGroup = atoms.select_atoms("protein") + super().__init__(ag.universe.trajectory) + + # define necessary selections + self._heavy_atoms: dict[str, "AtomGroup"] = { + t: ag.atoms[ + np.isin( + ag.names, t.split() + ) # need split() since `np.isin` takes an iterable as second argument + # and "N".split() -> ["N"] + ] + for t in heavyatom_names + } + self._hydrogens: list["AtomGroup"] = [ + res.atoms.select_atoms(f"name {hydrogen_name}") for res in ag.residues + ] + # can't do it the other way because I need missing values to exist + # so that I could fill them in later + if not self._guess_hydrogens: + # zip() assumes that _heavy_atoms and _hydrogens is ordered in the + # same way. This is true as long as the original AtomGroup ag is + # sorted. With the hard-coded protein selection for ag this is always + # true but if the code on L277 ever changes, make sure to sort first! + for calpha, hydrogen in zip( + self._heavy_atoms["CA"][1:], self._hydrogens[1:] + ): + if (calpha.resname != "PRO" and len(hydrogen) != 1) or ( + calpha.resname == "PRO" and hydrogen + ): + raise ValueError( + ( + "Residue {calpha.residue} contains wrong number of hydrogens: " + "exactly 1 hydrogen is expected for non-PRO residues, and " + "zero hydrogens for PRO residues." + ) + ) + + positions = [group.positions for group in self._heavy_atoms.values()] + if len(set(map(lambda arr: arr.shape[0], positions))) != 1: + raise ValueError( + ( + "Universe contains unequal numbers of (N,CA,C,O) atoms ('name' field)." + " Please select appropriate AtomGroup manually." + ) + ) + + def _prepare(self): + self.results.dssp_ndarray = [] + + def _get_coords(self) -> np.ndarray: + """Returns coordinates of (N,CA,C,O,H) atoms, as required by + :func:`get_hbond_map` and :func:`assign` functions. + + Returns + ------- + np.ndarray + coordinates of (N,CA,C,O,H) atoms + + Raises + ------ + ValueError + if input Universe contains different number of (N,CA,C,O) atoms + + """ + # NOTE: here we explicitly rely on the fact that `self._heavy_atoms` + # dictionary maintains order of the keys since python 3.7 + positions = [group.positions for group in self._heavy_atoms.values()] + coords = np.array(positions) + + if not self._guess_hydrogens: + guessed_h_coords = _get_hydrogen_atom_position(coords.swapaxes(0, 1)) + + h_coords = np.array( + [ + group.positions[0] if group else guessed_h_coords[idx] + for idx, group in enumerate(self._hydrogens) + ] + ) + h_coords = np.expand_dims(h_coords, axis=0) + coords = np.vstack([coords, h_coords]) + + coords = coords.swapaxes(0, 1) + return coords + + def _single_frame(self): + coords = self._get_coords() + dssp = assign(coords) + self.results.dssp_ndarray.append(dssp) + + def _conclude(self): + self.results.dssp = translate(np.array(self.results.dssp_ndarray)) + self.results.dssp_ndarray = np.array(self.results.dssp_ndarray) + self.results.resids = self._heavy_atoms["CA"].resids + + +def translate(onehot: np.ndarray) -> np.ndarray: + """Translate a one-hot encoding summary into char-based secondary structure + assignment. + + One-hot encoding corresponds to C3 notation: + '-', 'H', 'E' are loop, helix and sheet, respectively. Input array must + have its last axis of shape 3: ``(n_residues, 3)`` or ``(n_frames, n_residues, 3)`` + + Examples + -------- + + .. code-block:: python + + from MDAnalysis.analysis.dssp import translate + import numpy as np + # encoding 'HE-' + onehot = np.array([[False, True, False], # 'H' + [False, False, True], # 'E' + [True, False, False]]) # '-' + ''.join(translate(onehot)) + print(''.join(translate(onehot))) + + Running this code produces :: + + HE- + + Parameters + ---------- + onehot : np.ndarray + input array of one-hot encoding in ('-', 'H', 'E') order + + Returns + ------- + np.ndarray + array of '-', 'H' and 'E' symbols with secondary structure + + + .. versionadded:: 2.8.0 + """ + C3_ALPHABET = np.array(["-", "H", "E"]) + index = np.argmax(onehot, axis=-1) + return C3_ALPHABET[index] diff --git a/package/MDAnalysis/analysis/dssp/pydssp_numpy.py b/package/MDAnalysis/analysis/dssp/pydssp_numpy.py new file mode 100644 index 0000000000..1ae8ac369e --- /dev/null +++ b/package/MDAnalysis/analysis/dssp/pydssp_numpy.py @@ -0,0 +1,269 @@ +""" +A re-implementation of DSSP algorithm :footcite:p:`Kabsch1983`, taken from +*pydssp* v.0.9.0 (https://github.com/ShintaroMinami/PyDSSP) by Shintaro Minami, +distributed under MIT license. + +Current implementation doesn't use `einops` as a dependency, instead directly +using `numpy` operations for axis rearrangement. However, this implementation +does not allow for batch computation, in contrast with `pydssp`, since it's +designed to be used in per-frame manner in protein trajectories. +""" + +import numpy as np + +CONST_Q1Q2 = 0.084 +CONST_F = 332 +DEFAULT_CUTOFF = -0.5 +DEFAULT_MARGIN = 1.0 + + +def _upsample(a: np.ndarray, window: int) -> np.ndarray: + """Performs array upsampling with given window along given axis. + + Example + ------- + .. code-block:: python + hbmap = np.arange(4*4).reshape(4,4) + print(hbmap) + # [[ 0 1 2 3] + # [ 4 5 6 7] + # [ 8 9 10 11] + # [12 13 14 15]] + + print(_upsample(hbmap)) + # [[[[ 0 1 2] + # [ 4 5 6] + # [ 8 9 10]] + + # [[ 1 2 3] + # [ 5 6 7] + # [ 9 10 11]]] + + + # [[[ 4 5 6] + # [ 8 9 10] + # [12 13 14]] + + # [[ 5 6 7] + # [ 9 10 11] + # [13 14 15]]]] + + Parameters + ---------- + a : np.ndarray + input array + window : int + upsample window + + Returns + ------- + np.ndarray + unfolded array + """ + return _unfold(_unfold(a, window, -2), window, -2) + + +def _unfold(a: np.ndarray, window: int, axis: int): + "Helper function for 2D array upsampling" + idx = np.arange(window)[:, None] + np.arange(a.shape[axis] - window + 1)[None, :] + unfolded = np.take(a, idx, axis=axis) + return np.moveaxis(unfolded, axis - 1, -1) + + +def _get_hydrogen_atom_position(coord: np.ndarray) -> np.ndarray: + """Fills in hydrogen atoms positions if they are abscent, under the + assumption that C-N-H and H-N-CA angles are perfect 120 degrees, + and N-H bond length is 1.01 A. + + Parameters + ---------- + coord : np.ndarray + input coordinates in Angstrom, shape (n_atoms, 4, 3), + where second axes corresponds to (N, CA, C, O) atom coordinates + + Returns + ------- + np.ndarray + coordinates of additional hydrogens, shape (n_atoms-1, 3) + + .. versionadded:: 2.8.0 + """ + # C_i, N_i, H_i and CA_{i+1} are all in the peptide bond plane + # we wanna get C_{i+1} - N_{i} vectors and normalize them + # --------- + # v1 = vec(C_i, N_i) + # v2 = vec(CA_{i+1}, N_i) + # v3 = vec(N_i, H_i) = ? + # we use the assumption that all the angles are 120 degrees, + # and |v3| = 1.01, hence + # we can derive v3 = (v1/|v1| + v2/|v2|)*|v3| + + # get v1 = vec(C_i, N_i) + vec_cn = coord[1:, 0] - coord[:-1, 2] + vec_cn = vec_cn / np.linalg.norm(vec_cn, axis=-1, keepdims=True) + + # get v2 = vec(CA_{i+1}, N_{i}) + vec_can = coord[1:, 0] - coord[1:, 1] + vec_can = vec_can / np.linalg.norm(vec_can, axis=-1, keepdims=True) + + vec_nh = vec_cn + vec_can + vec_nh = vec_nh / np.linalg.norm(vec_nh, axis=-1, keepdims=True) + + # vec_(0, H) = vec(0, N) + vec_nh + return coord[1:, 0] + 1.01 * vec_nh + + +def get_hbond_map( + coord: np.ndarray, + cutoff: float = DEFAULT_CUTOFF, + margin: float = DEFAULT_MARGIN, + return_e: bool = False, +) -> np.ndarray: + """Returns hydrogen bond map + + Parameters + ---------- + coord : np.ndarray + input coordinates in either (n, 4, 3) or (n, 5, 3) shape + (without or with hydrogens). If hydrogens are not present, then + ideal positions (see :func:_get_hydrogen_atom_positions) are used. + cutoff : float, optional + cutoff, by default DEFAULT_CUTOFF + margin : float, optional + margin, by default DEFAULT_MARGIN + return_e : bool, optional + if to return energy instead of hbond map, by default False + + Returns + ------- + np.ndarray + output hbond map or energy depending on return_e param + + + .. versionadded:: 2.8.0 + """ + n_atoms, n_atom_types, _ = coord.shape + assert n_atom_types in ( + 4, + 5, + ), "Number of atoms should be 4 (N,CA,C,O) or 5 (N,CA,C,O,H)" + + if n_atom_types == 4: + h_1 = _get_hydrogen_atom_position(coord) + elif n_atom_types == 5: + h_1 = coord[1:, 4] + coord = coord[:, :4] + else: # pragma: no cover + raise ValueError("Number of atoms should be 4 (N,CA,C,O) or 5 (N,CA,C,O,H)") + # after this: + # h.shape == (n_atoms, 3) + # coord.shape == (n_atoms, 4, 3) + + # distance matrix + n_1, c_0, o_0 = coord[1:, 0], coord[0:-1, 2], coord[0:-1, 3] + + n = n_atoms - 1 + cmap = np.tile(c_0, (n, 1, 1)) + omap = np.tile(o_0, (n, 1, 1)) + nmap = np.tile(n_1, (1, 1, n)).reshape(n, n, 3) + hmap = np.tile(h_1, (1, 1, n)).reshape(n, n, 3) + + d_on = np.linalg.norm(omap - nmap, axis=-1) + d_ch = np.linalg.norm(cmap - hmap, axis=-1) + d_oh = np.linalg.norm(omap - hmap, axis=-1) + d_cn = np.linalg.norm(cmap - nmap, axis=-1) + + # electrostatic interaction energy + # e[i, j] = e(CO_i) - e(NH_j) + e = np.pad( + CONST_Q1Q2 * (1.0 / d_on + 1.0 / d_ch - 1.0 / d_oh - 1.0 / d_cn) * CONST_F, + [[1, 0], [0, 1]], + ) + + if return_e: # pragma: no cover + return e + + # mask for local pairs (i,i), (i,i+1), (i,i+2) + local_mask = ~np.eye(n_atoms, dtype=bool) + local_mask *= ~np.diag(np.ones(n_atoms - 1, dtype=bool), k=-1) + local_mask *= ~np.diag(np.ones(n_atoms - 2, dtype=bool), k=-2) + # hydrogen bond map (continuous value extension of original definition) + hbond_map = np.clip(cutoff - margin - e, a_min=-margin, a_max=margin) + hbond_map = (np.sin(hbond_map / margin * np.pi / 2) + 1.0) / 2 + hbond_map = hbond_map * local_mask + + return hbond_map + + +def assign(coord: np.ndarray) -> np.ndarray: + """Assigns secondary structure for a given coordinate array, + either with or without assigned hydrogens + + Parameters + ---------- + coord : np.ndarray + input coordinates in either (n, 4, 3) or (n, 5, 3) shape, + without or with hydrogens, respectively. Second dimension `k` represents + (N, CA, C, O) atoms coordinates (if k=4), or (N, CA, C, O, H) coordinates + (when k=5). + + Returns + ------- + np.ndarray + output (n,) array with one-hot labels in C3 notation ('-', 'H', 'E'), + representing loop, helix and sheet, respectively. + + + .. versionadded:: 2.8.0 + """ + # get hydrogen bond map + hbmap = get_hbond_map(coord) + hbmap = np.swapaxes(hbmap, -1, -2) # convert into "i:C=O, j:N-H" form + + # identify turn 3, 4, 5 + turn3 = np.diagonal(hbmap, offset=3) > 0.0 + turn4 = np.diagonal(hbmap, offset=4) > 0.0 + turn5 = np.diagonal(hbmap, offset=5) > 0.0 + + # assignment of helical secondary structures + h3 = np.pad(turn3[:-1] * turn3[1:], [[1, 3]]) + h4 = np.pad(turn4[:-1] * turn4[1:], [[1, 4]]) + h5 = np.pad(turn5[:-1] * turn5[1:], [[1, 5]]) + + # helix4 first, as alpha helix + helix4 = h4 + np.roll(h4, 1, 0) + np.roll(h4, 2, 0) + np.roll(h4, 3, 0) + h3 = h3 * ~np.roll(helix4, -1, 0) * ~helix4 # helix4 is higher prioritized + h5 = h5 * ~np.roll(helix4, -1, 0) * ~helix4 # helix4 is higher prioritized + helix3 = h3 + np.roll(h3, 1, 0) + np.roll(h3, 2, 0) + helix5 = ( + h5 + + np.roll(h5, 1, 0) + + np.roll(h5, 2, 0) + + np.roll(h5, 3, 0) + + np.roll(h5, 4, 0) + ) + + # identify bridge + unfoldmap = _upsample(hbmap, 3) > 0.0 + unfoldmap_rev = np.swapaxes(unfoldmap, 0, 1) + + p_bridge = (unfoldmap[:, :, 0, 1] * unfoldmap_rev[:, :, 1, 2]) + ( + unfoldmap_rev[:, :, 0, 1] * unfoldmap[:, :, 1, 2] + ) + p_bridge = np.pad(p_bridge, [[1, 1], [1, 1]]) + + a_bridge = (unfoldmap[:, :, 1, 1] * unfoldmap_rev[:, :, 1, 1]) + ( + unfoldmap[:, :, 0, 2] * unfoldmap_rev[:, :, 0, 2] + ) + a_bridge = np.pad(a_bridge, [[1, 1], [1, 1]]) + + # ladder + ladder = (p_bridge + a_bridge).sum(-1) > 0.0 + + # H, E, L of C3 + helix = (helix3 + helix4 + helix5) > 0.0 + strand = ladder + loop = ~helix * ~strand + + onehot = np.stack([loop, helix, strand], axis=-1) + return onehot diff --git a/package/doc/sphinx/source/documentation_pages/analysis/dssp.rst b/package/doc/sphinx/source/documentation_pages/analysis/dssp.rst new file mode 100644 index 0000000000..f166fa61e4 --- /dev/null +++ b/package/doc/sphinx/source/documentation_pages/analysis/dssp.rst @@ -0,0 +1 @@ +.. automodule:: MDAnalysis.analysis.dssp.dssp \ No newline at end of file diff --git a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst index 96ca0010dc..ee7f656875 100644 --- a/package/doc/sphinx/source/documentation_pages/analysis_modules.rst +++ b/package/doc/sphinx/source/documentation_pages/analysis_modules.rst @@ -136,6 +136,7 @@ Macromolecules analysis/gnm analysis/helix_analysis analysis/dihedrals + analysis/dssp Liquids ------- diff --git a/package/doc/sphinx/source/references.bib b/package/doc/sphinx/source/references.bib index d423e2486d..5e2167c986 100644 --- a/package/doc/sphinx/source/references.bib +++ b/package/doc/sphinx/source/references.bib @@ -771,6 +771,20 @@ @article{Kulke2022 doi = {10.1021/acs.jctc.2c00327} } +@article{Kabsch1983, +author = {Kabsch, Wolfgang and Sander, Christian}, +title = {Dictionary of protein secondary structure: Pattern recognition of hydrogen-bonded and geometrical features}, +journal = {Biopolymers}, +volume = {22}, +number = {12}, +pages = {2577-2637}, +doi = {https://doi.org/10.1002/bip.360221211}, +url = {https://onlinelibrary.wiley.com/doi/abs/10.1002/bip.360221211}, +eprint = {https://onlinelibrary.wiley.com/doi/pdf/10.1002/bip.360221211}, +abstract = {Abstract For a successful analysis of the relation between amino acid sequence and protein structure, an unambiguous and physically meaningful definition of secondary structure is essential. We have developed a set of simple and physically motivated criteria for secondary structure, programmed as a pattern-recognition process of hydrogen-bonded and geometrical features extracted from x-ray coordinates. Cooperative secondary structure is recognized as repeats of the elementary hydrogen-bonding patterns “turn” and “bridge.” Repeating turns are “helices,” repeating bridges are “ladders,” connected ladders are “sheets.” Geometric structure is defined in terms of the concepts torsion and curvature of differential geometry. Local chain “chirality” is the torsional handedness of four consecutive Cα positions and is positive for right-handed helices and negative for ideal twisted β-sheets. Curved pieces are defined as “bends.” Solvent “exposure” is given as the number of water molecules in possible contact with a residue. The end result is a compilation of the primary structure, including SS bonds, secondary structure, and solvent exposure of 62 different globular proteins. The presentation is in linear form: strip graphs for an overall view and strip tables for the details of each of 10.925 residues. The dictionary is also available in computer-readable form for protein structure prediction work.}, +year = {1983} +} + @article{Linke2018, title = {Fully Anisotropic Rotational Diffusion Tensor from Molecular Dynamics Simulations}, author = {Linke, Max and Köfinger, Jürgen and Hummer, Gerhard}, diff --git a/testsuite/MDAnalysisTests/analysis/test_dssp.py b/testsuite/MDAnalysisTests/analysis/test_dssp.py new file mode 100644 index 0000000000..a28dd458b5 --- /dev/null +++ b/testsuite/MDAnalysisTests/analysis/test_dssp.py @@ -0,0 +1,68 @@ +import pytest +import glob +import MDAnalysis as mda + +from MDAnalysis.analysis.dssp import DSSP, translate +from MDAnalysisTests.datafiles import DSSP as DSSP_FOLDER +from MDAnalysisTests.datafiles import TPR, XTC + + +# Files that match glob pattern '????.pdb.gz' and matching '????.pdb.dssp' files, +# containing the secondary structure assignment string, will be tested automatically. +@pytest.mark.parametrize("pdb_filename", glob.glob(f"{DSSP_FOLDER}/?????.pdb.gz")) +def test_file_guess_hydrogens(pdb_filename): + u = mda.Universe(pdb_filename) + with open(f"{pdb_filename.rstrip('.gz')}.dssp", "r") as fin: + correct_answ = fin.read().strip().split()[0] + + run = DSSP(u, guess_hydrogens=True).run() + answ = "".join(run.results.dssp[0]) + assert answ == correct_answ + + +def test_trajectory(): + u = mda.Universe(TPR, XTC).select_atoms("protein").universe + run = DSSP(u).run(stop=10) + first_frame = "".join(run.results.dssp[0]) + last_frame = "".join(run.results.dssp[-1]) + avg_frame = "".join(translate(run.results.dssp_ndarray.mean(axis=0))) + + assert first_frame[:10] != last_frame[:10] == avg_frame[:10] == "-EEEEEE---" + protein = mda.Universe(TPR, XTC).select_atoms("protein") + run = DSSP(protein).run(stop=10) + + +def test_atomgroup(): + protein = mda.Universe(TPR, XTC).select_atoms("protein") + run = DSSP(protein).run(stop=10) + first_frame = "".join(run.results.dssp[0]) + last_frame = "".join(run.results.dssp[-1]) + avg_frame = "".join(translate(run.results.dssp_ndarray.mean(axis=0))) + + assert first_frame[:10] != last_frame[:10] == avg_frame[:10] == "-EEEEEE---" + + +def test_trajectory_with_hydrogens(): + u = mda.Universe(TPR, XTC).select_atoms("protein").universe + run = DSSP(u, guess_hydrogens=False).run(stop=10) + first_frame = "".join(run.results.dssp[0]) + last_frame = "".join(run.results.dssp[-1]) + avg_frame = "".join(translate(run.results.dssp_ndarray.mean(axis=0))) + + assert first_frame[:10] == last_frame[:10] == avg_frame[:10] == "-EEEEEE---" + + +@pytest.mark.parametrize("pdb_filename", glob.glob(f"{DSSP_FOLDER}/2xdgA.pdb.gz")) +def test_trajectory_without_hydrogen_fails(pdb_filename): + u = mda.Universe(pdb_filename) + with pytest.raises(ValueError): + DSSP(u, guess_hydrogens=False).run() + + +@pytest.mark.parametrize( + "pdb_filename", glob.glob(f"{DSSP_FOLDER}/1mr1D_failing.pdb.gz") +) +def test_trajectory_with_uneven_number_of_atoms_fails(pdb_filename): + u = mda.Universe(pdb_filename) + with pytest.raises(ValueError): + DSSP(u, guess_hydrogens=True).run() diff --git a/testsuite/MDAnalysisTests/data/dssp/1ahsA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/1ahsA.pdb.dssp new file mode 100644 index 0000000000..9c622abc6b --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/1ahsA.pdb.dssp @@ -0,0 +1 @@ +----E-E-------E-------EEEEE-E--EEEEEE---EEEE-HHHH---------EEEEEEE---EE-----EE------EEEE--EEE-----EEE-----EEEEE-----EEEEEEE-EE- 1ahsA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/1ahsA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/1ahsA.pdb.gz new file mode 100644 index 0000000000..0952c81a61 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/1ahsA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/1bvyF.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/1bvyF.pdb.dssp new file mode 100644 index 0000000000..f62196a120 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/1bvyF.pdb.dssp @@ -0,0 +1 @@ +---EEEEEE----HHHHHHHHHHHHHH------EEEEHHH---------EEEEEEE-E--E-----HHHHHHHH-----------EEEEEEE----HHH--HHHHHHHHHHH-------E--EEEE----HHHHHHHHHHHHHHHHHHH--- 1bvyF.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/1bvyF.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/1bvyF.pdb.gz new file mode 100644 index 0000000000..a87c66cc08 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/1bvyF.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/1dx5I.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/1dx5I.pdb.dssp new file mode 100644 index 0000000000..fece7e5053 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/1dx5I.pdb.dssp @@ -0,0 +1 @@ +-----HHHH-----EEEE-----EEEE----EEEE--E---EEE-----EEE-EEE-----EEE----EEEE---EEEE--HHHH------EEEE----EEEEE------EEEE---- 1dx5I.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/1dx5I.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/1dx5I.pdb.gz new file mode 100644 index 0000000000..67848cfb74 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/1dx5I.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/1eteA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/1eteA.pdb.dssp new file mode 100644 index 0000000000..e7a7869083 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/1eteA.pdb.dssp @@ -0,0 +1 @@ +--------------HHHHHHHHH-------EEEEE--E------HHHHHHHHHHHHHHHHHH---HHHHHHHHHHHHHHHHHHH-----------EEEEEHHHHHHHHHHHHHHHH--------HHH----E-- 1eteA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/1eteA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/1eteA.pdb.gz new file mode 100644 index 0000000000..1a2749a1cc Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/1eteA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/1h4aX.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/1h4aX.pdb.dssp new file mode 100644 index 0000000000..4afa6a4fd1 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/1h4aX.pdb.dssp @@ -0,0 +1 @@ +-EEEEE-E---E--EEEE---E-----------EEEE-E-EEEEE-E---E--EEEE--EEE--HHHH--------EEEEE------EEEEE-EHHHE--EEEE---E--HHH-----E--EEEE-E--EEEE-E---E--EEEE--EEE--HHHH-----E--EEEE----- 1h4aX.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/1h4aX.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/1h4aX.pdb.gz new file mode 100644 index 0000000000..f5465f39b4 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/1h4aX.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/1i8nA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/1i8nA.pdb.dssp new file mode 100644 index 0000000000..5326cbf72b --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/1i8nA.pdb.dssp @@ -0,0 +1 @@ +-EEE----EEEEEEE--E--HHHEEE-----HHHHHHHHHH--------EEEEE-----EEEE------HHH-E-----EEEEEEEE-- 1i8nA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/1i8nA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/1i8nA.pdb.gz new file mode 100644 index 0000000000..2af1b729ad Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/1i8nA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/1lpbA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/1lpbA.pdb.dssp new file mode 100644 index 0000000000..313ce7f605 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/1lpbA.pdb.dssp @@ -0,0 +1 @@ +-----E-E--E---HHHE----EE--------EE---E----EEE------EE----E---EEEE----HHHHH----EEEEEE- 1lpbA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/1lpbA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/1lpbA.pdb.gz new file mode 100644 index 0000000000..71ca46a6a9 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/1lpbA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/1mr1D.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/1mr1D.pdb.dssp new file mode 100644 index 0000000000..92145899bb --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/1mr1D.pdb.dssp @@ -0,0 +1 @@ +-EEEEE-----EEEEE-HHH--------EEE-----EE-HHHH---------------------HHH--EE---------HHHHHHHHHHHH---- 1mr1D.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/1mr1D.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/1mr1D.pdb.gz new file mode 100644 index 0000000000..2b547cd4cb Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/1mr1D.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/1mr1D_failing.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/1mr1D_failing.pdb.gz new file mode 100644 index 0000000000..25bd55a047 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/1mr1D_failing.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/1or4A.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/1or4A.pdb.dssp new file mode 100644 index 0000000000..c21242961c --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/1or4A.pdb.dssp @@ -0,0 +1 @@ +--------HHHH---EE--HHHHHHHHHHHH----HHHHHHHHHHHHHHHH-HHHHHHHHHHHH---HHHHHHHHHH--HHHHHHHHHHHHHHH----EEHHHHHHHHHHHHHHHHH---HHHHHHHHHHHHHHHHHHHHHH---HHHHHHHHHHHHHHHHHHHHHH-- 1or4A.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/1or4A.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/1or4A.pdb.gz new file mode 100644 index 0000000000..a3d977ebf9 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/1or4A.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/1pdoA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/1pdoA.pdb.dssp new file mode 100644 index 0000000000..b01ade20e8 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/1pdoA.pdb.dssp @@ -0,0 +1 @@ +--EEEEE--E-HHHHHHHHHHHHH-----EEEE-E-----HHHHHHHHHHHH--------EEEEE-----HHHHHHHHHH-----EEEEE---HHHHHHHHHHH-----HHHHHHHHHHHHHH------ 1pdoA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/1pdoA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/1pdoA.pdb.gz new file mode 100644 index 0000000000..5a73fd9350 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/1pdoA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/1v7mV.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/1v7mV.pdb.dssp new file mode 100644 index 0000000000..9b0b0dc747 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/1v7mV.pdb.dssp @@ -0,0 +1 @@ +----HHHHHHHHHHHHHHHHH-----------EEEE-------------HHHHHHHHHHHHHHHHHHHHHHH-------HHHHHHHHHHHHHHHHHHHHHHH---------EEEE--HHHHHHHHHHHHH--------------- 1v7mV.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/1v7mV.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/1v7mV.pdb.gz new file mode 100644 index 0000000000..51096e8c91 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/1v7mV.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/1y1lA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/1y1lA.pdb.dssp new file mode 100644 index 0000000000..f2b16de635 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/1y1lA.pdb.dssp @@ -0,0 +1 @@ +-EEEEE-----HHHHHHHHHH-------EEEEE--------HHHHHHHH-----------EHHH--HHH--EEEEEE-------------EEEEE----------HHHHHHHHHHHHHHHH--- 1y1lA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/1y1lA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/1y1lA.pdb.gz new file mode 100644 index 0000000000..c93f478ddd Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/1y1lA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/2a2lA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/2a2lA.pdb.dssp new file mode 100644 index 0000000000..345dadf2a8 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/2a2lA.pdb.dssp @@ -0,0 +1 @@ +-------------HHHHHHHHHHHHHHHHH-----EEEEE-----E-EEEE-----HHHHHHHHHHHHHHHH----HHH-HHH---------HHH-HHH-------EEE-E----E--EEEEE---HHHHHHHHHHHHHHHHH-- 2a2lA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/2a2lA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/2a2lA.pdb.gz new file mode 100644 index 0000000000..9bc3692152 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/2a2lA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/2cayA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/2cayA.pdb.dssp new file mode 100644 index 0000000000..42d4862ecc --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/2cayA.pdb.dssp @@ -0,0 +1 @@ +-HHH--EE--E-----E-------E-EEE--EEEEE--EE------EEEEEE--EEEEEE---HHHH-EEEEHHHE-EEEEE--------EEEEEE---------EEEEE-----HHHHHHHHHHHHHHHH- 2cayA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/2cayA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/2cayA.pdb.gz new file mode 100644 index 0000000000..c121620fee Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/2cayA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/2cviA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/2cviA.pdb.dssp new file mode 100644 index 0000000000..3f7211afd3 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/2cviA.pdb.dssp @@ -0,0 +1 @@ +-EEEEEEEEE----HHHHHHHHH----E-EEEE------EEEEEEE--HHHHHHHHH--HHH---E-EEEEEE---------- 2cviA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/2cviA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/2cviA.pdb.gz new file mode 100644 index 0000000000..0475dda8fa Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/2cviA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/2fvvA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/2fvvA.pdb.dssp new file mode 100644 index 0000000000..adb3d765ba --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/2fvvA.pdb.dssp @@ -0,0 +1 @@ +----E-----EEEEEEEEE------EEEEEE-------EEE-EEE------HHHHHHHHHHHHH-EEEEE--E-EEEEE----EEEEEEEEEE-EE----HHHHHH---EEEEEHHHHHHHH----HHHHH---- 2fvvA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/2fvvA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/2fvvA.pdb.gz new file mode 100644 index 0000000000..092e7efe47 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/2fvvA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/2gu3A.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/2gu3A.pdb.dssp new file mode 100644 index 0000000000..28d2750d42 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/2gu3A.pdb.dssp @@ -0,0 +1 @@ +----HHHHHHHHHHH--E-EE-EEEEEE---EEEEEEEEE----EEEEEEE-------EEEEHHH---HHHHHHHHHH---E-EE-EEEEEEE--EEEEEEEEEE----EEEEEEE-----E-EEE-- 2gu3A.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/2gu3A.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/2gu3A.pdb.gz new file mode 100644 index 0000000000..a1f83a8e84 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/2gu3A.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/2i39A.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/2i39A.pdb.dssp new file mode 100644 index 0000000000..cf4cb9fa85 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/2i39A.pdb.dssp @@ -0,0 +1 @@ +--HHHHHHHHHHHHHH-------HHHHHHHHHHHH----HHHHHHHHHHHH---HHHHHHH------HHHHHHHHHHHHHHHHHH-------HHHHHHHHHHHHHHHHHHHHHHH-- 2i39A.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/2i39A.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/2i39A.pdb.gz new file mode 100644 index 0000000000..d886d9b3d2 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/2i39A.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/2j49A.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/2j49A.pdb.dssp new file mode 100644 index 0000000000..4d6572a2c0 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/2j49A.pdb.dssp @@ -0,0 +1 @@ +---HHHHHHHHHHHHH---HHHHHHHHHHHHHHHHHHHHHHHHH-HHHHHHHHHHHHHHHHHHHHHHHH-------HHHHHH-HHHHHHH---EEEEE-HHHHHHHHHHHH--HHHHHHHHHHHHHHHEEEEE- 2j49A.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/2j49A.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/2j49A.pdb.gz new file mode 100644 index 0000000000..b3a2e5366c Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/2j49A.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/2qdlA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/2qdlA.pdb.dssp new file mode 100644 index 0000000000..840ea32a4e --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/2qdlA.pdb.dssp @@ -0,0 +1 @@ +----------EEEEEEEE--EEEEEEHHH--EEEE----E--------E-EEEEE--EEEEEEEHHHHH-----------EEEEEEE--EEEEEEE-EE-EEEEE-HHH-E---------HHHE-EEEEE--EEEEEE------EE--EE---- 2qdlA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/2qdlA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/2qdlA.pdb.gz new file mode 100644 index 0000000000..fd459995fa Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/2qdlA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/2va0A.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/2va0A.pdb.dssp new file mode 100644 index 0000000000..778ec06573 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/2va0A.pdb.dssp @@ -0,0 +1 @@ +-HHHHHHHHHHHHH----HHHHHHHHH---HHHH--EEEEE----E---------HHHHHHH------EEEEE----EEEEEEEE-----EEEEEEE-- 2va0A.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/2va0A.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/2va0A.pdb.gz new file mode 100644 index 0000000000..631a0d6c88 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/2va0A.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/2xcjA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/2xcjA.pdb.dssp new file mode 100644 index 0000000000..300b5635be --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/2xcjA.pdb.dssp @@ -0,0 +1 @@ +---HHHHHHHHHHH----HHHHHHHH---HHHHHHHHH------HHHHHHHHH-HHHH--HHHHHH---EHHH-E---HHH--- 2xcjA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/2xcjA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/2xcjA.pdb.gz new file mode 100644 index 0000000000..e37e1a5630 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/2xcjA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/2xdgA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/2xdgA.pdb.dssp new file mode 100644 index 0000000000..d3ae94b2bb --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/2xdgA.pdb.dssp @@ -0,0 +1 @@ +-HHHHHHHHHHHH---------E--EE----EE--E---EEEEE---HHHHH-------EEEEEE--EE---E--HHHH----HHHH-- 2xdgA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/2xdgA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/2xdgA.pdb.gz new file mode 100644 index 0000000000..3195df0cbf Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/2xdgA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/2xr6A.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/2xr6A.pdb.dssp new file mode 100644 index 0000000000..eb645f8d00 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/2xr6A.pdb.dssp @@ -0,0 +1 @@ +------EEE--EEEEE----E-HHHHHHHHHH---EE-----HHHHHHHHHHHHH----EE--EE---E--E-EE----E--HHHHHHE------------EEEEE--EEEEE-----EEEEEEEE---- 2xr6A.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/2xr6A.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/2xr6A.pdb.gz new file mode 100644 index 0000000000..f3fbfc6ddd Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/2xr6A.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3a4rA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3a4rA.pdb.dssp new file mode 100644 index 0000000000..dce6f719bf --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3a4rA.pdb.dssp @@ -0,0 +1 @@ +-------EEEEEE------EEEEEE----EHHHHHHHHHHHH-------EEEE--EE-----EHHHH------EEEEE- 3a4rA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3a4rA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3a4rA.pdb.gz new file mode 100644 index 0000000000..11952cb78f Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3a4rA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3aqgA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3aqgA.pdb.dssp new file mode 100644 index 0000000000..11f5369171 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3aqgA.pdb.dssp @@ -0,0 +1 @@ +-EE-----EEEE--------E-EEEEEE----E-EEEEEE---E---EE----EEEEEE------E-EEEEEE-----EEEEEE----EEEEE----EEEEEE-----EEE-EEEEEEE--EE-EEEEEEEE- 3aqgA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3aqgA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3aqgA.pdb.gz new file mode 100644 index 0000000000..2f904d6e50 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3aqgA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3e8mA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3e8mA.pdb.dssp new file mode 100644 index 0000000000..67a4b75293 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3e8mA.pdb.dssp @@ -0,0 +1 @@ +-----EEEE-----------EE-----EE-----HHHHHHHHHHH---EEEEE-----HHHHHHHHH----EEE-----HHHHHHHHHHHH---HHHEEEE---HHHHHHH----EEE------HHHH--------------HHHHHHHHH----HHHHHHH-- 3e8mA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3e8mA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3e8mA.pdb.gz new file mode 100644 index 0000000000..f349037d04 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3e8mA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3ejfA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3ejfA.pdb.dssp new file mode 100644 index 0000000000..bdb20626a5 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3ejfA.pdb.dssp @@ -0,0 +1 @@ +-----EEEEEE--HHHHHHHHHHH---EEEEEE---------HHHHHHHHHH-HHHHHHHHHHHHHH----EEEE------E-EEEEE---------HHHHHHHHHHH-------EEEEE----------HHHHHHHHHHHH-----EEEEEE--HHHHHHHHH-- 3ejfA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3ejfA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3ejfA.pdb.gz new file mode 100644 index 0000000000..e4bb339f23 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3ejfA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3fhkA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3fhkA.pdb.dssp new file mode 100644 index 0000000000..302e3184d7 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3fhkA.pdb.dssp @@ -0,0 +1 @@ +-HHHHHHHHHHHHHHHHHHHHHHH----EE---HHHHHHHHHH---EEEEEEE---HHHHH-HHHHHHHHHHH------EEEEEE----HHHHHHHH-----------EEEEEE--EE-EEE-HHH-----HHHHHHHHHHHHHHH- 3fhkA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3fhkA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3fhkA.pdb.gz new file mode 100644 index 0000000000..8e8c9a9122 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3fhkA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3gfsA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3gfsA.pdb.dssp new file mode 100644 index 0000000000..42f0f0a448 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3gfsA.pdb.dssp @@ -0,0 +1 @@ +-EEEE-------HHHHHHHHHHHH---EEEE------------HHHH--HHHHHHHHHHHH--EEEEEEEE----E-HHHHHHHH---HHHH---EEEEEEE-------HHHHHHHHHHHHH---EE---EEEE-HHHEE----EE-HHHHHHHHHHHHHHHHHHH- 3gfsA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3gfsA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3gfsA.pdb.gz new file mode 100644 index 0000000000..4de47eb474 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3gfsA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3gknA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3gknA.pdb.dssp new file mode 100644 index 0000000000..b548cf6be2 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3gknA.pdb.dssp @@ -0,0 +1 @@ +---------HHHHH--EE-----EE--HHH----EEEEE------HHHHHHHHHHHHHHHHHHH---EEEEEE---HHHHHHHHHHH----EEEE----HHHHH---EEEEEE--EEEEEE--EEEEE-----E-EEE-------HHHHHHHHHHHH-- 3gknA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3gknA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3gknA.pdb.gz new file mode 100644 index 0000000000..78e780147a Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3gknA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3gwiA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3gwiA.pdb.dssp new file mode 100644 index 0000000000..8bdb7873f8 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3gwiA.pdb.dssp @@ -0,0 +1 @@ +--EE-EEE-----E-HHHHHHHHHHHHH------HHHHHHHH---HHHHHHHHHH-EE-EEE-------EEEEEEE-E--EEEEEEEE-HHHHH--E-EEEE--EEEE--HHHHHHHHHHHHHHHH---EEEEEEEEEEE-------HHH---EEE-EEEEEE- 3gwiA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3gwiA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3gwiA.pdb.gz new file mode 100644 index 0000000000..93e7baa7cd Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3gwiA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3hklA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3hklA.pdb.dssp new file mode 100644 index 0000000000..f860477fd4 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3hklA.pdb.dssp @@ -0,0 +1 @@ +-EEEE----------------EEEE-----HHHHHHHHHHHHHHHH-------HHHHHHHHHHHH--EE-------E-EE-HHHHHHHHH------HHHHHHHHHHHHHH----------HHH----------EE------ 3hklA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3hklA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3hklA.pdb.gz new file mode 100644 index 0000000000..cb3242dde6 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3hklA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3ieyB.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3ieyB.pdb.dssp new file mode 100644 index 0000000000..27ef17d682 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3ieyB.pdb.dssp @@ -0,0 +1 @@ +-EE---HHHH----EE----EE--HHHHHHHHHH--HHHHHH---HHHHHHHHHHH---HHHHHHHHHHHHH---EEEE----EEEE------------EEEEEEE------HHHHHHHHHHHHH---EEEEEEE-----EEEEEE------ 3ieyB.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3ieyB.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3ieyB.pdb.gz new file mode 100644 index 0000000000..0bbbe4c150 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3ieyB.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3ii2A.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3ii2A.pdb.dssp new file mode 100644 index 0000000000..3e38e36232 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3ii2A.pdb.dssp @@ -0,0 +1 @@ +--EEEEEEEEE---EEEEEEEEE----EE----EEE----EEEEEE--EEEEEEEEEEE----EEEEEEEEEEE-----EEEEE-EEEEEEHHHHHHHHH-----HHHHHHHH---------------------HHHHHHHHHHHHHHH- 3ii2A.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3ii2A.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3ii2A.pdb.gz new file mode 100644 index 0000000000..12019b60b2 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3ii2A.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3k7pA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3k7pA.pdb.dssp new file mode 100644 index 0000000000..d7056ed3b5 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3k7pA.pdb.dssp @@ -0,0 +1 @@ +--EEEEEEE--HHHHHHHHHHHHHHHH---EEEEE----------HHHHHHHHHHHHH-----EEEEEE---HHHHHHHH------EEE---HHHHHHHHH-----EEEEE-----HHHHHHHHHHHHH------HHHHHHHHHHHHHHHH-- 3k7pA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3k7pA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3k7pA.pdb.gz new file mode 100644 index 0000000000..f3debe4c8f Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3k7pA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3l4rA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3l4rA.pdb.dssp new file mode 100644 index 0000000000..0156c9d286 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3l4rA.pdb.dssp @@ -0,0 +1 @@ +-----HHHH-EE-EE-EEEE--HHH--------EEE-EEEEE--EEEEEEEEE----EEEEEEEEEE-----EEEEE---EEEEEE-EEE---EEEEEEEEEE--EEEEEEEEEE--HHH----HHHHHHHHHHH---HHHEEE------- 3l4rA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3l4rA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3l4rA.pdb.gz new file mode 100644 index 0000000000..34e14832ef Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3l4rA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3lqcA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3lqcA.pdb.dssp new file mode 100644 index 0000000000..e22ad715ac --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3lqcA.pdb.dssp @@ -0,0 +1 @@ +--HHHEEEEEE----HHHHHHHHH--HHHHHHHHH-----EEEEEEEE---EE--EEEEEEEE--EEEEEEE-------HHH-EE-E--EE---HHHHHH------EEEE-HHH--HHHH--EE-EEEEEEE---------EE-EEEEE-- 3lqcA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3lqcA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3lqcA.pdb.gz new file mode 100644 index 0000000000..8238a06452 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3lqcA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3nbkA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3nbkA.pdb.dssp new file mode 100644 index 0000000000..0aa476d4fd --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3nbkA.pdb.dssp @@ -0,0 +1 @@ +---EEEEEE------HHHHHHHHHHHHH--EEEEEEE----------HHHHHHHHHH-------EEEEEE---HHHHHHH----EEEEEE-----HHHHHHHHHHHHHHH--EEEEEE--HHH----HHHHHHHHH-----HHH--HHHHHHHHHHH- 3nbkA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3nbkA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3nbkA.pdb.gz new file mode 100644 index 0000000000..461d1b5f98 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3nbkA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3nngA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3nngA.pdb.dssp new file mode 100644 index 0000000000..d1c270bc5a --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3nngA.pdb.dssp @@ -0,0 +1 @@ +-----EEE------EEEE---HHH-HHHH-HHHHH-------EEEE-HHHH---EEEEE----E-E-EEEE-------HHHE--EEEEEEEE------EE----EE---------EEEE----E--E-EEEEEE-E---EEEE--EEEEEE-- 3nngA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3nngA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3nngA.pdb.gz new file mode 100644 index 0000000000..b52289de3a Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3nngA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3ny7A.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3ny7A.pdb.dssp new file mode 100644 index 0000000000..974b3ef6a4 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3ny7A.pdb.dssp @@ -0,0 +1 @@ +-----EEEE--------EEEEEEE--E-HHHHHHHHHHHH-------EEEEE--E---E-HHHHHHHHHHHHH-----EEEEE---HHHHHHHHH-----E---EEEE--HHHH---- 3ny7A.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3ny7A.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3ny7A.pdb.gz new file mode 100644 index 0000000000..947c8dacc9 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3ny7A.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3nzmA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3nzmA.pdb.dssp new file mode 100644 index 0000000000..717697c682 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3nzmA.pdb.dssp @@ -0,0 +1 @@ +------EE---EEEE---EEEEHHHHHH-----EEEEE----EEEEEEE-EEEE--EEE-EEEEE----EEEE----EEEE----EEEHHHHHHH--EEEEE----------------EEHHHH--EEEEEE-EEEE--EEEEEEEEE-----EEE----EE- 3nzmA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3nzmA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3nzmA.pdb.gz new file mode 100644 index 0000000000..dfee8baa7a Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3nzmA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3on9A.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3on9A.pdb.dssp new file mode 100644 index 0000000000..d1aff0dd02 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3on9A.pdb.dssp @@ -0,0 +1 @@ +----EEEEEEEEE-------EEE---EEEEE---EEEEEEEE-----E-EEEEEEE--EEEEEEEE------------EEEEEEEE-----EEE------------EEEEEEE-----EEEEEEEEE-----HHH-EEEEEE------------------ 3on9A.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3on9A.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3on9A.pdb.gz new file mode 100644 index 0000000000..96a9884e31 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3on9A.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3pivA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3pivA.pdb.dssp new file mode 100644 index 0000000000..194df27789 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3pivA.pdb.dssp @@ -0,0 +1 @@ +-HHH--HHHHHHHHHHHHHH-------------HHHHHHHH---HHHHHHHHHHHHHHHHHHH--HHH-------HHHHHHHHHHHHHHHHHHHHHHH---------HHHHHHHHHHHHHHHHHHH----HHHHHHHHHHHHHHHHHHHHHHHHH- 3pivA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3pivA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3pivA.pdb.gz new file mode 100644 index 0000000000..d5aed111cf Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3pivA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3q4oA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3q4oA.pdb.dssp new file mode 100644 index 0000000000..c96bc74e4d --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3q4oA.pdb.dssp @@ -0,0 +1 @@ +---HHHHHHHHHHHHHHHHHHHHHHHHHHHH--HHHHHHHHHHHHHHHHHHHHHHH-----------------HHHHHHHHHHHHHH---HHHHHHHHHHHHHHHHHHHHHHH-----HHHHHHHHHHHHHHHHHHHHHHHHHHH-----------HHHHHHHH----- 3q4oA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3q4oA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3q4oA.pdb.gz new file mode 100644 index 0000000000..485f79a314 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3q4oA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3so6A.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3so6A.pdb.dssp new file mode 100644 index 0000000000..92c1d70ffa --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3so6A.pdb.dssp @@ -0,0 +1 @@ +---EEEEEEE-EEEE------HHHHHHHHHHHHHHHH------EEEEEEEE--EEEEEE-----E-EEEEHHHE-EEEE------EEEEEEE------EEEEEEE---HHHHHHHHHHHHHHHHHHHHHHH------ 3so6A.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3so6A.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3so6A.pdb.gz new file mode 100644 index 0000000000..13a7ef3598 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3so6A.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3t5gB.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3t5gB.pdb.dssp new file mode 100644 index 0000000000..39901f2873 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3t5gB.pdb.dssp @@ -0,0 +1 @@ +-HHHHHHHHH-EEE-EEEEEE-----E-EEE---------EEEEEE-HHHHH--EEEEEEEEEE---E--EEEEEEEEE--EE-EEEEEE-E-E---EEEEEEEEEE---HHH---HHHH---EEEEEEEEE--EE-EEEEEEEEE- 3t5gB.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3t5gB.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3t5gB.pdb.gz new file mode 100644 index 0000000000..74caee64a0 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3t5gB.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/3vjzA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/3vjzA.pdb.dssp new file mode 100644 index 0000000000..44a0251d63 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/3vjzA.pdb.dssp @@ -0,0 +1 @@ +--------HHHHH--HHHHHHHHHHHHHHHHHHHH--------HHHHHHHHHHHHHHHHHHHHHHHHHHH--HHHHH---HHHHHH-----HHHHHHHHHHHHHHHHHHHHHHHHHH---HHHHHHH-HHHHHHHHHHHHHHHHHHHHHHHHHHH-HHHHH--- 3vjzA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/3vjzA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/3vjzA.pdb.gz new file mode 100644 index 0000000000..78289adf78 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/3vjzA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/4dkcA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/4dkcA.pdb.dssp new file mode 100644 index 0000000000..db880de365 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/4dkcA.pdb.dssp @@ -0,0 +1 @@ +-HHHHHHHHHHHH--HHHHHHHH---------EEE-HHH---HHHHHHHH-----HHHHHHHHHHHHHHHHHHHH---------HHHHHHHHHHHHHHHH--------HHHHHHHHHH-------EEE-HHHHHHHHHHHHHHHHHHHH-----HHH---- 4dkcA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/4dkcA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/4dkcA.pdb.gz new file mode 100644 index 0000000000..1b8ac16fd6 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/4dkcA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/4gcnA.pdb.dssp b/testsuite/MDAnalysisTests/data/dssp/4gcnA.pdb.dssp new file mode 100644 index 0000000000..f0b796b97b --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/4gcnA.pdb.dssp @@ -0,0 +1 @@ +--HHHHHHHHHHHHHHHHHH---HHHHHHHHHHHHHH----HHHHHHHHHHHHH---HHHHHHHHHHHHHHHHH----HHHHHHHHHHHHHHHHH---HHHHHHHHHHHHHH---HHHHHHHHHHH- 4gcnA.pdb diff --git a/testsuite/MDAnalysisTests/data/dssp/4gcnA.pdb.gz b/testsuite/MDAnalysisTests/data/dssp/4gcnA.pdb.gz new file mode 100644 index 0000000000..9f75931737 Binary files /dev/null and b/testsuite/MDAnalysisTests/data/dssp/4gcnA.pdb.gz differ diff --git a/testsuite/MDAnalysisTests/data/dssp/README.md b/testsuite/MDAnalysisTests/data/dssp/README.md new file mode 100644 index 0000000000..a826b22c24 --- /dev/null +++ b/testsuite/MDAnalysisTests/data/dssp/README.md @@ -0,0 +1 @@ +Files that match glob pattern '????.pdb.gz' and matching '????.pdb.dssp' files, containing the secondary structure assignment string, will be tested automatically. diff --git a/testsuite/MDAnalysisTests/datafiles.py b/testsuite/MDAnalysisTests/datafiles.py index 521a0b12b2..dda33cb10b 100644 --- a/testsuite/MDAnalysisTests/datafiles.py +++ b/testsuite/MDAnalysisTests/datafiles.py @@ -250,6 +250,7 @@ "PDB_charges", # PDB file with formal charges "SURFACE_PDB", # 111 FCC lattice topology for NSGrid bug #2345 "SURFACE_TRR", # full precision coordinates for NSGrid bug #2345 + "DSSP", # DSSP test suite ] from importlib import resources @@ -660,5 +661,8 @@ SURFACE_PDB = (_data_ref / 'surface.pdb.bz2').as_posix() SURFACE_TRR = (_data_ref / 'surface.trr').as_posix() +# DSSP testing: from https://github.com/ShintaroMinami/PyDSSP +DSSP = (_data_ref / 'dssp').as_posix() + # This should be the last line: clean up namespace del resources diff --git a/testsuite/MDAnalysisTests/utils/test_duecredit.py b/testsuite/MDAnalysisTests/utils/test_duecredit.py index 673b592bcc..adb32a5f50 100644 --- a/testsuite/MDAnalysisTests/utils/test_duecredit.py +++ b/testsuite/MDAnalysisTests/utils/test_duecredit.py @@ -72,7 +72,10 @@ def test_duecredit_collector_primary(self, module, path, citekey): "qcprot2"), ("MDAnalysis.analysis.encore", "MDAnalysis.analysis.encore", - "10.1371/journal.pcbi.1004415") + "10.1371/journal.pcbi.1004415"), + ("MDAnalysis.analysis.dssp", + "MDAnalysis.analysis.dssp", + "10.1002/bip.360221211") ]) def test_duecredit_collector_analysis_modules(self, module, path, citekey): importlib.import_module(module) diff --git a/testsuite/pyproject.toml b/testsuite/pyproject.toml index 62f311edb0..4af3b1d85c 100644 --- a/testsuite/pyproject.toml +++ b/testsuite/pyproject.toml @@ -128,6 +128,8 @@ MDAnalysisTests = [ "data/*.tng", "data/*.pdbx", "data/*.txt", + "data/dssp/*.pdb.gz", + "data/dssp/*.pdb.dssp", ] [tool.pytest.ini_options]