MDAnalysis · yuxuanzhuang · Oct 7, 2024 · Sep 27, 2024 · Sep 27, 2024 · Sep 27, 2024
diff --git a/package/CHANGELOG b/package/CHANGELOG
@@ -63,6 +63,7 @@ Enhancements
  * enables parallelization for analysis.bat.BAT (Issue #4663)
  * enable parallelization for analysis.dihedrals.{Dihedral,Ramachandran,Janin}
    (Issue #4673) 
+ * enables parallelization for analysis.dssp.dssp.DSSP (Issue #4674)
  * Improve error message for `AtomGroup.unwrap()` when bonds are not present.(Issue #4436, PR #4642)
  * Add `analysis.DSSP` module for protein secondary structure assignment, based on [pydssp](https://github.com/ShintaroMinami/PyDSSP)
  * Added a tqdm progress bar for `MDAnalysis.analysis.pca.PCA.transform()`

diff --git a/package/MDAnalysis/analysis/dssp/dssp.py b/package/MDAnalysis/analysis/dssp/dssp.py
@@ -148,7 +148,7 @@
 import numpy as np
 from MDAnalysis import Universe, AtomGroup
 
-from ..base import AnalysisBase
+from ..base import AnalysisBase, ResultsGroup
 from ...due import due, Doi
 
 due.cite(
@@ -196,7 +196,7 @@ class DSSP(AnalysisBase):
     .. Warning::
        For DSSP to work properly, your atoms must represent a protein. The
        hydrogen atom bound to the backbone nitrogen atom is matched by name
-       as given by the keyword argument `hydrogen_atom`. There may only be 
+       as given by the keyword argument `hydrogen_atom`. There may only be
        a single backbone nitrogen hydrogen atom per residue; the one exception
        is proline, for which there should not exist any such hydrogens.
        The default value of `hydrogen_atom` should handle the common naming
@@ -229,8 +229,8 @@ class DSSP(AnalysisBase):
         (except proline), namely the one bound to the backbone nitrogen.
 
         .. Note::
-           To work with different hydrogen-naming conventions by default, the 
-           default selection is broad but if hydrogens are incorrectly selected 
+           To work with different hydrogen-naming conventions by default, the
+           default selection is broad but if hydrogens are incorrectly selected
            (e.g., a :exc:`ValueError` is raised) you must customize `hydrogen_name`
            for your specific case.
 
@@ -263,7 +263,7 @@ class DSSP(AnalysisBase):
     The :attr:`results.dssp_ndarray` attribute holds a
     ``(n_frames, n_residues, 3)`` shape ndarray with a *one-hot encoding*
     of *loop* '-' (index 0), *helix* 'H' (index 1), and *sheet* 'E'
-    (index 2), respectively for each frame of the trajectory. It can be 
+    (index 2), respectively for each frame of the trajectory. It can be
     used to compute, for instance, the **average secondary structure**:
 
     >>> from MDAnalysis.analysis.dssp import translate, DSSP
@@ -276,8 +276,24 @@ class DSSP(AnalysisBase):
 
 
     .. versionadded:: 2.8.0
+
+    .. versionchanged:: 2.8.0
+       Enabled **parallel execution** with the ``multiprocessing`` and ``dask``
+       backends; use the new method :meth:`get_supported_backends` to see all
+       supported backends.
     """
 
+    _analysis_algorithm_is_parallelizable = True
+
+    @classmethod
+    def get_supported_backends(cls):
+        return (
+            "serial",
+            "multiprocessing",
+            "dask",
+        )
+
+
     def __init__(
         self,
         atoms: Union[Universe, AtomGroup],
@@ -382,6 +398,10 @@ def _conclude(self):
         self.results.dssp_ndarray = np.array(self.results.dssp_ndarray)
         self.results.resids = self._heavy_atoms["CA"].resids
 
+    def _get_aggregator(self):
+        return ResultsGroup(
+            lookup={"dssp_ndarray": ResultsGroup.flatten_sequence},
+        )
 
 def translate(onehot: np.ndarray) -> np.ndarray:
     """Translate a one-hot encoding summary into char-based secondary structure

diff --git a/testsuite/MDAnalysisTests/analysis/conftest.py b/testsuite/MDAnalysisTests/analysis/conftest.py
@@ -10,6 +10,7 @@
 from MDAnalysis.analysis.dihedrals import Dihedral, Ramachandran, Janin
 from MDAnalysis.analysis.bat import BAT
 from MDAnalysis.analysis.gnm import GNMAnalysis
+from MDAnalysis.analysis.dssp.dssp import DSSP
 from MDAnalysis.lib.util import is_installed
 
 def params_for_cls(cls, exclude: list[str] = None):
@@ -122,3 +123,10 @@ def client_GNMAnalysis(request):
 @pytest.fixture(scope='module', params=params_for_cls(BAT))
 def client_BAT(request):
     return request.param
+
+
+# MDAnalysis.analysis.dssp.dssp
+
+@pytest.fixture(scope="module", params=params_for_cls(DSSP))
+def client_DSSP(request):
+    return request.param
diff --git a/testsuite/MDAnalysisTests/analysis/test_dssp.py b/testsuite/MDAnalysisTests/analysis/test_dssp.py
@@ -10,41 +10,41 @@
 # Files that match glob pattern '????.pdb.gz' and matching '????.pdb.dssp' files,
 # containing the secondary structure assignment string, will be tested automatically.
 @pytest.mark.parametrize("pdb_filename", glob.glob(f"{DSSP_FOLDER}/?????.pdb.gz"))
-def test_file_guess_hydrogens(pdb_filename):
+def test_file_guess_hydrogens(pdb_filename, client_DSSP):
     u = mda.Universe(pdb_filename)
     with open(f"{pdb_filename.rstrip('.gz')}.dssp", "r") as fin:
         correct_answ = fin.read().strip().split()[0]
 
-    run = DSSP(u, guess_hydrogens=True).run()
+    run = DSSP(u, guess_hydrogens=True).run(**client_DSSP)
     answ = "".join(run.results.dssp[0])
     assert answ == correct_answ
 
 
-def test_trajectory():
+def test_trajectory(client_DSSP):
     u = mda.Universe(TPR, XTC).select_atoms("protein").universe
-    run = DSSP(u).run(stop=10)
+    run = DSSP(u).run(**client_DSSP, stop=10)
     first_frame = "".join(run.results.dssp[0])
     last_frame = "".join(run.results.dssp[-1])
     avg_frame = "".join(translate(run.results.dssp_ndarray.mean(axis=0)))
 
     assert first_frame[:10] != last_frame[:10] == avg_frame[:10] == "-EEEEEE---"
     protein = mda.Universe(TPR, XTC).select_atoms("protein")
-    run = DSSP(protein).run(stop=10)
+    run = DSSP(protein).run(**client_DSSP, stop=10)
 
 
-def test_atomgroup():
+def test_atomgroup(client_DSSP):
     protein = mda.Universe(TPR, XTC).select_atoms("protein")
-    run = DSSP(protein).run(stop=10)
+    run = DSSP(protein).run(**client_DSSP, stop=10)
     first_frame = "".join(run.results.dssp[0])
     last_frame = "".join(run.results.dssp[-1])
     avg_frame = "".join(translate(run.results.dssp_ndarray.mean(axis=0)))
 
     assert first_frame[:10] != last_frame[:10] == avg_frame[:10] == "-EEEEEE---"
 
 
-def test_trajectory_with_hydrogens():
+def test_trajectory_with_hydrogens(client_DSSP):
     u = mda.Universe(TPR, XTC).select_atoms("protein").universe
-    run = DSSP(u, guess_hydrogens=False).run(stop=10)
+    run = DSSP(u, guess_hydrogens=False).run(**client_DSSP, stop=10)
     first_frame = "".join(run.results.dssp[0])
     last_frame = "".join(run.results.dssp[-1])
     avg_frame = "".join(translate(run.results.dssp_ndarray.mean(axis=0)))
@@ -53,28 +53,29 @@ def test_trajectory_with_hydrogens():
 
 
 @pytest.mark.parametrize("pdb_filename", glob.glob(f"{DSSP_FOLDER}/2xdgA.pdb.gz"))
-def test_trajectory_without_hydrogen_fails(pdb_filename):
+def test_trajectory_without_hydrogen_fails(pdb_filename, client_DSSP):
     u = mda.Universe(pdb_filename)
     with pytest.raises(ValueError):
-        DSSP(u, guess_hydrogens=False).run()
+        DSSP(u, guess_hydrogens=False).run(**client_DSSP)
 
 
 @pytest.mark.parametrize(
     "pdb_filename", glob.glob(f"{DSSP_FOLDER}/1mr1D_failing.pdb.gz")
 )
-def test_trajectory_with_uneven_number_of_atoms_fails(pdb_filename):
+def test_trajectory_with_uneven_number_of_atoms_fails(pdb_filename,
+                                                      client_DSSP):
     u = mda.Universe(pdb_filename)
     with pytest.raises(ValueError):
-        DSSP(u, guess_hydrogens=True).run()
+        DSSP(u, guess_hydrogens=True).run(**client_DSSP)
 
 
 @pytest.mark.parametrize(
     "pdb_filename", glob.glob(f"{DSSP_FOLDER}/wrong_hydrogens.pdb.gz")
 )
-def test_exception_raises_with_atom_index(pdb_filename):
+def test_exception_raises_with_atom_index(pdb_filename, client_DSSP):
     u = mda.Universe(pdb_filename)
     with pytest.raises(
         ValueError,
         match="Residue <Residue SER, 298> contains*",
     ):
-        DSSP(u, guess_hydrogens=False).run()
+        DSSP(u, guess_hydrogens=False).run(**client_DSSP)