From 3e9934638ba2aba2cef308021fc71d1cbd2d6e32 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Wed, 21 Aug 2024 09:24:56 -0500
Subject: [PATCH 1/2] Switch to BioStructures for PDB/structure rep

BioStructures is poised to become the standard package for protein
structure I/O and representation. While MIToS is retained for the MSA
functionality and a few very useful data tables (vanderwaals radii and
atom-level amino acid traits), otherwise this is a complete switch. It's
somewhat surprising how few changes were needed to the tests (some of
which were essentially cleanups that could have happened earlier).
---
 NEWS.md             |   7 +++
 Project.toml        |   6 +--
 README.md           |   4 +-
 src/GPCRAnalysis.jl |  25 +++++++----
 src/align.jl        | 103 +++++++++++++++++++++++++-------------------
 src/alphafold.jl    |  12 +++---
 src/analyze.jl      |  73 ++++++++++++++-----------------
 src/deprecated.jl   |   4 --
 src/features.jl     |  73 ++++++++++++++++---------------
 src/forces.jl       |  12 +++---
 src/msa.jl          |  35 ++++++++++++---
 src/pocket.jl       |  20 +++++----
 src/properties.jl   |  40 ++++++++---------
 src/tmalign.jl      |  28 ++++++------
 src/utils.jl        |  32 +++++++++++---
 test/deprecated.jl  |  11 -----
 test/runtests.jl    |  56 ++++++++++++------------
 17 files changed, 302 insertions(+), 239 deletions(-)
 create mode 100644 NEWS.md
 delete mode 100644 src/deprecated.jl
 delete mode 100644 test/deprecated.jl

diff --git a/NEWS.md b/NEWS.md
new file mode 100644
index 0000000..9f593ad
--- /dev/null
+++ b/NEWS.md
@@ -0,0 +1,7 @@
+# v0.5.0
+
+Breaking changes:
+- BioStructures and MIToS implement different parsers for PDBFiles and different representations of structures. As of BioStructures v4, there is momentum for standardizing the Julia ecosystem around BioStructures. GPCRAnalysis version 0.5.0 switches from MIToS to Biostructures. `getchain` now returns the BioStructures representation, which requires users to switch to their API for further manipulations.
+
+  MIToS is still used for its Multiple Sequence Alignment (MSA) module.
+- The `align*` utilities now return a transformation, rather than the transformed structure. Users should use `applytransform!(chain, tform)` or `applytransform!(copy(chain), tform)` manually. An advantage of this approach is that it can also be used to align complexes based on a superposition computed from alignment of individual chains.
diff --git a/Project.toml b/Project.toml
index 2e530da..5a5e5bd 100644
--- a/Project.toml
+++ b/Project.toml
@@ -1,11 +1,11 @@
 name = "GPCRAnalysis"
 uuid = "c1d73f9e-d42a-418a-8d5b-c7b00ec0358f"
 authors = ["Tim Holy <tim.holy@gmail.com> and contributors"]
-version = "0.4.3"
+version = "0.5.0"
 
 [deps]
+BioStructures = "de9282ab-8554-53be-b2d6-f6c222edabfc"
 ColorTypes = "3da002f7-5984-5a60-b8a6-cbb66c0b333f"
-CoordinateTransformations = "150eb455-5306-5404-9cee-2592286d6298"
 Distances = "b4f34e82-e78d-54a5-968a-f98e89d6e8f7"
 Downloads = "f43a241f-c20a-4ad4-852c-f6b1247861c6"
 FixedPointNumbers = "53c48c17-4a7d-5ca2-90c5-79b7896eea93"
@@ -34,8 +34,8 @@ JuMP = "4076af6c-e467-56ae-b986-b466b2749572"
 GPCRAnalysisJuMPExt = ["JuMP", "HiGHS"]
 
 [compat]
+BioStructures = "4.2"
 ColorTypes = "0.11"
-CoordinateTransformations = "0.6"
 Distances = "0.10"
 Downloads = "1"
 FixedPointNumbers = "0.8"
diff --git a/README.md b/README.md
index f882163..d3dc1b8 100644
--- a/README.md
+++ b/README.md
@@ -5,4 +5,6 @@
 [![Coverage](https://codecov.io/gh/HolyLab/GPCRAnalysis.jl/branch/main/graph/badge.svg)](https://codecov.io/gh/HolyLab/GPCRAnalysis.jl)
 
 A suite for analyzing G-protein coupled receptors. Much of this is just a convenience wrapper and/or extension of
-[MIToS](https://github.com/diegozea/MIToS.jl), and some of this code may move there or elsewhere eventually.
+[BioStructures](https://github.com/BioJulia/BioStructures.jl) and [MIToS](https://github.com/diegozea/MIToS.jl).
+
+See NEWS.md for major changes.
diff --git a/src/GPCRAnalysis.jl b/src/GPCRAnalysis.jl
index 0f41f1d..fab9be1 100644
--- a/src/GPCRAnalysis.jl
+++ b/src/GPCRAnalysis.jl
@@ -4,10 +4,14 @@ using Downloads
 using Statistics
 using LinearAlgebra
 
-using MIToS
-using MIToS.Pfam
-using MIToS.MSA
-using MIToS.PDB
+using BioStructures
+
+using MIToS: MIToS, Pfam, MSA
+using MIToS.MSA: AbstractMultipleSequenceAlignment, AnnotatedAlignedSequence, AnnotatedMultipleSequenceAlignment,
+                 ReducedAlphabet, GAP, XAA
+using MIToS.MSA: getsequence, getannotsequence, getsequencemapping, getresidues, three2residue, sequencenames,
+                 filtersequences!, percentsimilarity
+using MIToS.PDB: vanderwaalsradius, ishydrophobic, isaromatic, iscationic, isanionic, ishbonddonor, ishbondacceptor
 
 using MultivariateStats
 using Distances
@@ -20,7 +24,6 @@ using TravelingSalesmanHeuristics
 
 using Interpolations
 using MutableConvexHulls
-using CoordinateTransformations
 using GaussianMixtureAlignment
 
 # For querying the Uniprot and Alphafold REST APIs
@@ -32,7 +35,11 @@ using GZip
 using FixedPointNumbers
 using ColorTypes
 
-export @res_str
+const ChainLike = Union{Chain, AbstractVector{<:AbstractResidue}}   # an iterable of residues
+const ResidueLike = Union{Residue, AbstractVector{<:AbstractAtom}}  # an iterable of atoms
+const StructureLike = Union{ChainLike, Model, MolecularStructure}
+
+# export @res_str
 
 export SequenceMapping, AccessionCode, MSACode, NWGapCosts
 export species, uniprotX, query_uniprot_accession, query_ebi_proteins, query_ncbi
@@ -40,8 +47,9 @@ export try_download_alphafold, query_alphafold_latest, download_alphafolds, alph
        findall_subseq, pLDDT, pLDDTcolor
 export align_to_axes, align_to_membrane, align_nw, align_ranges
 export filter_species!, filter_long!, sortperm_msa, chimerax_script
-export project_sequences, columnwise_entropy, align, residue_centroid, residue_centroid_matrix, alphacarbon_coordinates, alphacarbon_coordinates_matrix, mapclosest, chargelocations, positive_locations, negative_locations
-export StructAlign, residueindex, ismapped
+export project_sequences, columnwise_entropy, align, residue_centroid, residue_centroid_matrix, alphacarbon_coordinates,
+       alphacarbon_coordinates_matrix, mapclosest, chargelocations, positive_locations, negative_locations
+export StructAlign, residueindex, ismapped, skipnothing
 export BWScheme, lookupbw
 export aa_properties, aa_properties_zscored
 export sidechaincentroid, scvector, inward_tm_residues, inward_ecl_residues
@@ -61,6 +69,5 @@ include("chimerax.jl")
 include("pocket.jl")
 include("features.jl")
 include("forces.jl")
-include("deprecated.jl")
 
 end
diff --git a/src/align.jl b/src/align.jl
index e8aef64..ed099f3 100644
--- a/src/align.jl
+++ b/src/align.jl
@@ -1,13 +1,16 @@
 ## Minimum-distance alignment
 
 """
-    align(fixedpos::AbstractMatrix{Float64}, moving::AbstractVector{PDBResidue}, sm::SequenceMapping)
-    align(fixed::AbstractVector{PDBResidue}, moving::AbstractVector{PDBResidue}, sm::SequenceMapping)
-
-Return a rotated and shifted version of `moving` so that the α-carbons of residues `moving[sm]` have least
-mean square error deviation from positions `fixedpos` or those of residues `fixed`.
+    tform = align(fixedpos::AbstractMatrix{Float64}, moving::Chain, sm::SequenceMapping)
+    tform = align(fixed::Chain, moving::Chain, sm::SequenceMapping)
+
+Return a rotated and shifted version of `moving` so that the centroids of
+residues `moving[sm]` have least mean square error deviation from positions
+`fixedpos` or those of residues `fixed`. `fixed` or `fixedpos` should include
+just the residues or positions you want to align to, but `moving` should be an
+entire chain.
 """
-function align(fixedpos::AbstractMatrix{Float64}, moving::AbstractVector{PDBResidue}, sm::SequenceMapping; seqname=nothing, warn::Bool=true)
+function align(fixedpos::AbstractMatrix{Float64}, moving::StructureLike, sm::SequenceMapping; seqname=nothing, warn::Bool=true)
     length(sm) == size(fixedpos, 2) || throw(DimensionMismatch("reference has $(size(fixedpos, 2)) positions, but `sm` has $(length(sm))"))
     movres = moving[sm]
     keep = map(!isnothing, movres)
@@ -16,36 +19,33 @@ function align(fixedpos::AbstractMatrix{Float64}, moving::AbstractVector{PDBResi
     end
     movpos = residue_centroid_matrix(movres[keep])
     fixedpos = fixedpos[:,keep]
-    refmean = mean(fixedpos; dims=2)
-    movmean = mean(movpos; dims=2)
-    R = MIToS.PDB.kabsch((fixedpos .- refmean)', (movpos .- movmean)')
-    return change_coordinates(moving, (coordinatesmatrix(moving) .- movmean') * R .+ refmean')
+    return Transformation(movpos, fixedpos, sm[keep], findall(keep))
 end
-align(fixed::AbstractVector{PDBResidue}, moving::AbstractVector{PDBResidue}, sm::SequenceMapping; kwargs...) =
-    align(residue_centroid_matrix(fixed), moving, sm; kwargs...)
+align(fixed, moving::StructureLike, sm::SequenceMapping; kwargs...) = align(residue_centroid_matrix(fixed), moving, sm; kwargs...)
 
-function mapclosest(mapto::AbstractVector{PDBResidue}, mapfrom::AbstractVector{PDBResidue})
+function mapclosest(mapto, mapfrom)
     refcenters = residue_centroid_matrix(mapto)
     seqcenters = residue_centroid_matrix(mapfrom)
     D = pairwise(Euclidean(), refcenters, seqcenters; dims=2)
     assignment, _ = hungarian(D)
-    return collect(zip(assignment, [j == 0 ? convert(eltype(D), NaN) : D[i, j] for (i, j) in enumerate(assignment)]))
-    # d, m = findmin(D; dims=1)
-    # return [mi[1] => di for (mi, di) in zip(vec(m), vec(d))]
+    fillval = convert(eltype(D), NaN)
+    return collect(zip(assignment, [j == 0 ? fillval : D[i, j] for (i, j) in enumerate(assignment)]))
 end
 
 ## Align to membrane
 
 """
-    newchain = align_to_membrane(chain::AbstractVector{PDBResidue}, tms; extracellular=true)
+    tform = align_to_membrane(chain::ChainLike, tms; extracellular=true)
 
-Align the `chain` to the membrane, given the transmembrane segments `tms` as
-residue-indexes (e.g., `[37:61, 74:96, ...]`). `extracelluar` should be true if
-the N-terminus of the protein is extracellular (`chain[first(tms[1])]` is at the
-extracellular face), and false otherwise.
+Compute the rigid transformation `tform` needed to align `chain` to the
+membrane, given the transmembrane segments `tms` as residue-indexes (e.g.,
+`[37:61, 74:96, ...]`). `extracelluar` should be true if the N-terminus of the
+protein is extracellular (`chain[first(tms[1])]` is at the extracellular face),
+and false otherwise.
 
-`newchain` is oriented so that the center of the membrane is `z=0` and
-extracellular is positive.
+`applytransform!(chain, tform)` (or the `model` that includes `chain`) will
+re-orient `chain` so that the center of the membrane is `z=0` and extracellular
+is positive.
 
 The algorithm finds the membrane normal `u` by maximizing the ratio
 
@@ -53,13 +53,13 @@ The algorithm finds the membrane normal `u` by maximizing the ratio
   -----------------
     sumᵢ (u ⋅ δᵢ)²
 
-where `vᵢ` is a vector parallel to the `i`th TM helix, and `δᵢ` is a within-leaflet
-atomic displacement.
+where `vᵢ` is a vector parallel to the `i`th TM helix, and `δᵢ` is a
+within-leaflet atomic displacement.
 """
-function align_to_membrane(chain::AbstractVector{PDBResidue}, tms; extracellular::Bool=true)
+function align_to_membrane(chain::ChainLike, tms; extracellular::Bool=true)
     function leaflet_displacements(residues)
-        coords = reduce(vcat, [coordinatesmatrix(r) for r in residues])
-        meanpos = mean(coords, dims=1)
+        coords = reduce(hcat, [coordarray(r) for r in residues])
+        meanpos = mean(coords, dims=2)
         return coords .- meanpos
     end
 
@@ -79,33 +79,38 @@ function align_to_membrane(chain::AbstractVector{PDBResidue}, tms; extracellular
     # eigenvector corresponding to the smallest eigenvalue of the matrix.
     μtm = mean(tm_vectors)
     A = sum(u -> (Δu = u - μtm; Δu * Δu'), tm_vectors)
-    B = delta_i'*delta_i + delta_e'*delta_e
+    B = delta_i*delta_i' + delta_e*delta_e'
     λ, u = eigen(A, B)
     u = u[:, argmax(λ)]
     u = u / norm(u)
     # Find the projection by `u` of atom coordinates in each leaflet
-    proj_e = reduce(vcat, [coordinatesmatrix(r) for r in leaflet_e]) * u
-    proj_i = reduce(vcat, [coordinatesmatrix(r) for r in leaflet_i]) * u
+    proj_e = reduce(hcat, [coordarray(r) for r in leaflet_e])' * u
+    proj_i = reduce(hcat, [coordarray(r) for r in leaflet_i])' * u
     # Compute a rotation that aligns u to the z-axis
     t = [0, 0, mean(proj_e) > mean(proj_i) ? 1 : -1]   # extracellular is positive
     q = QuatRotation(1 + u'*t, cross(u, t)...)
     # Apply the rotation to the coordinates, and offset the z-coordinate to the mean of the leaflets
-    coords = coordinatesmatrix(chain)
-    μ = vec(mean(coords, dims=1))
-    newcoords = (q * (coords' .- μ))' .+ [0, 0, (median(proj_e) + median(proj_i))/2 - μ'*u]'
-    return change_coordinates(chain, newcoords)
+    ccoords = coordarray(chain)
+    μ = vec(mean(ccoords, dims=2))
+    return Transformation(μ, [0, 0, (median(proj_e) + median(proj_i))/2 - μ'*u], RotMatrix(q))
 end
 
+"""
+    tform = align_to_axes(strct)
+
+Compute the transformation needed to align the principle axes of inertia of
+`strct` with the coordinate axes.
+"""
 function align_to_axes(strct)
-    coords = coordinatesmatrix(strct)
-    coords = coords .- mean(coords, dims=1)
-    tform = GaussianMixtureAlignment.inertial_transforms(coords')[1]
-    tform = AffineMap(Matrix(tform.linear),[tform.translation...])
-    return change_coordinates(strct, tform(coords')')
+    scoords = coordarray(strct)
+    μ = mean(scoords, dims=2)
+    scoords = scoords .- μ
+    rotmtrx = GaussianMixtureAlignment.inertial_transforms(scoords)[1]
+    return Transformation(μ, [0, 0, 0], rotmtrx)
 end
 
-function collect_leaflet_residues(chain, tms, extracellular)
-    leaflet_e, leaflet_i = PDBResidue[], PDBResidue[]
+function collect_leaflet_residues(chain, tms, extracellular::Bool)
+    leaflet_e, leaflet_i = Residue[], Residue[]
     for r in tms
         start, stop = extrema(r)
         if extracellular
@@ -122,6 +127,11 @@ end
 
 ## Needleman-Wunsch alignment
 
+# This implements sequence alignment based on three-dimensional structure, where
+# the cost of aligning two residues is the Euclidean distance between their
+# centroids. The Needleman-Wunsch algorithm is used to find the optimal alignment
+# of two sequences.
+
 @enum NWParent NONE DIAG LEFT UP
 Base.show(io::IO, p::NWParent) = print(io, p == NONE ? 'X' :
                                            p == DIAG ? '↖' :
@@ -203,8 +213,11 @@ Find the optimal `ϕ` matching `seq1[ϕ[k][1]]` to `seq2[ϕ[k][2]]` for all
 `k`. `mode` controls the computation of pairwise matching penalties, and can be
 either `:distance` or `:distance_orientation`, where the latter adds any
 mismatch in sidechain orientation to the distance penalty.
+
+`seq1` and `seq2` must be aligned to each other in 3D space before calling this
+function. See [`align`](@ref).
 """
-function align_nw(seq1::AbstractVector{PDBResidue}, seq2::AbstractVector{PDBResidue}, gapcosts::NWGapCosts;
+function align_nw(seq1::ChainLike, seq2::ChainLike, gapcosts::NWGapCosts;
                   mode::Symbol=:distance_orientation)
     supported_modes = (:distance, :distance_orientation)
     mode ∈ supported_modes || throw(ArgumentError("supported modes are $(supported_modes), got $mode"))
@@ -227,7 +240,7 @@ Transfer `refranges`, a list of reside index spans in `seq2`, to `seq1`. `seq1`
 `seq2` must be spatially aligned, and the assignment is made by minimizing
 inter-chain distance subject to the constraint of preserving sequence order.
 """
-function align_ranges(seq1::AbstractVector{PDBResidue}, seq2::AbstractVector{PDBResidue}, refranges::AbstractVector{<:AbstractUnitRange}; kwargs...)
+function align_ranges(seq1::ChainLike, seq2::AbstractVector{<:AbstractResidue}, refranges::AbstractVector{<:AbstractUnitRange}; kwargs...)
     anchoridxs = sizehint!(Int[], length(refranges)*2)
     for r in refranges
         push!(anchoridxs, first(r), last(r))
@@ -237,6 +250,8 @@ function align_ranges(seq1::AbstractVector{PDBResidue}, seq2::AbstractVector{PDB
     @assert last.(ϕ) == eachindex(anchoridxs)
     return [ϕ[i][1]:ϕ[i+1][1] for i in 1:2:length(ϕ)]
 end
+align_ranges(seq1::ChainLike, seq2::Chain, refranges::AbstractVector{<:AbstractUnitRange}; kwargs...) =
+    align_ranges(seq1, collectresidues(seq2), refranges; kwargs...)
 
 function score_nw(D::AbstractMatrix, gapcosts::NWGapCosts)
     Base.require_one_based_indexing(D)
diff --git a/src/alphafold.jl b/src/alphafold.jl
index a644e55..ebd4dd8 100644
--- a/src/alphafold.jl
+++ b/src/alphafold.jl
@@ -89,7 +89,7 @@ end
 
 Attempt to download an [AlphaFold](https://alphafold.com/) structure.
 Returns `nothing` if no entry corresponding to `uniprotXname` exists; otherwise
-it return `path`, the pathname of the saved file.
+it returns `path`, the pathname of the saved file.
 
 In general, a better approach is to use [`download_alphafolds`](@ref) for multiple proteins,
 or [`query_alphafold_latest`](@ref) combined with `Downloads.download` for a single protein.
@@ -154,13 +154,13 @@ function pLDDTcolor(pLDDT::Real)
 end
 
 """
-    pLDDTcolor(r::PDBResidue)
+    pLDDTcolor(r::Residue)
     pLDDTcolor(score::Real)
 
 Return the color corresponding to the pLDDT score (a measure of confidence) of a residue.
 """
-pLDDTcolor(r::PDBResidue) = pLDDTcolor(pLDDT(r))
-pLDDTcolor(a::PDBAtom) = pLDDTcolor(pLDDT(a))
+pLDDTcolor(r::Residue) = pLDDTcolor(pLDDT(r))
+pLDDTcolor(a::Atom) = pLDDTcolor(pLDDT(a))
 
-pLDDT(r::PDBResidue) = pLDDT(only(unique(a -> a.B, r.atoms)))
-pLDDT(a::PDBAtom) = parse(Float64, a.B)
+pLDDT(r::Residue) = only(unique(pLDDT, r))
+pLDDT(a::Atom) = tempfactor(a)
diff --git a/src/analyze.jl b/src/analyze.jl
index c41e95c..b7d4a67 100644
--- a/src/analyze.jl
+++ b/src/analyze.jl
@@ -41,10 +41,8 @@ function columnwise_entropy(msa::AbstractMultipleSequenceAlignment, aacode=reduc
     return map(_entropy, eachcol(resnum))
 end
 
-MSA.Residue(r::PDBResidue) = three2residue(r.id.name)
-
 """
-    residue_centroid(r::PDBResidue)
+    residue_centroid(r::AbstractResidue)
 
 Compute the mean position of all atoms in `r` excluding hydrogen.
 
@@ -53,7 +51,7 @@ they incorporate the orientation of the side chain relative to the overall fold.
 
 See also [`residue_centroid_matrix`](@ref).
 """
-residue_centroid(r::PDBResidue) = mean(atom -> atom.coordinates, Iterators.filter(atom -> atom.element != "H", r.atoms))
+residue_centroid(r::AbstractResidue) = vec(mean(coordarray(r, heavyatomselector); dims=2))
 
 """
     residue_centroid_matrix(seq)
@@ -63,23 +61,21 @@ Return a matrix of all residue centroids as columns. See also [`residue_centroid
 residue_centroid_matrix(seq) = reduce(hcat, map(residue_centroid, seq))
 
 """
-    alphacarbon_coordinates(res::PDBResidue)
+    alphacarbon_coordinates(res::AbstractResidue)
 
 Return the coordinates of the α-carbon in `res`.
 """
-alphacarbon_coordinates(r) = r.atoms[findfirst(a -> a.atom === "CA", r.atoms)].coordinates
+alphacarbon_coordinates(r) = coords(firstmatch(calphaselector, r))
 
 """
     alphacarbon_coordinates_matrix(seq)
 
 Return a matrix of αC coordinates as columns across all residues. See also [`alphacarbon_coordinates`](@ref).
 """
-function alphacarbon_coordinates_matrix(seq)
-    return hcat([[r.atoms[findfirst(a -> a.atom === "CA", r.atoms)].coordinates...] for r in seq]...)
-end
+alphacarbon_coordinates_matrix(seq) = reduce(hcat, map(alphacarbon_coordinates, seq))
 
 """
-    chargelocations(pdb::AbstractVector{PDBResidue}; include_his::Bool=false)
+    chargelocations(chain::ChainLike; include_his::Bool=false)
 
 Return a list of potential charge locations in the protein structure. Each is a tuple `(position, residueindex, AAname)`.
 The positions are those of `N` (in positively-charged residues like Arg & Lys) or `O` (in negatively-charged residues like
@@ -88,43 +84,40 @@ the location of each potential charge will be listed (1 for Lys, 2 each for Arg,
 
 By default, histidine is not considered charged, but you can include it by setting `include_His=true`.
 """
-function chargelocations(pdb::AbstractVector{PDBResidue}; include_His::Bool=false)
-    argpat(atom) = startswith(atom.atom, "NH")  # arginine
-    lyspat(atom) = atom.atom == "NZ"            # lysine
-    asppat(atom) = startswith(atom.atom, "OD")  # aspartate
-    glupat(atom) = startswith(atom.atom, "OE")  # glutamate
-    hispat(atom) = startswith(atom.atom, r"N[DE]")  # histidine
-
-    locs = Tuple{Coordinates,Int,String}[]
-    for (i, res) in pairs(pdb)
-        name = res.id.name
+function chargelocations(chain::ChainLike; include_His::Bool=false)
+    argpat(atom) = startswith(atomname(atom), "NH")  # arginine
+    lyspat(atom) = atomname(atom) == "NZ"            # lysine
+    asppat(atom) = startswith(atomname(atom), "OD")  # aspartate
+    glupat(atom) = startswith(atomname(atom), "OE")  # glutamate
+    hispat(atom) = startswith(atomname(atom), r"N[DE]")  # histidine
+
+    locs = Tuple{typeof(coords(first(first(chain)))),Int,String}[]
+    for (i, res) in enumerate(chain)
+        name = resname(res)
         if name == "ARG"
-            idx = findfirst(argpat, res.atoms)
-            push!(locs, (res.atoms[idx].coordinates, i, name))
-            idx = findnext(argpat, res.atoms, idx+1)
-            push!(locs, (res.atoms[idx].coordinates, i, name))
+            for a in res
+                argpat(a) && push!(locs, (coords(a), i, name))
+            end
         elseif name == "LYS"
-            idx = findfirst(lyspat, res.atoms)
-            push!(locs, (res.atoms[idx].coordinates, i, name))
+            for a in res
+                lyspat(a) && push!(locs, (coords(a), i, name))
+            end
         elseif name == "ASP"
-            idx = findfirst(asppat, res.atoms)
-            push!(locs, (res.atoms[idx].coordinates, i, name))
-            idx = findnext(asppat, res.atoms, idx+1)
-            push!(locs, (res.atoms[idx].coordinates, i, name))
+            for a in res
+                asppat(a) && push!(locs, (coords(a), i, name))
+            end
         elseif name == "GLU"
-            idx = findfirst(glupat, res.atoms)
-            push!(locs, (res.atoms[idx].coordinates, i, name))
-            idx = findnext(glupat, res.atoms, idx+1)
-            push!(locs, (res.atoms[idx].coordinates, i, name))
+            for a in res
+                glupat(a) && push!(locs, (coords(a), i, name))
+            end
         elseif include_His && name == "HIS"
-            idx = findfirst(hispat, res.atoms)
-            push!(locs, (res.atoms[idx].coordinates, i, name))
-            idx = findnext(hispat, res.atoms, idx+1)
-            push!(locs, (res.atoms[idx].coordinates, i, name))
+            for a in res
+                hispat(a) && push!(locs, (coords(a), i, name))
+            end
         end
     end
     return locs
 end
 
-positive_locations(chargelocs::AbstractVector{Tuple{Coordinates,Int,String}}) = [loc[1] for loc in chargelocs if loc[3] ∈ ("ARG", "LYS", "HIS")]
-negative_locations(chargelocs::AbstractVector{Tuple{Coordinates,Int,String}}) = [loc[1] for loc in chargelocs if loc[3] ∈ ("ASP", "GLU")]
+positive_locations(chargelocs::AbstractVector{<:Tuple{AbstractVector,Int,String}}) = [loc[1] for loc in chargelocs if loc[3] ∈ ("ARG", "LYS", "HIS")]
+negative_locations(chargelocs::AbstractVector{<:Tuple{AbstractVector,Int,String}}) = [loc[1] for loc in chargelocs if loc[3] ∈ ("ASP", "GLU")]
diff --git a/src/deprecated.jl b/src/deprecated.jl
deleted file mode 100644
index ac878c5..0000000
--- a/src/deprecated.jl
+++ /dev/null
@@ -1,4 +0,0 @@
-function markers(modelnum::Integer, positions::AbstractVector{<:AbstractVector{<:Real}}, radius::Real, color)
-    Base.depwarn("`markers` is deprecated, use `marker.(modelnum, positions, radius, color)` instead", :markers)
-    return [marker(modelnum, pos, radius, color) for pos in positions]
-end
diff --git a/src/features.jl b/src/features.jl
index d2f6f0a..7b9bd8f 100644
--- a/src/features.jl
+++ b/src/features.jl
@@ -1,22 +1,23 @@
 const σdflt = (; hbond=1, ionic=5, aromatic=3)  # rough guesses for the interaction distance, in Å (separation is 2σ)
 
 equalvolumeradius(radii) = sum(radii.^3)^(1/3)
-equalvolumeradius(atoms::AbstractVector{PDBAtom}, r::PDBResidue) = equalvolumeradius([vanderwaalsradius[(r.id.name, a.atom)] for a in atoms])
-equalvolumeradius(atoms::AbstractVector{PDBAtom}, r::PDBResidue, ::Symbol) = equalvolumeradius(atoms, r)
+equalvolumeradius(atoms::ResidueLike, r::AbstractResidue) = equalvolumeradius([vanderwaalsradius[(resname(r), atomname(a))] for a in atoms])
+equalvolumeradius(atoms::ResidueLike, r::AbstractResidue, ::Symbol) = equalvolumeradius(atoms, r)
 
 function atomic_σfun(a, r, f::Symbol; σhbond=σdflt.hbond, σionic=σdflt.ionic, σaromatic=σdflt.aromatic)
-    f === :Steric && return vanderwaalsradius[(r.id.name, a.atom)]             # repulsive
-    f === :Hydrophobe && return 2 * vanderwaalsradius[(r.id.name, a.atom)]     # attractive
-    f ∈ (:Donor, :Acceptor) && return sqrt(σhbond^2 + vanderwaalsradius[(r.id.name, a.atom)]^2)
+    aname, rname = atomname(a), resname(r)
+    f === :Steric && return vanderwaalsradius[(rname, aname)]             # repulsive
+    f === :Hydrophobe && return 2 * vanderwaalsradius[(rname, aname)]     # attractive
+    f ∈ (:Donor, :Acceptor) && return sqrt(σhbond^2 + vanderwaalsradius[(rname, aname)]^2)
     f ∈ (:PosIonizable, :NegIonizable) && return σionic
     f === :Aromatic && return σaromatic # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3530875/
     throw(ArgumentError("Unknown feature type: $f"))
 end
 
 function combined_σfun(atoms, r, f::Symbol; σhbond=σdflt.hbond, σionic=σdflt.ionic, σaromatic=σdflt.aromatic)
-    μ = mean(a -> a.coordinates, atoms)
+    μ = mean(a -> coords(a), atoms)
     length(atoms) == 1 && return equalvolumeradius(atoms, r)
-    σ = std([norm(a.coordinates .- μ) for a in atoms]) + equalvolumeradius(atoms, r)
+    σ = std([norm(coords(a) .- μ) for a in atoms]) + equalvolumeradius(atoms, r)
     f === :Hydrophobe && return σ + 3//2
     f === :Steric && return σ
     f ∈ (:Donor, :Acceptor) && return sqrt(σhbond^2 + σ^2)
@@ -26,7 +27,7 @@ function combined_σfun(atoms, r, f::Symbol; σhbond=σdflt.hbond, σionic=σdfl
 end
 
 function atomic_ϕfun(a, r, f::Symbol)
-    name = r.id.name
+    name = resname(r)
     if f === :PosIonizable
         name == "HIS" && return 0.1
         name == "ARG" && return 0.5  # arginine has two ionizable atoms, but the net charge is 1
@@ -39,7 +40,7 @@ function atomic_ϕfun(a, r, f::Symbol)
 end
 
 function combined_ϕfun(atoms, r, f::Symbol)
-    name = r.id.name
+    name = resname(r)
     if f === :PosIonizable
         name == "HIS" && return 0.1
         name == "ARG" && return 1.0
@@ -59,38 +60,39 @@ function add_feature!(mgmm::IsotropicMultiGMM, key, μ, σ, ϕ)
     push!(gmm, eltype(gmm)(vec(μ), σ, ϕ))
 end
 
-function add_features_from_atom!(mgmm::IsotropicMultiGMM, a::PDBAtom, r::PDBResidue, σfun, ϕfun)
+function add_features_from_atom!(mgmm::IsotropicMultiGMM, a::AbstractAtom, r::AbstractResidue, σfun, ϕfun)
     ϕ = ϕfun(a, r, :Steric)
-    !iszero(ϕ) && add_feature!(mgmm, :Steric, a.coordinates, σfun(a, r, :Steric), ϕ)
-    if ishydrophobic(a, r.id.name)
+    rname, acoords = resname(r), coords(a)
+    !iszero(ϕ) && add_feature!(mgmm, :Steric, acoords, σfun(a, r, :Steric), ϕ)
+    if ishydrophobic(a, rname)
         ϕ = ϕfun(a, r, :Hydrophobe)
-        !iszero(ϕ) && add_feature!(mgmm, :Hydrophobe, a.coordinates, σfun(a, r, :Hydrophobe), ϕ)
+        !iszero(ϕ) && add_feature!(mgmm, :Hydrophobe, acoords, σfun(a, r, :Hydrophobe), ϕ)
     end
-    if isaromatic(a, r.id.name)
+    if isaromatic(a, rname)
         ϕ = ϕfun(a, r, :Aromatic)
-        !iszero(ϕ) && add_feature!(mgmm, :Aromatic, a.coordinates, σfun(a, r, :Aromatic), ϕ)
+        !iszero(ϕ) && add_feature!(mgmm, :Aromatic, acoords, σfun(a, r, :Aromatic), ϕ)
     end
-    if iscationic(a, r.id.name)
+    if iscationic(a, rname)
         ϕ = ϕfun(a, r, :PosIonizable)
-        !iszero(ϕ) && add_feature!(mgmm, :PosIonizable, a.coordinates, σfun(a, r, :PosIonizable), ϕ)
+        !iszero(ϕ) && add_feature!(mgmm, :PosIonizable, acoords, σfun(a, r, :PosIonizable), ϕ)
     end
-    if isanionic(a, r.id.name)
+    if isanionic(a, rname)
         ϕ = ϕfun(a, r, :NegIonizable)
-        !iszero(ϕ) && add_feature!(mgmm, :NegIonizable, a.coordinates, σfun(a, r, :NegIonizable), ϕ)
+        !iszero(ϕ) && add_feature!(mgmm, :NegIonizable, acoords, σfun(a, r, :NegIonizable), ϕ)
     end
-    if ishbonddonor(a, r.id.name)
+    if ishbonddonor(a, rname)
         ϕ = ϕfun(a, r, :Donor)
-        !iszero(ϕ) && add_feature!(mgmm, :Donor, a.coordinates, σfun(a, r, :Donor), ϕ)
+        !iszero(ϕ) && add_feature!(mgmm, :Donor, acoords, σfun(a, r, :Donor), ϕ)
     end
-    if ishbondacceptor(a, r.id.name)
+    if ishbondacceptor(a, rname)
         ϕ = ϕfun(a, r, :Acceptor)
-        !iszero(ϕ) && add_feature!(mgmm, :Acceptor, a.coordinates, σfun(a, r, :Acceptor), ϕ)
+        !iszero(ϕ) && add_feature!(mgmm, :Acceptor, acoords, σfun(a, r, :Acceptor), ϕ)
     end
 end
 
 function add_features_from_atoms!(mgmm::IsotropicMultiGMM, key, atoms, r, σfun, ϕfun)
     isempty(atoms) && return
-    μ = mean(a -> a.coordinates, atoms)
+    μ = mean(coords, atoms)
     σ = σfun(atoms, r, key)
     ϕ = ϕfun(atoms, r, key)
     add_feature!(mgmm, key, μ, σ, ϕ)
@@ -98,30 +100,31 @@ end
 
 function add_features_from_residue!(
     mgmm::IsotropicMultiGMM,
-    r::PDBResidue;
+    r::AbstractResidue;
     combined = false,
     σfun = combined ? combined_σfun : atomic_σfun,
     ϕfun = combined ? combined_ϕfun : atomic_ϕfun
 )
     if !combined
-        for a in r.atoms
+        for a in r
             add_features_from_atom!(mgmm, a, r, σfun, ϕfun)
         end
     else
-        add_features_from_atoms!(mgmm, :Steric,       r.atoms, r, σfun, ϕfun)
-        add_features_from_atoms!(mgmm, :Hydrophobe,   filter(x -> ishydrophobic(x, r.id.name),   r.atoms), r, σfun, ϕfun)
-        add_features_from_atoms!(mgmm, :Aromatic,     filter(x -> isaromatic(x, r.id.name),      r.atoms), r, σfun, ϕfun)
-        add_features_from_atoms!(mgmm, :PosIonizable, filter(x -> iscationic(x, r.id.name),      r.atoms), r, σfun, ϕfun)
-        add_features_from_atoms!(mgmm, :NegIonizable, filter(x -> isanionic(x, r.id.name),       r.atoms), r, σfun, ϕfun)
-        add_features_from_atoms!(mgmm, :Donor,        filter(x -> ishbonddonor(x, r.id.name),    r.atoms), r, σfun, ϕfun)
-        add_features_from_atoms!(mgmm, :Acceptor,     filter(x -> ishbondacceptor(x, r.id.name), r.atoms), r, σfun, ϕfun)
+        rname = resname(r)
+        add_features_from_atoms!(mgmm, :Steric,       r, r, σfun, ϕfun)
+        add_features_from_atoms!(mgmm, :Hydrophobe,   collectatoms(r, x -> ishydrophobic(x, rname))  , r, σfun, ϕfun)
+        add_features_from_atoms!(mgmm, :Aromatic,     collectatoms(r, x -> isaromatic(x, rname))     , r, σfun, ϕfun)
+        add_features_from_atoms!(mgmm, :PosIonizable, collectatoms(r, x -> iscationic(x, rname))     , r, σfun, ϕfun)
+        add_features_from_atoms!(mgmm, :NegIonizable, collectatoms(r, x -> isanionic(x, rname))      , r, σfun, ϕfun)
+        add_features_from_atoms!(mgmm, :Donor,        collectatoms(r, x -> ishbonddonor(x, rname))   , r, σfun, ϕfun)
+        add_features_from_atoms!(mgmm, :Acceptor,     collectatoms(r, x -> ishbondacceptor(x, rname)), r, σfun, ϕfun)
     end
 end
 
 """
-    mgmm = features_from_structure(seq, idxs=1:length(seq); combined=false)
+    mgmm = features_from_structure(seq::ChainLike, idxs=1:length(seq); combined=false)
 
-Construct an `IsotropicMultiGMM` from a `PDBResidue` sequence `seq` by adding
+Construct an `IsotropicMultiGMM` from `seq` by adding
 features for each residue in `idxs`.
 
 `combined=true` causes all atoms sharing the same feature to be combined,
diff --git a/src/forces.jl b/src/forces.jl
index 6aea990..0070b5d 100644
--- a/src/forces.jl
+++ b/src/forces.jl
@@ -41,11 +41,11 @@ The output is a list of 3×4 matrices, one for each residue in `seq`.
 
 See also: [`optimize_weights`](@ref), [`forcedict`](@ref).
 """
-function forcecomponents(seq::AbstractVector{PDBResidue}, interactions::InteractionList, residueindexes=eachindex(seq); kwargs...)
+function forcecomponents(seq::AbstractVector{<:AbstractResidue}, interactions::InteractionList, residueindexes=eachindex(seq); kwargs...)
     # Create a MGMM for each residue
     mgmms = [IsotropicMultiGMM(Dict{Symbol,IsotropicGMM{3,Float64}}()) for _ in seq]
-    for i in eachindex(seq)
-        add_features_from_residue!(mgmms[i], seq[i]; kwargs...)
+    for (i, seqi) in enumerate(seq)
+        add_features_from_residue!(mgmms[i], seqi; kwargs...)
     end
     forces = forcecomponents(mgmms, interactions, residueindexes)
     # Project out the component of the force that would stretch or compress the covalent bonds between residues
@@ -53,14 +53,14 @@ function forcecomponents(seq::AbstractVector{PDBResidue}, interactions::Interact
         force = forces[ii]
         if i > firstindex(seq)
             # Get the alpha-carbon to nitrogen vector
-            v = alpha_nitrogen(seq[i-1]).coordinates - alpha_carbon(seq[i]).coordinates
+            v = coords(alpha_nitrogen(seq[i-1])) - coords(alpha_carbon(seq[i]))
             v /= norm(v)
             # Project out the component of the force that would stretch or compress the bond
             force -= v * (v' * force)
         end
         if i < lastindex(seq)
             # Get the nitrogen to alpha-carbon vector
-            v = alpha_carbon(seq[i+1]).coordinates - alpha_nitrogen(seq[i]).coordinates
+            v = coords(alpha_carbon(seq[i+1])) - coords(alpha_nitrogen(seq[i]))
             v /= norm(v)
             # Project out the component of the force that would stretch or compress the bond
             force -= v * (v' * force)
@@ -69,6 +69,8 @@ function forcecomponents(seq::AbstractVector{PDBResidue}, interactions::Interact
     end
     return forces
 end
+forcecomponents(seq, interactions::InteractionList, residueindexes=eachindex(seq); kwargs...) =
+    forcecomponents(collectresidues(seq), interactions, residueindexes; kwargs...)
 
 function forcecomponents(mgmms::AbstractVector{<:IsotropicMultiGMM{N}}, interactions::InteractionList, residueindexes=eachindex(mgmms)) where N
     GaussianMixtureAlignment.validate_interactions(Dict(interactions)) || throw(ArgumentError("interactions must appear in only one order, got $interactions"))
diff --git a/src/msa.jl b/src/msa.jl
index a9da0ec..35fb5c7 100644
--- a/src/msa.jl
+++ b/src/msa.jl
@@ -36,7 +36,7 @@ Remove all sequences from `msa` with fewer than `minres` matching residues.
 function filter_long!(msa::AbstractMultipleSequenceAlignment, minres::Real)
     # Get rid of short sequences
     nresidues = map(eachrow(msa)) do v
-        sum(!=(Residue('-')), v)
+        sum(!=(MSA.Residue('-')), v)
     end
     mask = nresidues .> minres
     filtersequences!(msa, mask)
@@ -77,6 +77,22 @@ end
 struct SequenceMapping <: AbstractVector{Int}
     seqmap::Vector{Int}
 end
+"""
+    sm = SequenceMapping([4, 5, 0, ...])
+    sm = SequenceMapping(seq::AnnotatedAlignedSequence)
+
+A `SequenceMapping` is a vector of indexes within a full sequence that map to a
+reference. Specifically, `sm[i]` is the index of the residue in the full
+sequence that maps to the `i`-th position in the reference. 0 is a placeholder
+for a position in the reference that has no mapping to the full sequence.
+
+# Example
+
+`SequenceMapping([4, 5, 0, ...])` indicates that:
+- the first position in the reference maps to the fourth residue in the full sequence,
+- the second position in the reference maps to the fifth residue in the full sequence, and
+- the third position in the reference lacks a corresponding residue in the full sequence.
+"""
 SequenceMapping(seq::AnnotatedAlignedSequence) = SequenceMapping(getsequencemapping(seq))
 
 Base.size(sm::SequenceMapping) = size(sm.seqmap)
@@ -85,11 +101,18 @@ Base.setindex!(sm::SequenceMapping, v, i::Int) = sm.seqmap[i] = v
 Base.similar(::SequenceMapping, ::Type{Int}, dims::Dims{1}) = SequenceMapping(Vector{Int}(undef, dims))
 
 # We restrict the following to `Vector` to prevent ambiguities
-Base.getindex(fullseqvec::Vector{T}, sm::SequenceMapping) where T = any(iszero, sm.seqmap) ?
-    Union{T,Nothing}[i == 0 ? nothing : fullseqvec[i] for i in sm.seqmap] :
-    T[fullseqvec[i] for i in sm.seqmap]
+Base.getindex(fullseqvec::Vector{T}, sm::SequenceMapping) where T =
+    Union{T,Nothing}[i == 0 ? nothing : fullseqvec[i] for i in sm.seqmap]
+
+Base.getindex(fullseq::Chain, sm::SequenceMapping) = getindex(collectresidues(fullseq), sm)
+Base.getindex(fullseq::Model, sm::SequenceMapping) = getindex(only(fullseq), sm)
+Base.getindex(fullseq::MolecularStructure, sm::SequenceMapping) = getindex(only(fullseq), sm)
 
 function Base.setindex!(fullseqvec::Vector, colvalues::AbstractVector, sm::SequenceMapping)
-    idxnz = sm.seqmap .> 0
-    fullseqvec[sm.seqmap[idxnz]] = colvalues[idxnz]
+    eachindex(colvalues) === eachindex(sm) || throw(DimensionMismatch("`colvalues` and `sm` must have the same indices"))
+    for (v, i) in zip(colvalues, sm.seqmap)
+        i == 0 && continue
+        fullseqvec[i] = v
+    end
+    return fullseqvec
 end
diff --git a/src/pocket.jl b/src/pocket.jl
index 61d870b..07aa945 100644
--- a/src/pocket.jl
+++ b/src/pocket.jl
@@ -17,12 +17,12 @@ function z2tmcoords(z, allitps)
     return [itpos2coord(p, tmitps) for (p,tmitps) in zip(itpos, allitps)]
 end
 
-function sidechaincentroid(r::PDBResidue) # ignoring mass for now
-    scatoms = filter(a -> a.atom ∉ ["N","CA","C","O"], r.atoms)
-    return length(scatoms) == 0 ? nothing : sum(a.coordinates for a in scatoms) / length(scatoms)
+function sidechaincentroid(r::AbstractResidue) # ignoring mass for now
+    scatoms = collectatoms(r, !backboneselector)
+    return length(scatoms) == 0 ? nothing : sum(coords(a) for a in scatoms) / length(scatoms)
 end
 
-function scvector(r::PDBResidue)
+function scvector(r::AbstractResidue)
     cacoord = alphacarbon_coordinates(r)
     sccoord = sidechaincentroid(r)
     return sccoord === nothing ? zero(cacoord) : sccoord - cacoord
@@ -33,7 +33,7 @@ function res_inside_hull(sccoord, tmcoords) # tmcoords is a list of 2D points in
     return insidehull(sccoord[1:2], h)
 end
 
-function tm_res_is_inward(r::PDBResidue, itps)
+function tm_res_is_inward(r::AbstractResidue, itps)
     scvec = scvector(r)
     all(iszero, scvec) && return false
     sccoord = alphacarbon_coordinates(r) .+ scvec
@@ -47,14 +47,14 @@ Return an array of boolean[] indicating which residues (of those specified by `t
 
 `tmidxs` is a vector (typically of length 7, with each entry corresponding to a transmembrane region) of ranges of residue indices.
 """
-function inward_tm_residues(seq, tmidxs)
+function inward_tm_residues(seq::AbstractVector{<:AbstractResidue}, tmidxs)
     tmcoords = [alphacarbon_coordinates_matrix(seq[tm]) for tm in tmidxs]
     itps = [[interpolate(tm[i,:], BSpline((i === 3 ? Linear() : Cubic()))) for i=1:3] for tm in tmcoords]
     return [[tm_res_is_inward(r, itps) for r in seq[tm]] for tm in tmidxs]
 end
+inward_tm_residues(seq::Chain, tmidxs) = inward_tm_residues(collectresidues(seq), tmidxs)
 
-
-function ecl_res_is_inward(r::PDBResidue, topcenter)
+function ecl_res_is_inward(r::AbstractResidue, topcenter)
     scvec = scvector(r)
     all(iszero, scvec) && return false
     accoord = alphacarbon_coordinates(r)
@@ -69,8 +69,10 @@ Return an array of boolean[] indicating which residues (of those specified by `e
 
 `eclidxs` is a vector (with each entry corresponding to an extracellular loop) of ranges of residue indices.
 """
-function inward_ecl_residues(seq, eclidxs; includes_amino_terminus::Bool=false)
+function inward_ecl_residues(seq::AbstractVector{<:AbstractResidue}, eclidxs; includes_amino_terminus::Bool=false)
     tm_top_idxs = filter(x -> 0 < x < length(seq), vcat([[ecl[1] - 1, ecl[end] + 1] for ecl in eclidxs[begin+includes_amino_terminus:end]]...))
     topcenter = mean(alphacarbon_coordinates_matrix(seq[tm_top_idxs]); dims=2)
     return [[ecl_res_is_inward(r, topcenter) for r in seq[ecl]] for ecl in eclidxs]
 end
+inward_ecl_residues(seq, eclidxs; includes_amino_terminus::Bool=false) =
+    inward_ecl_residues(collectresidues(seq), eclidxs; includes_amino_terminus)
diff --git a/src/properties.jl b/src/properties.jl
index ffd3b3f..4ef06e3 100644
--- a/src/properties.jl
+++ b/src/properties.jl
@@ -8,26 +8,26 @@ Base.getindex(v::AAProperties, i::Int) = i == 1 ? v.charge :
                                          i == 2 ? v.hydropathy :
                                          i == 3 ? v.volume : Base.throw_boundserror(v, i)
 const aa_properties = Dict(
-    Residue('A') => AAProperties(0, 1.8, 67),
-    Residue('R') => AAProperties(1, -4.5, 148),
-    Residue('N') => AAProperties(0, -3.5, 96),
-    Residue('D') => AAProperties(-1, -3.5, 91),
-    Residue('C') => AAProperties(0, 2.5, 86),
-    Residue('Q') => AAProperties(0, -3.5, 114),
-    Residue('E') => AAProperties(-1, -3.5, 109),
-    Residue('G') => AAProperties(0, -0.4, 48),
-    Residue('H') => AAProperties(0.1, -3.2, 118),   # at pH 7.4, His has a 10% chance of being charged
-    Residue('I') => AAProperties(0, 4.5, 124),
-    Residue('L') => AAProperties(0, 3.8, 124),
-    Residue('K') => AAProperties(1, -3.9, 135),
-    Residue('M') => AAProperties(0, 1.9, 124),
-    Residue('F') => AAProperties(0, 2.8, 135),
-    Residue('P') => AAProperties(0, -1.6, 90),
-    Residue('S') => AAProperties(0, -0.8, 73),
-    Residue('T') => AAProperties(0, -0.7, 93),
-    Residue('W') => AAProperties(0, -0.9, 163),
-    Residue('Y') => AAProperties(0, -1.3, 141),
-    Residue('V') => AAProperties(0, 4.2, 105),
+    MSA.Residue('A') => AAProperties(0, 1.8, 67),
+    MSA.Residue('R') => AAProperties(1, -4.5, 148),
+    MSA.Residue('N') => AAProperties(0, -3.5, 96),
+    MSA.Residue('D') => AAProperties(-1, -3.5, 91),
+    MSA.Residue('C') => AAProperties(0, 2.5, 86),
+    MSA.Residue('Q') => AAProperties(0, -3.5, 114),
+    MSA.Residue('E') => AAProperties(-1, -3.5, 109),
+    MSA.Residue('G') => AAProperties(0, -0.4, 48),
+    MSA.Residue('H') => AAProperties(0.1, -3.2, 118),   # at pH 7.4, His has a 10% chance of being charged
+    MSA.Residue('I') => AAProperties(0, 4.5, 124),
+    MSA.Residue('L') => AAProperties(0, 3.8, 124),
+    MSA.Residue('K') => AAProperties(1, -3.9, 135),
+    MSA.Residue('M') => AAProperties(0, 1.9, 124),
+    MSA.Residue('F') => AAProperties(0, 2.8, 135),
+    MSA.Residue('P') => AAProperties(0, -1.6, 90),
+    MSA.Residue('S') => AAProperties(0, -0.8, 73),
+    MSA.Residue('T') => AAProperties(0, -0.7, 93),
+    MSA.Residue('W') => AAProperties(0, -0.9, 163),
+    MSA.Residue('Y') => AAProperties(0, -1.3, 141),
+    MSA.Residue('V') => AAProperties(0, 4.2, 105),
 )
 
 # z-score the amino acid features using the full covariance matrix
diff --git a/src/tmalign.jl b/src/tmalign.jl
index e6bdb69..e8472eb 100644
--- a/src/tmalign.jl
+++ b/src/tmalign.jl
@@ -25,43 +25,47 @@ function parse_tma(filename)
         lines = lines[idx+1:end]
     end
     length(lines) == 3 || error("expecting 3 lines, got $(length(lines))")
-    seq1 = [Residue(c) for c in lines[1]]
-    seq2 = [Residue(c) for c in lines[3]]
+    seq1 = [MSA.Residue(c) for c in lines[1]]
+    seq2 = [MSA.Residue(c) for c in lines[3]]
     quality = [c for c in lines[2]]
     append!(quality, fill('-', length(seq1) - length(quality)))
     return seq1, seq2, quality
 end
 
 const alignsentinel = -1
-function MapAlign(s::AbstractVector{PDBResidue}, a::AbstractVector{Residue}, quality)
+function MapAlign(s::ChainLike, a::AbstractVector{MSA.Residue}, quality)
     s2a, a2s = fill(alignsentinel, length(s)), Int[]  # we only retain the high-quality matches
+    sj, state = iterate(s)
     j = 0
     for (r, q) in zip(a, quality)
-        r == Residue('-') && continue
+        r == MSA.Residue('-') && continue
         j += 1
-        r == three2residue(s[j].id.name) || error("at position $j, residue was $(s[j].id.name) but alignment was $r")
-        q == ':' || continue
-        push!(a2s, j)
-        s2a[j] = length(a2s)
+        r == three2residue(String(resname(sj))) || error("at position $j, residue was $(resname(sj)) but alignment was $r")
+        if q == ':'
+            push!(a2s, j)
+            s2a[j] = length(a2s)
+        end
+        ret = iterate(s, state)
+        sj, state = ret === nothing ? (nothing, nothing) : ret   # if we've reached s[end], continuing should be an error
     end
     return MapAlign(s2a, a2s)
 end
 
-function StructAlign(struct1::AbstractVector{PDBResidue}, struct2::AbstractVector{PDBResidue},
-                     align1::AbstractVector{Residue},     align2::AbstractVector{Residue},
+function StructAlign(struct1::ChainLike, struct2::ChainLike,
+                     align1::AbstractVector{MSA.Residue}, align2::AbstractVector{MSA.Residue},
                      quality)
     StructAlign(MapAlign(struct1, align1, quality), MapAlign(struct2, align2, quality))
 end
 
 """
-    StructAlign(struct1::AbstractVector{PDBResidue}, struct2::AbstractVector{PDBResidue}, filename::AbstractString)
+    StructAlign(struct1::ChainLike, struct2::ChainLike, filename::AbstractString)
 
 Create a structure-based alignment between `struct1` and `struct2`. `filename` is the name of the TM-align "results" file
 (e.g., https://zhanggroup.org//TM-align/example/873772.html).
 
 See also [`residueindex`](@ref).
 """
-StructAlign(struct1::AbstractVector{PDBResidue}, struct2::AbstractVector{PDBResidue}, filename::AbstractString) =
+StructAlign(struct1::ChainLike, struct2::ChainLike, filename::AbstractString) =
     StructAlign(struct1, struct2, parse_tma(filename)...)
 
 """
diff --git a/src/utils.jl b/src/utils.jl
index fbc4cdd..c99df90 100644
--- a/src/utils.jl
+++ b/src/utils.jl
@@ -38,16 +38,16 @@ function query_ncbi(id)
 end
 
 """
-    getchain(filename::AbstractString; model="1", chain="A")
+    getchain(filename::AbstractString; model=1, chain="A")
 
 Read a PDB file `filename` and extract the specified chain.
 """
-getchain(filename::AbstractString; model="1", chain="A") = read_file(filename, PDBFile; model, chain)
+getchain(filename::AbstractString; model=1, chain="A") = read(filename, PDBFormat)[model][chain]
 
-function validate_seq_residues(seq::AnnotatedAlignedSequence, chain::AbstractVector{PDBResidue})
+function validate_seq_residues(seq::AnnotatedAlignedSequence, chain)
     for (i, r) in zip(getsequencemapping(seq), seq)
         (r == GAP || r == XAA) && continue
-        res = three2residue(chain[i].id.name)
+        res = three2residue(String(resname(chain[i])))
         res == r || return false
     end
     return true
@@ -78,5 +78,25 @@ function findall_subseq(subseq, seq)
     return starts
 end
 
-alpha_nitrogen(r) = r.atoms[findfirst(a -> a.atom == "N", r.atoms)]
-alpha_carbon(r) = r.atoms[findfirst(a -> a.atom == "CA", r.atoms)]
\ No newline at end of file
+alpha_nitrogen(r) = firstmatch(a -> atomname(a) == "N", r)
+alpha_carbon(r) = firstmatch(a -> atomname(a) == "CA", r)
+
+function firstmatch(f, iter)
+    for x in iter
+        if f(x)
+            return x
+        end
+    end
+    return nothing
+end
+
+function skipnothing(v::AbstractVector{Union{T,Nothing}}) where T
+    return T[x for x in v if x !== nothing]
+end
+
+MIToS.PDB.ishydrophobic(a::AbstractAtom, rname::AbstractString) = (rname, atomname(a)) in MIToS.PDB._hydrophobic
+MIToS.PDB.isaromatic(a::AbstractAtom, rname::AbstractString) = (rname, atomname(a)) in MIToS.PDB._aromatic
+MIToS.PDB.iscationic(a::AbstractAtom, rname::AbstractString) = (rname, atomname(a)) in MIToS.PDB._cationic
+MIToS.PDB.isanionic(a::AbstractAtom, rname::AbstractString) = (rname, atomname(a)) in MIToS.PDB._anionic
+MIToS.PDB.ishbonddonor(a::AbstractAtom, rname::AbstractString) = (rname, atomname(a)) in keys(MIToS.PDB._hbond_donor)
+MIToS.PDB.ishbondacceptor(a::AbstractAtom, rname::AbstractString) = (rname, atomname(a)) in keys(MIToS.PDB._hbond_acceptor)
diff --git a/test/deprecated.jl b/test/deprecated.jl
deleted file mode 100644
index 34ce5b4..0000000
--- a/test/deprecated.jl
+++ /dev/null
@@ -1,11 +0,0 @@
-@testset "Deprecated" begin
-    @info "Testing deprecated functions (warnings are expected)"
-    @test only(GPCRAnalysis.markers(2, [Coordinates(5, 4, 3)], 0.5, "blue")) == "marker #2 position 5.0,4.0,3.0 radius 0.5 color blue"
-    dot = GPCRAnalysis.marker(2, Coordinates(5, 4, 3), 0.5, "blue")
-    tmpfile = tempname() * ".cxc"
-    chimerax_script(tmpfile, ["align_ABCD.pdb", "align_EFGH.pdb"], [[5, 10, 15], [7, 11]]; extras=[dot])
-    script = read(tmpfile, String)
-    @test !occursin("matchmaker #2-2 to #1", script)
-    @test occursin("marker #2 position 5.0,4.0,3.0 radius 0.5 color blue", script)
-    rm(tmpfile)
-end
diff --git a/test/runtests.jl b/test/runtests.jl
index d593adb..b697607 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -1,7 +1,8 @@
 using GPCRAnalysis
-using MIToS
-using MIToS.MSA
-using MIToS.PDB
+using MIToS: MSA, Pfam
+using MIToS.MSA: @res_str, three2residue, coverage, GappedAlphabet, filtersequences!, percentsimilarity,
+                 nsequences, sequencenames, getsequencemapping
+using BioStructures
 using GaussianMixtureAlignment
 using InvertedIndices
 using Statistics
@@ -56,9 +57,9 @@ using Test
     @testset "MSA" begin
         # The test file is copied from MIToS/test/data, with gratitude
         pf09645_sto = "PF09645_full.stockholm"
-        msa = MIToS.MSA.read_file(pf09645_sto, MIToS.Pfam.Stockholm)
-        @test MIToS.MSA.nsequences(filter_species!(deepcopy(msa), "ATV")) == 1
-        @test MIToS.MSA.nsequences(filter_long!(deepcopy(msa), 70)) == 3
+        msa = MSA.read_file(pf09645_sto, Pfam.Stockholm)
+        @test MSA.nsequences(filter_species!(deepcopy(msa), "ATV")) == 1
+        @test MSA.nsequences(filter_long!(deepcopy(msa), 70)) == 3
 
         idx = SequenceMapping([0, 4, 5, 0])
         seqvals = fill(NaN, 9)
@@ -80,7 +81,7 @@ using Test
 
         @test AccessionCode(msa, MSACode("Y070_ATV/2-70")) == AccessionCode("Q3V4T1")
         @test MSACode(msa, AccessionCode("Q3V4T1")) == MSACode("Y070_ATV/2-70")
-        @test msa[MSACode("Y070_ATV/2-70")][8] == msa[AccessionCode("Q3V4T1")][8] == Residue('V')
+        @test msa[MSACode("Y070_ATV/2-70")][8] == msa[AccessionCode("Q3V4T1")][8] == MSA.Residue('V')
     end
     @testset "ChimeraX" begin
         tmpfile = tempname() * ".cxc"
@@ -92,11 +93,11 @@ using Test
         @test occursin("show #2 :11", script)
         @test occursin("transparency #1-2 80 target c", script)
         @test occursin("matchmaker #2-2 to #1", script)
-        dot = GPCRAnalysis.marker(2, Coordinates(5, 4, 3), 0.5, "blue")
+        dot = GPCRAnalysis.marker(2, [5, 4, 3], 0.5, "blue")
         chimerax_script(tmpfile, ["ABCD.pdb", "EFGH.pdb"], [[5, 10, 15], [7, 11]]; align=false, extras=[dot])
         script = read(tmpfile, String)
         @test !occursin("matchmaker #2-2 to #1", script)
-        @test occursin("marker #2 position 5.0,4.0,3.0 radius 0.5 color blue", script)
+        @test occursin("marker #2 position 5,4,3 radius 0.5 color blue", script)
         rm(tmpfile)
     end
     @testset "Needleman-Wunsch" begin
@@ -137,7 +138,7 @@ using Test
         D = [1 0 1 2 3; 3 2 1 0 1; 4 3 2 1 0; 5 4 3 2 1]
         @test align_nw(D, gapcosts) == [(1, 2), (2, 3), (3, 4), (4, 5)]
 
-        opsd = read_file("AF-P15409-F1-model_v4.pdb", PDBFile)
+        opsd = getchain("AF-P15409-F1-model_v4.pdb")
         opsd_tms = [37:61, 74:96, 111:133, 153:173, 203:224, 253:274, 287:308]
         @test align_ranges(opsd, opsd, opsd_tms) == opsd_tms
 
@@ -153,8 +154,8 @@ using Test
         testscore(S, P, D, gapcosts)
     end
     @testset "Pocket residues and features" begin
-        opsd = read_file("AF-P15409-F1-model_v4.pdb", PDBFile)
-        opsdr = [three2residue(r.id.name) for r in opsd]
+        opsd = getchain("AF-P15409-F1-model_v4.pdb")
+        opsdr = [three2residue(String(resname(r))) for r in opsd]
         # These are not quite accurate, from https://www.uniprot.org/uniprotkb/P15409/entry#subcellular_location
         # the actual answer is
         #   opsd_tms = [37:61, 74:96, 111:133, 153:173, 203:224, 253:274, 287:308]
@@ -167,13 +168,13 @@ using Test
                     only(findall_subseq(res"AEKE", opsdr)):only(findall_subseq(res"YIFT", opsdr))+3,
                     only(findall_subseq(res"PIFM", opsdr)):only(findall_subseq(res"YIML", opsdr))+3,
         ]
-        opsd = align_to_membrane(opsd, opsd_tms)
+        applytransform!(opsd, align_to_membrane(opsd, opsd_tms))
         # Check the alignment
         leaflet_e, leaflet_i = GPCRAnalysis.collect_leaflet_residues(opsd, opsd_tms, true)
-        ce = reduce(vcat, [coordinatesmatrix(r) for r in leaflet_e])
-        ci = reduce(vcat, [coordinatesmatrix(r) for r in leaflet_i])
-        ze = median(ce[:,3])
-        zi = median(ci[:,3])
+        ce = reduce(hcat, [coordarray(r) for r in leaflet_e])
+        ci = reduce(hcat, [coordarray(r) for r in leaflet_i])
+        ze = median(ce[3,:])
+        zi = median(ci[3,:])
         @test ze > zi    # extracellular is positive
         @test abs(ze + zi) < 1e-6 * (ze - zi) # center of membrane is at z=0
 
@@ -277,7 +278,7 @@ using Test
         @test all(abs.(wF*ones(3)) .< 1e-7)   # gradient is zero at the minimum even under re-tuning
 
         interactions = [(:Steric, :Steric) => 1, (:Hydrophobe, :Hydrophobe) => -1, (:Donor, :Acceptor) => -1]  # drop the ionic
-        opsd = read_file("AF-P15409-F1-model_v4.pdb", PDBFile)
+        opsd = getchain("AF-P15409-F1-model_v4.pdb")
         opsd_tms = [37:61, 74:96, 111:133, 153:173, 203:224, 253:274, 287:308]
         tm_res = inward_tm_residues(opsd, opsd_tms)
         tm_idxs = reduce(vcat, [tm[idx] for (tm, idx) in zip(opsd_tms, tm_res)])
@@ -328,7 +329,7 @@ using Test
                 absfn = joinpath(dir, fn)
                 @test try_download_alphafold(uname, absfn; version=v) == absfn
                 @test isfile(absfn)
-                @test getchain(absfn) isa AbstractVector{MIToS.PDB.PDBResidue}
+                @test getchain(absfn) isa GPCRAnalysis.ChainLike
                 @test only(alphafoldfiles(dir)) == fn
             end
         end
@@ -340,7 +341,7 @@ using Test
                 if !isfile(pfampath)
                     Pfam.downloadpfam("PF03402"; filename=pfampath)
                 end
-                msa = MIToS.MSA.read_file(pfampath, MSA.Stockholm, generatemapping=true, useidcoordinates=true)
+                msa = MSA.read_file(pfampath, MSA.Stockholm, generatemapping=true, useidcoordinates=true)
                 filter_species!(msa, "MOUSE")
                 # Make small enough for a decent download test
                 filtersequences!(msa, coverage(msa) .>= 0.9)
@@ -364,18 +365,19 @@ using Test
                 @test @inferred(pLDDTcolor(first(c1))) isa AbstractRGB
                 conserved_residues = c1[SequenceMapping(getsequencemapping(msa, 1))[conserved_cols]]
                 badidx = findall(==(nothing), conserved_residues)
-                conserved_residues = convert(Vector{PDBResidue}, conserved_residues[Not(badidx)])
+                conserved_residues = convert(Vector{Residue}, conserved_residues[Not(badidx)])
                 conserved_cols = conserved_cols[Not(badidx)]
                 sm = SequenceMapping(getsequencemapping(msa, 2))
-                c2a = align(conserved_residues, c2, sm[conserved_cols])
-                @test rmsd(conserved_residues, c2a[sm[conserved_cols]]; superimposed=true) < rmsd(conserved_residues, c2[sm[conserved_cols]]; superimposed=true)
+                c2a = applytransform!(copy(c2), align(conserved_residues, c2, sm[conserved_cols]))
+                @test rmsd(conserved_residues, skipnothing(c2a[sm[conserved_cols]]); superimpose=false) <
+                      rmsd(conserved_residues, skipnothing(c2[sm[conserved_cols]]); superimpose=false)
                 mc = mapclosest(c1, c2a)
 
                 cloc = chargelocations(c1)
                 lpos = positive_locations(cloc)
-                @test !isempty(lpos) && all(item -> isa(item, Coordinates), lpos)
+                @test !isempty(lpos) && all(item -> isa(item, AbstractVector), lpos)
                 lneg = negative_locations(cloc)
-                @test !isempty(lneg) && all(item -> isa(item, Coordinates), lneg)
+                @test !isempty(lneg) && all(item -> isa(item, AbstractVector), lneg)
                 @test isempty(lpos ∩ lneg)
 
                 # Choose a sufficiently-divergent pair that structural alignment is nontrivial
@@ -401,7 +403,7 @@ using Test
                 smref = smref[keep]
                 conserved_residues = cref[smref]
                 smcmp = SequenceMapping(getsequencemapping(msa, idxcmp))
-                ccmpa = align(conserved_residues, ccmp, smcmp[conserved_cols])
+                ccmpa = applytransform!(copy(ccmp), align(conserved_residues, ccmp, smcmp[conserved_cols]))
                 mc = mapclosest(cref, ccmpa)
                 idxclose = first.(filter(item -> item[2] < 5.0, mc))   # TMAlign scores those closer than 5Å as a match
                 n = 0; for i in Iterators.drop(eachindex(idxclose), 1)
@@ -425,5 +427,3 @@ using Test
         end
     end
 end
-
-include("deprecated.jl")

From 32765a806bfcd78ba452231a98c2287cf0426498 Mon Sep 17 00:00:00 2001
From: Tim Holy <tim.holy@gmail.com>
Date: Thu, 22 Aug 2024 06:37:14 -0500
Subject: [PATCH 2/2] Add `align_closest` and rename `map_closest`

---
 src/GPCRAnalysis.jl |  4 ++--
 src/align.jl        | 45 ++++++++++++++++++++++++++++++++++++++++++++-
 test/runtests.jl    |  5 +++--
 3 files changed, 49 insertions(+), 5 deletions(-)

diff --git a/src/GPCRAnalysis.jl b/src/GPCRAnalysis.jl
index fab9be1..45b9be6 100644
--- a/src/GPCRAnalysis.jl
+++ b/src/GPCRAnalysis.jl
@@ -45,10 +45,10 @@ export SequenceMapping, AccessionCode, MSACode, NWGapCosts
 export species, uniprotX, query_uniprot_accession, query_ebi_proteins, query_ncbi
 export try_download_alphafold, query_alphafold_latest, download_alphafolds, alphafoldfile, alphafoldfiles, getchain,
        findall_subseq, pLDDT, pLDDTcolor
-export align_to_axes, align_to_membrane, align_nw, align_ranges
+export align_to_axes, align_to_membrane, align_nw, align_ranges, map_closest, align_closest
 export filter_species!, filter_long!, sortperm_msa, chimerax_script
 export project_sequences, columnwise_entropy, align, residue_centroid, residue_centroid_matrix, alphacarbon_coordinates,
-       alphacarbon_coordinates_matrix, mapclosest, chargelocations, positive_locations, negative_locations
+       alphacarbon_coordinates_matrix, chargelocations, positive_locations, negative_locations
 export StructAlign, residueindex, ismapped, skipnothing
 export BWScheme, lookupbw
 export aa_properties, aa_properties_zscored
diff --git a/src/align.jl b/src/align.jl
index ed099f3..327f25f 100644
--- a/src/align.jl
+++ b/src/align.jl
@@ -23,15 +23,58 @@ function align(fixedpos::AbstractMatrix{Float64}, moving::StructureLike, sm::Seq
 end
 align(fixed, moving::StructureLike, sm::SequenceMapping; kwargs...) = align(residue_centroid_matrix(fixed), moving, sm; kwargs...)
 
-function mapclosest(mapto, mapfrom)
+"""
+    mapping = map_closest(mapto::StructureLike, mapfrom::StructureLike)
+
+Return a vector `mapping[i] = (j, distij)`, matching the `i`th residue
+in `mapto` to the `j`th residue in `mapfrom` and reporting the
+distance between them. The mapping minimizes the sum of distances.
+`mapto` and `mapfrom` must already be aligned for this to be meaningful.
+
+If `mapfrom` is shorter than `mapto`, some `j`s will be 0, indicating
+a skipped residue in `mapto`.
+"""
+function map_closest(mapto::StructureLike, mapfrom::StructureLike)
     refcenters = residue_centroid_matrix(mapto)
     seqcenters = residue_centroid_matrix(mapfrom)
+    return map_closest(refcenters, seqcenters)
+end
+
+function map_closest(refcenters::AbstractMatrix, seqcenters::AbstractMatrix)
     D = pairwise(Euclidean(), refcenters, seqcenters; dims=2)
     assignment, _ = hungarian(D)
     fillval = convert(eltype(D), NaN)
     return collect(zip(assignment, [j == 0 ? fillval : D[i, j] for (i, j) in enumerate(assignment)]))
 end
 
+"""
+    tform = align_closest(mapto::StructLike, mapfrom::StructLike; Dthresh=5)
+    tform = align_closest(coordsto, coordsfrom; Dthresh=5)
+
+Return the rigid transformation best aligning `mapfrom` to `mapto`. The transformation is computed from
+residues matched by `map_closest`, using only residues closer than `Dthresh`.
+
+Because the mapping is determined by distance, this can only "tweak" an already-close alignment.
+"""
+align_closest(mapto, mapfrom; kwargs...) = align_closest(residue_centroid_matrix(mapto), mapfrom; kwargs...)
+align_closest(coordsto::AbstractMatrix, mapfrom; kwargs...) = align_closest(coordsto, residue_centroid_matrix(mapfrom); kwargs...)
+align_closest(coordsto::AbstractMatrix, coordsfrom::AbstractMatrix; kwargs...) =
+    align_closest(coordsto, coordsfrom, map_closest(coordsto, coordsfrom); kwargs...)
+
+align_closest(mapto, mapfrom, mapping; kwargs...) = align_closest(residue_centroid_matrix(mapto), mapfrom, mapping; kwargs...)
+align_closest(coordsto::AbstractMatrix, mapfrom, mapping; kwargs...) =
+    align_closest(coordsto, residue_centroid_matrix(mapfrom), mapping; kwargs...)
+function align_closest(coordsto::AbstractMatrix, coordsfrom::AbstractMatrix, mapping; Dthresh=5)
+    toidx, fromidx = Int[], Int[]
+    for (i, (j, d)) in pairs(mapping)
+        j == 0 && continue
+        d > Dthresh && continue
+        push!(toidx, i)
+        push!(fromidx, j)
+    end
+    return Transformation(coordsto[:,toidx], coordsfrom[:,fromidx], toidx, fromidx)
+end
+
 ## Align to membrane
 
 """
diff --git a/test/runtests.jl b/test/runtests.jl
index b697607..1db1fa7 100644
--- a/test/runtests.jl
+++ b/test/runtests.jl
@@ -371,7 +371,8 @@ using Test
                 c2a = applytransform!(copy(c2), align(conserved_residues, c2, sm[conserved_cols]))
                 @test rmsd(conserved_residues, skipnothing(c2a[sm[conserved_cols]]); superimpose=false) <
                       rmsd(conserved_residues, skipnothing(c2[sm[conserved_cols]]); superimpose=false)
-                mc = mapclosest(c1, c2a)
+                mc = map_closest(c1, c2a)
+                @test align_closest(c1, c2a) isa Transformation
 
                 cloc = chargelocations(c1)
                 lpos = positive_locations(cloc)
@@ -404,7 +405,7 @@ using Test
                 conserved_residues = cref[smref]
                 smcmp = SequenceMapping(getsequencemapping(msa, idxcmp))
                 ccmpa = applytransform!(copy(ccmp), align(conserved_residues, ccmp, smcmp[conserved_cols]))
-                mc = mapclosest(cref, ccmpa)
+                mc = map_closest(cref, ccmpa)
                 idxclose = first.(filter(item -> item[2] < 5.0, mc))   # TMAlign scores those closer than 5Å as a match
                 n = 0; for i in Iterators.drop(eachindex(idxclose), 1)
                      n += idxclose[i] < idxclose[i-1]