From 6a1d3c2cfd3ee77eaa6bdb6d660a5c8717d441ce Mon Sep 17 00:00:00 2001
From: Adam Gundry <adam@well-typed.com>
Date: Tue, 17 Aug 2021 21:26:42 +0100
Subject: [PATCH] WIP: migrate search engine to full-text-search package (#748)

---
 Distribution/Server/Features/Search.hs        |  18 +-
 Distribution/Server/Features/Search/BM25F.hs  | 191 --------
 .../Server/Features/Search/DocFeatVals.hs     |  26 --
 .../Server/Features/Search/DocIdSet.hs        | 201 ---------
 .../Server/Features/Search/DocTermIds.hs      |  64 ---
 .../Server/Features/Search/PkgSearch.hs       |  17 +-
 .../Server/Features/Search/SearchEngine.hs    | 405 -----------------
 .../Server/Features/Search/SearchIndex.hs     | 414 ------------------
 .../Server/Features/Search/TermBag.hs         |  76 ----
 Distribution/Server/Framework/MemSize.hs      |   7 +
 hackage-server.cabal                          |   8 +-
 11 files changed, 27 insertions(+), 1400 deletions(-)
 delete mode 100644 Distribution/Server/Features/Search/BM25F.hs
 delete mode 100644 Distribution/Server/Features/Search/DocFeatVals.hs
 delete mode 100644 Distribution/Server/Features/Search/DocIdSet.hs
 delete mode 100644 Distribution/Server/Features/Search/DocTermIds.hs
 delete mode 100644 Distribution/Server/Features/Search/SearchEngine.hs
 delete mode 100644 Distribution/Server/Features/Search/SearchIndex.hs
 delete mode 100644 Distribution/Server/Features/Search/TermBag.hs

diff --git a/Distribution/Server/Features/Search.hs b/Distribution/Server/Features/Search.hs
index f5f22987c..43b4fb406 100644
--- a/Distribution/Server/Features/Search.hs
+++ b/Distribution/Server/Features/Search.hs
@@ -7,7 +7,7 @@ module Distribution.Server.Features.Search (
     defaultSearchRankParameters,
     SearchEngine.SearchRankParameters(..),
     PkgDocField, PkgDocFeatures,
-    BM25F.Explanation(..),
+    SearchEngine.Explanation(..),
   ) where
 
 import Distribution.Server.Framework
@@ -17,9 +17,7 @@ import Distribution.Server.Features.Core
 import Distribution.Server.Features.PackageList
 
 import Distribution.Server.Features.Search.PkgSearch
-import Distribution.Server.Features.Search.SearchEngine (SearchRankParameters(..))
-import qualified Distribution.Server.Features.Search.SearchEngine as SearchEngine
-import qualified Distribution.Server.Features.Search.BM25F as BM25F
+import qualified Data.SearchEngine as SearchEngine
 import qualified Distribution.Server.Packages.PackageIndex as PackageIndex
 
 import Distribution.Server.Packages.Types
@@ -40,10 +38,10 @@ data SearchFeature = SearchFeature {
 
     searchPackages        :: forall m. MonadIO m => [String] -> m [PackageName],
     searchPackagesExplain :: forall m. MonadIO m
-                          => SearchRankParameters PkgDocField PkgDocFeatures
+                          => PkgSearchRankParameters
                           -> [String]
                           -> m (Maybe PackageName,
-                               [(BM25F.Explanation PkgDocField PkgDocFeatures T.Text
+                               [(SearchEngine.Explanation PkgDocField PkgDocFeatures T.Text
                                 ,PackageName)])
 }
 
@@ -62,7 +60,7 @@ initSearchFeature env@ServerEnv{serverTemplatesDir, serverTemplatesMode} = do
 
     searchEngineState <- newMemStateWHNF initialPkgSearchEngine
 
-    return $ \core@CoreFeature{..} list -> do
+    return $ \core list -> do
       let feature = searchFeature env core list
                                   searchEngineState templates
 
@@ -157,15 +155,15 @@ searchFeature ServerEnv{serverBaseURI} CoreFeature{..} ListFeature{getAllLists}
         return results
 
     searchPackagesExplain :: MonadIO m
-                          => SearchRankParameters PkgDocField PkgDocFeatures
+                          => PkgSearchRankParameters
                           -> [String]
-                          -> m (Maybe PackageName, [(BM25F.Explanation PkgDocField PkgDocFeatures T.Text, PackageName)])
+                          -> m (Maybe PackageName, [(SearchEngine.Explanation PkgDocField PkgDocFeatures T.Text, PackageName)])
     searchPackagesExplain params terms = do
         se <- readMemState searchEngineState
         let results = SearchEngine.queryExplain
                         (SearchEngine.setRankParams params se)
                         (map T.pack terms)
-        return results
+        return (Nothing, results) -- TODO: no exact match available from the full-text-search version of queryExplain
 
     handlerGetOpenSearch :: DynamicPath -> ServerPartE Response
     handlerGetOpenSearch _ = do
diff --git a/Distribution/Server/Features/Search/BM25F.hs b/Distribution/Server/Features/Search/BM25F.hs
deleted file mode 100644
index 1b4189dbf..000000000
--- a/Distribution/Server/Features/Search/BM25F.hs
+++ /dev/null
@@ -1,191 +0,0 @@
-{-# LANGUAGE RecordWildCards #-}
-
--- | See:
---
--- * \"The Probabilistic Relevance Framework: BM25 and Beyond\"
---   <www.soi.city.ac.uk/~ser/papers/foundations_bm25_review.pdf‎>
---
--- * \"An Introduction to Information Retrieval\"
---   <http://nlp.stanford.edu/IR-book/pdf/irbookonlinereading.pdf>
---
-module Distribution.Server.Features.Search.BM25F (
-    Context(..),
-    FeatureFunction(..),
-    Doc(..),
-    score,
-
-    Explanation(..),
-    explain,
-  ) where
-
-import Data.Ix
-
-data Context term field feature = Context {
-         numDocsTotal     :: !Int,
-         avgFieldLength   :: field -> Float,
-         numDocsWithTerm  :: term -> Int,
-         paramK1          :: !Float,
-         paramB           :: field -> Float,
-         -- consider minimum length to prevent massive B bonus?
-         fieldWeight      :: field -> Float,
-         featureWeight    :: feature -> Float,
-         featureFunction  :: feature -> FeatureFunction
-       }
-
-data Doc term field feature = Doc {
-         docFieldLength        :: field -> Int,
-         docFieldTermFrequency :: field -> term -> Int,
-         docFeatureValue       :: feature -> Float
-       }
-
-
--- | The BM25F score for a document for a given set of terms.
---
-score :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
-         Context term field feature ->
-         Doc term field feature -> [term] -> Float
-score ctx doc terms =
-    sum (map (weightedTermScore    ctx doc) terms)
-  + sum (map (weightedNonTermScore ctx doc) features)
-
-  where
-    features = range (minBound, maxBound)
-
-
-weightedTermScore :: (Ix field, Bounded field) =>
-                     Context term field feature ->
-                     Doc term field feature -> term -> Float
-weightedTermScore ctx doc t =
-    weightIDF ctx t *     tf'
-                     / (k1 + tf')
-  where
-    tf' = weightedDocTermFrequency ctx doc t
-    k1  = paramK1 ctx
-
-
-weightIDF :: Context term field feature -> term -> Float
-weightIDF ctx t =
-    log ((n - n_t + 0.5) / (n_t + 0.5))
-  where
-    n   = fromIntegral (numDocsTotal ctx)
-    n_t = fromIntegral (numDocsWithTerm ctx t)
-
-
-weightedDocTermFrequency :: (Ix field, Bounded field) =>
-                            Context term field feature ->
-                            Doc term field feature -> term -> Float
-weightedDocTermFrequency ctx doc t =
-    sum [ w_f * tf_f / _B_f
-        | field <- range (minBound, maxBound)
-        , let w_f  = fieldWeight ctx field
-              tf_f = fromIntegral (docFieldTermFrequency doc field t)
-              _B_f = lengthNorm ctx doc field
-        ]
-
-
-lengthNorm :: Context term field feature ->
-              Doc term field feature -> field -> Float
-lengthNorm ctx doc field =
-    (1-b_f) + b_f * sl_f / avgsl_f
-  where
-    b_f     = paramB ctx field
-    sl_f    = fromIntegral (docFieldLength doc field)
-    avgsl_f = avgFieldLength ctx field
-
-
-weightedNonTermScore :: (Ix feature, Bounded feature) =>
-                        Context term field feature ->
-                        Doc term field feature -> feature -> Float
-weightedNonTermScore ctx doc feature =
-    w_f * _V_f f_f
-  where
-    w_f  = featureWeight ctx feature
-    _V_f = applyFeatureFunction (featureFunction ctx feature)
-    f_f  = docFeatureValue doc feature
-
-
-data FeatureFunction
-   = LogarithmicFunction   Float -- ^ @log (\lambda_i + f_i)@
-   | RationalFunction      Float -- ^ @f_i / (\lambda_i + f_i)@
-   | SigmoidFunction Float Float -- ^ @1 / (\lambda + exp(-(\lambda' * f_i))@
-
-applyFeatureFunction :: FeatureFunction -> (Float -> Float)
-applyFeatureFunction (LogarithmicFunction p1) = \fi -> log (p1 + fi)
-applyFeatureFunction (RationalFunction    p1) = \fi -> fi / (p1 + fi)
-applyFeatureFunction (SigmoidFunction  p1 p2) = \fi -> 1 / (p1 + exp (-fi * p2))
-
-
-------------------
--- Explanation
---
-
--- | A breakdown of the BM25F score, to explain somewhat how it relates to
--- the inputs, and so you can compare the scores of different documents.
---
-data Explanation field feature term = Explanation {
-       -- | The overall score is the sum of the 'termScores', 'positionScore'
-       -- and 'nonTermScore'
-       overallScore  :: Float,
-
-       -- | There is a score contribution from each query term. This is the
-       -- score for the term across all fields in the document (but see
-       -- 'termFieldScores').
-       termScores    :: [(term, Float)],
-{-
-       -- | There is a score contribution for positional information. Terms
-       -- appearing in the document close together give a bonus.
-       positionScore :: [(field, Float)],
--}
-       -- | The document can have an inate bonus score independent of the terms
-       -- in the query. For example this might be a popularity score.
-       nonTermScores :: [(feature, Float)],
-
-       -- | This does /not/ contribute to the 'overallScore'. It is an
-       -- indication of how the 'termScores' relates to per-field scores.
-       -- Note however that the term score for all fields is /not/ simply
-       -- sum of the per-field scores. The point of the BM25F scoring function
-       -- is that a linear combination of per-field scores is wrong, and BM25F
-       -- does a more cunning non-linear combination.
-       --
-       -- However, it is still useful as an indication to see scores for each
-       -- field for a term, to see how the compare.
-       --
-       termFieldScores :: [(term, [(field, Float)])]
-     }
-  deriving Show
-
-instance Functor (Explanation field feature) where
-  fmap f e@Explanation{..} =
-    e {
-      termScores      = [ (f t, s)  | (t, s)  <- termScores ],
-      termFieldScores = [ (f t, fs) | (t, fs) <- termFieldScores ]
-    }
-
-explain :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
-           Context term field feature ->
-           Doc term field feature -> [term] -> Explanation field feature term
-explain ctx doc ts =
-    Explanation {..}
-  where
-    overallScore  = sum (map snd termScores)
---                  + sum (map snd positionScore)
-                  + sum (map snd nonTermScores)
-    termScores    = [ (t, weightedTermScore ctx doc t) | t <- ts ]
---    positionScore = [ (f, 0) | f <- range (minBound, maxBound) ]
-    nonTermScores = [ (feature, weightedNonTermScore ctx doc feature)
-                    | feature <- range (minBound, maxBound) ]
-
-    termFieldScores =
-      [ (t, fieldScores)
-      | t <- ts
-      , let fieldScores =
-              [ (f, weightedTermScore ctx' doc t)
-              | f <- range (minBound, maxBound)
-              , let ctx' = ctx { fieldWeight = fieldWeightOnly f }
-              ]
-      ]
-    fieldWeightOnly f f' | sameField f f' = fieldWeight ctx f'
-                         | otherwise      = 0
-
-    sameField f f' = index (minBound, maxBound) f
-                  == index (minBound, maxBound) f'
diff --git a/Distribution/Server/Features/Search/DocFeatVals.hs b/Distribution/Server/Features/Search/DocFeatVals.hs
deleted file mode 100644
index 096364276..000000000
--- a/Distribution/Server/Features/Search/DocFeatVals.hs
+++ /dev/null
@@ -1,26 +0,0 @@
-{-# LANGUAGE BangPatterns, GeneralizedNewtypeDeriving #-}
-module Distribution.Server.Features.Search.DocFeatVals (
-    DocFeatVals,
-    featureValue,
-    create,
-  ) where
-
-import Distribution.Server.Features.Search.DocTermIds (vecIndexIx, vecCreateIx)
-import Distribution.Server.Framework.MemSize
-import Data.Vector (Vector)
-import Data.Ix (Ix)
-
-
--- | Storage for the non-term feature values i a document.
---
-newtype DocFeatVals feature = DocFeatVals (Vector Float)
-  deriving (Show, MemSize)
-
-featureValue :: (Ix feature, Bounded feature) => DocFeatVals feature -> feature -> Float
-featureValue (DocFeatVals featVec) = vecIndexIx featVec
-
-create :: (Ix feature, Bounded feature) =>
-          (feature -> Float) -> DocFeatVals feature
-create docFeatVals =
-    DocFeatVals (vecCreateIx docFeatVals)
-
diff --git a/Distribution/Server/Features/Search/DocIdSet.hs b/Distribution/Server/Features/Search/DocIdSet.hs
deleted file mode 100644
index f2af19357..000000000
--- a/Distribution/Server/Features/Search/DocIdSet.hs
+++ /dev/null
@@ -1,201 +0,0 @@
-{-# LANGUAGE BangPatterns, GeneralizedNewtypeDeriving, TypeFamilies, MultiParamTypeClasses #-}
-
-module Distribution.Server.Features.Search.DocIdSet (
-    DocId,
-    DocIdSet,
-    null,
-    size,
-    empty,
-    singleton,
-    fromList,
-    toList,
-    toSet,
-    insert,
-    delete,
-    union,
-    invariant,
-  ) where
-
-import Distribution.Server.Framework.MemSize
-
-import Data.Word
-import qualified Data.Vector.Unboxed         as VU
-import qualified Data.Vector.Unboxed.Mutable as MVU
-import qualified Data.Vector.Generic.Base    as VG
-import qualified Data.Vector.Generic.Mutable as MVG
-import Control.Monad (liftM)
-import Control.Monad.ST
-import Data.Set (Set)
-import qualified Data.Set as Set
-
-import Prelude hiding (null)
-
---import Test.QuickCheck
---import qualified Data.List as List
-
-newtype DocId = DocId Word32
-  deriving (Eq, Ord, Show, Enum, Bounded, MemSize, VU.Unbox)
-
-newtype DocIdSet = DocIdSet (VU.Vector DocId)
-  deriving (Eq, Show)
-
-
-newtype instance MVU.MVector s DocId = MVU_DocId (MVU.MVector s Word32)
-newtype instance VU.Vector DocId = VU_DocId (VU.Vector Word32)
-
-instance VG.Vector VU.Vector DocId where
--- fixme, can i do a noop/coerce/unsafecoerce instead of these fmaps on newtypes?
--- or does it even matter?
-  basicUnsafeFreeze (MVU_DocId mvw)= VU_DocId `liftM` VG.basicUnsafeFreeze mvw
-  basicUnsafeThaw (VU_DocId vw) = MVU_DocId `liftM` VG.basicUnsafeThaw vw
-  basicLength (VU_DocId vw) = VG.basicLength vw
-  basicUnsafeSlice start end (VU_DocId vw) = VU_DocId $ VG.basicUnsafeSlice start end vw
-  basicUnsafeIndexM (VU_DocId vw) ix = DocId `liftM` VG.basicUnsafeIndexM vw ix
-
- -- basicLength, basicUnsafeSlice, basicOverlaps, basicUnsafeNew, basicUnsafeRead, basicUnsafeWrite
-instance MVG.MVector MVU.MVector DocId where
-  basicLength (MVU_DocId vw) = MVG.basicLength vw
-  basicUnsafeSlice start end (MVU_DocId vw) = MVU_DocId $ MVG.basicUnsafeSlice start end vw
-  basicOverlaps (MVU_DocId vw1) (MVU_DocId vw2) = MVG.basicOverlaps vw1 vw2
-  basicUnsafeNew sz = MVU_DocId `liftM` MVG.basicUnsafeNew sz
-  basicUnsafeRead (MVU_DocId vw) ix   = DocId `liftM` MVG.basicUnsafeRead vw ix
-  basicUnsafeWrite (MVU_DocId vw) ix (DocId word) = MVG.basicUnsafeWrite vw ix word
-  basicInitialize (MVU_DocId vw) = MVG.basicInitialize vw
-
--- represented as a sorted sequence of ids
-invariant :: DocIdSet -> Bool
-invariant (DocIdSet vec) =
-    strictlyAscending (VU.toList vec)
-  where
-    strictlyAscending (a:xs@(b:_)) = a < b && strictlyAscending xs
-    strictlyAscending _  = True
-
-
-size :: DocIdSet -> Int
-size (DocIdSet vec) = VU.length vec
-
-null :: DocIdSet -> Bool
-null (DocIdSet vec) = VU.null vec
-
-empty :: DocIdSet
-empty = DocIdSet VU.empty
-
-singleton :: DocId -> DocIdSet
-singleton = DocIdSet . VU.singleton
-
-fromList :: [DocId] -> DocIdSet
-fromList = DocIdSet . VU.fromList . Set.toAscList . Set.fromList
-
-toList ::  DocIdSet -> [DocId]
-toList (DocIdSet vec) = VU.toList vec
-
-toSet ::  DocIdSet -> Set DocId
-toSet (DocIdSet vec) = Set.fromDistinctAscList (VU.toList vec)
-
-insert :: DocId -> DocIdSet -> DocIdSet
-insert x (DocIdSet vec) =
-    case binarySearch vec 0 (VU.length vec - 1) x of
-      (_, True)  -> DocIdSet vec
-      (i, False) -> case VU.splitAt i vec of
-                      (before, after) ->
-                        DocIdSet (VU.concat [before, VU.singleton x, after])
-
-delete :: DocId -> DocIdSet -> DocIdSet
-delete x (DocIdSet vec) =
-    case binarySearch vec 0 (VU.length vec - 1) x of
-      (_, False) -> DocIdSet vec
-      (i, True)  -> case VU.splitAt i vec of
-                      (before, after) ->
-                        DocIdSet (before VU.++ VU.tail after)
-
-binarySearch :: VU.Vector DocId -> Int -> Int -> DocId -> (Int, Bool)
-binarySearch vec !a !b !key
-  | a > b     = (a, False)
-  | otherwise =
-    let mid = (a + b) `div` 2
-     in case compare key (vec VU.! mid) of
-          LT -> binarySearch vec a (mid-1) key
-          EQ -> (mid, True)
-          GT -> binarySearch vec (mid+1) b key
-
-union :: DocIdSet -> DocIdSet -> DocIdSet
-union x y | null x = y
-          | null y = x
-union (DocIdSet xs) (DocIdSet ys) =
-    DocIdSet (VU.create (MVU.new sizeBound >>= writeMerged xs ys))
-  where
-    sizeBound = VU.length xs + VU.length ys
-
-writeMerged :: VU.Vector DocId -> VU.Vector DocId ->
-              MVU.MVector s DocId -> ST s (MVU.MVector s DocId)
-writeMerged xs0 ys0 out = do
-    i <- go xs0 ys0 0
-    return $! MVU.take i out
-  where
-    go !xs !ys !i
-      | VU.null xs = do VU.copy (MVU.slice i (VU.length ys) out) ys;
-                         return (i + VU.length ys)
-      | VU.null ys = do VU.copy (MVU.slice i (VU.length xs) out) xs;
-                         return (i + VU.length xs)
-      | otherwise   = let x = VU.head xs; y = VU.head ys
-                      in case compare x y of
-                GT -> do MVU.write out i y
-                         go           xs  (VU.tail ys) (i+1)
-                EQ -> do MVU.write out i x
-                         go (VU.tail xs) (VU.tail ys) (i+1)
-                LT -> do MVU.write out i x
-                         go (VU.tail xs)           ys  (i+1)
-
-instance MemSize DocIdSet where
-  memSize (DocIdSet vec) = memSizeUVector 2 vec
-
-
--------------
--- tests
---
-{-
-instance Arbitrary DocIdSet where
-  arbitrary = fromList `fmap` (listOf arbitrary)
-
-instance Arbitrary DocId where
-  arbitrary = DocId `fmap` choose (0,15)
-
-
-prop_insert :: DocIdSet -> DocId -> Bool
-prop_insert dset x =
-    let dset' = insert x dset
-     in invariant dset && invariant dset'
-     && all (`member` dset') (x : toList dset)
-
-prop_delete :: DocIdSet -> DocId -> Bool
-prop_delete dset x =
-    let dset' = DocIdSet.delete x dset
-     in invariant dset && invariant dset'
-     && all (`member` dset') (List.delete x (toList dset))
-     && not (x `member` dset')
-
-prop_delete' :: DocIdSet -> Bool
-prop_delete' dset =
-    all (prop_delete dset) (toList dset)
-
-prop_union :: DocIdSet -> DocIdSet -> Bool
-prop_union dset1 dset2 =
-    let dset  = union dset1 dset2
-        dset' = fromList (List.union (toList dset1) (toList dset2))
-
-     in invariant dset && invariant dset'
-     && dset == dset'
-
-prop_union' :: DocIdSet -> DocIdSet -> Bool
-prop_union' dset1 dset2 =
-    let dset   = union dset1 dset2
-        dset'  = List.foldl' (\s i -> insert i s) dset1 (toList dset2)
-        dset'' = List.foldl' (\s i -> insert i s) dset2 (toList dset1)
-     in invariant dset && invariant dset' && invariant dset''
-     && dset == dset'
-     && dset' == dset''
-
-member :: DocId -> DocIdSet -> Bool
-member x (DocIdSet vec) =
-   x `List.elem` VU.toList vec
--}
diff --git a/Distribution/Server/Features/Search/DocTermIds.hs b/Distribution/Server/Features/Search/DocTermIds.hs
deleted file mode 100644
index b59edc579..000000000
--- a/Distribution/Server/Features/Search/DocTermIds.hs
+++ /dev/null
@@ -1,64 +0,0 @@
-{-# LANGUAGE BangPatterns, GeneralizedNewtypeDeriving, ScopedTypeVariables #-}
-module Distribution.Server.Features.Search.DocTermIds (
-    DocTermIds,
-    TermId,
-    fieldLength,
-    fieldTermCount,
-    fieldElems,
-    create,
-    vecIndexIx,
-    vecCreateIx,
-  ) where
-
-import Distribution.Server.Features.Search.TermBag (TermBag, TermId)
-import qualified Distribution.Server.Features.Search.TermBag as TermBag
-
-import Distribution.Server.Framework.MemSize
-
-import Data.Vector (Vector, (!))
-import qualified Data.Vector as Vec
-import Data.Ix (Ix)
-import qualified Data.Ix as Ix
-
-
--- | The 'TermId's for the 'Term's that occur in a document. Documents may have
--- multiple fields and the 'DocTerms' type holds them separately for each field.
---
-newtype DocTermIds field = DocTermIds (Vector TermBag)
-  deriving (Show, MemSize)
-
-getField :: (Ix field, Bounded field) => DocTermIds field -> field -> TermBag
-getField (DocTermIds fieldVec) = vecIndexIx fieldVec
-
-create :: (Ix field, Bounded field) =>
-          (field -> [TermId]) -> DocTermIds field
-create docTermIds =
-    DocTermIds (vecCreateIx (TermBag.fromList . docTermIds))
-
--- | The number of terms in a field within the document.
-fieldLength :: (Ix field, Bounded field) => DocTermIds field -> field -> Int
-fieldLength docterms field =
-    TermBag.size (getField docterms field)
-
--- | The frequency of a particular term in a field within the document.
-fieldTermCount :: (Ix field, Bounded field) => DocTermIds field -> field -> TermId -> Int
-fieldTermCount docterms field termid =
-    TermBag.termCount (getField docterms field) termid
-
-fieldElems :: (Ix field, Bounded field) => DocTermIds field -> field -> [TermId]
-fieldElems docterms field =
-    TermBag.elems (getField docterms field)
-
----------------------------------
--- Vector indexed by Ix Bounded
---
-
-vecIndexIx  :: forall ix a . (Ix ix, Bounded ix) => Vector a -> ix -> a
-vecIndexIx vec ix = vec ! Ix.index (minBound :: ix, maxBound :: ix) ix
-
-vecCreateIx :: forall ix a . (Ix ix, Bounded ix) => (ix -> a) -> Vector a
-vecCreateIx f = Vec.fromListN (Ix.rangeSize bounds)
-                  [ y | ix <- Ix.range bounds, let !y = f ix ]
-  where
-    bounds :: (ix, ix)
-    bounds = (minBound, maxBound)
diff --git a/Distribution/Server/Features/Search/PkgSearch.hs b/Distribution/Server/Features/Search/PkgSearch.hs
index 0e1fd8832..895c6f41c 100644
--- a/Distribution/Server/Features/Search/PkgSearch.hs
+++ b/Distribution/Server/Features/Search/PkgSearch.hs
@@ -2,16 +2,19 @@
 
 module Distribution.Server.Features.Search.PkgSearch (
     PkgSearchEngine,
+    PkgSearchRankParameters,
     initialPkgSearchEngine,
     defaultSearchRankParameters,
     PkgDocField(..),
     PkgDocFeatures,
   ) where
 
-import Distribution.Server.Features.Search.SearchEngine
+-- import Distribution.Server.Features.Search.SearchEngine
 import Distribution.Server.Features.Search.ExtractNameTerms
 import Distribution.Server.Features.Search.ExtractDescriptionTerms
 
+import Data.SearchEngine
+
 import Data.Ix
 import Data.Set (Set)
 import qualified Data.Set as Set
@@ -23,7 +26,6 @@ import Distribution.Package
 import Distribution.PackageDescription
 import Distribution.Utils.ShortText
 import Distribution.Text (display)
-import Data.Text (unpack)
 
 
 type PkgSearchEngine = SearchEngine
@@ -52,8 +54,7 @@ pkgSearchConfig =
       documentKey           = packageName . fst,
       extractDocumentTerms  = extractTokens . fst,
       transformQueryTerm    = normaliseQueryToken,
-      documentFeatureValue  = getFeatureValue,
-      makeKey               = mkPackageName . unpack
+      documentFeatureValue  = getFeatureValue
   }
   where
     extractTokens :: PackageDescription -> PkgDocField -> [Text]
@@ -74,7 +75,9 @@ pkgSearchConfig =
 
     getFeatureValue (_pkg, downloadcount) Downloads = fromIntegral downloadcount
 
-defaultSearchRankParameters :: SearchRankParameters PkgDocField PkgDocFeatures
+type PkgSearchRankParameters = SearchRankParameters PkgDocField PkgDocFeatures
+
+defaultSearchRankParameters :: PkgSearchRankParameters
 defaultSearchRankParameters =
     SearchRankParameters {
       paramK1,
@@ -83,7 +86,9 @@ defaultSearchRankParameters =
       paramFeatureWeights,
       paramFeatureFunctions,
       paramResultsetSoftLimit = 400,
-      paramResultsetHardLimit = 800
+      paramResultsetHardLimit = 800,
+      paramAutosuggestPrefilterLimit = 1000, -- TODO: what should these limits be?
+      paramAutosuggestPostfilterLimit = 1000
     }
   where
     paramK1 :: Float
diff --git a/Distribution/Server/Features/Search/SearchEngine.hs b/Distribution/Server/Features/Search/SearchEngine.hs
deleted file mode 100644
index f2cfa4078..000000000
--- a/Distribution/Server/Features/Search/SearchEngine.hs
+++ /dev/null
@@ -1,405 +0,0 @@
-{-# LANGUAGE BangPatterns, NamedFieldPuns, RecordWildCards #-}
-{-# LANGUAGE ScopedTypeVariables #-}
-
-module Distribution.Server.Features.Search.SearchEngine (
-    SearchEngine,
-    SearchConfig(..),
-    SearchRankParameters(..),
-    BM25F.FeatureFunction(..),
-    Term,
-    initSearchEngine,
-    insertDoc,
-    insertDocs,
-    deleteDoc,
-    query,
-
-    NoFeatures,
-    noFeatures,
-
-    queryExplain,
-    BM25F.Explanation(..),
-    setRankParams,
-
-    invariant,
-  ) where
-
-import Distribution.Server.Features.Search.SearchIndex (SearchIndex, Term, TermId)
-import qualified Distribution.Server.Features.Search.SearchIndex as SI
-import Distribution.Server.Features.Search.DocIdSet (DocIdSet, DocId)
-import qualified Distribution.Server.Features.Search.DocIdSet as DocIdSet
-import Distribution.Server.Features.Search.DocTermIds (DocTermIds)
-import qualified Distribution.Server.Features.Search.DocTermIds as DocTermIds
-import Distribution.Server.Features.Search.DocFeatVals (DocFeatVals)
-import qualified Distribution.Server.Features.Search.DocFeatVals as DocFeatVals
-import qualified Distribution.Server.Features.Search.BM25F as BM25F
-
-import Distribution.Server.Framework.MemSize
-
-import Data.Ix
-import Data.Array.Unboxed
-import Data.List
-import Data.Function
-import Data.Maybe
-
--------------------
--- Doc layer
---
--- That is, at the layer of documents, so covering the issues of:
---  - inserting/removing whole documents
---  - documents having multiple fields
---  - documents having multiple terms
---  - transformations (case-fold/normalisation/stemming) on the doc terms
---  - transformations on the search terms
---
-
-data SearchConfig doc key field feature = SearchConfig {
-       documentKey          :: doc -> key,
-       extractDocumentTerms :: doc -> field -> [Term],
-       transformQueryTerm   :: Term -> field -> Term,
-       documentFeatureValue :: doc -> feature -> Float,
-       makeKey :: Term -> key
-
-     }
-
-data SearchRankParameters field feature = SearchRankParameters {
-       paramK1                 :: !Float,
-       paramB                  :: field -> Float,
-       paramFieldWeights       :: field -> Float,
-       paramFeatureWeights     :: feature -> Float,
-       paramFeatureFunctions   :: feature -> BM25F.FeatureFunction,
-       paramResultsetSoftLimit :: !Int,
-       paramResultsetHardLimit :: !Int
-     }
-
-data SearchEngine doc key field feature = SearchEngine {
-       searchIndex      :: !(SearchIndex      key field feature),
-       searchConfig     :: !(SearchConfig doc key field feature),
-       searchRankParams :: !(SearchRankParameters field feature),
-
-       -- cached info
-       sumFieldLengths :: !(UArray field Int),
-       bm25Context     :: BM25F.Context TermId field feature
-     }
-
-initSearchEngine :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
-                    SearchConfig doc key field feature ->
-                    SearchRankParameters field feature ->
-                    SearchEngine doc key field feature
-initSearchEngine config params =
-    cacheBM25Context
-      SearchEngine {
-        searchIndex      = SI.emptySearchIndex,
-        searchConfig     = config,
-        searchRankParams = params,
-        sumFieldLengths  = listArray (minBound, maxBound) (repeat 0),
-        -- FIXME this use of undefined bears explaining
-        bm25Context      = undefined
-      }
-
-setRankParams :: SearchRankParameters field feature ->
-                 SearchEngine doc key field feature ->
-                 SearchEngine doc key field feature
-setRankParams params@SearchRankParameters{..} se =
-    se {
-      searchRankParams = params,
-      bm25Context      = (bm25Context se) {
-        BM25F.paramK1         = paramK1,
-        BM25F.paramB          = paramB,
-        BM25F.fieldWeight     = paramFieldWeights,
-        BM25F.featureWeight   = paramFeatureWeights,
-        BM25F.featureFunction = paramFeatureFunctions
-      }
-    }
-
-invariant :: (Ord key, Ix field, Bounded field) =>
-             SearchEngine doc key field feature -> Bool
-invariant SearchEngine{searchIndex} =
-    SI.invariant searchIndex
--- && check caches
-
-cacheBM25Context :: Ix field =>
-                    SearchEngine doc key field feature ->
-                    SearchEngine doc key field feature
-cacheBM25Context
-    se@SearchEngine {
-      searchRankParams = SearchRankParameters{..},
-      searchIndex,
-      sumFieldLengths
-    }
-  = se { bm25Context = bm25Context' }
-  where
-    bm25Context' = BM25F.Context {
-      BM25F.numDocsTotal    = SI.docCount searchIndex,
-      BM25F.avgFieldLength  = \f -> fromIntegral (sumFieldLengths ! f)
-                                  / fromIntegral (SI.docCount searchIndex),
-      BM25F.numDocsWithTerm = DocIdSet.size . SI.lookupTermId searchIndex,
-      BM25F.paramK1         = paramK1,
-      BM25F.paramB          = paramB,
-      BM25F.fieldWeight     = paramFieldWeights,
-      BM25F.featureWeight   = paramFeatureWeights,
-      BM25F.featureFunction = paramFeatureFunctions
-    }
-
-updateCachedFieldLengths :: (Ix field, Bounded field) =>
-                            Maybe (DocTermIds field) -> Maybe (DocTermIds field) ->
-                            SearchEngine doc key field feature ->
-                            SearchEngine doc key field feature
-updateCachedFieldLengths Nothing (Just newDoc) se@SearchEngine{sumFieldLengths} =
-    se {
-      sumFieldLengths =
-        array (bounds sumFieldLengths)
-              [ (i, n + DocTermIds.fieldLength newDoc i)
-              | (i, n) <- assocs sumFieldLengths ]
-    }
-updateCachedFieldLengths (Just oldDoc) (Just newDoc) se@SearchEngine{sumFieldLengths} =
-    se {
-      sumFieldLengths =
-        array (bounds sumFieldLengths)
-              [ (i, n - DocTermIds.fieldLength oldDoc i
-                      + DocTermIds.fieldLength newDoc i)
-              | (i, n) <- assocs sumFieldLengths ]
-    }
-updateCachedFieldLengths (Just oldDoc) Nothing se@SearchEngine{sumFieldLengths} =
-    se {
-      sumFieldLengths =
-        array (bounds sumFieldLengths)
-              [ (i, n - DocTermIds.fieldLength oldDoc i)
-              | (i, n) <- assocs sumFieldLengths ]
-    }
-updateCachedFieldLengths Nothing Nothing se = se
-
-insertDocs :: (Ord key, Ix field, Bounded field, Ix feature, Bounded feature) =>
-              [doc] ->
-              SearchEngine doc key field feature ->
-              SearchEngine doc key field feature
-insertDocs docs se = foldl' (\se' doc -> insertDoc doc se') se docs
-
-insertDoc :: (Ord key, Ix field, Bounded field, Ix feature, Bounded feature) =>
-             doc ->
-             SearchEngine doc key field feature ->
-             SearchEngine doc key field feature
-insertDoc doc se@SearchEngine{ searchConfig = SearchConfig {
-                                 documentKey,
-                                 extractDocumentTerms,
-                                 documentFeatureValue
-                               }
-                             , searchIndex } =
-    let key = documentKey doc
-        searchIndex' = SI.insertDoc key (extractDocumentTerms doc)
-                                        (documentFeatureValue doc)
-                                        searchIndex
-        oldDoc       = SI.lookupDocKey searchIndex  key
-        newDoc       = SI.lookupDocKey searchIndex' key
-
-     in cacheBM25Context $
-        updateCachedFieldLengths oldDoc newDoc $
-          se { searchIndex = searchIndex' }
-
-deleteDoc :: (Ord key, Ix field, Bounded field) =>
-             key ->
-             SearchEngine doc key field feature ->
-             SearchEngine doc key field feature
-deleteDoc key se@SearchEngine{searchIndex} =
-    let searchIndex' = SI.deleteDoc key searchIndex
-        oldDoc       = SI.lookupDocKey searchIndex key
-
-     in cacheBM25Context $
-        updateCachedFieldLengths oldDoc Nothing $
-          se { searchIndex = searchIndex' }
-
-query :: (Ix field, Bounded field, Ix feature, Bounded feature, Ord key) =>
-         SearchEngine doc key field feature ->
-         [Term] -> [key]
-query se@SearchEngine{ searchIndex,
-                       searchConfig     = SearchConfig{transformQueryTerm, makeKey},
-                       searchRankParams = SearchRankParameters{..} }
-      terms =
-
-  let -- Start by transforming/normalising all the query terms.
-      -- This can be done differently for each field we search by.
-      lookupTerms :: [Term]
-      lookupTerms = [ term'
-                    | term  <- terms
-                    , let transformForField = transformQueryTerm term
-                    , term' <- nub [ transformForField field
-                                   | field <- range (minBound, maxBound) ]
-                    ]
-
-      -- Then we look up all the normalised terms in the index.
-      rawresults :: [Maybe (TermId, DocIdSet)]
-      rawresults = map (SI.lookupTerm searchIndex) lookupTerms
-
-      -- Check if there is one term then it exactly matches a package
-      exactMatch :: Maybe DocId
-      exactMatch = case terms of
-                     [] -> Nothing
-                     [x] -> SI.lookupDocKeyReal searchIndex (makeKey x)
-                     (_:_) -> Nothing
-
-      -- For the terms that occur in the index, this gives us the term's id
-      -- and the set of documents that the term occurs in.
-      termids   :: [TermId]
-      docidsets :: [DocIdSet]
-      (termids, docidsets) = unzip (catMaybes rawresults)
-
-      -- We looked up the documents that *any* of the term occur in (not all)
-      -- so this could be rather a lot of docs if the user uses a few common
-      -- terms. Scoring these result docs is a non-trivial cost so we want to
-      -- limit the number that we have to score. The standard trick is to
-      -- consider the doc sets in the order of size, smallest to biggest. Once
-      -- we have gone over a certain threshold of docs then don't bother with
-      -- the doc sets for the remaining terms. This tends to work because the
-      -- scoring gives lower weight to terms that occur in many documents.
-      unrankedResults :: DocIdSet
-      unrankedResults = pruneRelevantResults
-                          paramResultsetSoftLimit
-                          paramResultsetHardLimit
-                          docidsets
-
-      --TODO: technically this isn't quite correct. Because each field can
-      -- be normalised differently, we can end up with different termids for
-      -- the same original search term, and then we score those as if they
-      -- were different terms, which makes a difference when the term appears
-      -- in multiple fields (exactly the case BM25F is supposed to deal with).
-      -- What we ought to have instead is an Array (Int, field) TermId, and
-      -- make the scoring use the appropriate termid for each field, but to
-      -- consider them the "same" term.
-   in rankResults se exactMatch termids (DocIdSet.toList unrankedResults)
-
-rankResults :: forall field key feature doc .
-               (Ix field, Bounded field, Ix feature, Bounded feature) =>
-               SearchEngine doc key field feature -> Maybe DocId ->
-               [TermId] -> [DocId] -> [key]
-rankResults se@SearchEngine{searchIndex} exactMatch queryTerms docids =
-    maybe id prependExactMatch exactMatch (map snd
-    $ sortBy (flip compare `on` fst)
-      [ (relevanceScore se queryTerms doctermids docfeatvals, dockey)
-      | docid <- docids
-      , maybe True (/= docid) exactMatch
-      , let (dockey, doctermids, docfeatvals) = SI.lookupDocId searchIndex docid ])
-  where
-    prependExactMatch :: DocId -> [key] -> [key]
-    prependExactMatch docid keys = SI.lookupDocId' searchIndex docid : keys
-
-relevanceScore :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
-                  SearchEngine doc key field feature ->
-                  [TermId] -> DocTermIds field -> DocFeatVals feature -> Float
-relevanceScore SearchEngine{bm25Context} queryTerms doctermids docfeatvals =
-    BM25F.score bm25Context doc queryTerms
-  where
-    doc = indexDocToBM25Doc doctermids docfeatvals
-
-indexDocToBM25Doc :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
-                     DocTermIds field ->
-                     DocFeatVals feature ->
-                     BM25F.Doc TermId field feature
-indexDocToBM25Doc doctermids docfeatvals =
-    BM25F.Doc {
-      BM25F.docFieldLength        = DocTermIds.fieldLength    doctermids,
-      BM25F.docFieldTermFrequency = DocTermIds.fieldTermCount doctermids,
-      BM25F.docFeatureValue       = DocFeatVals.featureValue docfeatvals
-    }
-
-pruneRelevantResults :: Int -> Int -> [DocIdSet] -> DocIdSet
-pruneRelevantResults softLimit hardLimit =
-    -- Look at the docsets starting with the smallest ones. Smaller docsets
-    -- correspond to the rarer terms, which are the ones that score most highly.
-    go DocIdSet.empty . sortBy (compare `on` DocIdSet.size)
-  where
-    go !acc [] = acc
-    go !acc (d:ds)
-        -- If this is the first one, we add it anyway, otherwise we're in
-        -- danger of returning no results at all.
-      | DocIdSet.null acc = go d ds
-        -- We consider the size our docset would be if we add this extra one...
-        -- If it puts us over the hard limit then stop.
-      | size > hardLimit  = acc
-        -- If it puts us over soft limit then we add it and stop
-      | size > softLimit  = DocIdSet.union acc d
-        -- Otherwise we can add it and carry on to consider the remainder
-      | otherwise         = go (DocIdSet.union acc d) ds
-      where
-        size = DocIdSet.size acc + DocIdSet.size d
-
------------------------------
-
-queryExplain :: (Ix field, Bounded field, Ix feature, Bounded feature, Ord key) =>
-                SearchEngine doc key field feature ->
-                [Term] -> (Maybe key, [(BM25F.Explanation field feature Term, key)])
-queryExplain se@SearchEngine{ searchIndex,
-                              searchConfig     = SearchConfig{transformQueryTerm, makeKey},
-                              searchRankParams = SearchRankParameters{..} }
-      terms =
-
-  -- See 'query' above for explanation. Really we ought to combine them.
-  let lookupTerms :: [Term]
-      lookupTerms = [ term'
-                    | term  <- terms
-                    , let transformForField = transformQueryTerm term
-                    , term' <- nub [ transformForField field
-                                   | field <- range (minBound, maxBound) ]
-                    ]
-
-      exactMatch :: Maybe DocId
-      exactMatch = case terms of
-                     [] -> Nothing
-                     [x] -> SI.lookupDocKeyReal searchIndex (makeKey x)
-                     (_:_) -> Nothing
-
-      rawresults :: [Maybe (TermId, DocIdSet)]
-      rawresults = map (SI.lookupTerm searchIndex) lookupTerms
-
-      termids   :: [TermId]
-      docidsets :: [DocIdSet]
-      (termids, docidsets) = unzip (catMaybes rawresults)
-
-      unrankedResults :: DocIdSet
-      unrankedResults = pruneRelevantResults
-                          paramResultsetSoftLimit
-                          paramResultsetHardLimit
-                          docidsets
-
-   in ( fmap (SI.lookupDocId' searchIndex) exactMatch
-      , rankExplainResults se termids (DocIdSet.toList unrankedResults)
-      )
-
-rankExplainResults :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
-                      SearchEngine doc key field feature ->
-                      [TermId] ->
-                      [DocId] ->
-                      [(BM25F.Explanation field feature Term, key)]
-rankExplainResults se@SearchEngine{searchIndex} queryTerms docids =
-    sortBy (flip compare `on` (BM25F.overallScore . fst))
-      [ (explainRelevanceScore se queryTerms doctermids docfeatvals, dockey)
-      | docid <- docids
-      , let (dockey, doctermids, docfeatvals) = SI.lookupDocId searchIndex docid ]
-
-explainRelevanceScore :: (Ix field, Bounded field, Ix feature, Bounded feature) =>
-                         SearchEngine doc key field feature ->
-                         [TermId] ->
-                         DocTermIds field ->
-                         DocFeatVals feature ->
-                         BM25F.Explanation field feature Term
-explainRelevanceScore SearchEngine{bm25Context, searchIndex}
-                      queryTerms doctermids docfeatvals =
-    fmap (SI.getTerm searchIndex) (BM25F.explain bm25Context doc queryTerms)
-  where
-    doc = indexDocToBM25Doc doctermids docfeatvals
-
------------------------------
-
-data NoFeatures = NoFeatures
-  deriving (Eq, Ord, Bounded)
-
-instance Ix NoFeatures where
-  range   _   = []
-  inRange _ _ = False
-  index   _ _ = -1
-
-noFeatures :: NoFeatures -> a
-noFeatures _ = error "noFeatures"
-
------------------------------
-
-instance MemSize key => MemSize (SearchEngine doc key field feature) where
-  memSize SearchEngine {searchIndex} = 25 + memSize searchIndex
diff --git a/Distribution/Server/Features/Search/SearchIndex.hs b/Distribution/Server/Features/Search/SearchIndex.hs
deleted file mode 100644
index cfb2291e9..000000000
--- a/Distribution/Server/Features/Search/SearchIndex.hs
+++ /dev/null
@@ -1,414 +0,0 @@
-{-# LANGUAGE BangPatterns, NamedFieldPuns #-}
-
-module Distribution.Server.Features.Search.SearchIndex (
-    SearchIndex,
-    Term,
-    TermId,
-    DocId,
-
-    emptySearchIndex,
-    insertDoc,
-    deleteDoc,
-
-    docCount,
-    lookupTerm,
-    lookupTermId,
-    lookupDocId,
-    lookupDocId',
-    lookupDocKey,
-    lookupDocKeyReal,
-
-    getTerm,
-
-    invariant,
-  ) where
-
-import Distribution.Server.Features.Search.DocIdSet (DocIdSet, DocId)
-import qualified Distribution.Server.Features.Search.DocIdSet as DocIdSet
-import Distribution.Server.Features.Search.DocTermIds (DocTermIds, TermId, vecIndexIx, vecCreateIx)
-import qualified Distribution.Server.Features.Search.DocTermIds as DocTermIds
-import Distribution.Server.Features.Search.DocFeatVals (DocFeatVals)
-import qualified Distribution.Server.Features.Search.DocFeatVals as DocFeatVals
-
-import Distribution.Server.Framework.MemSize
-
-import Data.Ix (Ix)
-import qualified Data.Ix as Ix
-import Data.Map (Map)
-import qualified Data.Map as Map
-import Data.IntMap (IntMap)
-import qualified Data.IntMap as IntMap
-import qualified Data.Set as Set
-import Data.Text (Text)
-import Data.List (foldl')
-
-import Control.Exception (assert)
-
-type Term = Text
-
--- | The search index is essentially a many-to-many mapping between documents
--- and terms. Each document contains many terms and each term occurs in many
--- documents. It is a bidirectional mapping as we need to support lookups in
--- both directions.
---
--- Documents are identified by a key (in Ord) while terms are text values.
--- Inside the index however we assign compact numeric ids to both documents and
--- terms. The advantage of this is a much more compact in-memory representation
--- and the disadvantage is greater complexity. In particular it means we have
--- to manage bidirectional mappings between document keys and ids, and between
--- terms and term ids.
---
--- So the mappings we maintain can be depicted as:
---
--- >  Term   <-- 1:1 -->   TermId
--- >                         ^
--- >                         |
--- >                     many:many
--- >                         |
--- >                         v
--- > DocKey  <-- 1:1 -->   DocId
---
--- For efficiency, these details are exposed in the interface. In particular
--- the mapping from TermId to many DocIds is exposed via a 'DocIdSet',
--- and the mapping from DocIds to TermIds is exposed via 'DocTermIds'.
---
-data SearchIndex key field feature = SearchIndex {
-       -- the indexes
-       termMap           :: !(Map Term TermInfo),
-       termIdMap         :: !(IntMap Term),
-       docIdMap          :: !(IntMap (DocInfo key field feature)),
-       docKeyMap         :: !(Map key DocId),
-
-       -- auto-increment key counters
-       nextTermId        :: TermId,
-       nextDocId         :: DocId
-     }
-  deriving Show
-
-data TermInfo = TermInfo !TermId !DocIdSet
-  deriving Show
-
-data DocInfo key field feature = DocInfo !key !(DocTermIds field)
-                                              !(DocFeatVals feature)
-  deriving Show
-
-
------------------------
--- SearchIndex basics
---
-
-emptySearchIndex :: SearchIndex key field feature
-emptySearchIndex =
-    SearchIndex
-      Map.empty
-      IntMap.empty
-      IntMap.empty
-      Map.empty
-      minBound
-      minBound
-
-checkInvariant :: (Ord key, Ix field, Bounded field) =>
-                  SearchIndex key field feature -> SearchIndex key field feature
-checkInvariant si = assert (invariant si) si
-
-invariant :: (Ord key, Ix field, Bounded field) =>
-             SearchIndex key field feature -> Bool
-invariant SearchIndex{termMap, termIdMap, docKeyMap, docIdMap} =
-      and [ IntMap.lookup (fromEnum termId) termIdMap == Just term
-          | (term, (TermInfo termId _)) <- Map.assocs termMap ]
-  &&  and [ case Map.lookup term termMap of
-              Just (TermInfo termId' _) -> toEnum termId == termId'
-              Nothing                   -> False
-          | (termId, term) <- IntMap.assocs termIdMap ]
-  &&  and [ case IntMap.lookup (fromEnum docId) docIdMap of
-              Just (DocInfo docKey' _ _) -> docKey == docKey'
-              Nothing                  -> False
-          | (docKey, docId) <- Map.assocs docKeyMap ]
-  &&  and [ Map.lookup docKey docKeyMap == Just (toEnum docId)
-          | (docId, DocInfo docKey _ _) <- IntMap.assocs docIdMap ]
-  &&  and [ DocIdSet.invariant docIdSet
-          | (_term, (TermInfo _ docIdSet)) <- Map.assocs termMap ]
-  &&  and [ any (\field -> DocTermIds.fieldTermCount docterms field termId > 0) fields
-          | (_term, (TermInfo termId docIdSet)) <- Map.assocs termMap
-          , docId <- DocIdSet.toList docIdSet
-          , let DocInfo _ docterms _ = docIdMap IntMap.! fromEnum docId ]
-  &&  and [ IntMap.member (fromEnum termid) termIdMap
-          | (_docId, DocInfo _ docTerms _) <- IntMap.assocs docIdMap
-          , field <- fields
-          , termid <- DocTermIds.fieldElems docTerms field ]
-  where
-    fields = Ix.range (minBound, maxBound)
-
-
--------------------
--- Lookups
---
-
-docCount :: SearchIndex key field feature -> Int
-docCount SearchIndex{docIdMap} = IntMap.size docIdMap
-
-lookupTerm :: SearchIndex key field feature -> Term -> Maybe (TermId, DocIdSet)
-lookupTerm SearchIndex{termMap} term =
-    case Map.lookup term termMap of
-      Nothing                         -> Nothing
-      Just (TermInfo termid docidset) -> Just (termid, docidset)
-
-lookupTermId :: SearchIndex key field feature -> TermId -> DocIdSet
-lookupTermId SearchIndex{termIdMap, termMap} termid =
-    case IntMap.lookup (fromEnum termid) termIdMap of
-      Nothing   -> error $ "lookupTermId: not found " ++ show termid
-      Just term ->
-        case Map.lookup term termMap of
-          Nothing                    -> error "lookupTermId: internal error"
-          Just (TermInfo _ docidset) -> docidset
-
-lookupDocId :: SearchIndex key field feature ->
-               DocId -> (key, DocTermIds field, DocFeatVals feature)
-lookupDocId SearchIndex{docIdMap} docid =
-    case IntMap.lookup (fromEnum docid) docIdMap of
-      Nothing                                   -> errNotFound
-      Just (DocInfo key doctermids docfeatvals) -> (key, doctermids, docfeatvals)
-  where
-    errNotFound = error $ "lookupDocId: not found " ++ show docid
-
-lookupDocId' :: SearchIndex key field feature -> DocId -> key
-lookupDocId' searchIndex docId = dockey
-  where
-    (dockey, _, _) = lookupDocId searchIndex docId
-
-lookupDocKey :: Ord key => SearchIndex key field feature -> key -> Maybe (DocTermIds field)
-lookupDocKey SearchIndex{docKeyMap, docIdMap} key =
-    case Map.lookup key docKeyMap of
-      Nothing    -> Nothing
-      Just docid ->
-        case IntMap.lookup (fromEnum docid) docIdMap of
-          Nothing                          -> error "lookupDocKey: internal error"
-          Just (DocInfo _ doctermids _) -> Just doctermids
-
-lookupDocKeyReal :: Ord key => SearchIndex key field feature -> key -> Maybe DocId
-lookupDocKeyReal SearchIndex{docKeyMap} key = Map.lookup key docKeyMap
-
-getTerm :: SearchIndex key field feature -> TermId -> Term
-getTerm SearchIndex{termIdMap} termId =
-    termIdMap IntMap.! fromEnum termId
-
-getTermId :: SearchIndex key field feature -> Term -> TermId
-getTermId SearchIndex{termMap} term =
-    case termMap Map.! term of TermInfo termid _ -> termid
-
-getDocTermIds :: SearchIndex key field feature -> DocId -> DocTermIds field
-getDocTermIds SearchIndex{docIdMap} docid =
-    case docIdMap IntMap.! fromEnum docid of
-      DocInfo _ doctermids _ -> doctermids
-
---------------------
--- Insert & delete
---
-
--- Procedure for adding a new doc...
--- (key, field -> [Term])
--- alloc docid for key
--- add term occurrences for docid (include rev map for termid)
--- construct indexdoc now that we have all the term -> termid entries
--- insert indexdoc
-
--- Procedure for updating a doc...
--- (key, field -> [Term])
--- find docid for key
--- lookup old terms for docid (using termid rev map)
--- calc term occurrences to add, term occurrences to delete
--- add new term occurrences, delete old term occurrences
--- construct indexdoc now that we have all the term -> termid entries
--- insert indexdoc
-
--- Procedure for deleting a doc...
--- (key, field -> [Term])
--- find docid for key
--- lookup old terms for docid (using termid rev map)
--- delete old term occurrences
--- delete indexdoc
-
--- | This is the representation for documents to be added to the index.
--- Documents may
---
-type DocTerms         field   = field   -> [Term]
-type DocFeatureValues feature = feature -> Float
-
-insertDoc :: (Ord key, Ix field, Bounded field, Ix feature, Bounded feature) =>
-              key -> DocTerms field -> DocFeatureValues feature ->
-              SearchIndex key field feature -> SearchIndex key field feature
-insertDoc key userDocTerms userDocFeats si@SearchIndex{docKeyMap}
-  | Just docid <- Map.lookup key docKeyMap
-  = -- Some older version of the doc is already present in the index,
-    -- So we keep its docid. Now have to update the doc itself
-    -- and update the terms by removing old ones and adding new ones.
-    let oldTermsIds   = getDocTermIds si docid
-        userDocTerms' = memoiseDocTerms userDocTerms
-        newTerms      = docTermSet userDocTerms'
-        oldTerms      = docTermIdsTermSet si oldTermsIds
-        -- We optimise for the typical case of significant overlap between
-        -- the terms in the old and new versions of the document.
-        delTerms      = oldTerms `Set.difference` newTerms
-        addTerms      = newTerms `Set.difference` oldTerms
-
-     -- Note: adding the doc relies on all the terms being in the termMap
-     -- already, so we first add all the term occurrences for the docid.
-     in checkInvariant
-      . insertDocIdToDocEntry docid key userDocTerms' userDocFeats
-      . insertTermToDocIdEntries (Set.toList addTerms) docid
-      . deleteTermToDocIdEntries (Set.toList delTerms) docid
-      $ si
-
-  | otherwise
-  = -- We're dealing with a new doc, so allocate a docid for the key
-    let (si', docid)  = allocFreshDocId si
-        userDocTerms' = memoiseDocTerms userDocTerms
-        addTerms      = docTermSet userDocTerms'
-
-     -- Note: adding the doc relies on all the terms being in the termMap
-     -- already, so we first add all the term occurrences for the docid.
-     in checkInvariant
-      . insertDocIdToDocEntry docid key userDocTerms' userDocFeats
-      . insertDocKeyToIdEntry key docid
-      . insertTermToDocIdEntries (Set.toList addTerms) docid
-      $ si'
-
-deleteDoc :: (Ord key, Ix field, Bounded field) =>
-             key ->
-             SearchIndex key field feature -> SearchIndex key field feature
-deleteDoc key si@SearchIndex{docKeyMap}
-  | Just docid <- Map.lookup key docKeyMap
-  = let oldTermsIds = getDocTermIds si docid
-        oldTerms    = docTermIdsTermSet si oldTermsIds
-     in checkInvariant
-      . deleteDocEntry docid key
-      . deleteTermToDocIdEntries (Set.toList oldTerms) docid
-      $ si
-
-  | otherwise = si
-
-
-----------------------------------
--- Insert & delete support utils
---
-
-
-memoiseDocTerms :: (Ix field, Bounded field) => DocTerms field -> DocTerms field
-memoiseDocTerms docTermsFn =
-    \field -> vecIndexIx vec field
-  where
-    vec = vecCreateIx docTermsFn
-
-docTermSet :: (Bounded t, Ix t) => DocTerms t -> Set.Set Term
-docTermSet docterms =
-    Set.unions [ Set.fromList (docterms field)
-               | field <- Ix.range (minBound, maxBound) ]
-
-docTermIdsTermSet :: (Bounded field, Ix field) =>
-                     SearchIndex key field feature ->
-                     DocTermIds field -> Set.Set Term
-docTermIdsTermSet si doctermids =
-    Set.unions [ Set.fromList terms
-               | field <- Ix.range (minBound, maxBound)
-               , let termids = DocTermIds.fieldElems doctermids field
-                     terms   = map (getTerm si) termids ]
-
---
--- The Term <-> DocId mapping
---
-
--- | Add an entry into the 'Term' to 'DocId' mapping.
-insertTermToDocIdEntry :: Term -> DocId ->
-                          SearchIndex key field feature ->
-                          SearchIndex key field feature
-insertTermToDocIdEntry term !docid si@SearchIndex{termMap, termIdMap, nextTermId} =
-    case Map.lookup term termMap of
-      Nothing ->
-        let !termInfo' = TermInfo nextTermId (DocIdSet.singleton docid)
-         in si { termMap    = Map.insert term termInfo' termMap
-               , termIdMap  = IntMap.insert (fromEnum nextTermId) term termIdMap
-               , nextTermId = succ nextTermId }
-
-      Just (TermInfo termId docIdSet) ->
-        let !termInfo' = TermInfo termId (DocIdSet.insert docid docIdSet)
-         in si { termMap = Map.insert term termInfo' termMap }
-
--- | Add multiple entries into the 'Term' to 'DocId' mapping: many terms that
--- map to the same document.
-insertTermToDocIdEntries :: [Term] -> DocId ->
-                            SearchIndex key field feature ->
-                            SearchIndex key field feature
-insertTermToDocIdEntries terms !docid si =
-    foldl' (\si' term -> insertTermToDocIdEntry term docid si') si terms
-
--- | Delete an entry from the 'Term' to 'DocId' mapping.
-deleteTermToDocIdEntry :: Term -> DocId ->
-                          SearchIndex key field feature ->
-                          SearchIndex key field feature
-deleteTermToDocIdEntry term !docid si@SearchIndex{termMap, termIdMap} =
-    case  Map.lookup term termMap of
-      Nothing -> si
-      Just (TermInfo termId docIdSet) ->
-        let docIdSet' = DocIdSet.delete docid docIdSet
-            termInfo' = TermInfo termId docIdSet'
-        in if DocIdSet.null docIdSet'
-            then si { termMap = Map.delete term termMap
-                    , termIdMap = IntMap.delete (fromEnum termId) termIdMap }
-            else si { termMap = Map.insert term termInfo' termMap }
-
--- | Delete multiple entries from the 'Term' to 'DocId' mapping: many terms
--- that map to the same document.
-deleteTermToDocIdEntries :: [Term] -> DocId ->
-                            SearchIndex key field feature ->
-                            SearchIndex key field feature
-deleteTermToDocIdEntries terms !docid si =
-    foldl' (\si' term -> deleteTermToDocIdEntry term docid si') si terms
-
---
--- The DocId <-> Doc mapping
---
-
-allocFreshDocId :: SearchIndex key field feature ->
-                  (SearchIndex key field feature, DocId)
-allocFreshDocId si@SearchIndex{nextDocId} =
-    let !si' = si { nextDocId = succ nextDocId }
-     in (si', nextDocId)
-
-insertDocKeyToIdEntry :: Ord key => key -> DocId ->
-                         SearchIndex key field feature ->
-                         SearchIndex key field feature
-insertDocKeyToIdEntry dockey !docid si@SearchIndex{docKeyMap} =
-    si { docKeyMap = Map.insert dockey docid docKeyMap }
-
-insertDocIdToDocEntry :: (Ix field, Bounded field,
-                          Ix feature, Bounded feature) =>
-                         DocId -> key ->
-                         DocTerms field ->
-                         DocFeatureValues feature ->
-                         SearchIndex key field feature ->
-                         SearchIndex key field feature
-insertDocIdToDocEntry !docid dockey userdocterms userdocfeats
-                       si@SearchIndex{docIdMap} =
-    let doctermids = DocTermIds.create (map (getTermId si) . userdocterms)
-        docfeatvals= DocFeatVals.create userdocfeats
-        !docinfo   = DocInfo dockey doctermids docfeatvals
-     in si { docIdMap  = IntMap.insert (fromEnum docid) docinfo docIdMap }
-
-deleteDocEntry :: Ord key => DocId -> key ->
-                  SearchIndex key field feature -> SearchIndex key field feature
-deleteDocEntry docid key si@SearchIndex{docIdMap, docKeyMap} =
-     si { docIdMap  = IntMap.delete (fromEnum docid) docIdMap
-        , docKeyMap = Map.delete key docKeyMap }
-
-
-----------------------
--- MemSize instances
-
-instance MemSize key => MemSize (SearchIndex key field feature) where
-  memSize (SearchIndex a b c d e f) = memSize6 a b c d e f
-
-instance MemSize TermInfo where
-  memSize (TermInfo a b) = memSize2 a b
-
-instance MemSize key => MemSize (DocInfo key field feature) where
-  memSize (DocInfo a b c) = memSize3 a b c
diff --git a/Distribution/Server/Features/Search/TermBag.hs b/Distribution/Server/Features/Search/TermBag.hs
deleted file mode 100644
index 325e739b9..000000000
--- a/Distribution/Server/Features/Search/TermBag.hs
+++ /dev/null
@@ -1,76 +0,0 @@
-{-# LANGUAGE BangPatterns, GeneralizedNewtypeDeriving #-}
-module Distribution.Server.Features.Search.TermBag (
-    TermId,
-    TermBag,
-    size,
-    fromList,
-    elems,
-    termCount,
-  ) where
-
-import Distribution.Server.Framework.MemSize
-
-import qualified Data.Vector.Unboxed as Vec
-import qualified Data.Map as Map
-import Data.Word (Word32)
-import Data.Bits
-
-newtype TermId = TermId Word32
-  deriving (Eq, Ord, Show, Enum, MemSize)
-
-instance Bounded TermId where
-  minBound = TermId 0
-  maxBound = TermId 0x00FFFFFF
-
-data TermBag = TermBag !Int !(Vec.Vector TermIdAndCount)
-  deriving Show
-
--- We sneakily stuff both the TermId and the bag count into one 32bit word
-type TermIdAndCount = Word32
-
--- Bottom 24 bits is the TermId, top 8 bits is the bag count
-termIdAndCount :: TermId -> Int -> TermIdAndCount
-termIdAndCount (TermId termid) freq =
-      (min (fromIntegral freq) 255 `shiftL` 24)
-  .|. (termid .&. 0x00FFFFFF)
-
-getTermId :: TermIdAndCount -> TermId
-getTermId word = TermId (word .&. 0x00FFFFFF)
-
-getTermCount :: TermIdAndCount -> Int
-getTermCount word = fromIntegral (word `shiftR` 24)
-
-
-size :: TermBag -> Int
-size (TermBag sz _) = sz
-
-elems :: TermBag -> [TermId]
-elems (TermBag _ vec) = map getTermId (Vec.toList vec)
-
-termCount :: TermBag -> TermId -> Int
-termCount (TermBag _ vec) =
-    binarySearch 0 (Vec.length vec - 1)
-  where
-    binarySearch :: Int -> Int -> TermId -> Int
-    binarySearch !a !b !key
-      | a > b     = 0
-      | otherwise =
-        let mid         = (a + b) `div` 2
-            tidAndCount = vec Vec.! mid
-         in case compare key (getTermId tidAndCount) of
-              LT -> binarySearch a (mid-1) key
-              EQ -> getTermCount tidAndCount
-              GT -> binarySearch (mid+1) b key
-
-fromList :: [TermId] -> TermBag
-fromList termids =
-    let bag = Map.fromListWith (+) [ (t, 1) | t <- termids ]
-        sz  = Map.foldl' (+) 0 bag
-        vec = Vec.fromListN (Map.size bag)
-                            [ termIdAndCount termid freq
-                            | (termid, freq) <- Map.toAscList bag ]
-     in TermBag sz vec
-
-instance MemSize TermBag where
-  memSize (TermBag _ vec) = 2 + memSizeUVector 2 vec
-
diff --git a/Distribution/Server/Framework/MemSize.hs b/Distribution/Server/Framework/MemSize.hs
index b1cc3371f..128f3e16d 100644
--- a/Distribution/Server/Framework/MemSize.hs
+++ b/Distribution/Server/Framework/MemSize.hs
@@ -28,6 +28,7 @@ import qualified Data.Array.Unboxed as A
 import qualified Data.Vector as V
 import qualified Data.Vector.Unboxed as V.U
 import qualified Data.Version as Ver
+import qualified Data.SearchEngine as SE
 
 import Distribution.Package  (PackageIdentifier(..), PackageName, unPackageName)
 import Distribution.PackageDescription (FlagName, unFlagName)
@@ -269,3 +270,9 @@ instance MemSize CompilerFlavor where
 
 instance MemSize CompilerId where
     memSize (CompilerId a b) = memSize2 a b
+
+
+instance MemSize key => MemSize (SE.SearchEngine doc key field feature) where
+  -- TODO: what can we do about MemSize for SearchEngine?
+  -- memSize SE.SearchEngine {searchIndex} = 25 + memSize searchIndex
+  memSize _ = memSize0
diff --git a/hackage-server.cabal b/hackage-server.cabal
index 88f593a18..22f428457 100644
--- a/hackage-server.cabal
+++ b/hackage-server.cabal
@@ -310,16 +310,9 @@ library lib-server
       Distribution.Server.Features.HaskellPlatform
       Distribution.Server.Features.HaskellPlatform.State
       Distribution.Server.Features.Search
-      Distribution.Server.Features.Search.BM25F
-      Distribution.Server.Features.Search.DocIdSet
-      Distribution.Server.Features.Search.DocTermIds
-      Distribution.Server.Features.Search.DocFeatVals
       Distribution.Server.Features.Search.ExtractDescriptionTerms
       Distribution.Server.Features.Search.ExtractNameTerms
       Distribution.Server.Features.Search.PkgSearch
-      Distribution.Server.Features.Search.SearchEngine
-      Distribution.Server.Features.Search.SearchIndex
-      Distribution.Server.Features.Search.TermBag
       Distribution.Server.Features.Sitemap.Functions
       Distribution.Server.Features.Votes
       Distribution.Server.Features.Votes.State
@@ -369,6 +362,7 @@ library lib-server
     , cryptohash-sha256     ^>= 0.11.100
     , csv                   ^>= 0.1
     , ed25519               ^>= 0.0.5
+    , full-text-search      ^>= 0.2.1.1
     , hackage-security      ^>= 0.6
     , hackage-security-HTTP ^>= 0.1.1
     , haddock-library       > 1.7 && < 2