diff --git a/bench/Bench/Data/List.purs b/bench/Bench/Data/List.purs index c40fe60..ab26c3e 100644 --- a/bench/Bench/Data/List.purs +++ b/bench/Bench/Data/List.purs @@ -3,7 +3,7 @@ module Bench.Data.List where import Prelude import Data.Foldable (maximum) import Data.Int (pow) -import Data.List (List(..), take, range, foldr, length) +import Data.List (List(..), take, range, foldr, length, nub) import Data.Maybe (fromMaybe) import Data.Traversable (traverse_) import Effect (Effect) @@ -12,8 +12,9 @@ import Performance.Minibench (bench) benchList :: Effect Unit benchList = do - benchLists "map" $ map (_ + 1) - benchLists "foldr" $ foldr add 0 + --benchLists "map" $ map (_ + 1) + --benchLists "foldr" $ foldr add 0 + benchLists "nub" nub where diff --git a/ord-set-bench.txt b/ord-set-bench.txt new file mode 100644 index 0000000..6ef3fb6 --- /dev/null +++ b/ord-set-bench.txt @@ -0,0 +1,83 @@ + +> @ bench /home/miles/projects/purescript/lists +> npm run bench:build && npm run bench:run + + +> @ bench:build /home/miles/projects/purescript/lists +> purs compile 'bench/**/*.purs' 'src/**/*.purs' 'bower_components/*/src/**/*.purs' + + +> @ bench:run /home/miles/projects/purescript/lists +> node --expose-gc -e 'require("./output/Bench.Main/index.js").main()' + +List +==== +--- +nub: list (0 elems) +mean = 1.07 μs +stddev = 3.20 μs +min = 602.00 ns +max = 101.06 μs +--- +nub: list (1 elems) +mean = 3.38 μs +stddev = 21.73 μs +min = 1.45 μs +max = 559.14 μs +--- +nub: list (10 elems) +mean = 26.75 μs +stddev = 80.87 μs +min = 3.45 μs +max = 1.37 ms +--- +nub: list (100 elems) +mean = 86.53 μs +stddev = 122.58 μs +min = 58.71 μs +max = 1.58 ms +--- +nub: list (1000 elems) +mean = 1.02 ms +stddev = 467.08 μs +min = 900.78 μs +max = 13.52 ms +--- +nub: list (10000 elems) +/home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:146 + function $tco_loop(v, tree) { + ^ + +RangeError: Maximum call stack size exceeded + at $tco_loop (/home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:146:27) + at /home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:182:27 + at $tco_loop (/home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:207:60) + at /home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:229:39 + at $tco_loop (/home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:244:48) + at /home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:299:39 + at /home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:304:51 + at /home/miles/projects/purescript/lists/output/Data.List/index.js:507:76 + at /home/miles/projects/purescript/lists/output/Data.List/index.js:511:73 + at /home/miles/projects/purescript/lists/output/Data.List/index.js:511:73 +npm ERR! code ELIFECYCLE +npm ERR! errno 1 +npm ERR! @ bench:run: `node --expose-gc -e 'require("./output/Bench.Main/index.js").main()'` +npm ERR! Exit status 1 +npm ERR! +npm ERR! Failed at the @ bench:run script. +npm ERR! This is probably not a problem with npm. There is likely additional logging output above. +npm WARN Local package.json exists, but node_modules missing, did you mean to install? + +npm ERR! A complete log of this run can be found in: +npm ERR! /home/miles/.npm/_logs/2021-01-13T05_09_41_932Z-debug.log +npm ERR! code ELIFECYCLE +npm ERR! errno 1 +npm ERR! @ bench: `npm run bench:build && npm run bench:run` +npm ERR! Exit status 1 +npm ERR! +npm ERR! Failed at the @ bench script. +npm ERR! This is probably not a problem with npm. There is likely additional logging output above. +npm WARN Local package.json exists, but node_modules missing, did you mean to install? + +npm ERR! A complete log of this run can be found in: +npm ERR! /home/miles/.npm/_logs/2021-01-13T05_09_41_949Z-debug.log diff --git a/src/Data/List.purs b/src/Data/List.purs index b5bcc79..b697202 100644 --- a/src/Data/List.purs +++ b/src/Data/List.purs @@ -106,8 +106,8 @@ import Control.Monad.Rec.Class (class MonadRec, Step(..), tailRecM, tailRecM2) import Data.Bifunctor (bimap) import Data.Foldable (class Foldable, foldr, any, foldl) import Data.Foldable (foldl, foldr, foldMap, fold, intercalate, elem, notElem, find, findMap, any, all) as Exports -import Data.Function (on) import Data.FunctorWithIndex (mapWithIndex) as FWI +import Data.List.Internal (emptySet, insertAndLookupBy) import Data.List.Types (List(..), (:)) import Data.List.Types (NonEmptyList(..)) as NEL import Data.Maybe (Maybe(..)) @@ -115,9 +115,8 @@ import Data.Newtype (class Newtype) import Data.NonEmpty ((:|)) import Data.Traversable (scanl, scanr) as Exports import Data.Traversable (sequence) -import Data.Tuple (Tuple(..), fst, snd) +import Data.Tuple (Tuple(..)) import Data.Unfoldable (class Unfoldable, unfoldr) - import Prim.TypeError (class Warn, Text) -- | Convert a list into any unfoldable structure. @@ -685,18 +684,14 @@ nub = nubBy compare -- | -- | Running time: `O(n log n)` nubBy :: forall a. (a -> a -> Ordering) -> List a -> List a -nubBy p = - -- Discard indices, just keep original values. - mapReverse snd - -- Sort by index to recover original order. - -- Use `flip` to sort in reverse order in anticipation of final `mapReverse`. - <<< sortBy (flip compare `on` fst) - -- Removing neighboring duplicates. - <<< nubByAdjacentReverse (\a b -> (p `on` snd) a b == EQ) - -- Sort by original values to cluster duplicates. - <<< sortBy (p `on` snd) - -- Add indices so we can recover original order after deduplicating. - <<< addIndexReverse +nubBy p = reverse <<< go emptySet Nil + where + go _ acc Nil = acc + go s acc (a : as) = + let { found, result: s' } = insertAndLookupBy p a s + in if found + then go s' acc as + else go s' (a : acc) as -- | Remove duplicate elements from a list. -- | Keeps the first occurrence of each element in the input list, @@ -845,62 +840,3 @@ transpose ((x : xs) : xss) = foldM :: forall m a b. Monad m => (b -> a -> m b) -> b -> List a -> m b foldM _ b Nil = pure b foldM f b (a : as) = f b a >>= \b' -> foldM f b' as - --------------------------------------------------------------------------------- --- Fast operations which also reverse the list --------------------------------- --------------------------------------------------------------------------------- - --- | Maps a function to each element in a list --- | and reverses the result, but faster than --- | running each separately. Equivalent to: --- | --- | ```purescript --- | \f l = map f l # reverse --- | ``` --- | --- | Running time: `O(n)` -mapReverse :: forall a b. (a -> b) -> List a -> List b -mapReverse f = go Nil - where - go :: List b -> List a -> List b - go acc Nil = acc - go acc (x : xs) = go (f x : acc) xs - --- | Converts each element to a Tuple containing its index, --- | and reverses the result, but faster than running separately. --- | Equivalent to: --- | --- | ```purescript --- | reverse <<< mapWithIndex Tuple --- | ``` --- | --- | Running time: `O(n)` -addIndexReverse :: forall a. List a -> List (Tuple Int a) -addIndexReverse = go 0 Nil - where - go :: Int -> List (Tuple Int a) -> List a -> List (Tuple Int a) - go i acc Nil = acc - go i acc (x : xs) = go (i + 1) ((Tuple i x) : acc) xs - --- | Removes neighboring duplicate items from a list --- | based on an equality predicate. --- | Keeps the LAST element if duplicates are encountered. --- | Returned list is reversed (this is to improve performance). --- | --- | ```purescript --- | nubByAdjacentReverse (on eq length) ([1]:[2]:[3,4]:Nil) == [3,4]:[2]:Nil` --- | ``` --- | --- | Running time: `O(n)` -nubByAdjacentReverse :: forall a. (a -> a -> Boolean) -> List a -> List a -nubByAdjacentReverse p = go Nil - where - go :: List a -> List a -> List a - -- empty output - go Nil (x : xs) = go (x : Nil) xs - -- checking for duplicates - go acc@(a : as) (x : xs) - | p a x = go (x : as) xs - | otherwise = go (x : acc) xs - -- empty input - go acc Nil = acc diff --git a/src/Data/List/Internal.purs b/src/Data/List/Internal.purs new file mode 100644 index 0000000..5f5c950 --- /dev/null +++ b/src/Data/List/Internal.purs @@ -0,0 +1,63 @@ +module Data.List.Internal (Set, emptySet, insertAndLookupBy) where + +import Prelude + +import Data.List.Types (List(..)) + +data Set k + = Leaf + | Two (Set k) k (Set k) + | Three (Set k) k (Set k) k (Set k) + +emptySet :: forall k. Set k +emptySet = Leaf + +data TreeContext k + = TwoLeft k (Set k) + | TwoRight (Set k) k + | ThreeLeft k (Set k) k (Set k) + | ThreeMiddle (Set k) k k (Set k) + | ThreeRight (Set k) k (Set k) k + +fromZipper :: forall k. List (TreeContext k) -> Set k -> Set k +fromZipper Nil tree = tree +fromZipper (Cons x ctx) tree = + case x of + TwoLeft k1 right -> fromZipper ctx (Two tree k1 right) + TwoRight left k1 -> fromZipper ctx (Two left k1 tree) + ThreeLeft k1 mid k2 right -> fromZipper ctx (Three tree k1 mid k2 right) + ThreeMiddle left k1 k2 right -> fromZipper ctx (Three left k1 tree k2 right) + ThreeRight left k1 mid k2 -> fromZipper ctx (Three left k1 mid k2 tree) + +data KickUp k = KickUp (Set k) k (Set k) + +-- | Insert or replace a key/value pair in a map +insertAndLookupBy :: forall k. (k -> k -> Ordering) -> k -> Set k -> { found :: Boolean, result :: Set k } +insertAndLookupBy comp k orig = down Nil orig + where + down :: List (TreeContext k) -> Set k -> { found :: Boolean, result :: Set k } + down ctx Leaf = { found: false, result: up ctx (KickUp Leaf k Leaf) } + down ctx (Two left k1 right) = + case comp k k1 of + EQ -> { found: true, result: orig } + LT -> down (Cons (TwoLeft k1 right) ctx) left + _ -> down (Cons (TwoRight left k1) ctx) right + down ctx (Three left k1 mid k2 right) = + case comp k k1 of + EQ -> { found: true, result: orig } + c1 -> + case c1, comp k k2 of + _ , EQ -> { found: true, result: orig } + LT, _ -> down (Cons (ThreeLeft k1 mid k2 right) ctx) left + GT, LT -> down (Cons (ThreeMiddle left k1 k2 right) ctx) mid + _ , _ -> down (Cons (ThreeRight left k1 mid k2) ctx) right + + up :: List (TreeContext k) -> KickUp k -> Set k + up Nil (KickUp left k' right) = Two left k' right + up (Cons x ctx) kup = + case x, kup of + TwoLeft k1 right, KickUp left k' mid -> fromZipper ctx (Three left k' mid k1 right) + TwoRight left k1, KickUp mid k' right -> fromZipper ctx (Three left k1 mid k' right) + ThreeLeft k1 c k2 d, KickUp a k' b -> up ctx (KickUp (Two a k' b) k1 (Two c k2 d)) + ThreeMiddle a k1 k2 d, KickUp b k' c -> up ctx (KickUp (Two a k1 b) k' (Two c k2 d)) + ThreeRight a k1 b k2, KickUp c k' d -> up ctx (KickUp (Two a k1 b) k2 (Two c k' d)) diff --git a/src/Data/List/Lazy.purs b/src/Data/List/Lazy.purs index 9e28a4d..3b6ff84 100644 --- a/src/Data/List/Lazy.purs +++ b/src/Data/List/Lazy.purs @@ -103,6 +103,7 @@ import Control.Monad.Rec.Class as Rec import Data.Foldable (class Foldable, foldr, any, foldl) import Data.Foldable (foldl, foldr, foldMap, fold, intercalate, elem, notElem, find, findMap, any, all) as Exports import Data.Lazy (defer) +import Data.List.Internal (emptySet, insertAndLookupBy) import Data.List.Lazy.Types (List(..), Step(..), step, nil, cons, (:)) import Data.List.Lazy.Types (NonEmptyList(..)) as NEL import Data.Maybe (Maybe(..), isNothing) @@ -590,6 +591,30 @@ partition f = foldr go {yes: nil, no: nil} -- Set-like operations --------------------------------------------------------- -------------------------------------------------------------------------------- +-- | Remove duplicate elements from a list. +-- | Keeps the first occurrence of each element in the input list, +-- | in the same order they appear in the input list. +-- | +-- | Running time: `O(n log n)` +nub :: forall a. Ord a => List a -> List a +nub = nubBy compare + +-- | Remove duplicate elements from a list based on the provided comparison function. +-- | Keeps the first occurrence of each element in the input list, +-- | in the same order they appear in the input list. +-- | +-- | Running time: `O(n log n)` +nubBy :: forall a. (a -> a -> Ordering) -> List a -> List a +nubBy p = go emptySet + where + go s (List l) = List (map (goStep s) l) + goStep _ Nil = Nil + goStep s (Cons a as) = + let { found, result: s' } = insertAndLookupBy p a s + in if found + then step (go s' as) + else Cons a (go s' as) + -- | Remove duplicate elements from a list. -- | -- | Running time: `O(n^2)`