Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use Set for nub ord #1

Closed
wants to merge 8 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 4 additions & 3 deletions bench/Bench/Data/List.purs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ module Bench.Data.List where
import Prelude
import Data.Foldable (maximum)
import Data.Int (pow)
import Data.List (List(..), take, range, foldr, length)
import Data.List (List(..), take, range, foldr, length, nub)
import Data.Maybe (fromMaybe)
import Data.Traversable (traverse_)
import Effect (Effect)
Expand All @@ -12,8 +12,9 @@ import Performance.Minibench (bench)

benchList :: Effect Unit
benchList = do
benchLists "map" $ map (_ + 1)
benchLists "foldr" $ foldr add 0
--benchLists "map" $ map (_ + 1)
--benchLists "foldr" $ foldr add 0
benchLists "nub" nub

where

Expand Down
83 changes: 83 additions & 0 deletions ord-set-bench.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@

> @ bench /home/miles/projects/purescript/lists
> npm run bench:build && npm run bench:run


> @ bench:build /home/miles/projects/purescript/lists
> purs compile 'bench/**/*.purs' 'src/**/*.purs' 'bower_components/*/src/**/*.purs'


> @ bench:run /home/miles/projects/purescript/lists
> node --expose-gc -e 'require("./output/Bench.Main/index.js").main()'

List
====
---
nub: list (0 elems)
mean = 1.07 μs
stddev = 3.20 μs
min = 602.00 ns
max = 101.06 μs
---
nub: list (1 elems)
mean = 3.38 μs
stddev = 21.73 μs
min = 1.45 μs
max = 559.14 μs
---
nub: list (10 elems)
mean = 26.75 μs
stddev = 80.87 μs
min = 3.45 μs
max = 1.37 ms
---
nub: list (100 elems)
mean = 86.53 μs
stddev = 122.58 μs
min = 58.71 μs
max = 1.58 ms
---
nub: list (1000 elems)
mean = 1.02 ms
stddev = 467.08 μs
min = 900.78 μs
max = 13.52 ms
---
nub: list (10000 elems)
/home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:146
function $tco_loop(v, tree) {
^

RangeError: Maximum call stack size exceeded
at $tco_loop (/home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:146:27)
at /home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:182:27
at $tco_loop (/home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:207:60)
at /home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:229:39
at $tco_loop (/home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:244:48)
at /home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:299:39
at /home/miles/projects/purescript/lists/output/Data.List.Internal/index.js:304:51
at /home/miles/projects/purescript/lists/output/Data.List/index.js:507:76
at /home/miles/projects/purescript/lists/output/Data.List/index.js:511:73
at /home/miles/projects/purescript/lists/output/Data.List/index.js:511:73
npm ERR! code ELIFECYCLE
npm ERR! errno 1
npm ERR! @ bench:run: `node --expose-gc -e 'require("./output/Bench.Main/index.js").main()'`
npm ERR! Exit status 1
npm ERR!
npm ERR! Failed at the @ bench:run script.
npm ERR! This is probably not a problem with npm. There is likely additional logging output above.
npm WARN Local package.json exists, but node_modules missing, did you mean to install?

npm ERR! A complete log of this run can be found in:
npm ERR! /home/miles/.npm/_logs/2021-01-13T05_09_41_932Z-debug.log
npm ERR! code ELIFECYCLE
npm ERR! errno 1
npm ERR! @ bench: `npm run bench:build && npm run bench:run`
npm ERR! Exit status 1
npm ERR!
npm ERR! Failed at the @ bench script.
npm ERR! This is probably not a problem with npm. There is likely additional logging output above.
npm WARN Local package.json exists, but node_modules missing, did you mean to install?

npm ERR! A complete log of this run can be found in:
npm ERR! /home/miles/.npm/_logs/2021-01-13T05_09_41_949Z-debug.log
84 changes: 10 additions & 74 deletions src/Data/List.purs
Original file line number Diff line number Diff line change
Expand Up @@ -106,18 +106,17 @@ import Control.Monad.Rec.Class (class MonadRec, Step(..), tailRecM, tailRecM2)
import Data.Bifunctor (bimap)
import Data.Foldable (class Foldable, foldr, any, foldl)
import Data.Foldable (foldl, foldr, foldMap, fold, intercalate, elem, notElem, find, findMap, any, all) as Exports
import Data.Function (on)
import Data.FunctorWithIndex (mapWithIndex) as FWI
import Data.List.Internal (emptySet, insertAndLookupBy)
import Data.List.Types (List(..), (:))
import Data.List.Types (NonEmptyList(..)) as NEL
import Data.Maybe (Maybe(..))
import Data.Newtype (class Newtype)
import Data.NonEmpty ((:|))
import Data.Traversable (scanl, scanr) as Exports
import Data.Traversable (sequence)
import Data.Tuple (Tuple(..), fst, snd)
import Data.Tuple (Tuple(..))
import Data.Unfoldable (class Unfoldable, unfoldr)

import Prim.TypeError (class Warn, Text)

-- | Convert a list into any unfoldable structure.
Expand Down Expand Up @@ -685,18 +684,14 @@ nub = nubBy compare
-- |
-- | Running time: `O(n log n)`
nubBy :: forall a. (a -> a -> Ordering) -> List a -> List a
nubBy p =
-- Discard indices, just keep original values.
mapReverse snd
-- Sort by index to recover original order.
-- Use `flip` to sort in reverse order in anticipation of final `mapReverse`.
<<< sortBy (flip compare `on` fst)
-- Removing neighboring duplicates.
<<< nubByAdjacentReverse (\a b -> (p `on` snd) a b == EQ)
-- Sort by original values to cluster duplicates.
<<< sortBy (p `on` snd)
-- Add indices so we can recover original order after deduplicating.
<<< addIndexReverse
nubBy p = reverse <<< go emptySet Nil
where
go _ acc Nil = acc
go s acc (a : as) =
let { found, result: s' } = insertAndLookupBy p a s
in if found
then go s' acc as
else go s' (a : acc) as

-- | Remove duplicate elements from a list.
-- | Keeps the first occurrence of each element in the input list,
Expand Down Expand Up @@ -845,62 +840,3 @@ transpose ((x : xs) : xss) =
foldM :: forall m a b. Monad m => (b -> a -> m b) -> b -> List a -> m b
foldM _ b Nil = pure b
foldM f b (a : as) = f b a >>= \b' -> foldM f b' as

--------------------------------------------------------------------------------
-- Fast operations which also reverse the list ---------------------------------
--------------------------------------------------------------------------------

-- | Maps a function to each element in a list
-- | and reverses the result, but faster than
-- | running each separately. Equivalent to:
-- |
-- | ```purescript
-- | \f l = map f l # reverse
-- | ```
-- |
-- | Running time: `O(n)`
mapReverse :: forall a b. (a -> b) -> List a -> List b
mapReverse f = go Nil
where
go :: List b -> List a -> List b
go acc Nil = acc
go acc (x : xs) = go (f x : acc) xs

-- | Converts each element to a Tuple containing its index,
-- | and reverses the result, but faster than running separately.
-- | Equivalent to:
-- |
-- | ```purescript
-- | reverse <<< mapWithIndex Tuple
-- | ```
-- |
-- | Running time: `O(n)`
addIndexReverse :: forall a. List a -> List (Tuple Int a)
addIndexReverse = go 0 Nil
where
go :: Int -> List (Tuple Int a) -> List a -> List (Tuple Int a)
go i acc Nil = acc
go i acc (x : xs) = go (i + 1) ((Tuple i x) : acc) xs

-- | Removes neighboring duplicate items from a list
-- | based on an equality predicate.
-- | Keeps the LAST element if duplicates are encountered.
-- | Returned list is reversed (this is to improve performance).
-- |
-- | ```purescript
-- | nubByAdjacentReverse (on eq length) ([1]:[2]:[3,4]:Nil) == [3,4]:[2]:Nil`
-- | ```
-- |
-- | Running time: `O(n)`
nubByAdjacentReverse :: forall a. (a -> a -> Boolean) -> List a -> List a
nubByAdjacentReverse p = go Nil
where
go :: List a -> List a -> List a
-- empty output
go Nil (x : xs) = go (x : Nil) xs
-- checking for duplicates
go acc@(a : as) (x : xs)
| p a x = go (x : as) xs
| otherwise = go (x : acc) xs
-- empty input
go acc Nil = acc
63 changes: 63 additions & 0 deletions src/Data/List/Internal.purs
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
module Data.List.Internal (Set, emptySet, insertAndLookupBy) where

import Prelude

import Data.List.Types (List(..))

data Set k
= Leaf
| Two (Set k) k (Set k)
| Three (Set k) k (Set k) k (Set k)

emptySet :: forall k. Set k
emptySet = Leaf

data TreeContext k
= TwoLeft k (Set k)
| TwoRight (Set k) k
| ThreeLeft k (Set k) k (Set k)
| ThreeMiddle (Set k) k k (Set k)
| ThreeRight (Set k) k (Set k) k

fromZipper :: forall k. List (TreeContext k) -> Set k -> Set k
fromZipper Nil tree = tree
fromZipper (Cons x ctx) tree =
case x of
TwoLeft k1 right -> fromZipper ctx (Two tree k1 right)
TwoRight left k1 -> fromZipper ctx (Two left k1 tree)
ThreeLeft k1 mid k2 right -> fromZipper ctx (Three tree k1 mid k2 right)
ThreeMiddle left k1 k2 right -> fromZipper ctx (Three left k1 tree k2 right)
ThreeRight left k1 mid k2 -> fromZipper ctx (Three left k1 mid k2 tree)

data KickUp k = KickUp (Set k) k (Set k)

-- | Insert or replace a key/value pair in a map
insertAndLookupBy :: forall k. (k -> k -> Ordering) -> k -> Set k -> { found :: Boolean, result :: Set k }
insertAndLookupBy comp k orig = down Nil orig
where
down :: List (TreeContext k) -> Set k -> { found :: Boolean, result :: Set k }
down ctx Leaf = { found: false, result: up ctx (KickUp Leaf k Leaf) }
down ctx (Two left k1 right) =
case comp k k1 of
EQ -> { found: true, result: orig }
LT -> down (Cons (TwoLeft k1 right) ctx) left
_ -> down (Cons (TwoRight left k1) ctx) right
down ctx (Three left k1 mid k2 right) =
case comp k k1 of
EQ -> { found: true, result: orig }
c1 ->
case c1, comp k k2 of
_ , EQ -> { found: true, result: orig }
LT, _ -> down (Cons (ThreeLeft k1 mid k2 right) ctx) left
GT, LT -> down (Cons (ThreeMiddle left k1 k2 right) ctx) mid
_ , _ -> down (Cons (ThreeRight left k1 mid k2) ctx) right

up :: List (TreeContext k) -> KickUp k -> Set k
up Nil (KickUp left k' right) = Two left k' right
up (Cons x ctx) kup =
case x, kup of
TwoLeft k1 right, KickUp left k' mid -> fromZipper ctx (Three left k' mid k1 right)
TwoRight left k1, KickUp mid k' right -> fromZipper ctx (Three left k1 mid k' right)
ThreeLeft k1 c k2 d, KickUp a k' b -> up ctx (KickUp (Two a k' b) k1 (Two c k2 d))
ThreeMiddle a k1 k2 d, KickUp b k' c -> up ctx (KickUp (Two a k1 b) k' (Two c k2 d))
ThreeRight a k1 b k2, KickUp c k' d -> up ctx (KickUp (Two a k1 b) k2 (Two c k' d))
25 changes: 25 additions & 0 deletions src/Data/List/Lazy.purs
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ import Control.Monad.Rec.Class as Rec
import Data.Foldable (class Foldable, foldr, any, foldl)
import Data.Foldable (foldl, foldr, foldMap, fold, intercalate, elem, notElem, find, findMap, any, all) as Exports
import Data.Lazy (defer)
import Data.List.Internal (emptySet, insertAndLookupBy)
import Data.List.Lazy.Types (List(..), Step(..), step, nil, cons, (:))
import Data.List.Lazy.Types (NonEmptyList(..)) as NEL
import Data.Maybe (Maybe(..), isNothing)
Expand Down Expand Up @@ -590,6 +591,30 @@ partition f = foldr go {yes: nil, no: nil}
-- Set-like operations ---------------------------------------------------------
--------------------------------------------------------------------------------

-- | Remove duplicate elements from a list.
-- | Keeps the first occurrence of each element in the input list,
-- | in the same order they appear in the input list.
-- |
-- | Running time: `O(n log n)`
nub :: forall a. Ord a => List a -> List a
nub = nubBy compare

-- | Remove duplicate elements from a list based on the provided comparison function.
-- | Keeps the first occurrence of each element in the input list,
-- | in the same order they appear in the input list.
-- |
-- | Running time: `O(n log n)`
nubBy :: forall a. (a -> a -> Ordering) -> List a -> List a
nubBy p = go emptySet
where
go s (List l) = List (map (goStep s) l)
goStep _ Nil = Nil
goStep s (Cons a as) =
let { found, result: s' } = insertAndLookupBy p a s
in if found
then step (go s' as)
else Cons a (go s' as)

-- | Remove duplicate elements from a list.
-- |
-- | Running time: `O(n^2)`
Expand Down