From c28337176cc8057e50da7eaefb35fb20ab94104f Mon Sep 17 00:00:00 2001 From: Frank Mueller Date: Sat, 20 Aug 2022 17:17:39 +0200 Subject: [PATCH] Add sorting --- README.md | 4 +- go.mod | 5 +- go.sum | 2 + slices.go | 18 ++-- sort.go | 171 ++++++++++++++++++++++++++++++++++++ sort_test.go | 238 +++++++++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 429 insertions(+), 9 deletions(-) create mode 100644 sort.go create mode 100644 sort_test.go diff --git a/README.md b/README.md index a5907bf..f632b49 100644 --- a/README.md +++ b/README.md @@ -11,7 +11,9 @@ **Tideland Go Slices** provides a set of functions for the processing of types slices based on generics and higher-order functions. This processing contains tests, mappings, filterings, -concatings, deleting, filtering, folding and many more. +concatings, deleting, filtering, folding and many more. Opposite to the standard library of +Go with similiar functions like e.g. Sort() will not work on the same slice. All functions return +new slices, even if a variable operation would have no effect. ## Contributors diff --git a/go.mod b/go.mod index 52dc4bc..5e28378 100644 --- a/go.mod +++ b/go.mod @@ -2,4 +2,7 @@ module tideland.dev/go/slices go 1.19 -require tideland.dev/go/audit v0.7.0 +require ( + golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e + tideland.dev/go/audit v0.7.0 +) diff --git a/go.sum b/go.sum index 69189d7..098ee5d 100644 --- a/go.sum +++ b/go.sum @@ -1,2 +1,4 @@ +golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e h1:+WEEuIdZHnUeJJmEUjyYC2gfUMj69yZXw17EnHg/otA= +golang.org/x/exp v0.0.0-20220722155223-a9213eeb770e/go.mod h1:Kr81I6Kryrl9sr8s2FK3vxD90NdsKWRuOIl2O4CvYbA= tideland.dev/go/audit v0.7.0 h1:lr4LkNu7i5qLJuqQ6lUfnt0J09anZNfrdXdB1I9JlTs= tideland.dev/go/audit v0.7.0/go.mod h1:Jua+IB3KgAC7fbuZ1YHT7gKhwpiTOcn3Q7AOCQsrro8= diff --git a/slices.go b/slices.go index a84493e..d49b432 100644 --- a/slices.go +++ b/slices.go @@ -7,6 +7,10 @@ package slices // import "tideland.dev/go/slices" +//-------------------- +// IMPORTS +//-------------------- + //-------------------- // SLICES //-------------------- @@ -61,7 +65,7 @@ func DeleteAll[V comparable](dv V, ivs []V) []V { if ivs == nil { return nil } - var ovs []V = []V{} + ovs := []V{} for _, v := range ivs { if v != dv { ovs = append(ovs, v) @@ -75,7 +79,7 @@ func DeleteAllWith[V any](pred func(V) bool, ivs []V) []V { if ivs == nil { return nil } - var ovs []V = []V{} + ovs := []V{} for _, v := range ivs { if !pred(v) { ovs = append(ovs, v) @@ -192,7 +196,7 @@ func Split[V any](n int, ivs []V) ([]V, []V) { // SplitWith returns the values while pred() returns true as first and the rest // as second slice. func SplitWith[V any](pred func(V) bool, ivs []V) ([]V, []V) { - if ivs == nil || len(ivs) == 0 { + if len(ivs) == 0 { return nil, nil } n := -1 @@ -263,8 +267,8 @@ func Unique[V comparable](ivs []V) []V { if ivs == nil { return nil } - var ovs []V = []V{} - var isContained map[V]struct{} = map[V]struct{}{} + ovs := []V{} + isContained := map[V]struct{}{} for _, v := range ivs { if _, ok := isContained[v]; !ok { ovs = append(ovs, v) @@ -281,8 +285,8 @@ func UniqueWith[V any, C comparable](pred func(V) C, ivs []V) []V { if ivs == nil { return nil } - var ovs []V = []V{} - var isContained map[C]struct{} = map[C]struct{}{} + ovs := []V{} + isContained := map[C]struct{}{} for _, v := range ivs { cv := pred(v) if _, ok := isContained[cv]; !ok { diff --git a/sort.go b/sort.go new file mode 100644 index 0000000..e2cc067 --- /dev/null +++ b/sort.go @@ -0,0 +1,171 @@ +// Tideland Go Slices +// +// Copyright (C) 2022 Frank Mueller / Tideland / Oldenburg / Germany +// +// All rights reserved. Use of this source code is governed +// by the new BSD license. + +package slices // import "tideland.dev/go/slices" + +import ( + "runtime" + + "golang.org/x/exp/constraints" +) + +//-------------------- +// SORT +//-------------------- + +// Sort provides a parallel quicksort for a slice of values with the +// constraint ordered. +func Sort[V constraints.Ordered](ivs []V) []V { + less := func(vs []V, i, j int) bool { + return vs[i] < vs[j] + } + + return SortWith(ivs, less) +} + +// SortWith sorts a slice based on a less function comparing the two values +// at the indexes i and j and returning true if the value at i has to be sorted +// before the one at j. +func SortWith[V any](ivs []V, less func(vs []V, i, j int) bool) []V { + ovs := Copy(ivs) + + sort(ovs, less) + + return ovs +} + +// IsSorted returns true if a slice is sorted in ascending order. +func IsSorted[V constraints.Ordered](vs []V) bool { + for i := len(vs) - 1; i > 0; i-- { + if vs[i] < vs[i-1] { + return false + } + } + return true +} + +// IsSortedWith returns true if a slice is sorted in ascending order +// using less as comparison function. +func IsSortedWith[V any](vs []V, less func(a, b V) bool) bool { + for i := len(vs) - 1; i > 0; i-- { + if less(vs[i], vs[i-1]) { + return false + } + } + return true +} + +//-------------------- +// PRIVATE +//-------------------- + +// sequentialThreshold for switching from sequential quick sort to insertion sort. +var sequentialThreshold = runtime.NumCPU()*4 - 1 + +// parallelThreshold for switching from parallel to sequential quick sort. +var parallelThreshold = runtime.NumCPU()*2048 - 1 + +// swap exchanges two values in a slice. +func swap[V any](vs []V, lo, hi int) { + tmp := vs[lo] + vs[lo] = vs[hi] + vs[hi] = tmp +} + +// insertionSort for smaller data collections. +func insertionSort[V any](vs []V, less func(vs []V, i, j int) bool, lo, hi int) { + for i := lo + 1; i < hi+1; i++ { + for j := i; j > lo && less(vs, j, j-1); j-- { + swap(vs, j, j-1) + } + } +} + +// median to caclculate the median based on Tukey's ninther. +func median[V any](vs []V, less func(vs []V, i, j int) bool, lo, hi int) int { + m := (lo + hi) / 2 + d := (hi - lo) / 8 + // Move median into the middle. + mot := func(ml, mm, mh int) { + if less(vs, mm, ml) { + swap(vs, mm, ml) + } + if less(vs, mh, mm) { + swap(vs, mh, mm) + } + if less(vs, mm, ml) { + swap(vs, mm, ml) + } + } + // Get low, middle, and high median. + if hi-lo > 40 { + mot(lo+d, lo, lo+2*d) + mot(m-d, m, m+d) + mot(hi-d, hi, hi-2*d) + } + // Get combined median. + mot(lo, m, hi) + return m +} + +// partition the data based on the median. +func partition[V any](vs []V, less func(vs []V, i, j int) bool, lo, hi int) (int, int) { + med := median(vs, less, lo, hi) + idx := lo + swap(vs, med, hi) + for i := lo; i < hi; i++ { + if less(vs, i, hi) { + swap(vs, i, idx) + idx++ + } + } + swap(vs, idx, hi) + return idx - 1, idx + 1 +} + +// sequentialQuickSort using itself recursively. +func sequentialQuickSort[V any](vs []V, less func(vs []V, i, j int) bool, lo, hi int) { + if hi-lo > sequentialThreshold { + // Use sequential quicksort. + plo, phi := partition(vs, less, lo, hi) + sequentialQuickSort(vs, less, lo, plo) + sequentialQuickSort(vs, less, phi, hi) + } else { + // Use insertion sort. + insertionSort(vs, less, lo, hi) + } +} + +// parallelQuickSort using itself recursively and concurrent. +func parallelQuickSort[V any](vs []V, less func(vs []V, i, j int) bool, lo, hi int, done chan struct{}) { + if hi-lo > parallelThreshold { + // Parallel QuickSort. + plo, phi := partition(vs, less, lo, hi) + partDone := make(chan struct{}) + go parallelQuickSort(vs, less, lo, plo, partDone) + go parallelQuickSort(vs, less, phi, hi, partDone) + // Wait for the end of both sorts. + <-partDone + <-partDone + } else { + // Sequential QuickSort. + sequentialQuickSort(vs, less, lo, hi) + } + // Signal that it's done. + done <- struct{}{} +} + +// sort starts the parallel quick sort for the whole slice. +func sort[V any](vs []V, less func(vs []V, i, j int) bool) { + done := make(chan struct{}) + + go parallelQuickSort(vs, less, 0, len(vs)-1, done) + + <-done +} + +// EOF diff --git a/sort_test.go b/sort_test.go new file mode 100644 index 0000000..8bfc7d0 --- /dev/null +++ b/sort_test.go @@ -0,0 +1,238 @@ +// Tideland Go Testing - Unit Tests +// +// Copyright (C) 2022 Frank Mueller / Tideland / Oldenburg / Germany +// +// MatchesAll rights reserved. Use of this source code is governed +// by the new BSD license. + +package slices_test // import "tideland.dev/go/slices" + +//-------------------- +// IMPORTS +//-------------------- + +import ( + "testing" + + "tideland.dev/go/audit/asserts" + "tideland.dev/go/audit/generators" + + "tideland.dev/go/slices" +) + +//-------------------- +// TESTS +//-------------------- + +// TestSort verifies the standard sorting of slices. +func TestSort(t *testing.T) { + assert := asserts.NewTesting(t, asserts.FailStop) + + tests := []struct { + descr string + values []int + out []int + }{ + { + descr: "Simple unordered slice", + values: []int{5, 7, 1, 3, 4, 2, 8, 6, 9}, + out: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + }, { + descr: "Unordered double value slice", + values: []int{9, 5, 7, 3, 1, 3, 5, 4, 2, 8, 5, 6, 9}, + out: []int{1, 2, 3, 3, 4, 5, 5, 5, 6, 7, 8, 9, 9}, + }, { + descr: "Already ordered slice", + values: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + out: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + }, { + descr: "Reverse ordered slice", + values: []int{9, 8, 7, 6, 5, 4, 3, 2, 1}, + out: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + }, { + descr: "Single value slice", + values: []int{1, 1, 1, 1, 1}, + out: []int{1, 1, 1, 1, 1}, + }, { + descr: "Empty slice", + values: []int{}, + out: []int{}, + }, { + descr: "Nil slice", + values: nil, + out: nil, + }, + } + + for _, test := range tests { + assert.Logf(test.descr) + assert.Equal(slices.Sort(test.values), test.out) + } +} + +// TestSortWith verifies the sorting of slices with a less function. +func TestSortWith(t *testing.T) { + assert := asserts.NewTesting(t, asserts.FailStop) + + less := func(vs []string, i, j int) bool { return len(vs[i]) < len(vs[j]) } + tests := []struct { + descr string + values []string + out []string + }{ + { + descr: "Simple unordered slice", + values: []string{"alpha", "beta", "phi", "epsilon", "lambda", "pi"}, + out: []string{"pi", "phi", "beta", "alpha", "lambda", "epsilon"}, + }, { + descr: "Unordered double value slice", + values: []string{"phi", "alpha", "beta", "phi", "epsilon", "beta", "lambda", "pi"}, + out: []string{"pi", "phi", "phi", "beta", "beta", "alpha", "lambda", "epsilon"}, + }, { + descr: "Already ordered slice", + values: []string{"pi", "phi", "beta", "alpha", "lambda", "epsilon"}, + out: []string{"pi", "phi", "beta", "alpha", "lambda", "epsilon"}, + }, { + descr: "Reverse ordered slice", + values: []string{"epsilon", "lambda", "alpha", "beta", "phi", "pi"}, + out: []string{"pi", "phi", "beta", "alpha", "lambda", "epsilon"}, + }, { + descr: "Single value slice", + values: []string{"alpha", "alpha", "alpha", "alpha", "alpha"}, + out: []string{"alpha", "alpha", "alpha", "alpha", "alpha"}, + }, { + descr: "Empty slice", + values: []string{}, + out: []string{}, + }, { + descr: "Nil slice", + values: nil, + out: nil, + }, + } + + for _, test := range tests { + assert.Logf(test.descr) + assert.Equal(slices.SortWith(test.values, less), test.out) + } +} + +// TestIsSorted verifies the check of sorted slices. +func TestIsSorted(t *testing.T) { + assert := asserts.NewTesting(t, asserts.FailStop) + + tests := []struct { + descr string + values []int + out bool + }{ + { + descr: "Unordered slice", + values: []int{5, 7, 1, 3, 4, 2, 8, 6, 9}, + out: false, + }, { + descr: "Ordered slice", + values: []int{1, 2, 3, 4, 5, 6, 7, 8, 9}, + out: true, + }, { + descr: "Reverse ordered slice", + values: []int{9, 8, 7, 6, 5, 4, 3, 2, 1}, + out: false, + }, { + descr: "Single value slice", + values: []int{1, 1, 1, 1, 1}, + out: true, + }, { + descr: "Empty slice", + values: []int{}, + out: true, + }, { + descr: "Nil slice", + values: nil, + out: true, + }, + } + + for _, test := range tests { + assert.Logf(test.descr) + assert.Equal(slices.IsSorted(test.values), test.out) + } +} + +// TestIsSortedWith verifies the check of sorted slices. +func TestIsSortedWith(t *testing.T) { + assert := asserts.NewTesting(t, asserts.FailStop) + + less := func(a, b string) bool { return len(a) < len(b) } + tests := []struct { + descr string + values []string + out bool + }{ + { + descr: "Unordered slice", + values: []string{"alpha", "beta", "phi", "epsilon", "lambda", "pi"}, + out: false, + }, { + descr: "Ordered slice", + values: []string{"pi", "phi", "beta", "alpha", "lambda", "epsilon"}, + out: true, + }, { + descr: "Reverse ordered slice", + values: []string{"epsilon", "lambda", "alpha", "beta", "phi", "pi"}, + out: false, + }, { + descr: "Single value slice", + values: []string{"alpha", "alpha", "alpha", "alpha", "alpha"}, + out: true, + }, { + descr: "Empty slice", + values: []string{}, + out: true, + }, { + descr: "Nil slice", + values: nil, + out: true, + }, + } + + for _, test := range tests { + assert.Logf(test.descr) + assert.Equal(slices.IsSortedWith(test.values, less), test.out) + } +} + +//-------------------- +// BENCHMARKS AND FUZZ TESTS +//-------------------- + +// BenchmarkSort runs a performance test on standard sorting. +func BenchmarkSort(b *testing.B) { + gen := generators.New(generators.FixedRand()) + vs := gen.Ints(0, 1000, 10000) + + slices.Sort(vs) +} + +// BenchmarkSortWith runs a performance test on sorting with comparator. +func BenchmarkSortWith(b *testing.B) { + gen := generators.New(generators.FixedRand()) + vs := gen.Words(10000) + less := func(vs []string, i, j int) bool { return len(vs[i]) < len(vs[j]) } + + slices.SortWith(vs, less) +} + +// FuzzSort runs a fuzz test on the standard sorting. +func FuzzSort(f *testing.F) { + gen := generators.New(generators.FixedRand()) + + f.Add(5) + f.Fuzz(func(t *testing.T, i int) { + vs := gen.Ints(0, 1000, 10000) + + slices.Sort(vs) + }) +} + +// EOF