From 4fc58fbd93401a893904e3ee91eca7b7babd22dd Mon Sep 17 00:00:00 2001 From: Karl Gaissmaier Date: Sat, 4 Feb 2023 23:45:05 +0100 Subject: [PATCH] wip --- treap.go | 53 +++++++++++++++++++++++++++++++++++++++++++++++++++ treap_test.go | 44 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 97 insertions(+) diff --git a/treap.go b/treap.go index 40bda0c..b489d12 100644 --- a/treap.go +++ b/treap.go @@ -11,6 +11,7 @@ package interval import ( "math/rand" + "sync" ) // node is the basic recursive data structure. @@ -55,6 +56,58 @@ func NewTree[T any](cmp func(a, b T) (ll, rr, lr, rl int), items ...T) Tree[T] { return t } +// NewTreeConcurrent, convenience function for initializing the interval tree for large inputs (> 100_000). +// A good value reference for jobs is the number of logical CPUs usable by the current process. +func NewTreeConcurrent[T any](jobs int, cmp func(a, b T) (ll, rr, lr, rl int), items ...T) Tree[T] { + if jobs <= 1 { + return NewTree[T](cmp, items...) + } + + l := len(items) + + chunkSize := l/jobs + 1 + if chunkSize < 10_000 { + chunkSize = 10_000 + } + + var wg sync.WaitGroup + var chunk []T + partialTrees := make(chan Tree[T]) + + // fan out + for ; l > 0; l = len(items) { + // partition input into chunks + switch { + case l > chunkSize: + chunk = items[:chunkSize] + items = items[chunkSize:] + default: // rest + chunk = items[:l] + items = nil + } + + wg.Add(1) + go func(chunk ...T) { + defer wg.Done() + partialTrees <- NewTree[T](cmp, chunk...) + }(chunk...) + } + + // wait and close chan + go func() { + wg.Wait() + close(partialTrees) + }() + + // fan in + t := NewTree[T](cmp) + for other := range partialTrees { + // fast union, immutable is false + t = t.Union(other, false, false) + } + return t +} + // makeNode, create new node with item and random priority. func (t *Tree[T]) makeNode(item T) *node[T] { n := new(node[T]) diff --git a/treap_test.go b/treap_test.go index 2b9102e..4e42424 100644 --- a/treap_test.go +++ b/treap_test.go @@ -75,6 +75,12 @@ func TestNewTree(t *testing.T) { t.Errorf("String() = %v, want \"\"", "") } + tree = interval.NewTreeConcurrent(0, cmpUintInterval) + + if tree.String() != "" { + t.Errorf("String() = %v, want \"\"", "") + } + w := new(strings.Builder) if err := tree.Fprint(w); err != nil { t.Fatal(err) @@ -155,6 +161,44 @@ func TestNewTree(t *testing.T) { } } +func TestNewTreeConcurrent(t *testing.T) { + t.Parallel() + + ivals := genUintIvals(100_000) + + tree1 := interval.NewTree(cmpUintInterval, ivals[0]) + tree2 := interval.NewTreeConcurrent(1, cmpUintInterval, ivals[0]) + + if !equalStatistics(tree1, tree2) { + t.Fatal("New() differs with NewConcurrent(), statistics differ") + } + + tree1 = interval.NewTree(cmpUintInterval, ivals[:2]...) + tree2 = interval.NewTreeConcurrent(2, cmpUintInterval, ivals[:2]...) + + if !equalStatistics(tree1, tree2) { + t.Fatal("New() differs with NewConcurrent(), statistics differ") + } + + tree1 = interval.NewTree(cmpUintInterval, ivals[:30_000]...) + tree2 = interval.NewTreeConcurrent(3, cmpUintInterval, ivals[:30_000]...) + + if !equalStatistics(tree1, tree2) { + t.Log(tree1.Statistics()) + t.Log(tree2.Statistics()) + t.Fatal("New() differs with NewConcurrent(), statistics differ") + } + + tree1 = interval.NewTree(cmpUintInterval, ivals...) + tree2 = interval.NewTreeConcurrent(4, cmpUintInterval, ivals...) + + if !equalStatistics(tree1, tree2) { + t.Log(tree1.Statistics()) + t.Log(tree2.Statistics()) + t.Fatal("New() differs with NewConcurrent(), statistics differ") + } +} + func TestTreeWithDups(t *testing.T) { t.Parallel()