From 07af721ae90176cb1f7cb9a9d6545bba73fe36a6 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Thu, 11 Jul 2024 13:12:38 -0400 Subject: [PATCH 01/34] Add functions to merge sorted sequences Add initializers to RangeReplaceableCollection that merge two sorted sequence arguments. Add a function returning a lazy sequence that can merge two other sorted sequences. Add initializers to RangeReplaceableCollection that treat their two sorted sequence arguments as sets, then generates a result of the given set operation. These are included since they use the same base splicing code as straight mergers. Add support types for mergers and set operations. One describes the specific set operation requested, including a straight merger. The others are a sequence that lazily generates a set operation or merger, and its corresponding iterator. TODO: Add tests and a guide. --- .../Documentation.docc/Algorithms.md | 1 + .../Algorithms/Documentation.docc/Merging.md | 27 + Sources/Algorithms/MergeSorted.swift | 499 ++++++++++++++++++ 3 files changed, 527 insertions(+) create mode 100644 Sources/Algorithms/Documentation.docc/Merging.md create mode 100644 Sources/Algorithms/MergeSorted.swift diff --git a/Sources/Algorithms/Documentation.docc/Algorithms.md b/Sources/Algorithms/Documentation.docc/Algorithms.md index 3cfb2693..95a2aa70 100644 --- a/Sources/Algorithms/Documentation.docc/Algorithms.md +++ b/Sources/Algorithms/Documentation.docc/Algorithms.md @@ -40,3 +40,4 @@ Explore more chunking methods and the remainder of the Algorithms package, group - - - +- diff --git a/Sources/Algorithms/Documentation.docc/Merging.md b/Sources/Algorithms/Documentation.docc/Merging.md new file mode 100644 index 00000000..4469ed44 --- /dev/null +++ b/Sources/Algorithms/Documentation.docc/Merging.md @@ -0,0 +1,27 @@ +# Merging + +Merge two sorted sequences as a new sorted sequence. +Take two sorted sequences to be treated as sets, +then generate the result of applying a set operation. + +## Topics + +### Merging Sorted Sequences + +- ``Swift/RangeReplaceableCollection/init(mergeSorted:and:retaining:sortedBy:)`` +- ``Swift/RangeReplaceableCollection/init(mergeSorted:and:retaining:)`` +- ``Swift/mergeSorted(_:_:retaining:areInIncreasingOrder:)`` +- ``Swift/mergeSorted(_:_:retaining:)`` + +### Applying Set Operations to Sorted Sequences + +``Swift/RangeReplaceableCollection/init(sorted:withoutElementsFrom:)`` +``Swift/RangeReplaceableCollection/init(exclusivesOfSorted:andExclusivesOf:)`` +``Swift/RangeReplaceableCollection/init(elementsOfSorted:sharedWith:)`` +``Swift/RangeReplaceableCollection/init(unionOfSorted:and:)`` + +### Supporting Types + +- ``MergerSubset`` +- ``MergeSortedSequence`` +- ``MergeSortedIterator`` diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift new file mode 100644 index 00000000..e878ee3a --- /dev/null +++ b/Sources/Algorithms/MergeSorted.swift @@ -0,0 +1,499 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2024 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MARK: MergerSubset +//-------------------------------------------------------------------------===// + +/// Description of which elements of a merger were retained. +public enum MergerSubset: UInt, CaseIterable { + /// Keep no elements. + case none + /// Keep the elements of the first source that are not also in the second. + case firstWithoutSecond + /// Keep the elements of the second source that are not also in the first. + case secondWithoutFirst + /// Keep the elements of both sources that are not present in the other. + case symmetricDifference + /// Keep the elements that are present in both sorces. + case intersection + /// Keep only the elements from the first source. + case first + /// Keep only the elements from the second source. + case second + /// Keep all of the elements from both sources, consolidating shared ones. + case union + /// Keep all elements from both sources, including duplicates. + case sum = 0b1111 // `union` with an extra bit to distinguish. +} + +extension MergerSubset { + /// Whether the elements exclusive to the first source are emitted. + @inlinable + public var emitsExclusivesToFirst: Bool { rawValue & 0b001 != 0 } + /// Whether the elements exclusive to the second source are emitted. + @inlinable + public var emitsExclusivesToSecond: Bool { rawValue & 0b010 != 0 } + /// Whether the elements shared by both sources are emitted. + public var emitsSharedElements: Bool { rawValue & 0b100 != 0 } +} + +extension MergerSubset { + /// Create a filter specifying a full merge (duplicating the shared elements). + @inlinable + public init() { self = .sum } + /// Create a filter specifying which categories of elements are included in + /// the merger, with shared elements consolidated. + public init(keepExclusivesToFirst: Bool, keepExclusivesToSecond: Bool, keepSharedElements: Bool) { + self = switch (keepSharedElements, keepExclusivesToSecond, keepExclusivesToFirst) { + case (false, false, false): .none + case (false, false, true): .firstWithoutSecond + case (false, true, false): .secondWithoutFirst + case (false, true, true): .symmetricDifference + case ( true, false, false): .intersection + case ( true, false, true): .first + case ( true, true, false): .second + case ( true, true, true): .union + } + } +} + +extension MergerSubset { + /// Return the worst-case bounds with the given source lengths. + /// + /// These non-necessarily exclusive conditions can affect the result: + /// + /// - One or both of the sources is empty. + /// - The sources are identical. + /// - The sources have no elements in common. + /// - The shorter source is a subset of the longer one. + /// - The sources have just partial overlap. + /// + /// Both inputs must be nonnegative. + @usableFromInline + func expectedCountRange(given firstLength: Int, and secondLength: Int) -> ClosedRange { + /// Generate a range for a single value without repeating its expression. + func singleValueRange(_ v: Int) -> ClosedRange { return v...v } + + return switch self { + case .none: + singleValueRange(0) + case .firstWithoutSecond: + max(firstLength - secondLength, 0)...firstLength + case .secondWithoutFirst: + max(secondLength - firstLength, 0)...secondLength + case .symmetricDifference: + abs(firstLength - secondLength)...(firstLength + secondLength) + case .intersection: + 0...min(firstLength, secondLength) + case .first: + singleValueRange(firstLength) + case .second: + singleValueRange(secondLength) + case .union: + max(firstLength, secondLength)...(firstLength + secondLength) + case .sum: + singleValueRange(firstLength + secondLength) + } + } +} + +//===----------------------------------------------------------------------===// +// MARK: - RangeReplaceableCollection.init(mergeSorted:and:retaining:sortedBy:) +//-------------------------------------------------------------------------===// + +extension RangeReplaceableCollection { + /// Given two sequences that are both sorted according to the given predicate, + /// treat them as sets, and create the sorted result of the given set + /// operation. + /// + /// For simply merging the sequences, use `.sum` as the operation. + /// + /// - Precondition: Both `first` and `second` must be sorted according to + /// `areInIncreasingOrder`, and said predicate must be a strict weak ordering + /// over its arguments. Both `first` and `second` must be finite. + /// + /// - Parameters: + /// - first: The first sequence spliced. + /// - second: The second sequence spliced. + /// - filter: The subset of the merged sequence to keep. If not given, + /// defaults to `.sum`. + /// - areInIncreasingOrder: The criteria for sorting. + /// + /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the + /// lengths of the sequence arguments. + public init( + mergeSorted first: T, + and second: U, + retaining filter: MergerSubset = .sum, + sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool + ) rethrows + where T.Element == Element, U.Element == Element + { + self.init() + self.reserveCapacity( + filter.expectedCountRange(given: first.underestimatedCount, + and: second.underestimatedCount).lowerBound + ) + try withoutActuallyEscaping(areInIncreasingOrder) { + var iterator = MergeSortedIterator(first.makeIterator(), + second.makeIterator(), + filter: filter, + predicate: $0) + while let current = try iterator.throwingNext() { + self.append(current) + } + } + } +} + +extension RangeReplaceableCollection where Element: Comparable { + /// Given two sorted sequences, treat them as sets, and create the sorted + /// result of the given set operation. + /// + /// For simply merging the sequences, use `.sum` as the operation. + /// + /// - Precondition: Both `first` and `second` must be sorted, and both + /// must be finite. + /// + /// - Parameters: + /// - first: The first sequence spliced. + /// - second: The second sequence spliced. + /// - filter: The subset of the merged sequence to keep. If not given, + /// defaults to `.sum`. + /// + /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the + /// lengths of the sequence arguments. + @inlinable + public init( + mergeSorted first: T, + and second: U, + retaining filter: MergerSubset = .sum + ) where T.Element == Element, U.Element == Element + { + self.init(mergeSorted: first, and: second, retaining: filter, sortedBy: <) + } +} + +extension RangeReplaceableCollection where Element: Comparable { + + /// Given two sorted sequences treated as sets, create a copy of + /// the first sequence without the elements that have a match in + /// the second sequence (*i.e.* set difference). + /// + /// - Precondition: Both `first` and `second` must be sorted, and both + /// must be finite. + /// + /// - Parameters: + /// - first: The first sequence spliced. + /// - second: The second sequence spliced. + /// + /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the + /// lengths of the sequence arguments. + @inlinable + public init( + sorted first: T, withoutElementsFrom second: U + ) where T.Element == Element, U.Element == Element { + self.init(mergeSorted: first, and: second, retaining: .firstWithoutSecond) + } + + /// Given two sorted sequences treated as sets, create a sorted sequence + /// composed of the unmatched elements from both arguments (*i.e.* symmetric + /// set difference). + /// + /// - Precondition: Both `first` and `second` must be sorted, and both + /// must be finite. + /// + /// - Parameters: + /// - first: The first sequence spliced. + /// - second: The second sequence spliced. + /// + /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the + /// lengths of the sequence arguments. + @inlinable + public init( + exclusivesOfSorted first: T, andExclusivesOf second: U + ) where T.Element == Element, U.Element == Element { + self.init(mergeSorted: first, and: second, retaining: .symmetricDifference) + } + + /// Given two sorted sequences treated as sets, create a sorted sequence + /// composed of the elements that are present in both arguments, but + /// only one element per matched pair (*i.e.* set intersection). + /// + /// - Precondition: Both `first` and `second` must be sorted, and both + /// must be finite. + /// + /// - Parameters: + /// - first: The first sequence spliced. + /// - second: The second sequence spliced. + /// + /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the + /// lengths of the sequence arguments. + @inlinable + public init( + elementsOfSorted first: T, sharedWith second: U + ) where T.Element == Element, U.Element == Element { + self.init(mergeSorted: first, and: second, retaining: .intersection) + } + + /// Given two sorted sequences treated as sets, create a sorted sequence with + /// all the elements from both arguments, except matching pairs of + /// elements appear only once (*i.e.* set union). + /// + /// To retain both elements of a matched pair, use `.init(mergeSorted:and:)`. + /// + /// - Precondition: Both `first` and `second` must be sorted, and both + /// must be finite. + /// + /// - Parameters: + /// - first: The first sequence spliced. + /// - second: The second sequence spliced. + /// + /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the + /// lengths of the sequence arguments. + @inlinable + public init( + unionOfSorted first: T, and second: U + ) where T.Element == Element, U.Element == Element { + self.init(mergeSorted: first, and: second, retaining: .union) + } +} + +//===----------------------------------------------------------------------===// +// MARK: - mergeSorted(_:_:retaining:sortedBy:) +//-------------------------------------------------------------------------===// + +/// Given two sequences that are both sorted according to the given predicate +/// and treated as sets, apply the given set operation, returning the result as +/// a lazy sequence also sorted by the same predicate. +/// +/// For simply merging the sequences, use `.sum` as the operation. +/// +/// - Precondition: Both `first` and `second` must be sorted according to +/// `areInIncreasingOrder`, and said predicate must be a strict weak ordering +/// over its arguments. +/// +/// - Parameters: +/// - first: The first sequence spliced. +/// - second: The second sequence spliced. +/// - filter: The subset of the merged sequence to keep. If not given, +/// defaults to `.sum`. +/// - areInIncreasingOrder: The criteria for sorting. +/// - Returns: A sequence that lazily generates the merged sequence subset. +/// +/// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), +/// where `n` and `m` are the lengths of the sequence arguments. +public func mergeSorted( + _ first: T, + _ second: U, + retaining filter: MergerSubset = .sum, + sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool +) -> some Sequence & LazySequenceProtocol +where T.Element == U.Element { + return MergeSortedSequence( + merging: first.lazy, + and: second.lazy, + retaining: filter, + sortedBy: areInIncreasingOrder + ) +} + +/// Given two sorted sequences treated as sets, apply the given set operation, +/// returning the result as a sorted lazy sequence. +/// +/// For simply merging the sequences, use `.sum` as the operation. +/// +/// - Precondition: Both `first` and `second` must be sorted. +/// +/// - Parameters: +/// - first: The first sequence spliced. +/// - second: The second sequence spliced. +/// - filter: The subset of the merged sequence to keep. If not given, +/// defaults to `.sum`. +/// - Returns: A sequence that lazily generates the merged sequence subset. +/// +/// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), +/// where `n` and `m` are the lengths of the sequence arguments. +@inlinable +public func mergeSorted( + _ first: T, _ second: U, retaining filter: MergerSubset = .sum +) -> some Sequence & LazySequenceProtocol +where T.Element == U.Element, T.Element: Comparable { + return mergeSorted(first, second, retaining: filter, sortedBy: <) +} + +//===----------------------------------------------------------------------===// +// MARK: - MergeSortedSequence +//-------------------------------------------------------------------------===// + +/// A sequence that lazily vends the sorted result of a set operation upon +/// two sorted sequences treated as sets spliced together, using a predicate as +/// the sorting criteria for all three sequences involved. +public struct MergeSortedSequence +where First.Element == Second.Element +{ + /// The first source sequence. + let first: First + /// The second source sequence. + let second: Second + /// The subset of elements to retain. + let filter: MergerSubset + /// The sorting predicate. + let areInIncreasingOrder: (Element, Element) -> Bool + + /// Create a sequence using the two given sequences that are sorted according + /// to the given predicate, to vend the sources' elements combined while still + /// sorted according to the predicate, but keeping only the elements that + /// match the given set operation. + init( + merging first: First, + and second: Second, + retaining filter: MergerSubset, + sortedBy areInIncreasingOrder: @escaping (Element, Element) -> Bool + ) { + self.first = first + self.second = second + self.filter = filter + self.areInIncreasingOrder = areInIncreasingOrder + } +} + +extension MergeSortedSequence: Sequence { + public func makeIterator() -> some IteratorProtocol { + return MergeSortedIterator( + first.makeIterator(), + second.makeIterator(), + filter: filter, + predicate: areInIncreasingOrder + ) + } + + public var underestimatedCount: Int { + filter.expectedCountRange( + given: first.underestimatedCount, + and: second.underestimatedCount + ).lowerBound + } +} + +extension MergeSortedSequence: LazySequenceProtocol {} + +//===----------------------------------------------------------------------===// +// MARK: - MergeSortedIterator +//-------------------------------------------------------------------------===// + +/// An iterator that applies a set operation on two virtual sequences, +/// both treated as sets sorted according a predicate, spliced together to +/// vend a virtual sequence that is also sorted. +public struct MergeSortedIterator< + First: IteratorProtocol, + Second: IteratorProtocol +> where First.Element == Second.Element +{ + /// The first source of elements. + var firstSource: First? + /// The second source of elements. + var secondSource: Second? + /// The subset of elements to emit. + let filter: MergerSubset + /// The sorting predicate. + let areInIncreasingOrder: (Element, Element) throws -> Bool + + /// The latest element read from the first source. + var first: First.Element? + /// The latest element read from the second source. + var second: Second.Element? + /// Whether to keep on iterating. + var isFinished = false + + /// Create an iterator reading from two sources, comparing their respective + /// elements with the predicate, and emitting the given subset of the merged + /// sequence. + init( + _ firstSource: First, + _ secondSource: Second, + filter: MergerSubset, + predicate: @escaping (Element, Element) throws -> Bool + ) { + // Only load the sources that are actually needed. + switch filter { + case .none: + break + case .first: + self.firstSource = firstSource + case .second: + self.secondSource = secondSource + default: + self.firstSource = firstSource + self.secondSource = secondSource + } + + // Other member initialization + self.filter = filter + self.areInIncreasingOrder = predicate + } +} + +extension MergeSortedIterator: IteratorProtocol { + /// Advance to the next element, if any. May throw. + mutating func throwingNext() throws -> First.Element? { + while !isFinished { + // Extract another element from a source if the previous one was purged. + first = first ?? firstSource?.next() + second = second ?? secondSource?.next() + + // Of the latest valid elements, purge the smaller (or both when they are + // equivalent). Return said element if the filter permits, search again + // otherwise. + switch (first, second) { + case let (latestFirst?, latestSecond?) where try areInIncreasingOrder(latestFirst, latestSecond): + defer { first = nil } + guard filter.emitsExclusivesToFirst else { continue } + + return latestFirst + case let (latestFirst?, latestSecond?) where try areInIncreasingOrder(latestSecond, latestFirst): + defer { second = nil } + guard filter.emitsExclusivesToSecond else { continue } + + return latestSecond + case let (latestFirst?, latestSecond?): + // Purge both of the equivalent elements... + defer { + first = nil + + // ...except when the second source's element is only deferred. + if filter != .sum { second = nil } + } + guard filter.emitsSharedElements else { continue } + + // This will not cause mixed-source emmission when only the second + // source is being vended, because this case won't ever be reached. + return latestFirst + case (nil, let latestSecond?) where filter.emitsExclusivesToSecond: + second = nil + return latestSecond + case (let latestFirst?, nil) where filter.emitsExclusivesToFirst: + first = nil + return latestFirst + default: + // Either both sources are exhausted, or just one is while the remainder + // of the other won't be emitted. + isFinished = true + } + } + return nil + } + + public mutating func next() -> Second.Element? { + return try! throwingNext() + } +} From 154dc243c8dccae0843c089e2518901e650b26ad Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Thu, 11 Jul 2024 14:27:39 -0400 Subject: [PATCH 02/34] Correct function naming in documentation --- Sources/Algorithms/Documentation.docc/Merging.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Sources/Algorithms/Documentation.docc/Merging.md b/Sources/Algorithms/Documentation.docc/Merging.md index 4469ed44..e7e02d47 100644 --- a/Sources/Algorithms/Documentation.docc/Merging.md +++ b/Sources/Algorithms/Documentation.docc/Merging.md @@ -10,8 +10,8 @@ then generate the result of applying a set operation. - ``Swift/RangeReplaceableCollection/init(mergeSorted:and:retaining:sortedBy:)`` - ``Swift/RangeReplaceableCollection/init(mergeSorted:and:retaining:)`` -- ``Swift/mergeSorted(_:_:retaining:areInIncreasingOrder:)`` -- ``Swift/mergeSorted(_:_:retaining:)`` +- ``mergeSorted(_:_:retaining:areInIncreasingOrder:)`` +- ``mergeSorted(_:_:retaining:)`` ### Applying Set Operations to Sorted Sequences From 335dc2a854db1b3a7366507f5628e9ee06587806 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Thu, 11 Jul 2024 14:32:37 -0400 Subject: [PATCH 03/34] Add functions to merge sorted partitions Add methods to MutableCollection that merge two sorted partitions within the same collection. TODO: Add tests and a guide. --- .../Algorithms/Documentation.docc/Merging.md | 2 + Sources/Algorithms/MergeSorted.swift | 71 ++++++++++++++++++- 2 files changed, 72 insertions(+), 1 deletion(-) diff --git a/Sources/Algorithms/Documentation.docc/Merging.md b/Sources/Algorithms/Documentation.docc/Merging.md index e7e02d47..5600e4cf 100644 --- a/Sources/Algorithms/Documentation.docc/Merging.md +++ b/Sources/Algorithms/Documentation.docc/Merging.md @@ -12,6 +12,8 @@ then generate the result of applying a set operation. - ``Swift/RangeReplaceableCollection/init(mergeSorted:and:retaining:)`` - ``mergeSorted(_:_:retaining:areInIncreasingOrder:)`` - ``mergeSorted(_:_:retaining:)`` +- ``Swift/MutableCollection/mergeSortedPartitions(across:sortedBy:)`` +- ``Swift/MutableCollection/mergeSortedPartitions(across:)`` ### Applying Set Operations to Sorted Sequences diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index e878ee3a..7f690fba 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -10,7 +10,76 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// MARK: MergerSubset +// MARK: MutableCollection.mergeSortedPartitions(across:sortedBy:) +//-------------------------------------------------------------------------===// + +extension MutableCollection { + /// Given a partition point, + /// where each side is sorted according to the given predicate, + /// rearrange the elements until a single sorted run is formed. + /// + /// Equivalent elements from a given partition have stable ordering in + /// the unified sequence. + /// + /// - Precondition: The `pivot` must be a valid index of this collection. + /// The partitions of `startIndex.. Bool + ) rethrows { + var duplicate = self + try withoutActuallyEscaping(areInIncreasingOrder) { + var iterator = MergeSortedIterator( + self[startIndex.. Date: Sat, 13 Jul 2024 15:18:16 -0400 Subject: [PATCH 04/34] Remove the specialized set-operation initializers The set-operation initializers made everything more convoluted versus using a subset parameter in the base initializer. --- Sources/Algorithms/MergeSorted.swift | 85 ---------------------------- 1 file changed, 85 deletions(-) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index 7f690fba..a41ed93e 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -252,91 +252,6 @@ extension RangeReplaceableCollection where Element: Comparable { } } -extension RangeReplaceableCollection where Element: Comparable { - - /// Given two sorted sequences treated as sets, create a copy of - /// the first sequence without the elements that have a match in - /// the second sequence (*i.e.* set difference). - /// - /// - Precondition: Both `first` and `second` must be sorted, and both - /// must be finite. - /// - /// - Parameters: - /// - first: The first sequence spliced. - /// - second: The second sequence spliced. - /// - /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the - /// lengths of the sequence arguments. - @inlinable - public init( - sorted first: T, withoutElementsFrom second: U - ) where T.Element == Element, U.Element == Element { - self.init(mergeSorted: first, and: second, retaining: .firstWithoutSecond) - } - - /// Given two sorted sequences treated as sets, create a sorted sequence - /// composed of the unmatched elements from both arguments (*i.e.* symmetric - /// set difference). - /// - /// - Precondition: Both `first` and `second` must be sorted, and both - /// must be finite. - /// - /// - Parameters: - /// - first: The first sequence spliced. - /// - second: The second sequence spliced. - /// - /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the - /// lengths of the sequence arguments. - @inlinable - public init( - exclusivesOfSorted first: T, andExclusivesOf second: U - ) where T.Element == Element, U.Element == Element { - self.init(mergeSorted: first, and: second, retaining: .symmetricDifference) - } - - /// Given two sorted sequences treated as sets, create a sorted sequence - /// composed of the elements that are present in both arguments, but - /// only one element per matched pair (*i.e.* set intersection). - /// - /// - Precondition: Both `first` and `second` must be sorted, and both - /// must be finite. - /// - /// - Parameters: - /// - first: The first sequence spliced. - /// - second: The second sequence spliced. - /// - /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the - /// lengths of the sequence arguments. - @inlinable - public init( - elementsOfSorted first: T, sharedWith second: U - ) where T.Element == Element, U.Element == Element { - self.init(mergeSorted: first, and: second, retaining: .intersection) - } - - /// Given two sorted sequences treated as sets, create a sorted sequence with - /// all the elements from both arguments, except matching pairs of - /// elements appear only once (*i.e.* set union). - /// - /// To retain both elements of a matched pair, use `.init(mergeSorted:and:)`. - /// - /// - Precondition: Both `first` and `second` must be sorted, and both - /// must be finite. - /// - /// - Parameters: - /// - first: The first sequence spliced. - /// - second: The second sequence spliced. - /// - /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the - /// lengths of the sequence arguments. - @inlinable - public init( - unionOfSorted first: T, and second: U - ) where T.Element == Element, U.Element == Element { - self.init(mergeSorted: first, and: second, retaining: .union) - } -} - //===----------------------------------------------------------------------===// // MARK: - mergeSorted(_:_:retaining:sortedBy:) //-------------------------------------------------------------------------===// From 1e9a6117708a19bf7f913f0c2fb85fb20633a64f Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Sat, 13 Jul 2024 16:29:36 -0400 Subject: [PATCH 05/34] Anonymize an unused variable --- Sources/Algorithms/MergeSorted.swift | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index a41ed93e..165ddbd0 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -449,7 +449,7 @@ extension MergeSortedIterator: IteratorProtocol { guard filter.emitsExclusivesToSecond else { continue } return latestSecond - case let (latestFirst?, latestSecond?): + case (let latestFirst?, _?): // Purge both of the equivalent elements... defer { first = nil From db85a4fbd4f6a2e7327c1799abdd3902c42f1c8a Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Sat, 13 Jul 2024 18:55:27 -0400 Subject: [PATCH 06/34] Make some opaque types explicit --- Sources/Algorithms/MergeSorted.swift | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index 165ddbd0..1f56806f 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -281,7 +281,7 @@ public func mergeSorted( _ second: U, retaining filter: MergerSubset = .sum, sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool -) -> some Sequence & LazySequenceProtocol +) -> MergeSortedSequence, LazySequence> where T.Element == U.Element { return MergeSortedSequence( merging: first.lazy, @@ -310,7 +310,7 @@ where T.Element == U.Element { @inlinable public func mergeSorted( _ first: T, _ second: U, retaining filter: MergerSubset = .sum -) -> some Sequence & LazySequenceProtocol +) -> MergeSortedSequence, LazySequence> where T.Element == U.Element, T.Element: Comparable { return mergeSorted(first, second, retaining: filter, sortedBy: <) } @@ -352,13 +352,10 @@ where First.Element == Second.Element } extension MergeSortedSequence: Sequence { - public func makeIterator() -> some IteratorProtocol { - return MergeSortedIterator( - first.makeIterator(), - second.makeIterator(), - filter: filter, - predicate: areInIncreasingOrder - ) + public func makeIterator() + -> MergeSortedIterator { + return .init(first.makeIterator(), second.makeIterator(), filter: filter, + predicate: areInIncreasingOrder) } public var underestimatedCount: Int { From 34dc401056e48b9952dbf2d32b804024c27cbaf3 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Sat, 13 Jul 2024 21:08:46 -0400 Subject: [PATCH 07/34] Clean up code for eager mergers Change the code for the prime eager merger function to reuse the base sequence and iterator easier. This mandated that the merger sequence object had to accept throwing predicates. This change is internal and users cannot exploit it. --- Sources/Algorithms/MergeSorted.swift | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index 1f56806f..faa3a36e 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -208,15 +208,12 @@ extension RangeReplaceableCollection { where T.Element == Element, U.Element == Element { self.init() - self.reserveCapacity( - filter.expectedCountRange(given: first.underestimatedCount, - and: second.underestimatedCount).lowerBound - ) try withoutActuallyEscaping(areInIncreasingOrder) { - var iterator = MergeSortedIterator(first.makeIterator(), - second.makeIterator(), - filter: filter, - predicate: $0) + let sequence = MergeSortedSequence(merging: first, and: second, + retaining: filter, sortedBy: $0) + self.reserveCapacity(sequence.underestimatedCount) + + var iterator = sequence.makeIterator() while let current = try iterator.throwingNext() { self.append(current) } @@ -332,7 +329,7 @@ where First.Element == Second.Element /// The subset of elements to retain. let filter: MergerSubset /// The sorting predicate. - let areInIncreasingOrder: (Element, Element) -> Bool + let areInIncreasingOrder: (Element, Element) throws -> Bool /// Create a sequence using the two given sequences that are sorted according /// to the given predicate, to vend the sources' elements combined while still @@ -342,7 +339,7 @@ where First.Element == Second.Element merging first: First, and second: Second, retaining filter: MergerSubset, - sortedBy areInIncreasingOrder: @escaping (Element, Element) -> Bool + sortedBy areInIncreasingOrder: @escaping (Element, Element) throws -> Bool ) { self.first = first self.second = second From b627b6bb2ba8f7dd062737f2bde6b98e30dbf616 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Sat, 13 Jul 2024 21:35:48 -0400 Subject: [PATCH 08/34] Add tests for merging --- .../MergeSortedTests.swift | 167 ++++++++++++++++++ 1 file changed, 167 insertions(+) create mode 100644 Tests/SwiftAlgorithmsTests/MergeSortedTests.swift diff --git a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift new file mode 100644 index 00000000..62fecb92 --- /dev/null +++ b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift @@ -0,0 +1,167 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2024 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +import XCTest +import Algorithms + +final class MergeSortedTests: XCTestCase { + /// Check the convenience initializers for `MergerSubset`. + func testMergerSubsetInitializers() { + XCTAssertEqual(MergerSubset(), .sum) + + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: false, keepExclusivesToSecond: false, + keepSharedElements: false), + .none + ) + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: true, keepExclusivesToSecond: false, + keepSharedElements: false), + .firstWithoutSecond + ) + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: false, keepExclusivesToSecond: true, + keepSharedElements: false), + .secondWithoutFirst + ) + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: false, keepExclusivesToSecond: false, + keepSharedElements: true), + .intersection + ) + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: true, keepExclusivesToSecond: true, + keepSharedElements: false), + .symmetricDifference + ) + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: true, keepExclusivesToSecond: false, + keepSharedElements: true), + .first + ) + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: false, keepExclusivesToSecond: true, + keepSharedElements: true), + .second + ) + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: true, keepExclusivesToSecond: true, + keepSharedElements: true), + .union + ) + } + + /// Check the subset emission flags for `MergerSubset`. + func testMergerSubsetFlags() { + XCTAssertEqualSequences( + MergerSubset.allCases, + [.none, .firstWithoutSecond, .secondWithoutFirst, .symmetricDifference, + .intersection, .first, .second, .union, .sum] + ) + + XCTAssertEqualSequences( + MergerSubset.allCases.map(\.emitsExclusivesToFirst), + [false, true, false, true, false, true, false, true, true] + ) + XCTAssertEqualSequences( + MergerSubset.allCases.map(\.emitsExclusivesToSecond), + [false, false, true, true, false, false, true, true, true] + ) + XCTAssertEqualSequences( + MergerSubset.allCases.map(\.emitsSharedElements), + [false, false, false, false, true, true, true, true, true] + ) + } + + /// Check the lazily-generated merger/subset sequences. + func testLazyMergers() { + let low = 0..<7, high = 3..<10 + XCTAssertEqualSequences(mergeSorted(low, high), + [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) + XCTAssertLazySequence(mergeSorted(low, high)) + + let sequences = Dictionary(uniqueKeysWithValues: MergerSubset.allCases.map { + ($0, mergeSorted(low, high, retaining: $0)) + }) + XCTAssertEqualSequences(sequences[.none]!, EmptyCollection()) + XCTAssertEqualSequences(sequences[.firstWithoutSecond]!, 0..<3) + XCTAssertEqualSequences(sequences[.secondWithoutFirst]!, 7..<10) + XCTAssertEqualSequences(sequences[.symmetricDifference]!, [0, 1, 2, 7, 8, 9]) + XCTAssertEqualSequences(sequences[.intersection]!, 3..<7) + XCTAssertEqualSequences(sequences[.first]!, low) + XCTAssertEqualSequences(sequences[.second]!, high) + XCTAssertEqualSequences(sequences[.union]!, 0..<10) + XCTAssertEqualSequences(sequences[.sum]!, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) + + XCTAssertLessThanOrEqual(sequences[.none]!.underestimatedCount, 0) + XCTAssertLessThanOrEqual(sequences[.firstWithoutSecond]!.underestimatedCount, 3) + XCTAssertLessThanOrEqual(sequences[.secondWithoutFirst]!.underestimatedCount, 3) + XCTAssertLessThanOrEqual(sequences[.symmetricDifference]!.underestimatedCount, 6) + XCTAssertLessThanOrEqual(sequences[.intersection]!.underestimatedCount, 4) + XCTAssertLessThanOrEqual(sequences[.first]!.underestimatedCount, 7) + XCTAssertLessThanOrEqual(sequences[.second]!.underestimatedCount, 7) + XCTAssertLessThanOrEqual(sequences[.union]!.underestimatedCount, 7) + XCTAssertLessThanOrEqual(sequences[.sum]!.underestimatedCount, 14) + + // This exercises code missed by the `sequences` tests. + let reversed = Dictionary(uniqueKeysWithValues: MergerSubset.allCases.map { + ($0, mergeSorted(high, low, retaining: $0)) + }) + XCTAssertEqualSequences(reversed[.none]!, EmptyCollection()) + XCTAssertEqualSequences(reversed[.firstWithoutSecond]!, 7..<10) + XCTAssertEqualSequences(reversed[.secondWithoutFirst]!, 0..<3) + XCTAssertEqualSequences(reversed[.symmetricDifference]!, [0, 1, 2, 7, 8, 9]) + XCTAssertEqualSequences(reversed[.intersection]!, 3..<7) + XCTAssertEqualSequences(reversed[.first]!, high) + XCTAssertEqualSequences(reversed[.second]!, low) + XCTAssertEqualSequences(reversed[.union]!, 0..<10) + XCTAssertEqualSequences(reversed[.sum]!, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) + } + + /// Check the eager merger/subset sequences. + func testEagerMergers() { + let low = 0..<7, high = 3..<10 + XCTAssertEqualSequences(Array(mergeSorted: low, and: high), + [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) + + let sequences = Dictionary(uniqueKeysWithValues: MergerSubset.allCases.map { + ($0, Array(mergeSorted: low, and: high, retaining: $0)) + }) + XCTAssertEqualSequences(sequences[.none]!, EmptyCollection()) + XCTAssertEqualSequences(sequences[.firstWithoutSecond]!, 0..<3) + XCTAssertEqualSequences(sequences[.secondWithoutFirst]!, 7..<10) + XCTAssertEqualSequences(sequences[.symmetricDifference]!, [0, 1, 2, 7, 8, 9]) + XCTAssertEqualSequences(sequences[.intersection]!, 3..<7) + XCTAssertEqualSequences(sequences[.first]!, low) + XCTAssertEqualSequences(sequences[.second]!, high) + XCTAssertEqualSequences(sequences[.union]!, 0..<10) + XCTAssertEqualSequences(sequences[.sum]!, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) + } + + /// Check the more-memory version of merging two sorted partitions. + func testFastPartitionMerge() { + // Degenerate count of elements. + var empty = EmptyCollection(), single = CollectionOfOne(1) + XCTAssertEqualSequences(empty, []) + XCTAssertEqualSequences(single, [1]) + empty.mergeSortedPartitions(across: empty.startIndex) + single.mergeSortedPartitions(across: single.startIndex) + XCTAssertEqualSequences(empty, []) + XCTAssertEqualSequences(single, [1]) + + // Each side has multiple elements. + let low = 0..<7, high = 3..<10, pivot = low.count + var multiple = Array(chain(low, high)) + XCTAssertEqualSequences(multiple, [0, 1, 2, 3, 4, 5, 6, 3, 4, 5, 6, 7, 8, 9]) + multiple.mergeSortedPartitions(across: pivot) + XCTAssertEqualSequences(multiple, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) + } +} From 482a03f6670d95d21d8ebcdc210830d5f5a4b8f3 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Sat, 13 Jul 2024 21:52:15 -0400 Subject: [PATCH 09/34] Clean up some code; add sanity check --- Sources/Algorithms/MergeSorted.swift | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index faa3a36e..54f7b9a9 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -39,18 +39,16 @@ extension MutableCollection { ) rethrows { var duplicate = self try withoutActuallyEscaping(areInIncreasingOrder) { - var iterator = MergeSortedIterator( - self[startIndex.. Date: Sat, 13 Jul 2024 23:43:32 -0400 Subject: [PATCH 10/34] Clarify the meaning of a function parameter --- Sources/Algorithms/MergeSorted.swift | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index 54f7b9a9..dc4b462f 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -27,8 +27,8 @@ extension MutableCollection { /// and said predicate must be a strict weak ordering. /// /// - Parameters: - /// - pivot: The index dividing the partitions. - /// May point to an element, or be at `endIndex`. + /// - pivot: The index of the first element of the second partition, + /// or `endIndex` if said partition is empty. /// - areInIncreasingOrder: The criteria for sorting. /// /// - Complexity: O(*n*) in space and time, where `n` is the length of @@ -65,8 +65,8 @@ extension MutableCollection where Element: Comparable { /// where the partitions of `startIndex.. Date: Mon, 15 Jul 2024 15:44:56 -0400 Subject: [PATCH 11/34] Touch up tenses and permissions Since the filter type's values model future changes, change the tense of its description from past to future. Correct some items' publicity, hiding internal details. --- Sources/Algorithms/MergeSorted.swift | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index dc4b462f..d4ab300a 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -80,7 +80,7 @@ extension MutableCollection where Element: Comparable { // MARK: - MergerSubset //-------------------------------------------------------------------------===// -/// Description of which elements of a merger were retained. +/// Description of which elements of a merger will be retained. public enum MergerSubset: UInt, CaseIterable { /// Keep no elements. case none @@ -110,6 +110,7 @@ extension MergerSubset { @inlinable public var emitsExclusivesToSecond: Bool { rawValue & 0b010 != 0 } /// Whether the elements shared by both sources are emitted. + @inlinable public var emitsSharedElements: Bool { rawValue & 0b100 != 0 } } @@ -145,7 +146,7 @@ extension MergerSubset { /// - The sources have just partial overlap. /// /// Both inputs must be nonnegative. - @usableFromInline + fileprivate func expectedCountRange(given firstLength: Int, and secondLength: Int) -> ClosedRange { /// Generate a range for a single value without repeating its expression. func singleValueRange(_ v: Int) -> ClosedRange { return v...v } @@ -333,7 +334,7 @@ where First.Element == Second.Element /// to the given predicate, to vend the sources' elements combined while still /// sorted according to the predicate, but keeping only the elements that /// match the given set operation. - init( + fileprivate init( merging first: First, and second: Second, retaining filter: MergerSubset, @@ -385,16 +386,16 @@ public struct MergeSortedIterator< let areInIncreasingOrder: (Element, Element) throws -> Bool /// The latest element read from the first source. - var first: First.Element? + fileprivate var first: First.Element? /// The latest element read from the second source. - var second: Second.Element? + fileprivate var second: Second.Element? /// Whether to keep on iterating. - var isFinished = false + fileprivate var isFinished = false /// Create an iterator reading from two sources, comparing their respective /// elements with the predicate, and emitting the given subset of the merged /// sequence. - init( + fileprivate init( _ firstSource: First, _ secondSource: Second, filter: MergerSubset, @@ -421,7 +422,7 @@ public struct MergeSortedIterator< extension MergeSortedIterator: IteratorProtocol { /// Advance to the next element, if any. May throw. - mutating func throwingNext() throws -> First.Element? { + fileprivate mutating func throwingNext() throws -> First.Element? { while !isFinished { // Extract another element from a source if the previous one was purged. first = first ?? firstSource?.next() From eb3147394a633fe08468f5a20f49add1207baeb4 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Fri, 19 Jul 2024 01:13:56 -0400 Subject: [PATCH 12/34] Add functions to merge sorted partitions in-place Add methods to MutableCollection that merge two sorted partitions within the same collection, but not requiring extra scratch space. --- Sources/Algorithms/MergeSorted.swift | 130 ++++++++++++++++++ .../MergeSortedTests.swift | 62 +++++++++ 2 files changed, 192 insertions(+) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index d4ab300a..80b0ad3b 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -76,6 +76,136 @@ extension MutableCollection where Element: Comparable { } } +//===----------------------------------------------------------------------===// +// MARK: - MutableCollection.mergeSortedPartitionsInPlace(across:sortedBy:) +//-------------------------------------------------------------------------===// + +extension MutableCollection where Self: BidirectionalCollection { + /// Given a partition point, + /// where each side is sorted according to the given predicate, + /// rearrange the elements until a single sorted run is formed, + /// using minimal scratch memory. + /// + /// Equivalent elements from a given partition have stable ordering in + /// the unified sequence. + /// + /// - Precondition: The `pivot` must be a valid index of this collection. + /// The partitions of `startIndex.. Bool + ) rethrows { + // The pivot needs to be an interior element. + // (This therefore requires `self` to have a length of at least 2.) + guard pivot > startIndex, pivot < endIndex else { return } + + // Since each major partition is already sorted, we only need to swap the + // highest ranks of the starting partition with the lowest ranks of the + // trailing partition. + // + // - Zones: |--[1]--|--------[2]--------|------[3]------|---[4]---| + // - Before: ...[<=p], [x > p],... [>= x]; [p],... [<= x], [> x],... + // - After: ...[<=p], [p],... [<= x]; [x > p],... [>= x], [> x],... + // - Zones: |--[1]--|------[3]------|--------[2]--------|---[4]---| + // + // In other words: we're swapping the positions of zones [2] and [3]. + // + // Afterwards, the new starting partition of [1] and [3] ends up naturally + // sorted. However, the highest ranked element of [2] may rank higher than + // the lowest ranked element of [4], so the trailing partition ends up + // needing to call this function itself. + + // Find starting index of [2]. + let lowPivot: Index + do { + // Among the elements before the pivot, find the reverse-earliest that has + // at most an equivalent rank as the pivot element. + let pivotValue = self[pivot], searchSpace = self[.. pivotValue) → !(pivotValue < $0) + return try !areInIncreasingOrder(pivotValue, $0) + }), + beforeLowPivot < searchSpace.endIndex { + // In forward space, the element after the one just found will rank + // higher than the pivot element. + lowPivot = beforeLowPivot.base + + // There may be no prefix elements that outrank the pivot element. + // In other words, [2] is empty. + // (Therefore this collection is already globally sorted.) + guard lowPivot < pivot else { return } + } else { + // All the prefix elements rank higher than the pivot element. + // In other words, [1] is empty. + lowPivot = startIndex + } + } + + // Find the ending index of [3]. + let highPivot: Index + do { + // Find the earliest post-pivot element that ranks higher than the element + // from the previous step. If there isn't a match, i.e. [4] is empty, the + // entire post-pivot partition will be swapped. + let lowPivotValue = self[lowPivot] + highPivot = try self[pivot...].partitioningIndex { + try areInIncreasingOrder(lowPivotValue, $0) + } + } + // [3] starts with the pivot element, so it can never be empty. + + // Actually swap [2] and [3], then compare [2] and [4]. + let exLowPivot = rotate(subrange: lowPivot..() + XCTAssertEqualSequences(empty, []) + empty.mergeSortedPartitionsInPlace(across: empty.startIndex) + XCTAssertEqualSequences(empty, []) + empty.mergeSortedPartitionsInPlace(across: empty.endIndex) + XCTAssertEqualSequences(empty, []) + + var single = CollectionOfOne(2) + XCTAssertEqualSequences(single, [2]) + single.mergeSortedPartitionsInPlace(across: single.startIndex) + XCTAssertEqualSequences(single, [2]) + single.mergeSortedPartitionsInPlace(across: single.endIndex) + XCTAssertEqualSequences(single, [2]) + + // No sub-partitions empty. + var sample1 = [0, 2, 4, 6, 8, 10, 1, 3, 5, 7, 9] + sample1.mergeSortedPartitionsInPlace(across: 6) + XCTAssertEqualSequences(sample1, 0...10) + + // No pre-pivot elements less than or equal to the pivot element. + var sample2 = [4, 6, 8, 3, 5, 7] + sample2.mergeSortedPartitionsInPlace(across: 3) + XCTAssertEqualSequences(sample2, 3...8) + + // No pre-pivot elements greater than the pivot element. + var sample3 = [3, 4, 5, 6, 7, 8] + sample3.mergeSortedPartitionsInPlace(across: 3) + XCTAssertEqualSequences(sample3, 3...8) + + // The greatest elements are in the pre-pivot partition. + var sample4 = [3, 7, 8, 9, 4, 5, 6] + sample4.mergeSortedPartitionsInPlace(across: 4) + XCTAssertEqualSequences(sample4, 3...9) + + /// An error type. + enum MyError: Error { + /// An error state. + case anError + } + + // Test throwing. + var sample5 = [5, 3], counter = 0, limit = 1 + let compare: (Int, Int) throws -> Bool = { + guard counter < limit else { throw MyError.anError } + defer { counter += 1 } + + return $0 < $1 + } + XCTAssertThrowsError(try sample5.mergeSortedPartitionsInPlace(across: 1, sortedBy: compare)) + + sample5 = [2, 2, 4, 20, 3, 3, 5, 7] + counter = 0 ; limit = 6 + XCTAssertThrowsError(try sample5.mergeSortedPartitionsInPlace(across: 4, sortedBy: compare)) + XCTAssertEqualSequences(sample5, [2, 2, 4, 20, 3, 3, 5, 7]) + counter = 0 ; limit = .max + XCTAssertNoThrow(try sample5.mergeSortedPartitionsInPlace(across: 4, sortedBy: compare)) + XCTAssertEqualSequences(sample5, [2, 2, 3, 3, 4, 5, 7, 20]) + } } From 78857288745785c9c2fb2c619a898789c834b195 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Fri, 19 Jul 2024 14:05:47 -0400 Subject: [PATCH 13/34] Update documentation file Remove references to deleted initializers. Add references to the in-place partition-merging functions. --- Sources/Algorithms/Documentation.docc/Merging.md | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/Sources/Algorithms/Documentation.docc/Merging.md b/Sources/Algorithms/Documentation.docc/Merging.md index 5600e4cf..79ea63b8 100644 --- a/Sources/Algorithms/Documentation.docc/Merging.md +++ b/Sources/Algorithms/Documentation.docc/Merging.md @@ -14,13 +14,8 @@ then generate the result of applying a set operation. - ``mergeSorted(_:_:retaining:)`` - ``Swift/MutableCollection/mergeSortedPartitions(across:sortedBy:)`` - ``Swift/MutableCollection/mergeSortedPartitions(across:)`` - -### Applying Set Operations to Sorted Sequences - -``Swift/RangeReplaceableCollection/init(sorted:withoutElementsFrom:)`` -``Swift/RangeReplaceableCollection/init(exclusivesOfSorted:andExclusivesOf:)`` -``Swift/RangeReplaceableCollection/init(elementsOfSorted:sharedWith:)`` -``Swift/RangeReplaceableCollection/init(unionOfSorted:and:)`` +- ``Swift/MutableCollection/mergeSortedPartitionsInPlace(across:sortedBy:)`` +- ``Swift/MutableCollection/mergeSortedPartitionsInPlace(across:)`` ### Supporting Types From c48271797457f8f64247ee7ef7bb18e23867c283 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Fri, 19 Jul 2024 21:08:56 -0400 Subject: [PATCH 14/34] Add guide documentation Also include a test for the code that appears in the new Guide. --- Guides/MergeSorted.md | 143 ++++++++++++++++++ Guides/README.md | 2 + .../MergeSortedTests.swift | 22 +++ 3 files changed, 167 insertions(+) create mode 100644 Guides/MergeSorted.md diff --git a/Guides/MergeSorted.md b/Guides/MergeSorted.md new file mode 100644 index 00000000..9ca80abb --- /dev/null +++ b/Guides/MergeSorted.md @@ -0,0 +1,143 @@ +# Merge Sorted + +[[Source](https://github.com/apple/swift-algorithms/blob/main/Sources/Algorithms/MergeSorted.swift) | + [Tests](https://github.com/apple/swift-algorithms/blob/main/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift)] + +Splice two sequences that use the same sorting criteria into a sequence that +is also sorted with that criteria. + +If the sequences are sorted with something besides the less-than operator (`<`), +then a predicate can be supplied: + +```swift +let merged = mergeSorted([10, 4, 0, 0, -3], [20, 6, 1, -1, -5], sortedBy: >) +print(Array(merged)) +// [20, 10, 6, 4, 1, 0, 0, -1, -3, -5] +``` + +Sorted sequences can be treated as (multi-)sets. +Due to being sorted, +distinguishing elements that are shared between sequences or +are exclusive to a sequence can be determined in a resonable time frame. +Set operations take advantage of the catagories of sharing, +so applying operations can be done in-line during the merging: + +```swift +let first = [0, 1, 1, 2, 5, 10], second = [-1, 0, 1, 2, 2, 7, 10, 20] +print(Array(mergeSorted(first, second, retaining: .union))) +print(Array(mergeSorted(first, second, retaining: .intersection))) +print(Array(mergeSorted(first, second, retaining: .secondWithoutFirst))) +print(Array(mergeSorted(first, second, retaining: .sum))) // Standard merge! +/* +[-1, 0, 1, 1, 2, 2, 5, 7, 10, 20] +[0, 1, 2, 10] +[-1, 2, 7, 20] +[-1, 0, 0, 1, 1, 1, 2, 2, 2, 5, 7, 10, 10, 20] +*/ +``` + +## Detailed Design + +The merging algorithm can be applied in three domains: + +- A free function taking the source sequences. +- An initializer for `RangeReplaceableCollection`, + that takes the source sequences and then + creates the result in-place. +- A function over a `MutableCollection`, + where the two sources are adjancent partitions of the collection. + +Each version can take a parameter for which set operation is to be applied. +This defaults to `.sum`, which results in a conventional merge. +There are variants without an ordering predicate needing to be supplied, +because they default to the less-than operator (`<`) as the predicate. + +```swift +// Free-function form. Also used for lazy evaluation. + +public func mergeSorted(_ first: T, _ second: U, retaining filter: MergerSubset = .sum, sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool) -> MergeSortedSequence, LazySequence> where T : Sequence, U : Sequence, T.Element == U.Element + +@inlinable public func mergeSorted(_ first: T, _ second: U, retaining filter: MergerSubset = .sum) -> MergeSortedSequence, LazySequence> where T : Sequence, U : Sequence, T.Element : Comparable, T.Element == U.Element + +// Initializer form. + +extension RangeReplaceableCollection { + public init(mergeSorted first: T, and second: U, retaining filter: MergerSubset = .sum, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element +} + +extension RangeReplaceableCollection where Self.Element : Comparable { + @inlinable public init(mergeSorted first: T, and second: U, retaining filter: MergerSubset = .sum) where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element +} + +// Two-partition merging, optimizing for speed. + +extension MutableCollection { + public mutating func mergeSortedPartitions(across pivot: Index, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows +} + +extension MutableCollection where Self.Element : Comparable { + @inlinable public mutating func mergeSortedPartitions(across pivot: Index) +} + +// Two-partition merging, optimizing for space. + +extension MutableCollection where Self : BidirectionalCollection { + public mutating func mergeSortedPartitionsInPlace(across pivot: Index, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows +} + +extension MutableCollection where Self : BidirectionalCollection, Self.Element : Comparable { + @inlinable public mutating func mergeSortedPartitionsInPlace(across pivot: Index) +} +``` + +Desired subsets are described by a new type. + +```swift +public enum MergerSubset : UInt, CaseIterable { + case none, firstWithoutSecond, secondWithoutFirst, symmetricDifference, + intersection, first, second, union, + sum +} +``` + +Every set-operation combination is provided, although some are degenerate. + +Most of the merging functions use these support types: + +```swift +public struct MergeSortedSequence + : Sequence, LazySequenceProtocol +where First : Sequence, + Second : Sequence, + First.Element == Second.Element +{ /*...*/ } + +public struct MergeSortedIterator + : IteratorProtocol +where First : IteratorProtocol, + Second : IteratorProtocol, + First.Element == Second.Element +{ /*...*/ } +``` + +The merges via: + +- The free functions +- The initializers +- The speed-optimized partition-merge + +Operate in **O(** _n_ `+` _m_ **)** for both space and time, +where *n* and *m* are the lengths of the two operand sequences. +The space-optimized partition merge for a collection of length *n* operates in +**O(** 1 **)** for space, +with **O(** _n_ **)** for time when the collection is not random-access, +and *???* for random-access collections. + +### Naming + +Many merging functions use the word "merge" in their name. + +**[C++]:** Provides the `merge` and `inplace_merge` functions. +Set operations are provided by +the `set_union`, `set_intersection`, `set_difference`, and +`set_symmetric_difference` functions. diff --git a/Guides/README.md b/Guides/README.md index d4894882..b0fbbcf3 100644 --- a/Guides/README.md +++ b/Guides/README.md @@ -21,6 +21,7 @@ These guides describe the design and intention behind the APIs included in the ` - [`cycled()`, `cycled(times:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Cycle.md): Repeats the elements of a collection forever or a set number of times. - [`joined(by:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Joined.md): Concatenate sequences of sequences, using an element or sequence as a separator, or using a closure to generate each separator. - [`product(_:_:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Product.md): Iterates over all the pairs of two collections; equivalent to nested `for`-`in` loops. +- [`mergeSorted(_:_:sortedBy:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/MergedSorted.md): Merge two sorted sequences together. #### Subsetting operations @@ -33,6 +34,7 @@ These guides describe the design and intention behind the APIs included in the ` - [`trimmingPrefix(while:)`, `trimmingSuffix(while)`, `trimming(while:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Trim.md): Returns a slice by trimming elements from a collection's start, end, or both. The mutating `trim...` methods trim a collection in place. - [`uniqued()`, `uniqued(on:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Unique.md): The unique elements of a collection, preserving their order. - [`minAndMax()`, `minAndMax(by:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/MinMax.md): Returns the smallest and largest elements of a sequence. +- The `mergeSorted(_:_:retaining:sortedBy:)` function is a variant from the "Combining collections" section above that adds a parameter to apply a set operation in-line with the merge. #### Partial sorting diff --git a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift index d0c3f9c6..1ad3fd0b 100644 --- a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift +++ b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift @@ -226,4 +226,26 @@ final class MergeSortedTests: XCTestCase { XCTAssertNoThrow(try sample5.mergeSortedPartitionsInPlace(across: 4, sortedBy: compare)) XCTAssertEqualSequences(sample5, [2, 2, 3, 3, 4, 5, 7, 20]) } + + /// Check the code from documentation. + func testSampleCode() { + // From the guide. + let guide1 = [10, 4, 0, 0, -3], guide2 = [20, 6, 1, -1, -5] + let mergedGuides = mergeSorted(guide1, guide2, sortedBy: >) + XCTAssertEqualSequences(mergedGuides, [20, 10, 6, 4, 1, 0, 0, -1, -3, -5]) + + let guide3 = [0, 1, 1, 2, 5, 10], guide4 = [-1, 0, 1, 2, 2, 7, 10, 20] + XCTAssertEqualSequences(mergeSorted(guide3, guide4, retaining: .union), + [-1, 0, 1, 1, 2, 2, 5, 7, 10, 20]) + XCTAssertEqualSequences(mergeSorted(guide3, guide4, retaining: .intersection), + [0, 1, 2, 10]) + XCTAssertEqualSequences(mergeSorted(guide3, guide4, retaining: .firstWithoutSecond), + [1, 5]) + XCTAssertEqualSequences(mergeSorted(guide3, guide4, retaining: .secondWithoutFirst), + [-1, 2, 7, 20]) + XCTAssertEqualSequences(mergeSorted(guide3, guide4, retaining: .symmetricDifference), + [-1, 1, 2, 5, 7, 20]) + XCTAssertEqualSequences(mergeSorted(guide3, guide4, retaining: .sum), + [-1, 0, 0, 1, 1, 1, 2, 2, 2, 5, 7, 10, 10, 20]) + } } From 7276f96fe611e78418c080c50dd87c9434308636 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Sat, 20 Jul 2024 17:10:47 -0400 Subject: [PATCH 15/34] Refine text for the Guide --- Guides/MergeSorted.md | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/Guides/MergeSorted.md b/Guides/MergeSorted.md index 9ca80abb..e1d5013a 100644 --- a/Guides/MergeSorted.md +++ b/Guides/MergeSorted.md @@ -46,11 +46,18 @@ The merging algorithm can be applied in three domains: creates the result in-place. - A function over a `MutableCollection`, where the two sources are adjancent partitions of the collection. - -Each version can take a parameter for which set operation is to be applied. -This defaults to `.sum`, which results in a conventional merge. -There are variants without an ordering predicate needing to be supplied, -because they default to the less-than operator (`<`) as the predicate. + +The free-function and initializer forms can take an optional parameter, +that indicates which subset of the merge will be kept. +For instance, when using `.intersection`, only elements that appear in +both sources will be returned, any non-matches will be skipped over. +If a subset argument is not given, it defaults to `.sum`, +which represents a conventional merge. +The form for adjancent partitions cannot use subsetting, +always performing with a subset of `.sum`. +All of the forms take a parameter for the ordering predicate. +If the element type conforms to `Comparable`, +a predicate can be omitted to use a default of the less-than operator (`<`). ```swift // Free-function form. Also used for lazy evaluation. @@ -90,13 +97,15 @@ extension MutableCollection where Self : BidirectionalCollection, Self.Element : } ``` -Desired subsets are described by a new type. +Target subsets are described by a new type. ```swift public enum MergerSubset : UInt, CaseIterable { case none, firstWithoutSecond, secondWithoutFirst, symmetricDifference, intersection, first, second, union, sum + + //... } ``` @@ -127,11 +136,11 @@ The merges via: - The speed-optimized partition-merge Operate in **O(** _n_ `+` _m_ **)** for both space and time, -where *n* and *m* are the lengths of the two operand sequences. +where *n* and *m* are the lengths of the two operand sequences/partitions. The space-optimized partition merge for a collection of length *n* operates in **O(** 1 **)** for space, -with **O(** _n_ **)** for time when the collection is not random-access, -and *???* for random-access collections. +**O(** _n_ **)** for time when the collection is not random-access, +and *???* for time in random-access collections. ### Naming From c74b0d4c14a81f833842ebf48977a828fe586d6a Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Thu, 1 Aug 2024 22:01:18 -0400 Subject: [PATCH 16/34] Add documentation partition markers --- Tests/SwiftAlgorithmsTests/MergeSortedTests.swift | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift index 1ad3fd0b..68aea8de 100644 --- a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift +++ b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift @@ -13,6 +13,8 @@ import XCTest import Algorithms final class MergeSortedTests: XCTestCase { + // MARK: Support Types for Set-Operation Mergers + /// Check the convenience initializers for `MergerSubset`. func testMergerSubsetInitializers() { XCTAssertEqual(MergerSubset(), .sum) @@ -81,6 +83,8 @@ final class MergeSortedTests: XCTestCase { ) } + // MARK: - Set-Operation and Direct Mergers + /// Check the lazily-generated merger/subset sequences. func testLazyMergers() { let low = 0..<7, high = 3..<10 @@ -146,6 +150,8 @@ final class MergeSortedTests: XCTestCase { XCTAssertEqualSequences(sequences[.sum]!, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) } + // MARK: - Partition Mergers + /// Check the more-memory version of merging two sorted partitions. func testFastPartitionMerge() { // Degenerate count of elements. @@ -227,6 +233,8 @@ final class MergeSortedTests: XCTestCase { XCTAssertEqualSequences(sample5, [2, 2, 3, 3, 4, 5, 7, 20]) } + // MARK: - Sample Code + /// Check the code from documentation. func testSampleCode() { // From the guide. From d1ef470f403542214c43d5f6f93409f716b26761 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Thu, 1 Aug 2024 22:18:41 -0400 Subject: [PATCH 17/34] Separate tests between regular and subset mergers --- .../MergeSortedTests.swift | 36 ++++++++++++------- 1 file changed, 23 insertions(+), 13 deletions(-) diff --git a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift index 68aea8de..fef7a973 100644 --- a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift +++ b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift @@ -83,17 +83,15 @@ final class MergeSortedTests: XCTestCase { ) } - // MARK: - Set-Operation and Direct Mergers + // MARK: - Set-Operation Mergers - /// Check the lazily-generated merger/subset sequences. - func testLazyMergers() { + /// Check the lazily-generated subset sequences. + func testLazySetMergers() { let low = 0..<7, high = 3..<10 - XCTAssertEqualSequences(mergeSorted(low, high), - [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) - XCTAssertLazySequence(mergeSorted(low, high)) - let sequences = Dictionary(uniqueKeysWithValues: MergerSubset.allCases.map { - ($0, mergeSorted(low, high, retaining: $0)) + let subsetResult = mergeSorted(low, high, retaining: $0) + XCTAssertLazySequence(subsetResult) + return ($0, subsetResult) }) XCTAssertEqualSequences(sequences[.none]!, EmptyCollection()) XCTAssertEqualSequences(sequences[.firstWithoutSecond]!, 0..<3) @@ -130,12 +128,9 @@ final class MergeSortedTests: XCTestCase { XCTAssertEqualSequences(reversed[.sum]!, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) } - /// Check the eager merger/subset sequences. - func testEagerMergers() { + /// Check the eagerly-generated subset sequences. + func testEagerSetMergers() { let low = 0..<7, high = 3..<10 - XCTAssertEqualSequences(Array(mergeSorted: low, and: high), - [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) - let sequences = Dictionary(uniqueKeysWithValues: MergerSubset.allCases.map { ($0, Array(mergeSorted: low, and: high, retaining: $0)) }) @@ -150,6 +145,21 @@ final class MergeSortedTests: XCTestCase { XCTAssertEqualSequences(sequences[.sum]!, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) } + // MARK: - Direct Mergers + + /// Check lazily-generated mergers. + func testLazyMergers() { + let low = 0..<7, high = 3..<10, result = mergeSorted(low, high) + XCTAssertEqualSequences(result, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) + XCTAssertLazySequence(result) + } + + /// Check eagerly-generated mergers. + func testEagerMergers() { + let low = 0..<7, high = 3..<10, result = Array(mergeSorted: low, and: high) + XCTAssertEqualSequences(result, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) + } + // MARK: - Partition Mergers /// Check the more-memory version of merging two sorted partitions. From f40015db72be4092d0a980ded6ada0807a97d1bf Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Thu, 1 Aug 2024 23:09:20 -0400 Subject: [PATCH 18/34] Separate the regular and set-operation merger functions --- Sources/Algorithms/MergeSorted.swift | 131 ++++++++++++++++++++++++--- 1 file changed, 119 insertions(+), 12 deletions(-) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index 80b0ad3b..3efbd935 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -322,8 +322,7 @@ extension RangeReplaceableCollection { /// - Parameters: /// - first: The first sequence spliced. /// - second: The second sequence spliced. - /// - filter: The subset of the merged sequence to keep. If not given, - /// defaults to `.sum`. + /// - filter: The subset of the merged sequence to keep. /// - areInIncreasingOrder: The criteria for sorting. /// /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the @@ -331,7 +330,7 @@ extension RangeReplaceableCollection { public init( mergeSorted first: T, and second: U, - retaining filter: MergerSubset = .sum, + retaining filter: MergerSubset, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool ) rethrows where T.Element == Element, U.Element == Element @@ -362,8 +361,7 @@ extension RangeReplaceableCollection where Element: Comparable { /// - Parameters: /// - first: The first sequence spliced. /// - second: The second sequence spliced. - /// - filter: The subset of the merged sequence to keep. If not given, - /// defaults to `.sum`. + /// - filter: The subset of the merged sequence to keep. /// /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the /// lengths of the sequence arguments. @@ -371,7 +369,7 @@ extension RangeReplaceableCollection where Element: Comparable { public init( mergeSorted first: T, and second: U, - retaining filter: MergerSubset = .sum + retaining filter: MergerSubset ) where T.Element == Element, U.Element == Element { self.init(mergeSorted: first, and: second, retaining: filter, sortedBy: <) @@ -395,8 +393,7 @@ extension RangeReplaceableCollection where Element: Comparable { /// - Parameters: /// - first: The first sequence spliced. /// - second: The second sequence spliced. -/// - filter: The subset of the merged sequence to keep. If not given, -/// defaults to `.sum`. +/// - filter: The subset of the merged sequence to keep. /// - areInIncreasingOrder: The criteria for sorting. /// - Returns: A sequence that lazily generates the merged sequence subset. /// @@ -405,7 +402,7 @@ extension RangeReplaceableCollection where Element: Comparable { public func mergeSorted( _ first: T, _ second: U, - retaining filter: MergerSubset = .sum, + retaining filter: MergerSubset, sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool ) -> MergeSortedSequence, LazySequence> where T.Element == U.Element { @@ -427,15 +424,14 @@ where T.Element == U.Element { /// - Parameters: /// - first: The first sequence spliced. /// - second: The second sequence spliced. -/// - filter: The subset of the merged sequence to keep. If not given, -/// defaults to `.sum`. +/// - filter: The subset of the merged sequence to keep. /// - Returns: A sequence that lazily generates the merged sequence subset. /// /// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), /// where `n` and `m` are the lengths of the sequence arguments. @inlinable public func mergeSorted( - _ first: T, _ second: U, retaining filter: MergerSubset = .sum + _ first: T, _ second: U, retaining filter: MergerSubset ) -> MergeSortedSequence, LazySequence> where T.Element == U.Element, T.Element: Comparable { return mergeSorted(first, second, retaining: filter, sortedBy: <) @@ -604,3 +600,114 @@ extension MergeSortedIterator: IteratorProtocol { return try! throwingNext() } } + +//===----------------------------------------------------------------------===// +// MARK: - RangeReplaceableCollection.init(mergeSorted:and:sortedBy:) +//-------------------------------------------------------------------------===// + +extension RangeReplaceableCollection { + /// Given two sequences that are both sorted according to the given predicate, + /// create their sorted merger. + /// + /// - Precondition: Both `first` and `second` must be sorted according to + /// `areInIncreasingOrder`, and said predicate must be a strict weak ordering + /// over its arguments. Both `first` and `second` must be finite. + /// + /// - Parameters: + /// - first: The first sequence spliced. + /// - second: The second sequence spliced. + /// - areInIncreasingOrder: The criteria for sorting. + /// + /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the + /// lengths of the sequence arguments. + @inlinable + public init( + mergeSorted first: T, + and second: U, + sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool + ) rethrows + where T.Element == Element, U.Element == Element + { + try self.init(mergeSorted: first, and: second, retaining: .sum, sortedBy: areInIncreasingOrder) + } +} + +extension RangeReplaceableCollection where Element: Comparable { + /// Given two sorted sequences, create their sorted merger. + /// + /// - Precondition: Both `first` and `second` must be sorted, and both + /// must be finite. + /// + /// - Parameters: + /// - first: The first sequence spliced. + /// - second: The second sequence spliced. + /// + /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the + /// lengths of the sequence arguments. + @inlinable + public init( + mergeSorted first: T, + and second: U + ) where T.Element == Element, U.Element == Element + { + self.init(mergeSorted: first, and: second, sortedBy: <) + } +} + +//===----------------------------------------------------------------------===// +// MARK: - mergeSorted(_:_:sortedBy:) +//-------------------------------------------------------------------------===// + +/// Given two sequences that are both sorted according to the given predicate +/// and treated as sets, apply the given set operation, returning the result as +/// a lazy sequence also sorted by the same predicate. +/// +/// For simply merging the sequences, use `.sum` as the operation. +/// +/// - Precondition: Both `first` and `second` must be sorted according to +/// `areInIncreasingOrder`, and said predicate must be a strict weak ordering +/// over its arguments. +/// +/// - Parameters: +/// - first: The first sequence spliced. +/// - second: The second sequence spliced. +/// - filter: The subset of the merged sequence to keep. If not given, +/// defaults to `.sum`. +/// - areInIncreasingOrder: The criteria for sorting. +/// - Returns: A sequence that lazily generates the merged sequence subset. +/// +/// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), +/// where `n` and `m` are the lengths of the sequence arguments. +@inlinable +public func mergeSorted( + _ first: T, + _ second: U, + sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool +) -> MergeSortedSequence, LazySequence> +where T.Element == U.Element { + return mergeSorted(first, second, retaining: .sum, sortedBy: areInIncreasingOrder) +} + +/// Given two sorted sequences treated as sets, apply the given set operation, +/// returning the result as a sorted lazy sequence. +/// +/// For simply merging the sequences, use `.sum` as the operation. +/// +/// - Precondition: Both `first` and `second` must be sorted. +/// +/// - Parameters: +/// - first: The first sequence spliced. +/// - second: The second sequence spliced. +/// - filter: The subset of the merged sequence to keep. If not given, +/// defaults to `.sum`. +/// - Returns: A sequence that lazily generates the merged sequence subset. +/// +/// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), +/// where `n` and `m` are the lengths of the sequence arguments. +@inlinable +public func mergeSorted( + _ first: T, _ second: U +) -> MergeSortedSequence, LazySequence> +where T.Element == U.Element, T.Element: Comparable { + return mergeSorted(first, second, sortedBy: <) +} From a9dc93b8e8fe11905274790a6641cdb0ff724d21 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Fri, 2 Aug 2024 00:04:25 -0400 Subject: [PATCH 19/34] Move the set-operation merger functions to a new file --- Sources/Algorithms/MergeSorted.swift | 395 ---------------------- Sources/Algorithms/MergeSortedSets.swift | 405 +++++++++++++++++++++++ 2 files changed, 405 insertions(+), 395 deletions(-) create mode 100644 Sources/Algorithms/MergeSortedSets.swift diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index 3efbd935..4ad288e8 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -206,401 +206,6 @@ where Element: Comparable, Self: BidirectionalCollection { } } -//===----------------------------------------------------------------------===// -// MARK: - MergerSubset -//-------------------------------------------------------------------------===// - -/// Description of which elements of a merger will be retained. -public enum MergerSubset: UInt, CaseIterable { - /// Keep no elements. - case none - /// Keep the elements of the first source that are not also in the second. - case firstWithoutSecond - /// Keep the elements of the second source that are not also in the first. - case secondWithoutFirst - /// Keep the elements of both sources that are not present in the other. - case symmetricDifference - /// Keep the elements that are present in both sorces. - case intersection - /// Keep only the elements from the first source. - case first - /// Keep only the elements from the second source. - case second - /// Keep all of the elements from both sources, consolidating shared ones. - case union - /// Keep all elements from both sources, including duplicates. - case sum = 0b1111 // `union` with an extra bit to distinguish. -} - -extension MergerSubset { - /// Whether the elements exclusive to the first source are emitted. - @inlinable - public var emitsExclusivesToFirst: Bool { rawValue & 0b001 != 0 } - /// Whether the elements exclusive to the second source are emitted. - @inlinable - public var emitsExclusivesToSecond: Bool { rawValue & 0b010 != 0 } - /// Whether the elements shared by both sources are emitted. - @inlinable - public var emitsSharedElements: Bool { rawValue & 0b100 != 0 } -} - -extension MergerSubset { - /// Create a filter specifying a full merge (duplicating the shared elements). - @inlinable - public init() { self = .sum } - /// Create a filter specifying which categories of elements are included in - /// the merger, with shared elements consolidated. - public init(keepExclusivesToFirst: Bool, keepExclusivesToSecond: Bool, keepSharedElements: Bool) { - self = switch (keepSharedElements, keepExclusivesToSecond, keepExclusivesToFirst) { - case (false, false, false): .none - case (false, false, true): .firstWithoutSecond - case (false, true, false): .secondWithoutFirst - case (false, true, true): .symmetricDifference - case ( true, false, false): .intersection - case ( true, false, true): .first - case ( true, true, false): .second - case ( true, true, true): .union - } - } -} - -extension MergerSubset { - /// Return the worst-case bounds with the given source lengths. - /// - /// These non-necessarily exclusive conditions can affect the result: - /// - /// - One or both of the sources is empty. - /// - The sources are identical. - /// - The sources have no elements in common. - /// - The shorter source is a subset of the longer one. - /// - The sources have just partial overlap. - /// - /// Both inputs must be nonnegative. - fileprivate - func expectedCountRange(given firstLength: Int, and secondLength: Int) -> ClosedRange { - /// Generate a range for a single value without repeating its expression. - func singleValueRange(_ v: Int) -> ClosedRange { return v...v } - - return switch self { - case .none: - singleValueRange(0) - case .firstWithoutSecond: - max(firstLength - secondLength, 0)...firstLength - case .secondWithoutFirst: - max(secondLength - firstLength, 0)...secondLength - case .symmetricDifference: - abs(firstLength - secondLength)...(firstLength + secondLength) - case .intersection: - 0...min(firstLength, secondLength) - case .first: - singleValueRange(firstLength) - case .second: - singleValueRange(secondLength) - case .union: - max(firstLength, secondLength)...(firstLength + secondLength) - case .sum: - singleValueRange(firstLength + secondLength) - } - } -} - -//===----------------------------------------------------------------------===// -// MARK: - RangeReplaceableCollection.init(mergeSorted:and:retaining:sortedBy:) -//-------------------------------------------------------------------------===// - -extension RangeReplaceableCollection { - /// Given two sequences that are both sorted according to the given predicate, - /// treat them as sets, and create the sorted result of the given set - /// operation. - /// - /// For simply merging the sequences, use `.sum` as the operation. - /// - /// - Precondition: Both `first` and `second` must be sorted according to - /// `areInIncreasingOrder`, and said predicate must be a strict weak ordering - /// over its arguments. Both `first` and `second` must be finite. - /// - /// - Parameters: - /// - first: The first sequence spliced. - /// - second: The second sequence spliced. - /// - filter: The subset of the merged sequence to keep. - /// - areInIncreasingOrder: The criteria for sorting. - /// - /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the - /// lengths of the sequence arguments. - public init( - mergeSorted first: T, - and second: U, - retaining filter: MergerSubset, - sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool - ) rethrows - where T.Element == Element, U.Element == Element - { - self.init() - try withoutActuallyEscaping(areInIncreasingOrder) { - let sequence = MergeSortedSequence(merging: first, and: second, - retaining: filter, sortedBy: $0) - self.reserveCapacity(sequence.underestimatedCount) - - var iterator = sequence.makeIterator() - while let current = try iterator.throwingNext() { - self.append(current) - } - } - } -} - -extension RangeReplaceableCollection where Element: Comparable { - /// Given two sorted sequences, treat them as sets, and create the sorted - /// result of the given set operation. - /// - /// For simply merging the sequences, use `.sum` as the operation. - /// - /// - Precondition: Both `first` and `second` must be sorted, and both - /// must be finite. - /// - /// - Parameters: - /// - first: The first sequence spliced. - /// - second: The second sequence spliced. - /// - filter: The subset of the merged sequence to keep. - /// - /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the - /// lengths of the sequence arguments. - @inlinable - public init( - mergeSorted first: T, - and second: U, - retaining filter: MergerSubset - ) where T.Element == Element, U.Element == Element - { - self.init(mergeSorted: first, and: second, retaining: filter, sortedBy: <) - } -} - -//===----------------------------------------------------------------------===// -// MARK: - mergeSorted(_:_:retaining:sortedBy:) -//-------------------------------------------------------------------------===// - -/// Given two sequences that are both sorted according to the given predicate -/// and treated as sets, apply the given set operation, returning the result as -/// a lazy sequence also sorted by the same predicate. -/// -/// For simply merging the sequences, use `.sum` as the operation. -/// -/// - Precondition: Both `first` and `second` must be sorted according to -/// `areInIncreasingOrder`, and said predicate must be a strict weak ordering -/// over its arguments. -/// -/// - Parameters: -/// - first: The first sequence spliced. -/// - second: The second sequence spliced. -/// - filter: The subset of the merged sequence to keep. -/// - areInIncreasingOrder: The criteria for sorting. -/// - Returns: A sequence that lazily generates the merged sequence subset. -/// -/// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), -/// where `n` and `m` are the lengths of the sequence arguments. -public func mergeSorted( - _ first: T, - _ second: U, - retaining filter: MergerSubset, - sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool -) -> MergeSortedSequence, LazySequence> -where T.Element == U.Element { - return MergeSortedSequence( - merging: first.lazy, - and: second.lazy, - retaining: filter, - sortedBy: areInIncreasingOrder - ) -} - -/// Given two sorted sequences treated as sets, apply the given set operation, -/// returning the result as a sorted lazy sequence. -/// -/// For simply merging the sequences, use `.sum` as the operation. -/// -/// - Precondition: Both `first` and `second` must be sorted. -/// -/// - Parameters: -/// - first: The first sequence spliced. -/// - second: The second sequence spliced. -/// - filter: The subset of the merged sequence to keep. -/// - Returns: A sequence that lazily generates the merged sequence subset. -/// -/// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), -/// where `n` and `m` are the lengths of the sequence arguments. -@inlinable -public func mergeSorted( - _ first: T, _ second: U, retaining filter: MergerSubset -) -> MergeSortedSequence, LazySequence> -where T.Element == U.Element, T.Element: Comparable { - return mergeSorted(first, second, retaining: filter, sortedBy: <) -} - -//===----------------------------------------------------------------------===// -// MARK: - MergeSortedSequence -//-------------------------------------------------------------------------===// - -/// A sequence that lazily vends the sorted result of a set operation upon -/// two sorted sequences treated as sets spliced together, using a predicate as -/// the sorting criteria for all three sequences involved. -public struct MergeSortedSequence -where First.Element == Second.Element -{ - /// The first source sequence. - let first: First - /// The second source sequence. - let second: Second - /// The subset of elements to retain. - let filter: MergerSubset - /// The sorting predicate. - let areInIncreasingOrder: (Element, Element) throws -> Bool - - /// Create a sequence using the two given sequences that are sorted according - /// to the given predicate, to vend the sources' elements combined while still - /// sorted according to the predicate, but keeping only the elements that - /// match the given set operation. - fileprivate init( - merging first: First, - and second: Second, - retaining filter: MergerSubset, - sortedBy areInIncreasingOrder: @escaping (Element, Element) throws -> Bool - ) { - self.first = first - self.second = second - self.filter = filter - self.areInIncreasingOrder = areInIncreasingOrder - } -} - -extension MergeSortedSequence: Sequence { - public func makeIterator() - -> MergeSortedIterator { - return .init(first.makeIterator(), second.makeIterator(), filter: filter, - predicate: areInIncreasingOrder) - } - - public var underestimatedCount: Int { - filter.expectedCountRange( - given: first.underestimatedCount, - and: second.underestimatedCount - ).lowerBound - } -} - -extension MergeSortedSequence: LazySequenceProtocol {} - -//===----------------------------------------------------------------------===// -// MARK: - MergeSortedIterator -//-------------------------------------------------------------------------===// - -/// An iterator that applies a set operation on two virtual sequences, -/// both treated as sets sorted according a predicate, spliced together to -/// vend a virtual sequence that is also sorted. -public struct MergeSortedIterator< - First: IteratorProtocol, - Second: IteratorProtocol -> where First.Element == Second.Element -{ - /// The first source of elements. - var firstSource: First? - /// The second source of elements. - var secondSource: Second? - /// The subset of elements to emit. - let filter: MergerSubset - /// The sorting predicate. - let areInIncreasingOrder: (Element, Element) throws -> Bool - - /// The latest element read from the first source. - fileprivate var first: First.Element? - /// The latest element read from the second source. - fileprivate var second: Second.Element? - /// Whether to keep on iterating. - fileprivate var isFinished = false - - /// Create an iterator reading from two sources, comparing their respective - /// elements with the predicate, and emitting the given subset of the merged - /// sequence. - fileprivate init( - _ firstSource: First, - _ secondSource: Second, - filter: MergerSubset, - predicate: @escaping (Element, Element) throws -> Bool - ) { - // Only load the sources that are actually needed. - switch filter { - case .none: - break - case .first: - self.firstSource = firstSource - case .second: - self.secondSource = secondSource - default: - self.firstSource = firstSource - self.secondSource = secondSource - } - - // Other member initialization - self.filter = filter - self.areInIncreasingOrder = predicate - } -} - -extension MergeSortedIterator: IteratorProtocol { - /// Advance to the next element, if any. May throw. - fileprivate mutating func throwingNext() throws -> First.Element? { - while !isFinished { - // Extract another element from a source if the previous one was purged. - first = first ?? firstSource?.next() - second = second ?? secondSource?.next() - - // Of the latest valid elements, purge the smaller (or both when they are - // equivalent). Return said element if the filter permits, search again - // otherwise. - switch (first, second) { - case let (latestFirst?, latestSecond?) where try areInIncreasingOrder(latestFirst, latestSecond): - defer { first = nil } - guard filter.emitsExclusivesToFirst else { continue } - - return latestFirst - case let (latestFirst?, latestSecond?) where try areInIncreasingOrder(latestSecond, latestFirst): - defer { second = nil } - guard filter.emitsExclusivesToSecond else { continue } - - return latestSecond - case (let latestFirst?, _?): - // Purge both of the equivalent elements... - defer { - first = nil - - // ...except when the second source's element is only deferred. - if filter != .sum { second = nil } - } - guard filter.emitsSharedElements else { continue } - - // This will not cause mixed-source emmission when only the second - // source is being vended, because this case won't ever be reached. - return latestFirst - case (nil, let latestSecond?) where filter.emitsExclusivesToSecond: - second = nil - return latestSecond - case (let latestFirst?, nil) where filter.emitsExclusivesToFirst: - first = nil - return latestFirst - default: - // Either both sources are exhausted, or just one is while the remainder - // of the other won't be emitted. - isFinished = true - } - } - return nil - } - - public mutating func next() -> Second.Element? { - return try! throwingNext() - } -} - //===----------------------------------------------------------------------===// // MARK: - RangeReplaceableCollection.init(mergeSorted:and:sortedBy:) //-------------------------------------------------------------------------===// diff --git a/Sources/Algorithms/MergeSortedSets.swift b/Sources/Algorithms/MergeSortedSets.swift new file mode 100644 index 00000000..a753ace9 --- /dev/null +++ b/Sources/Algorithms/MergeSortedSets.swift @@ -0,0 +1,405 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2024 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MARK: MergerSubset +//-------------------------------------------------------------------------===// + +/// Description of which elements of a merger will be retained. +public enum MergerSubset: UInt, CaseIterable { + /// Keep no elements. + case none + /// Keep the elements of the first source that are not also in the second. + case firstWithoutSecond + /// Keep the elements of the second source that are not also in the first. + case secondWithoutFirst + /// Keep the elements of both sources that are not present in the other. + case symmetricDifference + /// Keep the elements that are present in both sorces. + case intersection + /// Keep only the elements from the first source. + case first + /// Keep only the elements from the second source. + case second + /// Keep all of the elements from both sources, consolidating shared ones. + case union + /// Keep all elements from both sources, including duplicates. + case sum = 0b1111 // `union` with an extra bit to distinguish. +} + +extension MergerSubset { + /// Whether the elements exclusive to the first source are emitted. + @inlinable + public var emitsExclusivesToFirst: Bool { rawValue & 0b001 != 0 } + /// Whether the elements exclusive to the second source are emitted. + @inlinable + public var emitsExclusivesToSecond: Bool { rawValue & 0b010 != 0 } + /// Whether the elements shared by both sources are emitted. + @inlinable + public var emitsSharedElements: Bool { rawValue & 0b100 != 0 } +} + +extension MergerSubset { + /// Create a filter specifying a full merge (duplicating the shared elements). + @inlinable + public init() { self = .sum } + /// Create a filter specifying which categories of elements are included in + /// the merger, with shared elements consolidated. + public init(keepExclusivesToFirst: Bool, keepExclusivesToSecond: Bool, keepSharedElements: Bool) { + self = switch (keepSharedElements, keepExclusivesToSecond, keepExclusivesToFirst) { + case (false, false, false): .none + case (false, false, true): .firstWithoutSecond + case (false, true, false): .secondWithoutFirst + case (false, true, true): .symmetricDifference + case ( true, false, false): .intersection + case ( true, false, true): .first + case ( true, true, false): .second + case ( true, true, true): .union + } + } +} + +extension MergerSubset { + /// Return the worst-case bounds with the given source lengths. + /// + /// These non-necessarily exclusive conditions can affect the result: + /// + /// - One or both of the sources is empty. + /// - The sources are identical. + /// - The sources have no elements in common. + /// - The shorter source is a subset of the longer one. + /// - The sources have just partial overlap. + /// + /// Both inputs must be nonnegative. + fileprivate + func expectedCountRange(given firstLength: Int, and secondLength: Int) -> ClosedRange { + /// Generate a range for a single value without repeating its expression. + func singleValueRange(_ v: Int) -> ClosedRange { return v...v } + + return switch self { + case .none: + singleValueRange(0) + case .firstWithoutSecond: + max(firstLength - secondLength, 0)...firstLength + case .secondWithoutFirst: + max(secondLength - firstLength, 0)...secondLength + case .symmetricDifference: + abs(firstLength - secondLength)...(firstLength + secondLength) + case .intersection: + 0...min(firstLength, secondLength) + case .first: + singleValueRange(firstLength) + case .second: + singleValueRange(secondLength) + case .union: + max(firstLength, secondLength)...(firstLength + secondLength) + case .sum: + singleValueRange(firstLength + secondLength) + } + } +} + +//===----------------------------------------------------------------------===// +// MARK: - RangeReplaceableCollection.init(mergeSorted:and:retaining:sortedBy:) +//-------------------------------------------------------------------------===// + +extension RangeReplaceableCollection { + /// Given two sequences that are both sorted according to the given predicate, + /// treat them as sets, and create the sorted result of the given set + /// operation. + /// + /// For simply merging the sequences, use `.sum` as the operation. + /// + /// - Precondition: Both `first` and `second` must be sorted according to + /// `areInIncreasingOrder`, and said predicate must be a strict weak ordering + /// over its arguments. Both `first` and `second` must be finite. + /// + /// - Parameters: + /// - first: The first sequence spliced. + /// - second: The second sequence spliced. + /// - filter: The subset of the merged sequence to keep. + /// - areInIncreasingOrder: The criteria for sorting. + /// + /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the + /// lengths of the sequence arguments. + public init( + mergeSorted first: T, + and second: U, + retaining filter: MergerSubset, + sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool + ) rethrows + where T.Element == Element, U.Element == Element + { + self.init() + try withoutActuallyEscaping(areInIncreasingOrder) { + let sequence = MergeSortedSequence(merging: first, and: second, + retaining: filter, sortedBy: $0) + self.reserveCapacity(sequence.underestimatedCount) + + var iterator = sequence.makeIterator() + while let current = try iterator.throwingNext() { + self.append(current) + } + } + } +} + +extension RangeReplaceableCollection where Element: Comparable { + /// Given two sorted sequences, treat them as sets, and create the sorted + /// result of the given set operation. + /// + /// For simply merging the sequences, use `.sum` as the operation. + /// + /// - Precondition: Both `first` and `second` must be sorted, and both + /// must be finite. + /// + /// - Parameters: + /// - first: The first sequence spliced. + /// - second: The second sequence spliced. + /// - filter: The subset of the merged sequence to keep. + /// + /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the + /// lengths of the sequence arguments. + @inlinable + public init( + mergeSorted first: T, + and second: U, + retaining filter: MergerSubset + ) where T.Element == Element, U.Element == Element + { + self.init(mergeSorted: first, and: second, retaining: filter, sortedBy: <) + } +} + +//===----------------------------------------------------------------------===// +// MARK: - mergeSorted(_:_:retaining:sortedBy:) +//-------------------------------------------------------------------------===// + +/// Given two sequences that are both sorted according to the given predicate +/// and treated as sets, apply the given set operation, returning the result as +/// a lazy sequence also sorted by the same predicate. +/// +/// For simply merging the sequences, use `.sum` as the operation. +/// +/// - Precondition: Both `first` and `second` must be sorted according to +/// `areInIncreasingOrder`, and said predicate must be a strict weak ordering +/// over its arguments. +/// +/// - Parameters: +/// - first: The first sequence spliced. +/// - second: The second sequence spliced. +/// - filter: The subset of the merged sequence to keep. +/// - areInIncreasingOrder: The criteria for sorting. +/// - Returns: A sequence that lazily generates the merged sequence subset. +/// +/// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), +/// where `n` and `m` are the lengths of the sequence arguments. +public func mergeSorted( + _ first: T, + _ second: U, + retaining filter: MergerSubset, + sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool +) -> MergeSortedSequence, LazySequence> +where T.Element == U.Element { + return MergeSortedSequence( + merging: first.lazy, + and: second.lazy, + retaining: filter, + sortedBy: areInIncreasingOrder + ) +} + +/// Given two sorted sequences treated as sets, apply the given set operation, +/// returning the result as a sorted lazy sequence. +/// +/// For simply merging the sequences, use `.sum` as the operation. +/// +/// - Precondition: Both `first` and `second` must be sorted. +/// +/// - Parameters: +/// - first: The first sequence spliced. +/// - second: The second sequence spliced. +/// - filter: The subset of the merged sequence to keep. +/// - Returns: A sequence that lazily generates the merged sequence subset. +/// +/// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), +/// where `n` and `m` are the lengths of the sequence arguments. +@inlinable +public func mergeSorted( + _ first: T, _ second: U, retaining filter: MergerSubset +) -> MergeSortedSequence, LazySequence> +where T.Element == U.Element, T.Element: Comparable { + return mergeSorted(first, second, retaining: filter, sortedBy: <) +} + +//===----------------------------------------------------------------------===// +// MARK: - MergeSortedSequence +//-------------------------------------------------------------------------===// + +/// A sequence that lazily vends the sorted result of a set operation upon +/// two sorted sequences treated as sets spliced together, using a predicate as +/// the sorting criteria for all three sequences involved. +public struct MergeSortedSequence +where First.Element == Second.Element +{ + /// The first source sequence. + let first: First + /// The second source sequence. + let second: Second + /// The subset of elements to retain. + let filter: MergerSubset + /// The sorting predicate. + let areInIncreasingOrder: (Element, Element) throws -> Bool + + /// Create a sequence using the two given sequences that are sorted according + /// to the given predicate, to vend the sources' elements combined while still + /// sorted according to the predicate, but keeping only the elements that + /// match the given set operation. + init( + merging first: First, + and second: Second, + retaining filter: MergerSubset, + sortedBy areInIncreasingOrder: @escaping (Element, Element) throws -> Bool + ) { + self.first = first + self.second = second + self.filter = filter + self.areInIncreasingOrder = areInIncreasingOrder + } +} + +extension MergeSortedSequence: Sequence { + public func makeIterator() + -> MergeSortedIterator { + return .init(first.makeIterator(), second.makeIterator(), filter: filter, + predicate: areInIncreasingOrder) + } + + public var underestimatedCount: Int { + filter.expectedCountRange( + given: first.underestimatedCount, + and: second.underestimatedCount + ).lowerBound + } +} + +extension MergeSortedSequence: LazySequenceProtocol {} + +//===----------------------------------------------------------------------===// +// MARK: - MergeSortedIterator +//-------------------------------------------------------------------------===// + +/// An iterator that applies a set operation on two virtual sequences, +/// both treated as sets sorted according a predicate, spliced together to +/// vend a virtual sequence that is also sorted. +public struct MergeSortedIterator< + First: IteratorProtocol, + Second: IteratorProtocol +> where First.Element == Second.Element +{ + /// The first source of elements. + var firstSource: First? + /// The second source of elements. + var secondSource: Second? + /// The subset of elements to emit. + let filter: MergerSubset + /// The sorting predicate. + let areInIncreasingOrder: (Element, Element) throws -> Bool + + /// The latest element read from the first source. + fileprivate var first: First.Element? + /// The latest element read from the second source. + fileprivate var second: Second.Element? + /// Whether to keep on iterating. + fileprivate var isFinished = false + + /// Create an iterator reading from two sources, comparing their respective + /// elements with the predicate, and emitting the given subset of the merged + /// sequence. + fileprivate init( + _ firstSource: First, + _ secondSource: Second, + filter: MergerSubset, + predicate: @escaping (Element, Element) throws -> Bool + ) { + // Only load the sources that are actually needed. + switch filter { + case .none: + break + case .first: + self.firstSource = firstSource + case .second: + self.secondSource = secondSource + default: + self.firstSource = firstSource + self.secondSource = secondSource + } + + // Other member initialization + self.filter = filter + self.areInIncreasingOrder = predicate + } +} + +extension MergeSortedIterator: IteratorProtocol { + /// Advance to the next element, if any. May throw. + mutating func throwingNext() throws -> First.Element? { + while !isFinished { + // Extract another element from a source if the previous one was purged. + first = first ?? firstSource?.next() + second = second ?? secondSource?.next() + + // Of the latest valid elements, purge the smaller (or both when they are + // equivalent). Return said element if the filter permits, search again + // otherwise. + switch (first, second) { + case let (latestFirst?, latestSecond?) where try areInIncreasingOrder(latestFirst, latestSecond): + defer { first = nil } + guard filter.emitsExclusivesToFirst else { continue } + + return latestFirst + case let (latestFirst?, latestSecond?) where try areInIncreasingOrder(latestSecond, latestFirst): + defer { second = nil } + guard filter.emitsExclusivesToSecond else { continue } + + return latestSecond + case (let latestFirst?, _?): + // Purge both of the equivalent elements... + defer { + first = nil + + // ...except when the second source's element is only deferred. + if filter != .sum { second = nil } + } + guard filter.emitsSharedElements else { continue } + + // This will not cause mixed-source emmission when only the second + // source is being vended, because this case won't ever be reached. + return latestFirst + case (nil, let latestSecond?) where filter.emitsExclusivesToSecond: + second = nil + return latestSecond + case (let latestFirst?, nil) where filter.emitsExclusivesToFirst: + first = nil + return latestFirst + default: + // Either both sources are exhausted, or just one is while the remainder + // of the other won't be emitted. + isFinished = true + } + } + return nil + } + + public mutating func next() -> Second.Element? { + return try! throwingNext() + } +} From 33f17fa43945f4e31f22794a81491e6005c9ce56 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Fri, 2 Aug 2024 00:40:20 -0400 Subject: [PATCH 20/34] Rename some set-operation merger functions --- Sources/Algorithms/MergeSorted.swift | 8 ++--- Sources/Algorithms/MergeSortedSets.swift | 32 +++++++++---------- .../MergeSortedTests.swift | 16 +++++----- 3 files changed, 28 insertions(+), 28 deletions(-) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index 4ad288e8..90cfc7ab 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -39,7 +39,7 @@ extension MutableCollection { ) rethrows { var duplicate = self try withoutActuallyEscaping(areInIncreasingOrder) { - let sequence = MergeSortedSequence(merging: self[startIndex..( _ first: T, _ second: U, sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool -) -> MergeSortedSequence, LazySequence> +) -> MergeSortedSetsSequence, LazySequence> where T.Element == U.Element { - return mergeSorted(first, second, retaining: .sum, sortedBy: areInIncreasingOrder) + return mergeSortedSets(first, second, retaining: .sum, sortedBy: areInIncreasingOrder) } /// Given two sorted sequences treated as sets, apply the given set operation, @@ -312,7 +312,7 @@ where T.Element == U.Element { @inlinable public func mergeSorted( _ first: T, _ second: U -) -> MergeSortedSequence, LazySequence> +) -> MergeSortedSetsSequence, LazySequence> where T.Element == U.Element, T.Element: Comparable { return mergeSorted(first, second, sortedBy: <) } diff --git a/Sources/Algorithms/MergeSortedSets.swift b/Sources/Algorithms/MergeSortedSets.swift index a753ace9..4e98d7a3 100644 --- a/Sources/Algorithms/MergeSortedSets.swift +++ b/Sources/Algorithms/MergeSortedSets.swift @@ -140,7 +140,7 @@ extension RangeReplaceableCollection { { self.init() try withoutActuallyEscaping(areInIncreasingOrder) { - let sequence = MergeSortedSequence(merging: first, and: second, + let sequence = MergeSortedSetsSequence(merging: first, and: second, retaining: filter, sortedBy: $0) self.reserveCapacity(sequence.underestimatedCount) @@ -180,7 +180,7 @@ extension RangeReplaceableCollection where Element: Comparable { } //===----------------------------------------------------------------------===// -// MARK: - mergeSorted(_:_:retaining:sortedBy:) +// MARK: - mergeSortedSets(_:_:retaining:sortedBy:) //-------------------------------------------------------------------------===// /// Given two sequences that are both sorted according to the given predicate @@ -202,14 +202,14 @@ extension RangeReplaceableCollection where Element: Comparable { /// /// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), /// where `n` and `m` are the lengths of the sequence arguments. -public func mergeSorted( +public func mergeSortedSets( _ first: T, _ second: U, retaining filter: MergerSubset, sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool -) -> MergeSortedSequence, LazySequence> +) -> MergeSortedSetsSequence, LazySequence> where T.Element == U.Element { - return MergeSortedSequence( + return MergeSortedSetsSequence( merging: first.lazy, and: second.lazy, retaining: filter, @@ -233,21 +233,21 @@ where T.Element == U.Element { /// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), /// where `n` and `m` are the lengths of the sequence arguments. @inlinable -public func mergeSorted( +public func mergeSortedSets( _ first: T, _ second: U, retaining filter: MergerSubset -) -> MergeSortedSequence, LazySequence> +) -> MergeSortedSetsSequence, LazySequence> where T.Element == U.Element, T.Element: Comparable { - return mergeSorted(first, second, retaining: filter, sortedBy: <) + return mergeSortedSets(first, second, retaining: filter, sortedBy: <) } //===----------------------------------------------------------------------===// -// MARK: - MergeSortedSequence +// MARK: - MergeSortedSetsSequence //-------------------------------------------------------------------------===// /// A sequence that lazily vends the sorted result of a set operation upon /// two sorted sequences treated as sets spliced together, using a predicate as /// the sorting criteria for all three sequences involved. -public struct MergeSortedSequence +public struct MergeSortedSetsSequence where First.Element == Second.Element { /// The first source sequence. @@ -276,9 +276,9 @@ where First.Element == Second.Element } } -extension MergeSortedSequence: Sequence { +extension MergeSortedSetsSequence: Sequence { public func makeIterator() - -> MergeSortedIterator { + -> MergeSortedSetsIterator { return .init(first.makeIterator(), second.makeIterator(), filter: filter, predicate: areInIncreasingOrder) } @@ -291,16 +291,16 @@ extension MergeSortedSequence: Sequence { } } -extension MergeSortedSequence: LazySequenceProtocol {} +extension MergeSortedSetsSequence: LazySequenceProtocol {} //===----------------------------------------------------------------------===// -// MARK: - MergeSortedIterator +// MARK: - MergeSortedSetsIterator //-------------------------------------------------------------------------===// /// An iterator that applies a set operation on two virtual sequences, /// both treated as sets sorted according a predicate, spliced together to /// vend a virtual sequence that is also sorted. -public struct MergeSortedIterator< +public struct MergeSortedSetsIterator< First: IteratorProtocol, Second: IteratorProtocol > where First.Element == Second.Element @@ -349,7 +349,7 @@ public struct MergeSortedIterator< } } -extension MergeSortedIterator: IteratorProtocol { +extension MergeSortedSetsIterator: IteratorProtocol { /// Advance to the next element, if any. May throw. mutating func throwingNext() throws -> First.Element? { while !isFinished { diff --git a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift index fef7a973..48aeba1f 100644 --- a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift +++ b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift @@ -89,7 +89,7 @@ final class MergeSortedTests: XCTestCase { func testLazySetMergers() { let low = 0..<7, high = 3..<10 let sequences = Dictionary(uniqueKeysWithValues: MergerSubset.allCases.map { - let subsetResult = mergeSorted(low, high, retaining: $0) + let subsetResult = mergeSortedSets(low, high, retaining: $0) XCTAssertLazySequence(subsetResult) return ($0, subsetResult) }) @@ -115,7 +115,7 @@ final class MergeSortedTests: XCTestCase { // This exercises code missed by the `sequences` tests. let reversed = Dictionary(uniqueKeysWithValues: MergerSubset.allCases.map { - ($0, mergeSorted(high, low, retaining: $0)) + ($0, mergeSortedSets(high, low, retaining: $0)) }) XCTAssertEqualSequences(reversed[.none]!, EmptyCollection()) XCTAssertEqualSequences(reversed[.firstWithoutSecond]!, 7..<10) @@ -253,17 +253,17 @@ final class MergeSortedTests: XCTestCase { XCTAssertEqualSequences(mergedGuides, [20, 10, 6, 4, 1, 0, 0, -1, -3, -5]) let guide3 = [0, 1, 1, 2, 5, 10], guide4 = [-1, 0, 1, 2, 2, 7, 10, 20] - XCTAssertEqualSequences(mergeSorted(guide3, guide4, retaining: .union), + XCTAssertEqualSequences(mergeSortedSets(guide3, guide4, retaining: .union), [-1, 0, 1, 1, 2, 2, 5, 7, 10, 20]) - XCTAssertEqualSequences(mergeSorted(guide3, guide4, retaining: .intersection), + XCTAssertEqualSequences(mergeSortedSets(guide3, guide4, retaining: .intersection), [0, 1, 2, 10]) - XCTAssertEqualSequences(mergeSorted(guide3, guide4, retaining: .firstWithoutSecond), + XCTAssertEqualSequences(mergeSortedSets(guide3, guide4, retaining: .firstWithoutSecond), [1, 5]) - XCTAssertEqualSequences(mergeSorted(guide3, guide4, retaining: .secondWithoutFirst), + XCTAssertEqualSequences(mergeSortedSets(guide3, guide4, retaining: .secondWithoutFirst), [-1, 2, 7, 20]) - XCTAssertEqualSequences(mergeSorted(guide3, guide4, retaining: .symmetricDifference), + XCTAssertEqualSequences(mergeSortedSets(guide3, guide4, retaining: .symmetricDifference), [-1, 1, 2, 5, 7, 20]) - XCTAssertEqualSequences(mergeSorted(guide3, guide4, retaining: .sum), + XCTAssertEqualSequences(mergeSortedSets(guide3, guide4, retaining: .sum), [-1, 0, 0, 1, 1, 1, 2, 2, 2, 5, 7, 10, 10, 20]) } } From 9a0702df3f44131ff0435c3d7f7fade0c6c3ad5a Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Fri, 2 Aug 2024 01:28:02 -0400 Subject: [PATCH 21/34] Change the conformance for set-operation merges Change the set-operation merger sequence to make its lazy status conditional. --- Sources/Algorithms/MergeSorted.swift | 4 ++-- Sources/Algorithms/MergeSortedSets.swift | 16 +++++++++++----- .../SwiftAlgorithmsTests/MergeSortedTests.swift | 5 +---- 3 files changed, 14 insertions(+), 11 deletions(-) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index 90cfc7ab..34259966 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -288,7 +288,7 @@ public func mergeSorted( _ first: T, _ second: U, sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool -) -> MergeSortedSetsSequence, LazySequence> +) -> MergeSortedSetsSequence where T.Element == U.Element { return mergeSortedSets(first, second, retaining: .sum, sortedBy: areInIncreasingOrder) } @@ -312,7 +312,7 @@ where T.Element == U.Element { @inlinable public func mergeSorted( _ first: T, _ second: U -) -> MergeSortedSetsSequence, LazySequence> +) -> MergeSortedSetsSequence where T.Element == U.Element, T.Element: Comparable { return mergeSorted(first, second, sortedBy: <) } diff --git a/Sources/Algorithms/MergeSortedSets.swift b/Sources/Algorithms/MergeSortedSets.swift index 4e98d7a3..275622fc 100644 --- a/Sources/Algorithms/MergeSortedSets.swift +++ b/Sources/Algorithms/MergeSortedSets.swift @@ -207,11 +207,11 @@ public func mergeSortedSets( _ second: U, retaining filter: MergerSubset, sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool -) -> MergeSortedSetsSequence, LazySequence> +) -> MergeSortedSetsSequence where T.Element == U.Element { return MergeSortedSetsSequence( - merging: first.lazy, - and: second.lazy, + merging: first, + and: second, retaining: filter, sortedBy: areInIncreasingOrder ) @@ -235,7 +235,7 @@ where T.Element == U.Element { @inlinable public func mergeSortedSets( _ first: T, _ second: U, retaining filter: MergerSubset -) -> MergeSortedSetsSequence, LazySequence> +) -> MergeSortedSetsSequence where T.Element == U.Element, T.Element: Comparable { return mergeSortedSets(first, second, retaining: filter, sortedBy: <) } @@ -291,7 +291,13 @@ extension MergeSortedSetsSequence: Sequence { } } -extension MergeSortedSetsSequence: LazySequenceProtocol {} +extension MergeSortedSetsSequence: LazySequenceProtocol +where First: LazySequenceProtocol, Second: LazySequenceProtocol +{ + public var elements: MergeSortedSetsSequence { + return Elements(merging: first.elements, and: second.elements, retaining: filter, sortedBy: areInIncreasingOrder) + } +} //===----------------------------------------------------------------------===// // MARK: - MergeSortedSetsIterator diff --git a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift index 48aeba1f..ed77f462 100644 --- a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift +++ b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift @@ -89,9 +89,7 @@ final class MergeSortedTests: XCTestCase { func testLazySetMergers() { let low = 0..<7, high = 3..<10 let sequences = Dictionary(uniqueKeysWithValues: MergerSubset.allCases.map { - let subsetResult = mergeSortedSets(low, high, retaining: $0) - XCTAssertLazySequence(subsetResult) - return ($0, subsetResult) + return ($0, mergeSortedSets(low, high, retaining: $0)) }) XCTAssertEqualSequences(sequences[.none]!, EmptyCollection()) XCTAssertEqualSequences(sequences[.firstWithoutSecond]!, 0..<3) @@ -151,7 +149,6 @@ final class MergeSortedTests: XCTestCase { func testLazyMergers() { let low = 0..<7, high = 3..<10, result = mergeSorted(low, high) XCTAssertEqualSequences(result, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) - XCTAssertLazySequence(result) } /// Check eagerly-generated mergers. From b11af9846e6a1934c85bd3b11a44ae3fa6e3dee8 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Sat, 3 Aug 2024 11:43:37 -0400 Subject: [PATCH 22/34] Update documentation for the merger free-functions Correct a copy-and-paste error, where the some original functions' decreased capabilities were not updated. (Those capabilities stayed in the functions' copies.) Mark all the functions above as not eagerly generating their results. These functions' results don't actually conform to being a lazy sequence unless both source sequences also are lazy. --- Sources/Algorithms/MergeSorted.swift | 20 +++++--------------- Sources/Algorithms/MergeSortedSets.swift | 8 ++++---- 2 files changed, 9 insertions(+), 19 deletions(-) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index 34259966..ae03b781 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -263,11 +263,8 @@ extension RangeReplaceableCollection where Element: Comparable { // MARK: - mergeSorted(_:_:sortedBy:) //-------------------------------------------------------------------------===// -/// Given two sequences that are both sorted according to the given predicate -/// and treated as sets, apply the given set operation, returning the result as -/// a lazy sequence also sorted by the same predicate. -/// -/// For simply merging the sequences, use `.sum` as the operation. +/// Given two sequences that are both sorted according to the given predicate, +/// return their merger that is sorted by the predicate and vended lazily. /// /// - Precondition: Both `first` and `second` must be sorted according to /// `areInIncreasingOrder`, and said predicate must be a strict weak ordering @@ -276,10 +273,8 @@ extension RangeReplaceableCollection where Element: Comparable { /// - Parameters: /// - first: The first sequence spliced. /// - second: The second sequence spliced. -/// - filter: The subset of the merged sequence to keep. If not given, -/// defaults to `.sum`. /// - areInIncreasingOrder: The criteria for sorting. -/// - Returns: A sequence that lazily generates the merged sequence subset. +/// - Returns: The merged sequence. /// /// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), /// where `n` and `m` are the lengths of the sequence arguments. @@ -293,19 +288,14 @@ where T.Element == U.Element { return mergeSortedSets(first, second, retaining: .sum, sortedBy: areInIncreasingOrder) } -/// Given two sorted sequences treated as sets, apply the given set operation, -/// returning the result as a sorted lazy sequence. -/// -/// For simply merging the sequences, use `.sum` as the operation. +/// Given two sorted sequences, return their still-sorted merger, vended lazily. /// /// - Precondition: Both `first` and `second` must be sorted. /// /// - Parameters: /// - first: The first sequence spliced. /// - second: The second sequence spliced. -/// - filter: The subset of the merged sequence to keep. If not given, -/// defaults to `.sum`. -/// - Returns: A sequence that lazily generates the merged sequence subset. +/// - Returns: The merged sequence. /// /// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), /// where `n` and `m` are the lengths of the sequence arguments. diff --git a/Sources/Algorithms/MergeSortedSets.swift b/Sources/Algorithms/MergeSortedSets.swift index 275622fc..515ebd6e 100644 --- a/Sources/Algorithms/MergeSortedSets.swift +++ b/Sources/Algorithms/MergeSortedSets.swift @@ -185,7 +185,7 @@ extension RangeReplaceableCollection where Element: Comparable { /// Given two sequences that are both sorted according to the given predicate /// and treated as sets, apply the given set operation, returning the result as -/// a lazy sequence also sorted by the same predicate. +/// a sequence sorted by the predicate and that is vended lazily. /// /// For simply merging the sequences, use `.sum` as the operation. /// @@ -198,7 +198,7 @@ extension RangeReplaceableCollection where Element: Comparable { /// - second: The second sequence spliced. /// - filter: The subset of the merged sequence to keep. /// - areInIncreasingOrder: The criteria for sorting. -/// - Returns: A sequence that lazily generates the merged sequence subset. +/// - Returns: The merged sequence subset. /// /// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), /// where `n` and `m` are the lengths of the sequence arguments. @@ -218,7 +218,7 @@ where T.Element == U.Element { } /// Given two sorted sequences treated as sets, apply the given set operation, -/// returning the result as a sorted lazy sequence. +/// returning the result as a sorted sequence that vends lazily. /// /// For simply merging the sequences, use `.sum` as the operation. /// @@ -228,7 +228,7 @@ where T.Element == U.Element { /// - first: The first sequence spliced. /// - second: The second sequence spliced. /// - filter: The subset of the merged sequence to keep. -/// - Returns: A sequence that lazily generates the merged sequence subset. +/// - Returns: The merged sequence subset. /// /// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), /// where `n` and `m` are the lengths of the sequence arguments. From ea1a22a7d60e72f88f0e76df5c832105cdfac893 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Sat, 3 Aug 2024 12:04:29 -0400 Subject: [PATCH 23/34] Update the general documentation Change the general list of functions and the descriptions of the merging functions to reflect the split between the merge-only and subsetting variants. --- Guides/MergeSorted.md | 27 ++++++++++++++++++--------- Guides/README.md | 2 +- 2 files changed, 19 insertions(+), 10 deletions(-) diff --git a/Guides/MergeSorted.md b/Guides/MergeSorted.md index e1d5013a..c11c0c33 100644 --- a/Guides/MergeSorted.md +++ b/Guides/MergeSorted.md @@ -20,14 +20,14 @@ Due to being sorted, distinguishing elements that are shared between sequences or are exclusive to a sequence can be determined in a resonable time frame. Set operations take advantage of the catagories of sharing, -so applying operations can be done in-line during the merging: +so applying operations can be done in-line during merging: ```swift let first = [0, 1, 1, 2, 5, 10], second = [-1, 0, 1, 2, 2, 7, 10, 20] -print(Array(mergeSorted(first, second, retaining: .union))) -print(Array(mergeSorted(first, second, retaining: .intersection))) -print(Array(mergeSorted(first, second, retaining: .secondWithoutFirst))) -print(Array(mergeSorted(first, second, retaining: .sum))) // Standard merge! +print(Array(mergeSortedSets(first, second, retaining: .union))) +print(Array(mergeSortedSets(first, second, retaining: .intersection))) +print(Array(mergeSortedSets(first, second, retaining: .secondWithoutFirst))) +print(Array(mergeSortedSets(first, second, retaining: .sum))) // Standard merge! /* [-1, 0, 1, 1, 2, 2, 5, 7, 10, 20] [0, 1, 2, 10] @@ -62,18 +62,27 @@ a predicate can be omitted to use a default of the less-than operator (`<`). ```swift // Free-function form. Also used for lazy evaluation. -public func mergeSorted(_ first: T, _ second: U, retaining filter: MergerSubset = .sum, sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool) -> MergeSortedSequence, LazySequence> where T : Sequence, U : Sequence, T.Element == U.Element +@inlinable public func mergeSorted(_ first: T, _ second: U, sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool) -> MergeSortedSetsSequence where T : Sequence, U : Sequence, T.Element == U.Element -@inlinable public func mergeSorted(_ first: T, _ second: U, retaining filter: MergerSubset = .sum) -> MergeSortedSequence, LazySequence> where T : Sequence, U : Sequence, T.Element : Comparable, T.Element == U.Element +@inlinable public func mergeSorted(_ first: T, _ second: U) -> MergeSortedSetsSequence where T : Sequence, U : Sequence, T.Element : Comparable, T.Element == U.Element + +public func mergeSortedSets(_ first: T, _ second: U, retaining filter: MergerSubset, sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool) -> MergeSortedSetsSequence where T : Sequence, U : Sequence, T.Element == U.Element + +@inlinable public func mergeSortedSets(_ first: T, _ second: U, retaining filter: MergerSubset) -> MergeSortedSetsSequence where T : Sequence, U : Sequence, T.Element : Comparable, T.Element == U.Element // Initializer form. extension RangeReplaceableCollection { - public init(mergeSorted first: T, and second: U, retaining filter: MergerSubset = .sum, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element + @inlinable public init(mergeSorted first: T, and second: U, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element + + public init(mergeSorted first: T, and second: U, retaining filter: MergerSubset, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element } extension RangeReplaceableCollection where Self.Element : Comparable { - @inlinable public init(mergeSorted first: T, and second: U, retaining filter: MergerSubset = .sum) where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element + @inlinable public init(mergeSorted first: T, and second: U) where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element + + @inlinable public init(mergeSorted first: T, and second: U, retaining filter: MergerSubset) where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element + } // Two-partition merging, optimizing for speed. diff --git a/Guides/README.md b/Guides/README.md index b0fbbcf3..d2e87b10 100644 --- a/Guides/README.md +++ b/Guides/README.md @@ -34,7 +34,7 @@ These guides describe the design and intention behind the APIs included in the ` - [`trimmingPrefix(while:)`, `trimmingSuffix(while)`, `trimming(while:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Trim.md): Returns a slice by trimming elements from a collection's start, end, or both. The mutating `trim...` methods trim a collection in place. - [`uniqued()`, `uniqued(on:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Unique.md): The unique elements of a collection, preserving their order. - [`minAndMax()`, `minAndMax(by:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/MinMax.md): Returns the smallest and largest elements of a sequence. -- The `mergeSorted(_:_:retaining:sortedBy:)` function is a variant from the "Combining collections" section above that adds a parameter to apply a set operation in-line with the merge. +- The `mergeSortedSets(_:_:retaining:sortedBy:)` function is a variant from the "Combining collections" section above that adds a parameter to apply a set operation in-line with the merge. #### Partial sorting From b3a50316b3ac261fdbe7698379c8eb9effe4db25 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Sat, 3 Aug 2024 12:39:17 -0400 Subject: [PATCH 24/34] Update on a conformance made conditional --- Guides/MergeSorted.md | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/Guides/MergeSorted.md b/Guides/MergeSorted.md index c11c0c33..ef87f78f 100644 --- a/Guides/MergeSorted.md +++ b/Guides/MergeSorted.md @@ -124,12 +124,17 @@ Most of the merging functions use these support types: ```swift public struct MergeSortedSequence - : Sequence, LazySequenceProtocol + : Sequence where First : Sequence, Second : Sequence, First.Element == Second.Element { /*...*/ } +extension MergeSortedSetsSequence + : LazySequenceProtocol +where First : LazySequenceProtocol, Second : LazySequenceProtocol +{ /*...*/ } + public struct MergeSortedIterator : IteratorProtocol where First : IteratorProtocol, From d3fc6b8e85cedb68a7e90df6b470507a16f15f4b Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Sat, 3 Aug 2024 13:18:35 -0400 Subject: [PATCH 25/34] Add function/type summaries --- Guides/MergeSorted.md | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) diff --git a/Guides/MergeSorted.md b/Guides/MergeSorted.md index ef87f78f..79437577 100644 --- a/Guides/MergeSorted.md +++ b/Guides/MergeSorted.md @@ -62,46 +62,57 @@ a predicate can be omitted to use a default of the less-than operator (`<`). ```swift // Free-function form. Also used for lazy evaluation. +/// Given two sequences that are both sorted according to the given predicate, return their merger that is sorted by the predicate and vended lazily. @inlinable public func mergeSorted(_ first: T, _ second: U, sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool) -> MergeSortedSetsSequence where T : Sequence, U : Sequence, T.Element == U.Element +/// Given two sorted sequences, return their still-sorted merger, vended lazily. @inlinable public func mergeSorted(_ first: T, _ second: U) -> MergeSortedSetsSequence where T : Sequence, U : Sequence, T.Element : Comparable, T.Element == U.Element +/// Given two sequences that are both sorted according to the given predicate and treated as sets, apply the given set operation, returning the result as a sequence sorted by the predicate and that is vended lazily. public func mergeSortedSets(_ first: T, _ second: U, retaining filter: MergerSubset, sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool) -> MergeSortedSetsSequence where T : Sequence, U : Sequence, T.Element == U.Element +/// Given two sorted sequences treated as sets, apply the given set operation, returning the result as a sorted sequence that vends lazily. @inlinable public func mergeSortedSets(_ first: T, _ second: U, retaining filter: MergerSubset) -> MergeSortedSetsSequence where T : Sequence, U : Sequence, T.Element : Comparable, T.Element == U.Element // Initializer form. extension RangeReplaceableCollection { + /// Given two sequences that are both sorted according to the given predicate, create their sorted merger. @inlinable public init(mergeSorted first: T, and second: U, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element + /// Given two sequences that are both sorted according to the given predicate, treat them as sets, and create the sorted result of the given set operation. public init(mergeSorted first: T, and second: U, retaining filter: MergerSubset, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element } extension RangeReplaceableCollection where Self.Element : Comparable { + /// Given two sorted sequences, create their sorted merger. @inlinable public init(mergeSorted first: T, and second: U) where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element + /// Given two sorted sequences, treat them as sets, and create the sorted result of the given set operation. @inlinable public init(mergeSorted first: T, and second: U, retaining filter: MergerSubset) where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element - } // Two-partition merging, optimizing for speed. extension MutableCollection { + /// Given a partition point, where each side is sorted according to the given predicate, rearrange the elements until a single sorted run is formed. public mutating func mergeSortedPartitions(across pivot: Index, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows } extension MutableCollection where Self.Element : Comparable { + /// Given a partition point, where each side is sorted, rearrange the elements until a single sorted run is formed. @inlinable public mutating func mergeSortedPartitions(across pivot: Index) } // Two-partition merging, optimizing for space. extension MutableCollection where Self : BidirectionalCollection { + /// Given a partition point, where each side is sorted according to the given predicate, rearrange the elements until a single sorted run is formed, using minimal scratch memory. public mutating func mergeSortedPartitionsInPlace(across pivot: Index, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows } extension MutableCollection where Self : BidirectionalCollection, Self.Element : Comparable { + /// Given a partition point, where each side is sorted, rearrange the elements until a single sorted run is formed, using minimal scratch memory. @inlinable public mutating func mergeSortedPartitionsInPlace(across pivot: Index) } ``` @@ -109,6 +120,7 @@ extension MutableCollection where Self : BidirectionalCollection, Self.Element : Target subsets are described by a new type. ```swift +/// Description of which elements of a merger will be retained. public enum MergerSubset : UInt, CaseIterable { case none, firstWithoutSecond, secondWithoutFirst, symmetricDifference, intersection, first, second, union, @@ -123,6 +135,7 @@ Every set-operation combination is provided, although some are degenerate. Most of the merging functions use these support types: ```swift +/// A sequence that lazily vends the sorted result of a set operation upon two sorted sequences treated as sets spliced together, using a predicate as the sorting criteria for all three sequences involved. public struct MergeSortedSequence : Sequence where First : Sequence, @@ -135,6 +148,7 @@ extension MergeSortedSetsSequence where First : LazySequenceProtocol, Second : LazySequenceProtocol { /*...*/ } +/// An iterator that applies a set operation on two virtual sequences, both treated as sets sorted according a predicate, spliced together to vend a virtual sequence that is also sorted. public struct MergeSortedIterator : IteratorProtocol where First : IteratorProtocol, From 02838f0c3d15889413b2553d8a31f3f8c55f3282 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Sat, 3 Aug 2024 13:29:59 -0400 Subject: [PATCH 26/34] Update documentation over the full/subset split Change the detailed design to accommodate for the split between the full versus subsetting merger functions. --- Guides/MergeSorted.md | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/Guides/MergeSorted.md b/Guides/MergeSorted.md index 79437577..48a119bd 100644 --- a/Guides/MergeSorted.md +++ b/Guides/MergeSorted.md @@ -40,21 +40,20 @@ print(Array(mergeSortedSets(first, second, retaining: .sum))) // Standard merge The merging algorithm can be applied in three domains: -- A free function taking the source sequences. -- An initializer for `RangeReplaceableCollection`, - that takes the source sequences and then - creates the result in-place. -- A function over a `MutableCollection`, +- Free functions taking the source sequences. +- Initializers for `RangeReplaceableCollection`, + that take the source sequences and then + create the result in-place. +- Functions over a `MutableCollection`, where the two sources are adjancent partitions of the collection. -The free-function and initializer forms can take an optional parameter, -that indicates which subset of the merge will be kept. -For instance, when using `.intersection`, only elements that appear in -both sources will be returned, any non-matches will be skipped over. -If a subset argument is not given, it defaults to `.sum`, -which represents a conventional merge. -The form for adjancent partitions cannot use subsetting, -always performing with a subset of `.sum`. +The free-function and initializer forms have variants that +take an extra parameter, +which represent which subset of the merger will be kept. +For instance, +using `.intersection` makes the resulting merger contain only the elements that +appear in both sources, +skipping any elements that appear in exactly one source. All of the forms take a parameter for the ordering predicate. If the element type conforms to `Comparable`, a predicate can be omitted to use a default of the less-than operator (`<`). From 59b2d4c9e18de3f2ed943f1c4f218ef3c3de81e2 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Sat, 3 Aug 2024 22:32:56 -0400 Subject: [PATCH 27/34] Correct (lack of) copy & paste error --- Guides/MergeSorted.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Guides/MergeSorted.md b/Guides/MergeSorted.md index 48a119bd..91c3b4fc 100644 --- a/Guides/MergeSorted.md +++ b/Guides/MergeSorted.md @@ -135,7 +135,7 @@ Most of the merging functions use these support types: ```swift /// A sequence that lazily vends the sorted result of a set operation upon two sorted sequences treated as sets spliced together, using a predicate as the sorting criteria for all three sequences involved. -public struct MergeSortedSequence +public struct MergeSortedSetsSequence : Sequence where First : Sequence, Second : Sequence, @@ -148,7 +148,7 @@ where First : LazySequenceProtocol, Second : LazySequenceProtocol { /*...*/ } /// An iterator that applies a set operation on two virtual sequences, both treated as sets sorted according a predicate, spliced together to vend a virtual sequence that is also sorted. -public struct MergeSortedIterator +public struct MergeSortedSetsIterator : IteratorProtocol where First : IteratorProtocol, Second : IteratorProtocol, From d539b1f8552094137fba4815d6cf113faface23b Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Sat, 3 Aug 2024 22:41:07 -0400 Subject: [PATCH 28/34] Update the merge summary on the full/subset split --- Sources/Algorithms/Documentation.docc/Merging.md | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/Sources/Algorithms/Documentation.docc/Merging.md b/Sources/Algorithms/Documentation.docc/Merging.md index 79ea63b8..49169184 100644 --- a/Sources/Algorithms/Documentation.docc/Merging.md +++ b/Sources/Algorithms/Documentation.docc/Merging.md @@ -8,10 +8,14 @@ then generate the result of applying a set operation. ### Merging Sorted Sequences +- ``Swift/RangeReplaceableCollection/init(mergeSorted:and:sortedBy:)`` +- ``Swift/RangeReplaceableCollection/init(mergeSorted:and:)`` - ``Swift/RangeReplaceableCollection/init(mergeSorted:and:retaining:sortedBy:)`` - ``Swift/RangeReplaceableCollection/init(mergeSorted:and:retaining:)`` -- ``mergeSorted(_:_:retaining:areInIncreasingOrder:)`` -- ``mergeSorted(_:_:retaining:)`` +- ``mergeSorted(_:_:sortedBy:)`` +- ``mergeSorted(_:_:)`` +- ``mergeSortedSets(_:_:retaining:sortedBy:)`` +- ``mergeSortedSets(_:_:retaining:)`` - ``Swift/MutableCollection/mergeSortedPartitions(across:sortedBy:)`` - ``Swift/MutableCollection/mergeSortedPartitions(across:)`` - ``Swift/MutableCollection/mergeSortedPartitionsInPlace(across:sortedBy:)`` @@ -20,5 +24,5 @@ then generate the result of applying a set operation. ### Supporting Types - ``MergerSubset`` -- ``MergeSortedSequence`` -- ``MergeSortedIterator`` +- ``MergeSortedSetsSequence`` +- ``MergeSortedSetsIterator`` From b3eeabbd29200c466552432b2a03be752029c8ea Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Sun, 22 Sep 2024 22:00:35 -0400 Subject: [PATCH 29/34] Add preliminary filter-less merger sequence --- Sources/Algorithms/MergeSorted.swift | 115 ++++++++++++++++++ .../MergeSortedTests.swift | 8 ++ 2 files changed, 123 insertions(+) diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index ae03b781..37f37f6d 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -306,3 +306,118 @@ public func mergeSorted( where T.Element == U.Element, T.Element: Comparable { return mergeSorted(first, second, sortedBy: <) } + +//===----------------------------------------------------------------------===// +// MARK: - MergeSortedSequence +//-------------------------------------------------------------------------===// + +/// A sequence taking some sequences, +/// all sorted along a predicate, +/// that vends the spliced-together merged sequence, +/// where said sequence is also sorted. +/// +/// - TODO: When Swift supports same-element requirements for +/// variadic generics, change this type's generic pattern to +/// accept any number of source iterators. +@available(macOS 13.0.0, *) +public struct MergeSortedSequence +where First.Element == Second.Element +{ + /// The sorting criterion. + let areInIncreasingOrder: (Element, Element) throws -> Bool + /// The first source sequence. + let first: First + /// The second source sequence. + let second: Second + + public + init( + _ first: First, + _ second: Second, + sortedBy areInIncreasingOrder: @escaping (Element, Element) throws -> Bool + ) { + self.first = first + self.second = second + self.areInIncreasingOrder = areInIncreasingOrder + } +} + +@available(macOS 13.0.0, *) +extension MergeSortedSequence: Sequence { + public func makeIterator() -> MergeSortedIterator { + return .init(first.makeIterator(), second.makeIterator(), + areInIncreasingOrder: areInIncreasingOrder) + } + + public var underestimatedCount: Int { + let result = first.underestimatedCount + .addingReportingOverflow(second.underestimatedCount) + return result.overflow ? .max : result.partialValue + } +} + +@available(macOS 13.0.0, *) +extension MergeSortedSequence: LazySequenceProtocol +where First: LazySequenceProtocol, Second: LazySequenceProtocol +{ + public var elements: MergeSortedSequence { + .init(first.elements, second.elements, sortedBy: areInIncreasingOrder) + } +} + +//===----------------------------------------------------------------------===// +// MARK: - MergeSortedIterator +//-------------------------------------------------------------------------===// + +/// An iterator taking some virtual sequences, +/// all sorted along a predicate, +/// that vends the spliced-together virtual sequence merger, +/// where said sequence is also sorted. +@available(macOS 13.0.0, *) +public struct MergeSortedIterator { + /// The sorting criterion. + let areInIncreasingOrder: (Element, Element) throws -> Bool + /// The sources to splice together. + var sources: [(latest: Element?, source: any IteratorProtocol)] + + /// Create an iterator that reads from the two given sources and + /// vends their merger, + /// assuming all three virtual sequences are sorted according to + /// the given predicate. + /// + /// - TODO: When Swift supports same-element requirements for + /// variadic generics, change this initializer to accept any number of + /// source iterators. + init, U: IteratorProtocol>( + _ first: T, + _ second: U, + areInIncreasingOrder: @escaping (Element, Element) throws -> Bool + ) { + self.areInIncreasingOrder = areInIncreasingOrder + self.sources = [(nil, first), (nil, second)] + } +} + +@available(macOS 13.0.0, *) +extension MergeSortedIterator: IteratorProtocol { + /// Advance to the next element, if any. May throw. + @usableFromInline + mutating func throwingNext() throws -> Element? { + for index in sources.indices { + sources[index].latest = sources[index].latest + ?? sources[index].source.next() + } + sources.removeAll { $0.latest == nil } + guard let indexOfSmallest = try sources.indices.min(by: { + try areInIncreasingOrder(sources[$0].latest!, sources[$1].latest!) + }) else { return nil } + defer { sources[indexOfSmallest].latest = nil } + + return sources[indexOfSmallest].latest + } + + @inlinable + public mutating func next() -> Element? { + return try! throwingNext() + } +} diff --git a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift index ed77f462..31386834 100644 --- a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift +++ b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift @@ -157,6 +157,14 @@ final class MergeSortedTests: XCTestCase { XCTAssertEqualSequences(result, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) } + /// Test mergers for any number of arguments (one day). + @available(macOS 13.0.0, *) + func testMoreMergers() { + let low = 0..<7, high = 3..<10, + result = MergeSortedSequence(low, high, sortedBy: <) + XCTAssertEqualSequences(result, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) + } + // MARK: - Partition Mergers /// Check the more-memory version of merging two sorted partitions. From 1ee97e0f3c27ee133be853e33278bd1a4c8834f1 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Wed, 25 Sep 2024 15:34:50 -0400 Subject: [PATCH 30/34] Move partition-based merging to separate files Remove the quick partition-merge functions. Move the lower-memory partition-merge functions to their own file. Rename the functions, lessening the amount of "sort" in the labeling. Replace the general throw markers with exact-error markers. Make the corresponding changes to the tests. --- Guides/MergeSorted.md | 32 ++- Guides/README.md | 1 + .../Algorithms/Documentation.docc/Merging.md | 2 - Sources/Algorithms/MergePartitions.swift | 178 ++++++++++++++++ Sources/Algorithms/MergeSorted.swift | 199 +----------------- .../MergePartitionsTests.swift | 91 ++++++++ .../MergeSortedTests.swift | 83 -------- 7 files changed, 284 insertions(+), 302 deletions(-) create mode 100644 Sources/Algorithms/MergePartitions.swift create mode 100644 Tests/SwiftAlgorithmsTests/MergePartitionsTests.swift diff --git a/Guides/MergeSorted.md b/Guides/MergeSorted.md index 91c3b4fc..870a3c2b 100644 --- a/Guides/MergeSorted.md +++ b/Guides/MergeSorted.md @@ -91,28 +91,23 @@ extension RangeReplaceableCollection where Self.Element : Comparable { @inlinable public init(mergeSorted first: T, and second: U, retaining filter: MergerSubset) where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element } -// Two-partition merging, optimizing for speed. - -extension MutableCollection { - /// Given a partition point, where each side is sorted according to the given predicate, rearrange the elements until a single sorted run is formed. - public mutating func mergeSortedPartitions(across pivot: Index, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows -} - -extension MutableCollection where Self.Element : Comparable { - /// Given a partition point, where each side is sorted, rearrange the elements until a single sorted run is formed. - @inlinable public mutating func mergeSortedPartitions(across pivot: Index) -} - -// Two-partition merging, optimizing for space. +// Two-partition merging. extension MutableCollection where Self : BidirectionalCollection { - /// Given a partition point, where each side is sorted according to the given predicate, rearrange the elements until a single sorted run is formed, using minimal scratch memory. - public mutating func mergeSortedPartitionsInPlace(across pivot: Index, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows + /// Assuming that both this collection's slice before the given index and + /// the slice at and past that index are both sorted according to + /// the given predicate, + /// rearrange the slices' elements until the collection as + /// a whole is sorted according to the predicate. + public mutating func mergePartitions(across pivot: Index, sortedBy areInIncreasingOrder: (Element, Element) throws(Fault) -> Bool) throws(Fault) where Fault : Error } extension MutableCollection where Self : BidirectionalCollection, Self.Element : Comparable { - /// Given a partition point, where each side is sorted, rearrange the elements until a single sorted run is formed, using minimal scratch memory. - @inlinable public mutating func mergeSortedPartitionsInPlace(across pivot: Index) + /// Assuming that both this collection's slice before the given index and + /// the slice at and past that index are both sorted, + /// rearrange the slices' elements until the collection as + /// a whole is sorted. + @inlinable public mutating func mergePartitions(across pivot: Index) } ``` @@ -160,11 +155,10 @@ The merges via: - The free functions - The initializers -- The speed-optimized partition-merge Operate in **O(** _n_ `+` _m_ **)** for both space and time, where *n* and *m* are the lengths of the two operand sequences/partitions. -The space-optimized partition merge for a collection of length *n* operates in +A partition merge for a collection of length *n* operates in **O(** 1 **)** for space, **O(** _n_ **)** for time when the collection is not random-access, and *???* for time in random-access collections. diff --git a/Guides/README.md b/Guides/README.md index d2e87b10..0dc3c9d9 100644 --- a/Guides/README.md +++ b/Guides/README.md @@ -12,6 +12,7 @@ These guides describe the design and intention behind the APIs included in the ` #### Mutating algorithms +- [`mergePartitions(across:)`, `mergePartitions(across:sortedBy:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/MergedSorted.md): In-place merger of sorted partitions. - [`rotate(toStartAt:)`, `rotate(subrange:toStartAt:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Rotate.md): In-place rotation of elements. - [`stablePartition(by:)`, `stablePartition(subrange:by:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Partition.md): A partition that preserves the relative order of the resulting prefix and suffix. diff --git a/Sources/Algorithms/Documentation.docc/Merging.md b/Sources/Algorithms/Documentation.docc/Merging.md index 49169184..266c9f48 100644 --- a/Sources/Algorithms/Documentation.docc/Merging.md +++ b/Sources/Algorithms/Documentation.docc/Merging.md @@ -18,8 +18,6 @@ then generate the result of applying a set operation. - ``mergeSortedSets(_:_:retaining:)`` - ``Swift/MutableCollection/mergeSortedPartitions(across:sortedBy:)`` - ``Swift/MutableCollection/mergeSortedPartitions(across:)`` -- ``Swift/MutableCollection/mergeSortedPartitionsInPlace(across:sortedBy:)`` -- ``Swift/MutableCollection/mergeSortedPartitionsInPlace(across:)`` ### Supporting Types diff --git a/Sources/Algorithms/MergePartitions.swift b/Sources/Algorithms/MergePartitions.swift new file mode 100644 index 00000000..b067c789 --- /dev/null +++ b/Sources/Algorithms/MergePartitions.swift @@ -0,0 +1,178 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2024 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +extension MutableCollection where Self: BidirectionalCollection { + /// Assuming that both this collection's slice before the given index and + /// the slice at and past that index are both sorted according to + /// the given predicate, + /// rearrange the slices' elements until the collection as + /// a whole is sorted according to the predicate. + /// + /// Equivalent elements retain their relative order. + /// + /// It may be faster to use a global `merge` function with the partitions and + /// the sorting predicate as the arguments and then copy the + /// sorted result back. + /// + /// - Precondition: The `pivot` must be a valid index of this collection. + /// The partitions of `startIndex..( + across pivot: Index, + sortedBy areInIncreasingOrder: (Element, Element) throws(Fault) -> Bool + ) throws(Fault) { + // The pivot needs to be an interior element. + // (This therefore requires `self` to have a length of at least 2.) + guard pivot > startIndex, pivot < endIndex else { return } + + // Since each major partition is already sorted, we only need to swap the + // highest ranks of the leading partition with the lowest ranks of the + // trailing partition. + // + // - Zones: |--[1]--|--------[2]--------|------[3]------|---[4]---| + // - Before: ...[<=p], [x > p],... [>= x]; [p],... [<= x], [> x],... + // - After: ...[<=p], [p],... [<= x]; [x > p],... [>= x], [> x],... + // - Zones: |--[1]--|------[3]------|--------[2]--------|---[4]---| + // + // In other words: we're swapping the positions of zones [2] and [3]. + // + // Afterwards, the new leading partition of [1] and [3] ends up naturally + // sorted. However, the highest ranked element of [2] may outrank + // the lowest ranked element of [4], so the trailing partition ends up + // needing to call this function itself. + + // Find starting index of [2]. + let lowPivot: Index + do { + // Among the elements before the pivot, find the reverse-earliest that has + // at most an equivalent rank as the pivot element. + let pivotValue = self[pivot], searchSpace = self[.. Bool { + // e <= pivotValue → !(e > pivotValue) → !(pivotValue < e) + return try !areInIncreasingOrder(pivotValue, e) + } + if case let beforeLowPivot = try searchSpace.pi(where: atMostPivotValue), + beforeLowPivot < searchSpace.endIndex { + // In forward space, the element after the one just found will rank + // higher than the pivot element. + lowPivot = beforeLowPivot.base + + // There may be no prefix elements that outrank the pivot element. + // In other words, [2] is empty. + // (Therefore this collection is already globally sorted.) + guard lowPivot < pivot else { return } + } else { + // All the prefix elements rank higher than the pivot element. + // In other words, [1] is empty. + lowPivot = startIndex + } + } + + // Find the ending index of [3]. + let highPivot: Index + do { + // Find the earliest post-pivot element that ranks higher than the element + // from the previous step. If there isn't a match, i.e. [4] is empty, the + // entire post-pivot partition will be swapped. + let lowPivotValue = self[lowPivot] + func moreThanLowPivotValue(_ e: Element) throws(Fault) -> Bool { + return try areInIncreasingOrder(lowPivotValue, e) + } + highPivot = try self[pivot...].pi(where: moreThanLowPivotValue) + + // [3] starts with the pivot element, so it can never be empty. + } + + // Actually swap [2] and [3], then recur into [2] + [4]. + let exLowPivot = rotate(subrange: lowPivot..( + where in2nd: (Element) throws(Fault) -> Bool + ) throws(Fault) -> Index { + var n = count + var l = startIndex + + while n > 0 { + let half = n / 2 + let mid = index(l, offsetBy: half) + if try in2nd(self[mid]) { + n = half + } else { + l = index(after: mid) + n -= half + 1 + } + } + return l + } +} diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift index 37f37f6d..c4def73c 100644 --- a/Sources/Algorithms/MergeSorted.swift +++ b/Sources/Algorithms/MergeSorted.swift @@ -10,204 +10,7 @@ //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -// MARK: MutableCollection.mergeSortedPartitions(across:sortedBy:) -//-------------------------------------------------------------------------===// - -extension MutableCollection { - /// Given a partition point, - /// where each side is sorted according to the given predicate, - /// rearrange the elements until a single sorted run is formed. - /// - /// Equivalent elements from a given partition have stable ordering in - /// the unified sequence. - /// - /// - Precondition: The `pivot` must be a valid index of this collection. - /// The partitions of `startIndex.. Bool - ) rethrows { - var duplicate = self - try withoutActuallyEscaping(areInIncreasingOrder) { - let sequence = MergeSortedSetsSequence(merging: self[startIndex.. Bool - ) rethrows { - // The pivot needs to be an interior element. - // (This therefore requires `self` to have a length of at least 2.) - guard pivot > startIndex, pivot < endIndex else { return } - - // Since each major partition is already sorted, we only need to swap the - // highest ranks of the starting partition with the lowest ranks of the - // trailing partition. - // - // - Zones: |--[1]--|--------[2]--------|------[3]------|---[4]---| - // - Before: ...[<=p], [x > p],... [>= x]; [p],... [<= x], [> x],... - // - After: ...[<=p], [p],... [<= x]; [x > p],... [>= x], [> x],... - // - Zones: |--[1]--|------[3]------|--------[2]--------|---[4]---| - // - // In other words: we're swapping the positions of zones [2] and [3]. - // - // Afterwards, the new starting partition of [1] and [3] ends up naturally - // sorted. However, the highest ranked element of [2] may rank higher than - // the lowest ranked element of [4], so the trailing partition ends up - // needing to call this function itself. - - // Find starting index of [2]. - let lowPivot: Index - do { - // Among the elements before the pivot, find the reverse-earliest that has - // at most an equivalent rank as the pivot element. - let pivotValue = self[pivot], searchSpace = self[.. pivotValue) → !(pivotValue < $0) - return try !areInIncreasingOrder(pivotValue, $0) - }), - beforeLowPivot < searchSpace.endIndex { - // In forward space, the element after the one just found will rank - // higher than the pivot element. - lowPivot = beforeLowPivot.base - - // There may be no prefix elements that outrank the pivot element. - // In other words, [2] is empty. - // (Therefore this collection is already globally sorted.) - guard lowPivot < pivot else { return } - } else { - // All the prefix elements rank higher than the pivot element. - // In other words, [1] is empty. - lowPivot = startIndex - } - } - - // Find the ending index of [3]. - let highPivot: Index - do { - // Find the earliest post-pivot element that ranks higher than the element - // from the previous step. If there isn't a match, i.e. [4] is empty, the - // entire post-pivot partition will be swapped. - let lowPivotValue = self[lowPivot] - highPivot = try self[pivot...].partitioningIndex { - try areInIncreasingOrder(lowPivotValue, $0) - } - } - // [3] starts with the pivot element, so it can never be empty. - - // Actually swap [2] and [3], then compare [2] and [4]. - let exLowPivot = rotate(subrange: lowPivot..() + XCTAssertEqualSequences(empty, []) + empty.mergePartitions(across: empty.startIndex) + XCTAssertEqualSequences(empty, []) + empty.mergePartitions(across: empty.endIndex) + XCTAssertEqualSequences(empty, []) + + var single = CollectionOfOne(2) + XCTAssertEqualSequences(single, [2]) + single.mergePartitions(across: single.startIndex) + XCTAssertEqualSequences(single, [2]) + single.mergePartitions(across: single.endIndex) + XCTAssertEqualSequences(single, [2]) + } + + /// Check the regular merging cases. + func testNonThrowingCases() { + // No sub-partitions empty. + var sample1 = [0, 2, 4, 6, 8, 10, 1, 3, 5, 7, 9] + sample1.mergePartitions(across: 6) + XCTAssertEqualSequences(sample1, 0...10) + + // No pre-pivot elements less than or equal to the pivot element. + var sample2 = [4, 6, 8, 3, 5, 7] + sample2.mergePartitions(across: 3) + XCTAssertEqualSequences(sample2, 3...8) + + // No pre-pivot elements greater than the pivot element. + var sample3 = [3, 4, 5, 6, 7, 8] + sample3.mergePartitions(across: 3) + XCTAssertEqualSequences(sample3, 3...8) + + // The greatest elements are in the pre-pivot partition. + var sample4 = [3, 7, 8, 9, 4, 5, 6] + sample4.mergePartitions(across: 4) + XCTAssertEqualSequences(sample4, 3...9) + } + + /// Check what happens when the predicate throws. + func testThrowingCases() { + /// An error type. + enum MyError: Error { + /// An error state. + case anError + } + + // Test throwing. + var sample5 = [5, 3], counter = 0, limit = 1 + let compare: (Int, Int) throws -> Bool = { + guard counter < limit else { throw MyError.anError } + defer { counter += 1 } + + return $0 < $1 + } + XCTAssertThrowsError(try sample5.mergePartitions(across: 1, + sortedBy: compare)) + + // Interrupted comparisons. + sample5 = [2, 2, 4, 20, 3, 3, 5, 7] + counter = 0 ; limit = 6 + XCTAssertThrowsError(try sample5.mergePartitions(across: 4, + sortedBy: compare)) + XCTAssertEqualSequences(sample5, [2, 2, 4, 20, 3, 3, 5, 7]) + + // No interruptions. + counter = 0 ; limit = .max + XCTAssertNoThrow(try sample5.mergePartitions(across: 4, sortedBy: compare)) + XCTAssertEqualSequences(sample5, [2, 2, 3, 3, 4, 5, 7, 20]) + } + + // MARK: - Sample Code + + // To be determined... +} diff --git a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift index 31386834..bc895603 100644 --- a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift +++ b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift @@ -165,89 +165,6 @@ final class MergeSortedTests: XCTestCase { XCTAssertEqualSequences(result, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) } - // MARK: - Partition Mergers - - /// Check the more-memory version of merging two sorted partitions. - func testFastPartitionMerge() { - // Degenerate count of elements. - var empty = EmptyCollection(), single = CollectionOfOne(1) - XCTAssertEqualSequences(empty, []) - XCTAssertEqualSequences(single, [1]) - empty.mergeSortedPartitions(across: empty.startIndex) - single.mergeSortedPartitions(across: single.startIndex) - XCTAssertEqualSequences(empty, []) - XCTAssertEqualSequences(single, [1]) - - // Each side has multiple elements. - let low = 0..<7, high = 3..<10, pivot = low.count - var multiple = Array(chain(low, high)) - XCTAssertEqualSequences(multiple, [0, 1, 2, 3, 4, 5, 6, 3, 4, 5, 6, 7, 8, 9]) - multiple.mergeSortedPartitions(across: pivot) - XCTAssertEqualSequences(multiple, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) - } - - /// Check the in-place version of merging two sorted partitions. - func testSlowPartitionMerge() { - // Degenerate cases. - var empty = EmptyCollection() - XCTAssertEqualSequences(empty, []) - empty.mergeSortedPartitionsInPlace(across: empty.startIndex) - XCTAssertEqualSequences(empty, []) - empty.mergeSortedPartitionsInPlace(across: empty.endIndex) - XCTAssertEqualSequences(empty, []) - - var single = CollectionOfOne(2) - XCTAssertEqualSequences(single, [2]) - single.mergeSortedPartitionsInPlace(across: single.startIndex) - XCTAssertEqualSequences(single, [2]) - single.mergeSortedPartitionsInPlace(across: single.endIndex) - XCTAssertEqualSequences(single, [2]) - - // No sub-partitions empty. - var sample1 = [0, 2, 4, 6, 8, 10, 1, 3, 5, 7, 9] - sample1.mergeSortedPartitionsInPlace(across: 6) - XCTAssertEqualSequences(sample1, 0...10) - - // No pre-pivot elements less than or equal to the pivot element. - var sample2 = [4, 6, 8, 3, 5, 7] - sample2.mergeSortedPartitionsInPlace(across: 3) - XCTAssertEqualSequences(sample2, 3...8) - - // No pre-pivot elements greater than the pivot element. - var sample3 = [3, 4, 5, 6, 7, 8] - sample3.mergeSortedPartitionsInPlace(across: 3) - XCTAssertEqualSequences(sample3, 3...8) - - // The greatest elements are in the pre-pivot partition. - var sample4 = [3, 7, 8, 9, 4, 5, 6] - sample4.mergeSortedPartitionsInPlace(across: 4) - XCTAssertEqualSequences(sample4, 3...9) - - /// An error type. - enum MyError: Error { - /// An error state. - case anError - } - - // Test throwing. - var sample5 = [5, 3], counter = 0, limit = 1 - let compare: (Int, Int) throws -> Bool = { - guard counter < limit else { throw MyError.anError } - defer { counter += 1 } - - return $0 < $1 - } - XCTAssertThrowsError(try sample5.mergeSortedPartitionsInPlace(across: 1, sortedBy: compare)) - - sample5 = [2, 2, 4, 20, 3, 3, 5, 7] - counter = 0 ; limit = 6 - XCTAssertThrowsError(try sample5.mergeSortedPartitionsInPlace(across: 4, sortedBy: compare)) - XCTAssertEqualSequences(sample5, [2, 2, 4, 20, 3, 3, 5, 7]) - counter = 0 ; limit = .max - XCTAssertNoThrow(try sample5.mergeSortedPartitionsInPlace(across: 4, sortedBy: compare)) - XCTAssertEqualSequences(sample5, [2, 2, 3, 3, 4, 5, 7, 20]) - } - // MARK: - Sample Code /// Check the code from documentation. From 6caf3405c2b2c93188a3576b7f8b59ad19a1e988 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Wed, 25 Sep 2024 16:15:34 -0400 Subject: [PATCH 31/34] Redo merging two sequences Remove initializer-based merging. Replace and rename all the free functions and their support sequence/iterator types. Adjust the documentation and testing to match. Remove filter-less merging. Rename all the (lazy) free functions and their support sequence and iterator types. Replace the merging initializers with (eager) free functions. --- Guides/Merge.md | 196 ++++++++ Guides/MergeSorted.md | 173 ------- Guides/README.md | 3 +- .../Algorithms/Documentation.docc/Merging.md | 20 +- Sources/Algorithms/Merge.swift | 426 ++++++++++++++++++ Sources/Algorithms/MergeSorted.swift | 226 ---------- Sources/Algorithms/MergeSortedSets.swift | 411 ----------------- .../MergeSortedTests.swift | 191 -------- Tests/SwiftAlgorithmsTests/MergeTests.swift | 187 ++++++++ 9 files changed, 818 insertions(+), 1015 deletions(-) create mode 100644 Guides/Merge.md delete mode 100644 Guides/MergeSorted.md create mode 100644 Sources/Algorithms/Merge.swift delete mode 100644 Sources/Algorithms/MergeSorted.swift delete mode 100644 Sources/Algorithms/MergeSortedSets.swift delete mode 100644 Tests/SwiftAlgorithmsTests/MergeSortedTests.swift create mode 100644 Tests/SwiftAlgorithmsTests/MergeTests.swift diff --git a/Guides/Merge.md b/Guides/Merge.md new file mode 100644 index 00000000..8f066821 --- /dev/null +++ b/Guides/Merge.md @@ -0,0 +1,196 @@ +# Merge + +- Between Partitions: + [[Source](https://github.com/apple/swift-algorithms/blob/main/Sources/Algorithms/MergePartitions.swift) | + [Tests](https://github.com/apple/swift-algorithms/blob/main/Tests/SwiftAlgorithmsTests/MergePartitionsTests.swift)] +- Between Arbitrary Sequences: + [[Source](https://github.com/apple/swift-algorithms/blob/main/Sources/Algorithms/Merge.swift) | + [Tests](https://github.com/apple/swift-algorithms/blob/main/Tests/SwiftAlgorithmsTests/MergeTests.swift)] + +Splice two sequences that use the same sorting criteria into a sequence that +is also sorted with that criteria. + +If the sequences are sorted with something besides the less-than operator (`<`), +then a predicate can be supplied: + +```swift +let merged = merge([10, 4, 0, 0, -3], [20, 6, 1, -1, -5], keeping: .sum, sortedBy: >) +print(Array(merged)) +// [20, 10, 6, 4, 1, 0, 0, -1, -3, -5] +``` + +Sorted sequences can be treated as (multi-)sets. +Due to being sorted, +distinguishing elements that are shared between sequences or +are exclusive to a sequence can be determined in a resonable time frame. +Set operations take advantage of the catagories of sharing, +so applying operations can be done in-line during merging: + +```swift +let first = [0, 1, 1, 2, 5, 10], second = [-1, 0, 1, 2, 2, 7, 10, 20] +print(merge(first, second, into: Array.self, keeping: .union)) +print(merge(first, second, into: Array.self, keeping: .intersection)) +print(merge(first, second, into: Array.self, keeping: .secondWithoutFirst)) +print(merge(first, second, into: Array.self, keeping: .sum)) // Standard merge! +/* +[-1, 0, 1, 1, 2, 2, 5, 7, 10, 20] +[0, 1, 2, 10] +[-1, 2, 7, 20] +[-1, 0, 0, 1, 1, 1, 2, 2, 2, 5, 7, 10, 10, 20] +*/ +``` + +## Detailed Design + +The merging algorithm can be applied in two domains: + +- Free functions taking the source sequences. +- Functions over a `MutableCollection & BidirectionalCollection`, + where the two sources are adjancent partitions of the collection. + +Besides the optional ordering predicate, +the partition-merging methods' other parameter is the index to the +first element of the second partition, +or `endIndex` if that partition is empty. + +Besides the optional ordering predicate, +the free functions take the two operand sequences and the desired set operation +(intersection, union, symmetric difference, *etc.*). +Use `.sum` for a conventional merge. +Half of those functions take an extra parameter taking a reference to +a collection type. +These functions create an object of that type and eagerly fill it with the +result of the merger. +The functions without that parameter return a special sequence that lazily +generates the result of the merger. + +```swift +// Merging two adjacent partitions. + +extension MutableCollection where Self : BidirectionalCollection { + /// Assuming that both this collection's slice before the given index and + /// the slice at and past that index are both sorted according to + /// the given predicate, + /// rearrange the slices' elements until the collection as + /// a whole is sorted according to the predicate. + public mutating func mergePartitions( + across pivot: Index, + sortedBy areInIncreasingOrder: (Element, Element) throws(Fault) -> Bool + ) throws(Fault) where Fault : Error +} + +extension MutableCollection where Self : BidirectionalCollection, Self.Element : Comparable { + /// Assuming that both this collection's slice before the given index and + /// the slice at and past that index are both sorted, + /// rearrange the slices' elements until the collection as + /// a whole is sorted. + public mutating func mergePartitions(across pivot: Index) +} + +// Merging two sequences with free functions, applying a set operation. +// Has lazy and eager variants. + +/// Given two sequences treated as (multi)sets, both sorted according to +/// a given predicate, +/// return a sequence that lazily vends the also-sorted result of applying a +/// given set operation to the sequence operands. +public func merge( + _ first: First, _ second: Second, keeping filter: MergerSubset, + sortedBy areInIncreasingOrder: @escaping (First.Element, Second.Element) -> Bool +) -> MergedSequence +where First : Sequence, Second : Sequence, First.Element == Second.Element + +/// Given two sequences treated as (multi)sets, both sorted according to +/// a given predicate, +/// eagerly apply a given set operation to the sequences then copy the +/// also-sorted result into a collection of a given type. +public func merge( + _ first: First, _ second: Second, into type: Result.Type, keeping filter: MergerSubset, + sortedBy areInIncreasingOrder: (First.Element, Second.Element) throws(Fault) -> Bool +) throws(Fault) -> Result +where First : Sequence, Second : Sequence, Result : RangeReplaceableCollection, + Fault : Error, First.Element == Second.Element, Second.Element == Result.Element + +/// Given two sorted sequences treated as (multi)sets, +/// return a sequence that lazily vends the also-sorted result of applying a +/// given set operation to the sequence operands. +public func merge( + _ first: First, _ second: Second, keeping filter: MergerSubset +) -> MergedSequence +where First : Sequence, Second : Sequence, First.Element : Comparable, + First.Element == Second.Element + +/// Given two sorted sequences treated as (multi)sets, +/// eagerly apply a given set operation to the sequences then copy the +/// also-sorted result into a collection of a given type. +public func merge( + _ first: First, _ second: Second, into type: Result.Type, keeping filter: MergerSubset +) -> Result +where First : Sequence, Second : Sequence, Result : RangeReplaceableCollection, + First.Element : Comparable, First.Element == Second.Element, + Second.Element == Result.Element +``` + +Target subsets are described by a new type. + +```swift +/// Description of which elements of a merger will be retained. +public enum MergerSubset : UInt, CaseIterable +{ + case none, firstWithoutSecond, secondWithoutFirst, symmetricDifference, + intersection, first, second, union, + sum + + //... +} +``` + +Every set-operation combination is provided, although some are degenerate. + +The merging free-functions use these support types: + +```swift +/// A sequence that reads from two sequences treated as (multi)sets, +/// where both sequences' elements are sorted according to some predicate, +/// and emits a sorted merger, +/// excluding any elements barred by a set operation. +public struct MergedSequence + : Sequence, LazySequenceProtocol + where First : Sequence, Second : Sequence, Fault : Error, + First.Element == Second.Element +{ + //... +} + +/// An iterator that reads from two virtual sequences treated as (multi)sets, +/// where both sequences' elements are sorted according to some predicate, +/// and emits a sorted merger, +/// excluding any elements barred by a set operation. +public struct MergingIterator + : IteratorProtocol + where First : IteratorProtocol, Second : IteratorProtocol, Fault : Error, + First.Element == Second.Element +{ + //... +} +``` + +The partition merger operates **O(** 1 **)** in space; +for time it works at _???_ for random-access collections and +_???_ for bidirectional collections. + +The eager merging free functions operate at **O(** _n_ `+` _m_ **)** in +space and time, +where *n* and *m* are the lengths of the source sequences. +The lazy merging free functions operate at **O(** 1 **)** in space and time. +Actually generating the entire merged sequence will take +**O(** _n_ `+` _m_ **)** over distributed time. + +### Naming + +Many merging functions use the word "merge" in their name. + +**[C++]:** Provides the `merge` and `inplace_merge` functions. +Set operations are provided by +the `set_union`, `set_intersection`, `set_difference`, and +`set_symmetric_difference` functions. diff --git a/Guides/MergeSorted.md b/Guides/MergeSorted.md deleted file mode 100644 index 870a3c2b..00000000 --- a/Guides/MergeSorted.md +++ /dev/null @@ -1,173 +0,0 @@ -# Merge Sorted - -[[Source](https://github.com/apple/swift-algorithms/blob/main/Sources/Algorithms/MergeSorted.swift) | - [Tests](https://github.com/apple/swift-algorithms/blob/main/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift)] - -Splice two sequences that use the same sorting criteria into a sequence that -is also sorted with that criteria. - -If the sequences are sorted with something besides the less-than operator (`<`), -then a predicate can be supplied: - -```swift -let merged = mergeSorted([10, 4, 0, 0, -3], [20, 6, 1, -1, -5], sortedBy: >) -print(Array(merged)) -// [20, 10, 6, 4, 1, 0, 0, -1, -3, -5] -``` - -Sorted sequences can be treated as (multi-)sets. -Due to being sorted, -distinguishing elements that are shared between sequences or -are exclusive to a sequence can be determined in a resonable time frame. -Set operations take advantage of the catagories of sharing, -so applying operations can be done in-line during merging: - -```swift -let first = [0, 1, 1, 2, 5, 10], second = [-1, 0, 1, 2, 2, 7, 10, 20] -print(Array(mergeSortedSets(first, second, retaining: .union))) -print(Array(mergeSortedSets(first, second, retaining: .intersection))) -print(Array(mergeSortedSets(first, second, retaining: .secondWithoutFirst))) -print(Array(mergeSortedSets(first, second, retaining: .sum))) // Standard merge! -/* -[-1, 0, 1, 1, 2, 2, 5, 7, 10, 20] -[0, 1, 2, 10] -[-1, 2, 7, 20] -[-1, 0, 0, 1, 1, 1, 2, 2, 2, 5, 7, 10, 10, 20] -*/ -``` - -## Detailed Design - -The merging algorithm can be applied in three domains: - -- Free functions taking the source sequences. -- Initializers for `RangeReplaceableCollection`, - that take the source sequences and then - create the result in-place. -- Functions over a `MutableCollection`, - where the two sources are adjancent partitions of the collection. - -The free-function and initializer forms have variants that -take an extra parameter, -which represent which subset of the merger will be kept. -For instance, -using `.intersection` makes the resulting merger contain only the elements that -appear in both sources, -skipping any elements that appear in exactly one source. -All of the forms take a parameter for the ordering predicate. -If the element type conforms to `Comparable`, -a predicate can be omitted to use a default of the less-than operator (`<`). - -```swift -// Free-function form. Also used for lazy evaluation. - -/// Given two sequences that are both sorted according to the given predicate, return their merger that is sorted by the predicate and vended lazily. -@inlinable public func mergeSorted(_ first: T, _ second: U, sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool) -> MergeSortedSetsSequence where T : Sequence, U : Sequence, T.Element == U.Element - -/// Given two sorted sequences, return their still-sorted merger, vended lazily. -@inlinable public func mergeSorted(_ first: T, _ second: U) -> MergeSortedSetsSequence where T : Sequence, U : Sequence, T.Element : Comparable, T.Element == U.Element - -/// Given two sequences that are both sorted according to the given predicate and treated as sets, apply the given set operation, returning the result as a sequence sorted by the predicate and that is vended lazily. -public func mergeSortedSets(_ first: T, _ second: U, retaining filter: MergerSubset, sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool) -> MergeSortedSetsSequence where T : Sequence, U : Sequence, T.Element == U.Element - -/// Given two sorted sequences treated as sets, apply the given set operation, returning the result as a sorted sequence that vends lazily. -@inlinable public func mergeSortedSets(_ first: T, _ second: U, retaining filter: MergerSubset) -> MergeSortedSetsSequence where T : Sequence, U : Sequence, T.Element : Comparable, T.Element == U.Element - -// Initializer form. - -extension RangeReplaceableCollection { - /// Given two sequences that are both sorted according to the given predicate, create their sorted merger. - @inlinable public init(mergeSorted first: T, and second: U, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element - - /// Given two sequences that are both sorted according to the given predicate, treat them as sets, and create the sorted result of the given set operation. - public init(mergeSorted first: T, and second: U, retaining filter: MergerSubset, sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool) rethrows where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element -} - -extension RangeReplaceableCollection where Self.Element : Comparable { - /// Given two sorted sequences, create their sorted merger. - @inlinable public init(mergeSorted first: T, and second: U) where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element - - /// Given two sorted sequences, treat them as sets, and create the sorted result of the given set operation. - @inlinable public init(mergeSorted first: T, and second: U, retaining filter: MergerSubset) where T : Sequence, U : Sequence, Self.Element == T.Element, T.Element == U.Element -} - -// Two-partition merging. - -extension MutableCollection where Self : BidirectionalCollection { - /// Assuming that both this collection's slice before the given index and - /// the slice at and past that index are both sorted according to - /// the given predicate, - /// rearrange the slices' elements until the collection as - /// a whole is sorted according to the predicate. - public mutating func mergePartitions(across pivot: Index, sortedBy areInIncreasingOrder: (Element, Element) throws(Fault) -> Bool) throws(Fault) where Fault : Error -} - -extension MutableCollection where Self : BidirectionalCollection, Self.Element : Comparable { - /// Assuming that both this collection's slice before the given index and - /// the slice at and past that index are both sorted, - /// rearrange the slices' elements until the collection as - /// a whole is sorted. - @inlinable public mutating func mergePartitions(across pivot: Index) -} -``` - -Target subsets are described by a new type. - -```swift -/// Description of which elements of a merger will be retained. -public enum MergerSubset : UInt, CaseIterable { - case none, firstWithoutSecond, secondWithoutFirst, symmetricDifference, - intersection, first, second, union, - sum - - //... -} -``` - -Every set-operation combination is provided, although some are degenerate. - -Most of the merging functions use these support types: - -```swift -/// A sequence that lazily vends the sorted result of a set operation upon two sorted sequences treated as sets spliced together, using a predicate as the sorting criteria for all three sequences involved. -public struct MergeSortedSetsSequence - : Sequence -where First : Sequence, - Second : Sequence, - First.Element == Second.Element -{ /*...*/ } - -extension MergeSortedSetsSequence - : LazySequenceProtocol -where First : LazySequenceProtocol, Second : LazySequenceProtocol -{ /*...*/ } - -/// An iterator that applies a set operation on two virtual sequences, both treated as sets sorted according a predicate, spliced together to vend a virtual sequence that is also sorted. -public struct MergeSortedSetsIterator - : IteratorProtocol -where First : IteratorProtocol, - Second : IteratorProtocol, - First.Element == Second.Element -{ /*...*/ } -``` - -The merges via: - -- The free functions -- The initializers - -Operate in **O(** _n_ `+` _m_ **)** for both space and time, -where *n* and *m* are the lengths of the two operand sequences/partitions. -A partition merge for a collection of length *n* operates in -**O(** 1 **)** for space, -**O(** _n_ **)** for time when the collection is not random-access, -and *???* for time in random-access collections. - -### Naming - -Many merging functions use the word "merge" in their name. - -**[C++]:** Provides the `merge` and `inplace_merge` functions. -Set operations are provided by -the `set_union`, `set_intersection`, `set_difference`, and -`set_symmetric_difference` functions. diff --git a/Guides/README.md b/Guides/README.md index 0dc3c9d9..423ad45a 100644 --- a/Guides/README.md +++ b/Guides/README.md @@ -21,8 +21,8 @@ These guides describe the design and intention behind the APIs included in the ` - [`chain(_:_:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Chain.md): Concatenates two collections with the same element type. - [`cycled()`, `cycled(times:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Cycle.md): Repeats the elements of a collection forever or a set number of times. - [`joined(by:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Joined.md): Concatenate sequences of sequences, using an element or sequence as a separator, or using a closure to generate each separator. +- [`merge(_:_:keeping:sortedBy:)`, `merge(_:_:into:keeping:sortedBy:)`, `merge(_:_:keeping:)`, `merge(_:_:into:keeping:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Merge.md): Merge two sorted sequences together. - [`product(_:_:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Product.md): Iterates over all the pairs of two collections; equivalent to nested `for`-`in` loops. -- [`mergeSorted(_:_:sortedBy:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/MergedSorted.md): Merge two sorted sequences together. #### Subsetting operations @@ -35,7 +35,6 @@ These guides describe the design and intention behind the APIs included in the ` - [`trimmingPrefix(while:)`, `trimmingSuffix(while)`, `trimming(while:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Trim.md): Returns a slice by trimming elements from a collection's start, end, or both. The mutating `trim...` methods trim a collection in place. - [`uniqued()`, `uniqued(on:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Unique.md): The unique elements of a collection, preserving their order. - [`minAndMax()`, `minAndMax(by:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/MinMax.md): Returns the smallest and largest elements of a sequence. -- The `mergeSortedSets(_:_:retaining:sortedBy:)` function is a variant from the "Combining collections" section above that adds a parameter to apply a set operation in-line with the merge. #### Partial sorting diff --git a/Sources/Algorithms/Documentation.docc/Merging.md b/Sources/Algorithms/Documentation.docc/Merging.md index 266c9f48..497d8105 100644 --- a/Sources/Algorithms/Documentation.docc/Merging.md +++ b/Sources/Algorithms/Documentation.docc/Merging.md @@ -8,19 +8,15 @@ then generate the result of applying a set operation. ### Merging Sorted Sequences -- ``Swift/RangeReplaceableCollection/init(mergeSorted:and:sortedBy:)`` -- ``Swift/RangeReplaceableCollection/init(mergeSorted:and:)`` -- ``Swift/RangeReplaceableCollection/init(mergeSorted:and:retaining:sortedBy:)`` -- ``Swift/RangeReplaceableCollection/init(mergeSorted:and:retaining:)`` -- ``mergeSorted(_:_:sortedBy:)`` -- ``mergeSorted(_:_:)`` -- ``mergeSortedSets(_:_:retaining:sortedBy:)`` -- ``mergeSortedSets(_:_:retaining:)`` -- ``Swift/MutableCollection/mergeSortedPartitions(across:sortedBy:)`` -- ``Swift/MutableCollection/mergeSortedPartitions(across:)`` +- ``merge(_:_:keeping:sortedBy)`` +- ``merge(_:_:into:keeping:sortedBy)`` +- ``merge(_:_:keeping:)`` +- ``merge(_:_:into:keeping:)`` +- ``Swift/MutableCollection/mergePartitions(across:sortedBy:)`` +- ``Swift/MutableCollection/mergePartitions(across:)`` ### Supporting Types - ``MergerSubset`` -- ``MergeSortedSetsSequence`` -- ``MergeSortedSetsIterator`` +- ``MergedSequence`` +- ``MergingIterator`` diff --git a/Sources/Algorithms/Merge.swift b/Sources/Algorithms/Merge.swift new file mode 100644 index 00000000..49dbf56b --- /dev/null +++ b/Sources/Algorithms/Merge.swift @@ -0,0 +1,426 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2024 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// MARK: MergerSubset +//-------------------------------------------------------------------------===// + +/// Description of which elements of a merger will be retained. +public enum MergerSubset: UInt, CaseIterable { + /// Keep no elements. + case none + /// Keep the elements of the first source that are not also in the second. + case firstWithoutSecond + /// Keep the elements of the second source that are not also in the first. + case secondWithoutFirst + /// Keep the elements of both sources that are not present in the other. + case symmetricDifference + /// Keep the elements that are present in both sorces. + case intersection + /// Keep only the elements from the first source. + case first + /// Keep only the elements from the second source. + case second + /// Keep all of the elements from both sources, consolidating shared ones. + case union + /// Keep all elements from both sources, including duplicates. + case sum = 0b1111 // `union` with an extra bit to distinguish. +} + +extension MergerSubset { + /// Whether the elements exclusive to the first source are emitted. + @inlinable + public var emitsExclusivesToFirst: Bool { rawValue & 0b001 != 0 } + /// Whether the elements exclusive to the second source are emitted. + @inlinable + public var emitsExclusivesToSecond: Bool { rawValue & 0b010 != 0 } + /// Whether the elements shared by both sources are emitted. + @inlinable + public var emitsSharedElements: Bool { rawValue & 0b100 != 0 } +} + +extension MergerSubset { + /// Create a filter specifying a full merge (duplicating the shared elements). + @inlinable + public init() { self = .sum } + /// Create a filter specifying which categories of elements are included in + /// the merger, with shared elements consolidated. + public init(keepExclusivesToFirst: Bool, keepExclusivesToSecond: Bool, + keepSharedElements: Bool) { + self = switch (keepSharedElements, keepExclusivesToSecond, + keepExclusivesToFirst) { + case (false, false, false): .none + case (false, false, true): .firstWithoutSecond + case (false, true, false): .secondWithoutFirst + case (false, true, true): .symmetricDifference + case ( true, false, false): .intersection + case ( true, false, true): .first + case ( true, true, false): .second + case ( true, true, true): .union + } + } +} + +extension MergerSubset { + /// Return the worst-case bounds with the given source lengths. + /// + /// These non-necessarily exclusive conditions can affect the result: + /// + /// - One or both of the sources is empty. + /// - The sources are identical. + /// - The sources have no elements in common. + /// - The shorter source is a subset of the longer one. + /// - The sources have just partial overlap. + /// + /// Both inputs must be nonnegative. + fileprivate + func expectedCountRange(given firstLength: Int, and secondLength: Int) + -> ClosedRange { + /// Generate a range for a single value without repeating its expression. + func singleValueRange(_ v: Int) -> ClosedRange { return v...v } + + return switch self { + case .none: + singleValueRange(0) + case .firstWithoutSecond: + max(firstLength - secondLength, 0)...firstLength + case .secondWithoutFirst: + max(secondLength - firstLength, 0)...secondLength + case .symmetricDifference: + abs(firstLength - secondLength)...(firstLength + secondLength) + case .intersection: + 0...min(firstLength, secondLength) + case .first: + singleValueRange(firstLength) + case .second: + singleValueRange(secondLength) + case .union: + max(firstLength, secondLength)...(firstLength + secondLength) + case .sum: + singleValueRange(firstLength + secondLength) + } + } +} + +//===----------------------------------------------------------------------===// +// MARK: - Merging functions +//-------------------------------------------------------------------------===// + +/// Given two sequences treated as (multi)sets, both sorted according to +/// a given predicate, +/// return a sequence that lazily vends the also-sorted result of applying a +/// given set operation to the sequence operands. +/// +/// For simply merging the sequences, use `.sum` as the operation. +/// +/// - Precondition: Both `first` and `second` must be sorted according to +/// `areInIncreasingOrder`. +/// Said predicate must model a strict weak ordering over its arguments. +/// +/// - Parameters: +/// - first: The first sequence to merge. +/// - second: The second sequence to merge. +/// - filter: The subset of the merged sequence to keep. +/// - areInIncreasingOrder: The function expressing the sorting criterion. +/// - Returns: A lazy sequence for the resulting merge. +/// +/// - Complexity: O(1). +public func merge( + _ first: First, + _ second: Second, + keeping filter: MergerSubset, + sortedBy areInIncreasingOrder: @escaping (First.Element, Second.Element) + -> Bool +) -> MergedSequence +where First.Element == Second.Element { + return .init(first, second, keeping: filter, sortedBy: areInIncreasingOrder) +} + +/// Given two sequences treated as (multi)sets, both sorted according to +/// a given predicate, +/// eagerly apply a given set operation to the sequences then copy the +/// also-sorted result into a collection of a given type. +/// +/// For simply merging the sequences, use `.sum` as the operation. +/// +/// - Precondition: Both `first` and `second` must be sorted according to +/// `areInIncreasingOrder`. +/// Said predicate must model a strict weak ordering over its arguments. +/// Both `first` and `second` must be finite. +/// +/// - Parameters: +/// - first: The first sequence to merge. +/// - second: The second sequence to merge. +/// - type: A marker specifying the type of collection for +/// storing the result. +/// - filter: The subset of the merged sequence to keep. +/// - areInIncreasingOrder: The function expressing the sorting criterion. +/// - Returns: The resulting merge stored in a collection of the given `type`. +/// +/// - Complexity:O(`n` + `m`), +/// where *n* and *m* are the lengths of `first` and `second`. +public func merge( + _ first: First, + _ second: Second, + into type: Result.Type, + keeping filter: MergerSubset, + sortedBy areInIncreasingOrder: (First.Element, Second.Element) throws(Fault) + -> Bool +) throws(Fault) -> Result +where First.Element == Second.Element, Second.Element == Result.Element { + func makeResult( + compare: @escaping (First.Element, Second.Element) throws(Fault) -> Bool + ) throws(Fault) -> Result { + var result = Result() + let sequence = MergedSequence(first, second, keeping: filter, + sortedBy: compare) + var iterator = sequence.makeIterator() + result.reserveCapacity(sequence.underestimatedCount) + while let element = try iterator.throwingNext() { + result.append(element) + } + return result + } + + return try withoutActuallyEscaping(areInIncreasingOrder, + do: makeResult(compare:)) +} + +/// Given two sorted sequences treated as (multi)sets, +/// return a sequence that lazily vends the also-sorted result of applying a +/// given set operation to the sequence operands. +/// +/// For simply merging the sequences, use `.sum` as the operation. +/// +/// - Precondition: Both `first` and `second` must be sorted. +/// +/// - Parameters: +/// - first: The first sequence to merge. +/// - second: The second sequence to merge. +/// - filter: The subset of the merged sequence to keep. +/// - Returns: A lazy sequence for the resulting merge. +/// +/// - Complexity: O(1). +@inlinable +public func merge( + _ first: First, + _ second: Second, + keeping filter: MergerSubset +) -> MergedSequence +where First.Element == Second.Element, Second.Element: Comparable { + return merge(first, second, keeping: filter, sortedBy: <) +} + +/// Given two sorted sequences treated as (multi)sets, +/// eagerly apply a given set operation to the sequences then copy the +/// also-sorted result into a collection of a given type. +/// +/// For simply merging the sequences, use `.sum` as the operation. +/// +/// - Precondition: Both `first` and `second` must be sorted. +/// Both `first` and `second` must be finite. +/// +/// - Parameters: +/// - first: The first sequence to merge. +/// - second: The second sequence to merge. +/// - type: A marker specifying the type of collection for +/// storing the result. +/// - filter: The subset of the merged sequence to keep. +/// - Returns: The resulting merge stored in a collection of the given `type`. +/// +/// - Complexity:O(`n` + `m`), +/// where *n* and *m* are the lengths of `first` and `second`. +@inlinable +public func merge( + _ first: First, + _ second: Second, + into type: Result.Type, + keeping filter: MergerSubset +) -> Result +where First.Element == Second.Element, Second.Element == Result.Element, + Result.Element: Comparable { + return merge(first, second, into: Result.self, keeping: filter, sortedBy: <) +} + +//===----------------------------------------------------------------------===// +// MARK: - MergedSequence +//-------------------------------------------------------------------------===// + +/// A sequence that reads from two sequences treated as (multi)sets, +/// where both sequences' elements are sorted according to some predicate, +/// and emits a sorted merger, +/// excluding any elements barred by a set operation. +public struct MergedSequence< + First: Sequence, + Second: Sequence, + Fault: Error +> where First.Element == Second.Element { + /// The elements for the first operand. + let base1: First + /// The elements for the second operand. + let base2: Second + /// The set operation to apply to the operands. + let filter: MergerSubset + /// The predicate with the sorting criterion. + let areInIncreasingOrder: (Element, Element) throws(Fault) -> Bool + + /// Create a sequence that reads from the two given sequences, + /// which will vend their merger after applying the given set operation, + /// where both the base sequences and this sequence emit their + /// elements sorted according to the given predicate. + init( + _ base1: First, + _ base2: Second, + keeping filter: MergerSubset, + sortedBy areInIncreasingOrder: @escaping (Element, Element) + throws(Fault) -> Bool + ) { + self.base1 = base1 + self.base2 = base2 + self.filter = filter + self.areInIncreasingOrder = areInIncreasingOrder + } +} + +extension MergedSequence: Sequence { + public func makeIterator( + ) -> MergingIterator { + return .init(base1.makeIterator(), base2.makeIterator(), + keeping: filter, sortedBy: areInIncreasingOrder) + } + + public var underestimatedCount: Int { + filter.expectedCountRange( + given: base1.underestimatedCount, + and: base2.underestimatedCount + ).lowerBound + } +} + +extension MergedSequence: LazySequenceProtocol {} + +//===----------------------------------------------------------------------===// +// MARK: - MergingIterator +//-------------------------------------------------------------------------===// + +/// An iterator that reads from two virtual sequences treated as (multi)sets, +/// where both sequences' elements are sorted according to some predicate, +/// and emits a sorted merger, +/// excluding any elements barred by a set operation. +public struct MergingIterator< + First: IteratorProtocol, + Second: IteratorProtocol, + Fault: Error +> where First.Element == Second.Element { + /// The elements for the first operand. + var base1: First? + /// The elements for the second operand. + var base2: Second? + /// The set operation to apply to the operands. + let filter: MergerSubset + /// The predicate with the sorting criterion. + let areInIncreasingOrder: (Element, Element) throws(Fault) -> Bool + + /// The latest element read from `base1`. + fileprivate var latest1: First.Element? + /// The latest element read from `base2`. + fileprivate var latest2: Second.Element? + /// Whether to continue iterating. + fileprivate var isFinished = false + + /// Create an iterator that reads from the two given iterators, + /// which will vend their merger after applying the given set operation, + /// where both the base iterators and this iterator emit their + /// elements sorted according to the given predicate. + init( + _ base1: First, + _ base2: Second, + keeping filter: MergerSubset, + sortedBy areInIncreasingOrder: @escaping (Element, Element) + throws(Fault) -> Bool + ) { + // Don't keep operand iterators that aren't needed. + switch filter { + case .none: + break + case .first: + self.base1 = base1 + case .second: + self.base2 = base2 + default: + self.base1 = base1 + self.base2 = base2 + } + + // The other members. + self.filter = filter + self.areInIncreasingOrder = areInIncreasingOrder + } +} + +extension MergingIterator: IteratorProtocol { + /// Advance to the next element, if any. May throw. + fileprivate mutating func throwingNext() throws(Fault) -> First.Element? { + while !isFinished { + // Extract another element from a source if the previous one was purged. + latest1 = latest1 ?? base1?.next() + latest2 = latest2 ?? base2?.next() + + // Of the latest valid elements, purge the smaller (or both when they are + // equivalent). Return said element if the filter permits, search again + // otherwise. + switch (latest1, latest2) { + case let (latestFirst?, latestSecond?) + where try areInIncreasingOrder(latestFirst, latestSecond): + defer { latest1 = nil } + guard filter.emitsExclusivesToFirst else { continue } + + return latestFirst + case let (latestFirst?, latestSecond?) + where try areInIncreasingOrder(latestSecond, latestFirst): + defer { latest2 = nil } + guard filter.emitsExclusivesToSecond else { continue } + + return latestSecond + case (let latestFirst?, _?): + // Purge both of the equivalent elements... + defer { + latest1 = nil + + // ...except when the second source's element is only deferred. + if filter != .sum { latest2 = nil } + } + guard filter.emitsSharedElements else { continue } + + // This will not cause mixed-source emission when only the second + // source is being vended, because this case won't ever be reached. + return latestFirst + case (nil, let latestSecond?) where filter.emitsExclusivesToSecond: + latest2 = nil + return latestSecond + case (let latestFirst?, nil) where filter.emitsExclusivesToFirst: + latest1 = nil + return latestFirst + default: + // Either both sources are exhausted, or just one is while the remainder + // of the other won't be emitted. + isFinished = true + } + } + return nil + } + + public mutating func next() -> Second.Element? { + return try! throwingNext() + } +} diff --git a/Sources/Algorithms/MergeSorted.swift b/Sources/Algorithms/MergeSorted.swift deleted file mode 100644 index c4def73c..00000000 --- a/Sources/Algorithms/MergeSorted.swift +++ /dev/null @@ -1,226 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// This source file is part of the Swift Algorithms open source project -// -// Copyright (c) 2024 Apple Inc. and the Swift project authors -// Licensed under Apache License v2.0 with Runtime Library Exception -// -// See https://swift.org/LICENSE.txt for license information -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// MARK: RangeReplaceableCollection.init(mergeSorted:and:sortedBy:) -//-------------------------------------------------------------------------===// - -extension RangeReplaceableCollection { - /// Given two sequences that are both sorted according to the given predicate, - /// create their sorted merger. - /// - /// - Precondition: Both `first` and `second` must be sorted according to - /// `areInIncreasingOrder`, and said predicate must be a strict weak ordering - /// over its arguments. Both `first` and `second` must be finite. - /// - /// - Parameters: - /// - first: The first sequence spliced. - /// - second: The second sequence spliced. - /// - areInIncreasingOrder: The criteria for sorting. - /// - /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the - /// lengths of the sequence arguments. - @inlinable - public init( - mergeSorted first: T, - and second: U, - sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool - ) rethrows - where T.Element == Element, U.Element == Element - { - try self.init(mergeSorted: first, and: second, retaining: .sum, sortedBy: areInIncreasingOrder) - } -} - -extension RangeReplaceableCollection where Element: Comparable { - /// Given two sorted sequences, create their sorted merger. - /// - /// - Precondition: Both `first` and `second` must be sorted, and both - /// must be finite. - /// - /// - Parameters: - /// - first: The first sequence spliced. - /// - second: The second sequence spliced. - /// - /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the - /// lengths of the sequence arguments. - @inlinable - public init( - mergeSorted first: T, - and second: U - ) where T.Element == Element, U.Element == Element - { - self.init(mergeSorted: first, and: second, sortedBy: <) - } -} - -//===----------------------------------------------------------------------===// -// MARK: - mergeSorted(_:_:sortedBy:) -//-------------------------------------------------------------------------===// - -/// Given two sequences that are both sorted according to the given predicate, -/// return their merger that is sorted by the predicate and vended lazily. -/// -/// - Precondition: Both `first` and `second` must be sorted according to -/// `areInIncreasingOrder`, and said predicate must be a strict weak ordering -/// over its arguments. -/// -/// - Parameters: -/// - first: The first sequence spliced. -/// - second: The second sequence spliced. -/// - areInIncreasingOrder: The criteria for sorting. -/// - Returns: The merged sequence. -/// -/// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), -/// where `n` and `m` are the lengths of the sequence arguments. -@inlinable -public func mergeSorted( - _ first: T, - _ second: U, - sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool -) -> MergeSortedSetsSequence -where T.Element == U.Element { - return mergeSortedSets(first, second, retaining: .sum, sortedBy: areInIncreasingOrder) -} - -/// Given two sorted sequences, return their still-sorted merger, vended lazily. -/// -/// - Precondition: Both `first` and `second` must be sorted. -/// -/// - Parameters: -/// - first: The first sequence spliced. -/// - second: The second sequence spliced. -/// - Returns: The merged sequence. -/// -/// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), -/// where `n` and `m` are the lengths of the sequence arguments. -@inlinable -public func mergeSorted( - _ first: T, _ second: U -) -> MergeSortedSetsSequence -where T.Element == U.Element, T.Element: Comparable { - return mergeSorted(first, second, sortedBy: <) -} - -//===----------------------------------------------------------------------===// -// MARK: - MergeSortedSequence -//-------------------------------------------------------------------------===// - -/// A sequence taking some sequences, -/// all sorted along a predicate, -/// that vends the spliced-together merged sequence, -/// where said sequence is also sorted. -/// -/// - TODO: When Swift supports same-element requirements for -/// variadic generics, change this type's generic pattern to -/// accept any number of source iterators. -@available(macOS 13.0.0, *) -public struct MergeSortedSequence -where First.Element == Second.Element -{ - /// The sorting criterion. - let areInIncreasingOrder: (Element, Element) throws -> Bool - /// The first source sequence. - let first: First - /// The second source sequence. - let second: Second - - public - init( - _ first: First, - _ second: Second, - sortedBy areInIncreasingOrder: @escaping (Element, Element) throws -> Bool - ) { - self.first = first - self.second = second - self.areInIncreasingOrder = areInIncreasingOrder - } -} - -@available(macOS 13.0.0, *) -extension MergeSortedSequence: Sequence { - public func makeIterator() -> MergeSortedIterator { - return .init(first.makeIterator(), second.makeIterator(), - areInIncreasingOrder: areInIncreasingOrder) - } - - public var underestimatedCount: Int { - let result = first.underestimatedCount - .addingReportingOverflow(second.underestimatedCount) - return result.overflow ? .max : result.partialValue - } -} - -@available(macOS 13.0.0, *) -extension MergeSortedSequence: LazySequenceProtocol -where First: LazySequenceProtocol, Second: LazySequenceProtocol -{ - public var elements: MergeSortedSequence { - .init(first.elements, second.elements, sortedBy: areInIncreasingOrder) - } -} - -//===----------------------------------------------------------------------===// -// MARK: - MergeSortedIterator -//-------------------------------------------------------------------------===// - -/// An iterator taking some virtual sequences, -/// all sorted along a predicate, -/// that vends the spliced-together virtual sequence merger, -/// where said sequence is also sorted. -@available(macOS 13.0.0, *) -public struct MergeSortedIterator { - /// The sorting criterion. - let areInIncreasingOrder: (Element, Element) throws -> Bool - /// The sources to splice together. - var sources: [(latest: Element?, source: any IteratorProtocol)] - - /// Create an iterator that reads from the two given sources and - /// vends their merger, - /// assuming all three virtual sequences are sorted according to - /// the given predicate. - /// - /// - TODO: When Swift supports same-element requirements for - /// variadic generics, change this initializer to accept any number of - /// source iterators. - init, U: IteratorProtocol>( - _ first: T, - _ second: U, - areInIncreasingOrder: @escaping (Element, Element) throws -> Bool - ) { - self.areInIncreasingOrder = areInIncreasingOrder - self.sources = [(nil, first), (nil, second)] - } -} - -@available(macOS 13.0.0, *) -extension MergeSortedIterator: IteratorProtocol { - /// Advance to the next element, if any. May throw. - @usableFromInline - mutating func throwingNext() throws -> Element? { - for index in sources.indices { - sources[index].latest = sources[index].latest - ?? sources[index].source.next() - } - sources.removeAll { $0.latest == nil } - guard let indexOfSmallest = try sources.indices.min(by: { - try areInIncreasingOrder(sources[$0].latest!, sources[$1].latest!) - }) else { return nil } - defer { sources[indexOfSmallest].latest = nil } - - return sources[indexOfSmallest].latest - } - - @inlinable - public mutating func next() -> Element? { - return try! throwingNext() - } -} diff --git a/Sources/Algorithms/MergeSortedSets.swift b/Sources/Algorithms/MergeSortedSets.swift deleted file mode 100644 index 515ebd6e..00000000 --- a/Sources/Algorithms/MergeSortedSets.swift +++ /dev/null @@ -1,411 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// This source file is part of the Swift Algorithms open source project -// -// Copyright (c) 2024 Apple Inc. and the Swift project authors -// Licensed under Apache License v2.0 with Runtime Library Exception -// -// See https://swift.org/LICENSE.txt for license information -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -// MARK: MergerSubset -//-------------------------------------------------------------------------===// - -/// Description of which elements of a merger will be retained. -public enum MergerSubset: UInt, CaseIterable { - /// Keep no elements. - case none - /// Keep the elements of the first source that are not also in the second. - case firstWithoutSecond - /// Keep the elements of the second source that are not also in the first. - case secondWithoutFirst - /// Keep the elements of both sources that are not present in the other. - case symmetricDifference - /// Keep the elements that are present in both sorces. - case intersection - /// Keep only the elements from the first source. - case first - /// Keep only the elements from the second source. - case second - /// Keep all of the elements from both sources, consolidating shared ones. - case union - /// Keep all elements from both sources, including duplicates. - case sum = 0b1111 // `union` with an extra bit to distinguish. -} - -extension MergerSubset { - /// Whether the elements exclusive to the first source are emitted. - @inlinable - public var emitsExclusivesToFirst: Bool { rawValue & 0b001 != 0 } - /// Whether the elements exclusive to the second source are emitted. - @inlinable - public var emitsExclusivesToSecond: Bool { rawValue & 0b010 != 0 } - /// Whether the elements shared by both sources are emitted. - @inlinable - public var emitsSharedElements: Bool { rawValue & 0b100 != 0 } -} - -extension MergerSubset { - /// Create a filter specifying a full merge (duplicating the shared elements). - @inlinable - public init() { self = .sum } - /// Create a filter specifying which categories of elements are included in - /// the merger, with shared elements consolidated. - public init(keepExclusivesToFirst: Bool, keepExclusivesToSecond: Bool, keepSharedElements: Bool) { - self = switch (keepSharedElements, keepExclusivesToSecond, keepExclusivesToFirst) { - case (false, false, false): .none - case (false, false, true): .firstWithoutSecond - case (false, true, false): .secondWithoutFirst - case (false, true, true): .symmetricDifference - case ( true, false, false): .intersection - case ( true, false, true): .first - case ( true, true, false): .second - case ( true, true, true): .union - } - } -} - -extension MergerSubset { - /// Return the worst-case bounds with the given source lengths. - /// - /// These non-necessarily exclusive conditions can affect the result: - /// - /// - One or both of the sources is empty. - /// - The sources are identical. - /// - The sources have no elements in common. - /// - The shorter source is a subset of the longer one. - /// - The sources have just partial overlap. - /// - /// Both inputs must be nonnegative. - fileprivate - func expectedCountRange(given firstLength: Int, and secondLength: Int) -> ClosedRange { - /// Generate a range for a single value without repeating its expression. - func singleValueRange(_ v: Int) -> ClosedRange { return v...v } - - return switch self { - case .none: - singleValueRange(0) - case .firstWithoutSecond: - max(firstLength - secondLength, 0)...firstLength - case .secondWithoutFirst: - max(secondLength - firstLength, 0)...secondLength - case .symmetricDifference: - abs(firstLength - secondLength)...(firstLength + secondLength) - case .intersection: - 0...min(firstLength, secondLength) - case .first: - singleValueRange(firstLength) - case .second: - singleValueRange(secondLength) - case .union: - max(firstLength, secondLength)...(firstLength + secondLength) - case .sum: - singleValueRange(firstLength + secondLength) - } - } -} - -//===----------------------------------------------------------------------===// -// MARK: - RangeReplaceableCollection.init(mergeSorted:and:retaining:sortedBy:) -//-------------------------------------------------------------------------===// - -extension RangeReplaceableCollection { - /// Given two sequences that are both sorted according to the given predicate, - /// treat them as sets, and create the sorted result of the given set - /// operation. - /// - /// For simply merging the sequences, use `.sum` as the operation. - /// - /// - Precondition: Both `first` and `second` must be sorted according to - /// `areInIncreasingOrder`, and said predicate must be a strict weak ordering - /// over its arguments. Both `first` and `second` must be finite. - /// - /// - Parameters: - /// - first: The first sequence spliced. - /// - second: The second sequence spliced. - /// - filter: The subset of the merged sequence to keep. - /// - areInIncreasingOrder: The criteria for sorting. - /// - /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the - /// lengths of the sequence arguments. - public init( - mergeSorted first: T, - and second: U, - retaining filter: MergerSubset, - sortedBy areInIncreasingOrder: (Element, Element) throws -> Bool - ) rethrows - where T.Element == Element, U.Element == Element - { - self.init() - try withoutActuallyEscaping(areInIncreasingOrder) { - let sequence = MergeSortedSetsSequence(merging: first, and: second, - retaining: filter, sortedBy: $0) - self.reserveCapacity(sequence.underestimatedCount) - - var iterator = sequence.makeIterator() - while let current = try iterator.throwingNext() { - self.append(current) - } - } - } -} - -extension RangeReplaceableCollection where Element: Comparable { - /// Given two sorted sequences, treat them as sets, and create the sorted - /// result of the given set operation. - /// - /// For simply merging the sequences, use `.sum` as the operation. - /// - /// - Precondition: Both `first` and `second` must be sorted, and both - /// must be finite. - /// - /// - Parameters: - /// - first: The first sequence spliced. - /// - second: The second sequence spliced. - /// - filter: The subset of the merged sequence to keep. - /// - /// - Complexity: O(`n` + `m`) in space and time, where `n` and `m` are the - /// lengths of the sequence arguments. - @inlinable - public init( - mergeSorted first: T, - and second: U, - retaining filter: MergerSubset - ) where T.Element == Element, U.Element == Element - { - self.init(mergeSorted: first, and: second, retaining: filter, sortedBy: <) - } -} - -//===----------------------------------------------------------------------===// -// MARK: - mergeSortedSets(_:_:retaining:sortedBy:) -//-------------------------------------------------------------------------===// - -/// Given two sequences that are both sorted according to the given predicate -/// and treated as sets, apply the given set operation, returning the result as -/// a sequence sorted by the predicate and that is vended lazily. -/// -/// For simply merging the sequences, use `.sum` as the operation. -/// -/// - Precondition: Both `first` and `second` must be sorted according to -/// `areInIncreasingOrder`, and said predicate must be a strict weak ordering -/// over its arguments. -/// -/// - Parameters: -/// - first: The first sequence spliced. -/// - second: The second sequence spliced. -/// - filter: The subset of the merged sequence to keep. -/// - areInIncreasingOrder: The criteria for sorting. -/// - Returns: The merged sequence subset. -/// -/// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), -/// where `n` and `m` are the lengths of the sequence arguments. -public func mergeSortedSets( - _ first: T, - _ second: U, - retaining filter: MergerSubset, - sortedBy areInIncreasingOrder: @escaping (T.Element, U.Element) -> Bool -) -> MergeSortedSetsSequence -where T.Element == U.Element { - return MergeSortedSetsSequence( - merging: first, - and: second, - retaining: filter, - sortedBy: areInIncreasingOrder - ) -} - -/// Given two sorted sequences treated as sets, apply the given set operation, -/// returning the result as a sorted sequence that vends lazily. -/// -/// For simply merging the sequences, use `.sum` as the operation. -/// -/// - Precondition: Both `first` and `second` must be sorted. -/// -/// - Parameters: -/// - first: The first sequence spliced. -/// - second: The second sequence spliced. -/// - filter: The subset of the merged sequence to keep. -/// - Returns: The merged sequence subset. -/// -/// - Complexity: O(1). The actual iteration takes place in O(`n` + `m`), -/// where `n` and `m` are the lengths of the sequence arguments. -@inlinable -public func mergeSortedSets( - _ first: T, _ second: U, retaining filter: MergerSubset -) -> MergeSortedSetsSequence -where T.Element == U.Element, T.Element: Comparable { - return mergeSortedSets(first, second, retaining: filter, sortedBy: <) -} - -//===----------------------------------------------------------------------===// -// MARK: - MergeSortedSetsSequence -//-------------------------------------------------------------------------===// - -/// A sequence that lazily vends the sorted result of a set operation upon -/// two sorted sequences treated as sets spliced together, using a predicate as -/// the sorting criteria for all three sequences involved. -public struct MergeSortedSetsSequence -where First.Element == Second.Element -{ - /// The first source sequence. - let first: First - /// The second source sequence. - let second: Second - /// The subset of elements to retain. - let filter: MergerSubset - /// The sorting predicate. - let areInIncreasingOrder: (Element, Element) throws -> Bool - - /// Create a sequence using the two given sequences that are sorted according - /// to the given predicate, to vend the sources' elements combined while still - /// sorted according to the predicate, but keeping only the elements that - /// match the given set operation. - init( - merging first: First, - and second: Second, - retaining filter: MergerSubset, - sortedBy areInIncreasingOrder: @escaping (Element, Element) throws -> Bool - ) { - self.first = first - self.second = second - self.filter = filter - self.areInIncreasingOrder = areInIncreasingOrder - } -} - -extension MergeSortedSetsSequence: Sequence { - public func makeIterator() - -> MergeSortedSetsIterator { - return .init(first.makeIterator(), second.makeIterator(), filter: filter, - predicate: areInIncreasingOrder) - } - - public var underestimatedCount: Int { - filter.expectedCountRange( - given: first.underestimatedCount, - and: second.underestimatedCount - ).lowerBound - } -} - -extension MergeSortedSetsSequence: LazySequenceProtocol -where First: LazySequenceProtocol, Second: LazySequenceProtocol -{ - public var elements: MergeSortedSetsSequence { - return Elements(merging: first.elements, and: second.elements, retaining: filter, sortedBy: areInIncreasingOrder) - } -} - -//===----------------------------------------------------------------------===// -// MARK: - MergeSortedSetsIterator -//-------------------------------------------------------------------------===// - -/// An iterator that applies a set operation on two virtual sequences, -/// both treated as sets sorted according a predicate, spliced together to -/// vend a virtual sequence that is also sorted. -public struct MergeSortedSetsIterator< - First: IteratorProtocol, - Second: IteratorProtocol -> where First.Element == Second.Element -{ - /// The first source of elements. - var firstSource: First? - /// The second source of elements. - var secondSource: Second? - /// The subset of elements to emit. - let filter: MergerSubset - /// The sorting predicate. - let areInIncreasingOrder: (Element, Element) throws -> Bool - - /// The latest element read from the first source. - fileprivate var first: First.Element? - /// The latest element read from the second source. - fileprivate var second: Second.Element? - /// Whether to keep on iterating. - fileprivate var isFinished = false - - /// Create an iterator reading from two sources, comparing their respective - /// elements with the predicate, and emitting the given subset of the merged - /// sequence. - fileprivate init( - _ firstSource: First, - _ secondSource: Second, - filter: MergerSubset, - predicate: @escaping (Element, Element) throws -> Bool - ) { - // Only load the sources that are actually needed. - switch filter { - case .none: - break - case .first: - self.firstSource = firstSource - case .second: - self.secondSource = secondSource - default: - self.firstSource = firstSource - self.secondSource = secondSource - } - - // Other member initialization - self.filter = filter - self.areInIncreasingOrder = predicate - } -} - -extension MergeSortedSetsIterator: IteratorProtocol { - /// Advance to the next element, if any. May throw. - mutating func throwingNext() throws -> First.Element? { - while !isFinished { - // Extract another element from a source if the previous one was purged. - first = first ?? firstSource?.next() - second = second ?? secondSource?.next() - - // Of the latest valid elements, purge the smaller (or both when they are - // equivalent). Return said element if the filter permits, search again - // otherwise. - switch (first, second) { - case let (latestFirst?, latestSecond?) where try areInIncreasingOrder(latestFirst, latestSecond): - defer { first = nil } - guard filter.emitsExclusivesToFirst else { continue } - - return latestFirst - case let (latestFirst?, latestSecond?) where try areInIncreasingOrder(latestSecond, latestFirst): - defer { second = nil } - guard filter.emitsExclusivesToSecond else { continue } - - return latestSecond - case (let latestFirst?, _?): - // Purge both of the equivalent elements... - defer { - first = nil - - // ...except when the second source's element is only deferred. - if filter != .sum { second = nil } - } - guard filter.emitsSharedElements else { continue } - - // This will not cause mixed-source emmission when only the second - // source is being vended, because this case won't ever be reached. - return latestFirst - case (nil, let latestSecond?) where filter.emitsExclusivesToSecond: - second = nil - return latestSecond - case (let latestFirst?, nil) where filter.emitsExclusivesToFirst: - first = nil - return latestFirst - default: - // Either both sources are exhausted, or just one is while the remainder - // of the other won't be emitted. - isFinished = true - } - } - return nil - } - - public mutating func next() -> Second.Element? { - return try! throwingNext() - } -} diff --git a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift b/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift deleted file mode 100644 index bc895603..00000000 --- a/Tests/SwiftAlgorithmsTests/MergeSortedTests.swift +++ /dev/null @@ -1,191 +0,0 @@ -//===----------------------------------------------------------------------===// -// -// This source file is part of the Swift Algorithms open source project -// -// Copyright (c) 2024 Apple Inc. and the Swift project authors -// Licensed under Apache License v2.0 with Runtime Library Exception -// -// See https://swift.org/LICENSE.txt for license information -// -//===----------------------------------------------------------------------===// - -import XCTest -import Algorithms - -final class MergeSortedTests: XCTestCase { - // MARK: Support Types for Set-Operation Mergers - - /// Check the convenience initializers for `MergerSubset`. - func testMergerSubsetInitializers() { - XCTAssertEqual(MergerSubset(), .sum) - - XCTAssertEqual( - MergerSubset(keepExclusivesToFirst: false, keepExclusivesToSecond: false, - keepSharedElements: false), - .none - ) - XCTAssertEqual( - MergerSubset(keepExclusivesToFirst: true, keepExclusivesToSecond: false, - keepSharedElements: false), - .firstWithoutSecond - ) - XCTAssertEqual( - MergerSubset(keepExclusivesToFirst: false, keepExclusivesToSecond: true, - keepSharedElements: false), - .secondWithoutFirst - ) - XCTAssertEqual( - MergerSubset(keepExclusivesToFirst: false, keepExclusivesToSecond: false, - keepSharedElements: true), - .intersection - ) - XCTAssertEqual( - MergerSubset(keepExclusivesToFirst: true, keepExclusivesToSecond: true, - keepSharedElements: false), - .symmetricDifference - ) - XCTAssertEqual( - MergerSubset(keepExclusivesToFirst: true, keepExclusivesToSecond: false, - keepSharedElements: true), - .first - ) - XCTAssertEqual( - MergerSubset(keepExclusivesToFirst: false, keepExclusivesToSecond: true, - keepSharedElements: true), - .second - ) - XCTAssertEqual( - MergerSubset(keepExclusivesToFirst: true, keepExclusivesToSecond: true, - keepSharedElements: true), - .union - ) - } - - /// Check the subset emission flags for `MergerSubset`. - func testMergerSubsetFlags() { - XCTAssertEqualSequences( - MergerSubset.allCases, - [.none, .firstWithoutSecond, .secondWithoutFirst, .symmetricDifference, - .intersection, .first, .second, .union, .sum] - ) - - XCTAssertEqualSequences( - MergerSubset.allCases.map(\.emitsExclusivesToFirst), - [false, true, false, true, false, true, false, true, true] - ) - XCTAssertEqualSequences( - MergerSubset.allCases.map(\.emitsExclusivesToSecond), - [false, false, true, true, false, false, true, true, true] - ) - XCTAssertEqualSequences( - MergerSubset.allCases.map(\.emitsSharedElements), - [false, false, false, false, true, true, true, true, true] - ) - } - - // MARK: - Set-Operation Mergers - - /// Check the lazily-generated subset sequences. - func testLazySetMergers() { - let low = 0..<7, high = 3..<10 - let sequences = Dictionary(uniqueKeysWithValues: MergerSubset.allCases.map { - return ($0, mergeSortedSets(low, high, retaining: $0)) - }) - XCTAssertEqualSequences(sequences[.none]!, EmptyCollection()) - XCTAssertEqualSequences(sequences[.firstWithoutSecond]!, 0..<3) - XCTAssertEqualSequences(sequences[.secondWithoutFirst]!, 7..<10) - XCTAssertEqualSequences(sequences[.symmetricDifference]!, [0, 1, 2, 7, 8, 9]) - XCTAssertEqualSequences(sequences[.intersection]!, 3..<7) - XCTAssertEqualSequences(sequences[.first]!, low) - XCTAssertEqualSequences(sequences[.second]!, high) - XCTAssertEqualSequences(sequences[.union]!, 0..<10) - XCTAssertEqualSequences(sequences[.sum]!, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) - - XCTAssertLessThanOrEqual(sequences[.none]!.underestimatedCount, 0) - XCTAssertLessThanOrEqual(sequences[.firstWithoutSecond]!.underestimatedCount, 3) - XCTAssertLessThanOrEqual(sequences[.secondWithoutFirst]!.underestimatedCount, 3) - XCTAssertLessThanOrEqual(sequences[.symmetricDifference]!.underestimatedCount, 6) - XCTAssertLessThanOrEqual(sequences[.intersection]!.underestimatedCount, 4) - XCTAssertLessThanOrEqual(sequences[.first]!.underestimatedCount, 7) - XCTAssertLessThanOrEqual(sequences[.second]!.underestimatedCount, 7) - XCTAssertLessThanOrEqual(sequences[.union]!.underestimatedCount, 7) - XCTAssertLessThanOrEqual(sequences[.sum]!.underestimatedCount, 14) - - // This exercises code missed by the `sequences` tests. - let reversed = Dictionary(uniqueKeysWithValues: MergerSubset.allCases.map { - ($0, mergeSortedSets(high, low, retaining: $0)) - }) - XCTAssertEqualSequences(reversed[.none]!, EmptyCollection()) - XCTAssertEqualSequences(reversed[.firstWithoutSecond]!, 7..<10) - XCTAssertEqualSequences(reversed[.secondWithoutFirst]!, 0..<3) - XCTAssertEqualSequences(reversed[.symmetricDifference]!, [0, 1, 2, 7, 8, 9]) - XCTAssertEqualSequences(reversed[.intersection]!, 3..<7) - XCTAssertEqualSequences(reversed[.first]!, high) - XCTAssertEqualSequences(reversed[.second]!, low) - XCTAssertEqualSequences(reversed[.union]!, 0..<10) - XCTAssertEqualSequences(reversed[.sum]!, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) - } - - /// Check the eagerly-generated subset sequences. - func testEagerSetMergers() { - let low = 0..<7, high = 3..<10 - let sequences = Dictionary(uniqueKeysWithValues: MergerSubset.allCases.map { - ($0, Array(mergeSorted: low, and: high, retaining: $0)) - }) - XCTAssertEqualSequences(sequences[.none]!, EmptyCollection()) - XCTAssertEqualSequences(sequences[.firstWithoutSecond]!, 0..<3) - XCTAssertEqualSequences(sequences[.secondWithoutFirst]!, 7..<10) - XCTAssertEqualSequences(sequences[.symmetricDifference]!, [0, 1, 2, 7, 8, 9]) - XCTAssertEqualSequences(sequences[.intersection]!, 3..<7) - XCTAssertEqualSequences(sequences[.first]!, low) - XCTAssertEqualSequences(sequences[.second]!, high) - XCTAssertEqualSequences(sequences[.union]!, 0..<10) - XCTAssertEqualSequences(sequences[.sum]!, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) - } - - // MARK: - Direct Mergers - - /// Check lazily-generated mergers. - func testLazyMergers() { - let low = 0..<7, high = 3..<10, result = mergeSorted(low, high) - XCTAssertEqualSequences(result, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) - } - - /// Check eagerly-generated mergers. - func testEagerMergers() { - let low = 0..<7, high = 3..<10, result = Array(mergeSorted: low, and: high) - XCTAssertEqualSequences(result, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) - } - - /// Test mergers for any number of arguments (one day). - @available(macOS 13.0.0, *) - func testMoreMergers() { - let low = 0..<7, high = 3..<10, - result = MergeSortedSequence(low, high, sortedBy: <) - XCTAssertEqualSequences(result, [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9]) - } - - // MARK: - Sample Code - - /// Check the code from documentation. - func testSampleCode() { - // From the guide. - let guide1 = [10, 4, 0, 0, -3], guide2 = [20, 6, 1, -1, -5] - let mergedGuides = mergeSorted(guide1, guide2, sortedBy: >) - XCTAssertEqualSequences(mergedGuides, [20, 10, 6, 4, 1, 0, 0, -1, -3, -5]) - - let guide3 = [0, 1, 1, 2, 5, 10], guide4 = [-1, 0, 1, 2, 2, 7, 10, 20] - XCTAssertEqualSequences(mergeSortedSets(guide3, guide4, retaining: .union), - [-1, 0, 1, 1, 2, 2, 5, 7, 10, 20]) - XCTAssertEqualSequences(mergeSortedSets(guide3, guide4, retaining: .intersection), - [0, 1, 2, 10]) - XCTAssertEqualSequences(mergeSortedSets(guide3, guide4, retaining: .firstWithoutSecond), - [1, 5]) - XCTAssertEqualSequences(mergeSortedSets(guide3, guide4, retaining: .secondWithoutFirst), - [-1, 2, 7, 20]) - XCTAssertEqualSequences(mergeSortedSets(guide3, guide4, retaining: .symmetricDifference), - [-1, 1, 2, 5, 7, 20]) - XCTAssertEqualSequences(mergeSortedSets(guide3, guide4, retaining: .sum), - [-1, 0, 0, 1, 1, 1, 2, 2, 2, 5, 7, 10, 10, 20]) - } -} diff --git a/Tests/SwiftAlgorithmsTests/MergeTests.swift b/Tests/SwiftAlgorithmsTests/MergeTests.swift new file mode 100644 index 00000000..347b4e0c --- /dev/null +++ b/Tests/SwiftAlgorithmsTests/MergeTests.swift @@ -0,0 +1,187 @@ +//===----------------------------------------------------------------------===// +// +// This source file is part of the Swift Algorithms open source project +// +// Copyright (c) 2024 Apple Inc. and the Swift project authors +// Licensed under Apache License v2.0 with Runtime Library Exception +// +// See https://swift.org/LICENSE.txt for license information +// +//===----------------------------------------------------------------------===// + +import XCTest +import Algorithms + +final class MergeTests: XCTestCase { + // MARK: Support Types for Set-Operation Mergers + + /// Check the convenience initializers for `MergerSubset`. + func testMergerSubsetInitializers() { + XCTAssertEqual(MergerSubset(), .sum) + + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: false, keepExclusivesToSecond: false, + keepSharedElements: false), + .none + ) + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: true, keepExclusivesToSecond: false, + keepSharedElements: false), + .firstWithoutSecond + ) + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: false, keepExclusivesToSecond: true, + keepSharedElements: false), + .secondWithoutFirst + ) + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: false, keepExclusivesToSecond: false, + keepSharedElements: true), + .intersection + ) + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: true, keepExclusivesToSecond: true, + keepSharedElements: false), + .symmetricDifference + ) + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: true, keepExclusivesToSecond: false, + keepSharedElements: true), + .first + ) + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: false, keepExclusivesToSecond: true, + keepSharedElements: true), + .second + ) + XCTAssertEqual( + MergerSubset(keepExclusivesToFirst: true, keepExclusivesToSecond: true, + keepSharedElements: true), + .union + ) + } + + /// Check the subset emission flags for `MergerSubset`. + func testMergerSubsetFlags() { + XCTAssertEqualSequences( + MergerSubset.allCases, + [.none, .firstWithoutSecond, .secondWithoutFirst, .symmetricDifference, + .intersection, .first, .second, .union, .sum] + ) + + XCTAssertEqualSequences( + MergerSubset.allCases.map(\.emitsExclusivesToFirst), + [false, true, false, true, false, true, false, true, true] + ) + XCTAssertEqualSequences( + MergerSubset.allCases.map(\.emitsExclusivesToSecond), + [false, false, true, true, false, false, true, true, true] + ) + XCTAssertEqualSequences( + MergerSubset.allCases.map(\.emitsSharedElements), + [false, false, false, false, true, true, true, true, true] + ) + } + + // MARK: - Set-Operation Mergers + + /// Test subset sequence results, no matter if lazy or eager generation. + func mergerTests( + converter: (Range, Range, MergerSubset) -> U + ) where U.Element == Int { + let first = 0..<7, second = 3..<10 + let expectedNone = EmptyCollection(), expectedFirstOnly = 0..<3, + expectedSecondOnly = 7..<10, expectedDiff = [0, 1, 2, 7, 8, 9], + expectedIntersection = 3..<7, expectedFirst = first, + expectedSecond = second, expectedUnion = 0..<10, + expectedSum = [0, 1, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7, 8, 9] + do { + let sequences = Dictionary(uniqueKeysWithValues: MergerSubset.allCases.map { + return ($0, converter(first, second, $0)) + }) + XCTAssertEqualSequences(sequences[.none]!, expectedNone) + XCTAssertEqualSequences(sequences[.firstWithoutSecond]!, expectedFirstOnly) + XCTAssertEqualSequences(sequences[.secondWithoutFirst]!, expectedSecondOnly) + XCTAssertEqualSequences(sequences[.symmetricDifference]!, expectedDiff) + XCTAssertEqualSequences(sequences[.intersection]!, expectedIntersection) + XCTAssertEqualSequences(sequences[.first]!, expectedFirst) + XCTAssertEqualSequences(sequences[.second]!, expectedSecond) + XCTAssertEqualSequences(sequences[.union]!, expectedUnion) + XCTAssertEqualSequences(sequences[.sum]!, expectedSum) + + XCTAssertLessThanOrEqual(sequences[.none]!.underestimatedCount, + expectedNone.count) + XCTAssertLessThanOrEqual(sequences[.firstWithoutSecond]!.underestimatedCount, + expectedFirstOnly.count) + XCTAssertLessThanOrEqual(sequences[.secondWithoutFirst]!.underestimatedCount, + expectedSecondOnly.count) + XCTAssertLessThanOrEqual(sequences[.symmetricDifference]!.underestimatedCount, + expectedDiff.count) + XCTAssertLessThanOrEqual(sequences[.intersection]!.underestimatedCount, + expectedIntersection.count) + XCTAssertLessThanOrEqual(sequences[.first]!.underestimatedCount, + expectedFirst.count) + XCTAssertLessThanOrEqual(sequences[.second]!.underestimatedCount, + expectedSecond.count) + XCTAssertLessThanOrEqual(sequences[.union]!.underestimatedCount, + expectedUnion.count) + XCTAssertLessThanOrEqual(sequences[.sum]!.underestimatedCount, + expectedSum.count) + } + + do { + // This exercises code missed by the `sequences` tests. + let flipped = Dictionary(uniqueKeysWithValues: MergerSubset.allCases.map { + return ($0, converter(second, first, $0)) + }) + XCTAssertEqualSequences(flipped[.none]!, expectedNone) + XCTAssertEqualSequences(flipped[.firstWithoutSecond]!, expectedSecondOnly) + XCTAssertEqualSequences(flipped[.secondWithoutFirst]!, expectedFirstOnly) + XCTAssertEqualSequences(flipped[.symmetricDifference]!, expectedDiff) + XCTAssertEqualSequences(flipped[.intersection]!, expectedIntersection) + XCTAssertEqualSequences(flipped[.first]!, expectedSecond) + XCTAssertEqualSequences(flipped[.second]!, expectedFirst) + XCTAssertEqualSequences(flipped[.union]!, expectedUnion) + XCTAssertEqualSequences(flipped[.sum]!, expectedSum) + } + + } + + /// Check the lazily-generated subset sequences. + func testLazySetMergers() { + mergerTests(converter: { merge($0, $1, keeping: $2) }) + } + + /// Check the eagerly-generated subset sequences. + func testEagerSetMergers() { + mergerTests(converter: { merge($0, $1, into: Array.self, keeping: $2) }) + } + + // MARK: - Sample Code + + /// Check the code from documentation. + func testSampleCode() { + // From the guide. + do { + let merged = merge([10, 4, 0, 0, -3], [20, 6, 1, -1, -5], keeping: .sum, + sortedBy: >) + XCTAssertEqualSequences(merged, [20, 10, 6, 4, 1, 0, 0, -1, -3, -5]) + } + + do { + let first = [0, 1, 1, 2, 5, 10], second = [-1, 0, 1, 2, 2, 7, 10, 20] + XCTAssertEqualSequences(merge(first, second, into: Array.self, + keeping: .union), + [-1, 0, 1, 1, 2, 2, 5, 7, 10, 20]) + XCTAssertEqualSequences(merge(first, second, into: Array.self, + keeping: .intersection), + [0, 1, 2, 10]) + XCTAssertEqualSequences(merge(first, second, into: Array.self, + keeping: .secondWithoutFirst), + [-1, 2, 7, 20]) + XCTAssertEqualSequences(merge(first, second, into: Array.self, + keeping: .sum), + [-1, 0, 0, 1, 1, 1, 2, 2, 2, 5, 7, 10, 10, 20]) + } + } +} From 188649a8e5d5056c7d7aa036ccd9cedf08f23686 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Wed, 25 Sep 2024 17:02:25 -0400 Subject: [PATCH 32/34] Regroup free functions over eager vs lazy --- Guides/Merge.md | 18 +++---- .../Algorithms/Documentation.docc/Merging.md | 2 +- Sources/Algorithms/Merge.swift | 50 +++++++++---------- 3 files changed, 35 insertions(+), 35 deletions(-) diff --git a/Guides/Merge.md b/Guides/Merge.md index 8f066821..ed3716ed 100644 --- a/Guides/Merge.md +++ b/Guides/Merge.md @@ -100,6 +100,15 @@ public func merge( ) -> MergedSequence where First : Sequence, Second : Sequence, First.Element == Second.Element +/// Given two sorted sequences treated as (multi)sets, +/// return a sequence that lazily vends the also-sorted result of applying a +/// given set operation to the sequence operands. +public func merge( + _ first: First, _ second: Second, keeping filter: MergerSubset +) -> MergedSequence +where First : Sequence, Second : Sequence, First.Element : Comparable, + First.Element == Second.Element + /// Given two sequences treated as (multi)sets, both sorted according to /// a given predicate, /// eagerly apply a given set operation to the sequences then copy the @@ -111,15 +120,6 @@ public func merge( where First : Sequence, Second : Sequence, Result : RangeReplaceableCollection, Fault : Error, First.Element == Second.Element, Second.Element == Result.Element -/// Given two sorted sequences treated as (multi)sets, -/// return a sequence that lazily vends the also-sorted result of applying a -/// given set operation to the sequence operands. -public func merge( - _ first: First, _ second: Second, keeping filter: MergerSubset -) -> MergedSequence -where First : Sequence, Second : Sequence, First.Element : Comparable, - First.Element == Second.Element - /// Given two sorted sequences treated as (multi)sets, /// eagerly apply a given set operation to the sequences then copy the /// also-sorted result into a collection of a given type. diff --git a/Sources/Algorithms/Documentation.docc/Merging.md b/Sources/Algorithms/Documentation.docc/Merging.md index 497d8105..62eab085 100644 --- a/Sources/Algorithms/Documentation.docc/Merging.md +++ b/Sources/Algorithms/Documentation.docc/Merging.md @@ -9,8 +9,8 @@ then generate the result of applying a set operation. ### Merging Sorted Sequences - ``merge(_:_:keeping:sortedBy)`` -- ``merge(_:_:into:keeping:sortedBy)`` - ``merge(_:_:keeping:)`` +- ``merge(_:_:into:keeping:sortedBy)`` - ``merge(_:_:into:keeping:)`` - ``Swift/MutableCollection/mergePartitions(across:sortedBy:)`` - ``Swift/MutableCollection/mergePartitions(across:)`` diff --git a/Sources/Algorithms/Merge.swift b/Sources/Algorithms/Merge.swift index 49dbf56b..5d45107b 100644 --- a/Sources/Algorithms/Merge.swift +++ b/Sources/Algorithms/Merge.swift @@ -144,6 +144,31 @@ where First.Element == Second.Element { return .init(first, second, keeping: filter, sortedBy: areInIncreasingOrder) } +/// Given two sorted sequences treated as (multi)sets, +/// return a sequence that lazily vends the also-sorted result of applying a +/// given set operation to the sequence operands. +/// +/// For simply merging the sequences, use `.sum` as the operation. +/// +/// - Precondition: Both `first` and `second` must be sorted. +/// +/// - Parameters: +/// - first: The first sequence to merge. +/// - second: The second sequence to merge. +/// - filter: The subset of the merged sequence to keep. +/// - Returns: A lazy sequence for the resulting merge. +/// +/// - Complexity: O(1). +@inlinable +public func merge( + _ first: First, + _ second: Second, + keeping filter: MergerSubset +) -> MergedSequence +where First.Element == Second.Element, Second.Element: Comparable { + return merge(first, second, keeping: filter, sortedBy: <) +} + /// Given two sequences treated as (multi)sets, both sorted according to /// a given predicate, /// eagerly apply a given set operation to the sequences then copy the @@ -195,31 +220,6 @@ where First.Element == Second.Element, Second.Element == Result.Element { do: makeResult(compare:)) } -/// Given two sorted sequences treated as (multi)sets, -/// return a sequence that lazily vends the also-sorted result of applying a -/// given set operation to the sequence operands. -/// -/// For simply merging the sequences, use `.sum` as the operation. -/// -/// - Precondition: Both `first` and `second` must be sorted. -/// -/// - Parameters: -/// - first: The first sequence to merge. -/// - second: The second sequence to merge. -/// - filter: The subset of the merged sequence to keep. -/// - Returns: A lazy sequence for the resulting merge. -/// -/// - Complexity: O(1). -@inlinable -public func merge( - _ first: First, - _ second: Second, - keeping filter: MergerSubset -) -> MergedSequence -where First.Element == Second.Element, Second.Element: Comparable { - return merge(first, second, keeping: filter, sortedBy: <) -} - /// Given two sorted sequences treated as (multi)sets, /// eagerly apply a given set operation to the sequences then copy the /// also-sorted result into a collection of a given type. From bd7411d46d2fe444667c94a60c4b840023f8784c Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Wed, 25 Sep 2024 17:44:58 -0400 Subject: [PATCH 33/34] Lock eager mergers to always return arrays Remove the parameter to specify the return type of eager mergers; they now are locked to being of the standard Array type. As this gives the eager merging functions the same name as the lazy versions, the lazy versions have been renamed to avoid ambiguity. --- Guides/Merge.md | 47 +++++++------- Guides/README.md | 2 +- .../Algorithms/Documentation.docc/Merging.md | 4 +- Sources/Algorithms/Merge.swift | 64 +++++++++++++------ Tests/SwiftAlgorithmsTests/MergeTests.swift | 20 +++--- 5 files changed, 79 insertions(+), 58 deletions(-) diff --git a/Guides/Merge.md b/Guides/Merge.md index ed3716ed..77bb4386 100644 --- a/Guides/Merge.md +++ b/Guides/Merge.md @@ -14,7 +14,7 @@ If the sequences are sorted with something besides the less-than operator (`<`), then a predicate can be supplied: ```swift -let merged = merge([10, 4, 0, 0, -3], [20, 6, 1, -1, -5], keeping: .sum, sortedBy: >) +let merged = lazilyMerge([10, 4, 0, 0, -3], [20, 6, 1, -1, -5], keeping: .sum, sortedBy: >) print(Array(merged)) // [20, 10, 6, 4, 1, 0, 0, -1, -3, -5] ``` @@ -28,10 +28,10 @@ so applying operations can be done in-line during merging: ```swift let first = [0, 1, 1, 2, 5, 10], second = [-1, 0, 1, 2, 2, 7, 10, 20] -print(merge(first, second, into: Array.self, keeping: .union)) -print(merge(first, second, into: Array.self, keeping: .intersection)) -print(merge(first, second, into: Array.self, keeping: .secondWithoutFirst)) -print(merge(first, second, into: Array.self, keeping: .sum)) // Standard merge! +print(merge(first, second, keeping: .union)) +print(merge(first, second, keeping: .intersection)) +print(merge(first, second, keeping: .secondWithoutFirst)) +print(merge(first, second, keeping: .sum)) // Standard merge! /* [-1, 0, 1, 1, 2, 2, 5, 7, 10, 20] [0, 1, 2, 10] @@ -94,7 +94,7 @@ extension MutableCollection where Self : BidirectionalCollection, Self.Element : /// a given predicate, /// return a sequence that lazily vends the also-sorted result of applying a /// given set operation to the sequence operands. -public func merge( +public func lazilyMerge( _ first: First, _ second: Second, keeping filter: MergerSubset, sortedBy areInIncreasingOrder: @escaping (First.Element, Second.Element) -> Bool ) -> MergedSequence @@ -103,32 +103,29 @@ where First : Sequence, Second : Sequence, First.Element == Second.Element /// Given two sorted sequences treated as (multi)sets, /// return a sequence that lazily vends the also-sorted result of applying a /// given set operation to the sequence operands. -public func merge( +public func lazilyMerge( _ first: First, _ second: Second, keeping filter: MergerSubset ) -> MergedSequence where First : Sequence, Second : Sequence, First.Element : Comparable, First.Element == Second.Element -/// Given two sequences treated as (multi)sets, both sorted according to -/// a given predicate, -/// eagerly apply a given set operation to the sequences then copy the -/// also-sorted result into a collection of a given type. -public func merge( - _ first: First, _ second: Second, into type: Result.Type, keeping filter: MergerSubset, +/// Returns a sorted array containing the result of the given set operation +/// applied to the given sorted sequences, +/// where sorting is determined by the given predicate. +public func merge( + _ first: First, _ second: Second, keeping filter: MergerSubset, sortedBy areInIncreasingOrder: (First.Element, Second.Element) throws(Fault) -> Bool -) throws(Fault) -> Result -where First : Sequence, Second : Sequence, Result : RangeReplaceableCollection, - Fault : Error, First.Element == Second.Element, Second.Element == Result.Element +) throws(Fault) -> [Second.Element] +where First : Sequence, Second : Sequence, + Fault : Error, First.Element == Second.Element -/// Given two sorted sequences treated as (multi)sets, -/// eagerly apply a given set operation to the sequences then copy the -/// also-sorted result into a collection of a given type. -public func merge( - _ first: First, _ second: Second, into type: Result.Type, keeping filter: MergerSubset -) -> Result -where First : Sequence, Second : Sequence, Result : RangeReplaceableCollection, - First.Element : Comparable, First.Element == Second.Element, - Second.Element == Result.Element +/// Returns a sorted array containing the result of the given set operation +/// applied to the given sorted sequences. +public func merge( + _ first: First, _ second: Second, keeping filter: MergerSubset +) -> [Second.Element] +where First : Sequence, Second : Sequence, + First.Element : Comparable, First.Element == Second.Element ``` Target subsets are described by a new type. diff --git a/Guides/README.md b/Guides/README.md index 423ad45a..fbb8b35d 100644 --- a/Guides/README.md +++ b/Guides/README.md @@ -21,7 +21,7 @@ These guides describe the design and intention behind the APIs included in the ` - [`chain(_:_:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Chain.md): Concatenates two collections with the same element type. - [`cycled()`, `cycled(times:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Cycle.md): Repeats the elements of a collection forever or a set number of times. - [`joined(by:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Joined.md): Concatenate sequences of sequences, using an element or sequence as a separator, or using a closure to generate each separator. -- [`merge(_:_:keeping:sortedBy:)`, `merge(_:_:into:keeping:sortedBy:)`, `merge(_:_:keeping:)`, `merge(_:_:into:keeping:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Merge.md): Merge two sorted sequences together. +- [`lazilyMerge(_:_:keeping:sortedBy:)`, `lazilyMerge(_:_:keeping:)`, `merge(_:_:keeping:sortedBy:)`, `merge(_:_:keeping:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Merge.md): Merge two sorted sequences together. - [`product(_:_:)`](https://github.com/apple/swift-algorithms/blob/main/Guides/Product.md): Iterates over all the pairs of two collections; equivalent to nested `for`-`in` loops. #### Subsetting operations diff --git a/Sources/Algorithms/Documentation.docc/Merging.md b/Sources/Algorithms/Documentation.docc/Merging.md index 62eab085..ad72b7a0 100644 --- a/Sources/Algorithms/Documentation.docc/Merging.md +++ b/Sources/Algorithms/Documentation.docc/Merging.md @@ -8,10 +8,10 @@ then generate the result of applying a set operation. ### Merging Sorted Sequences +- ``lazilyMerge(_:_:keeping:sortedBy)`` +- ``lazilyMerge(_:_:keeping:)`` - ``merge(_:_:keeping:sortedBy)`` - ``merge(_:_:keeping:)`` -- ``merge(_:_:into:keeping:sortedBy)`` -- ``merge(_:_:into:keeping:)`` - ``Swift/MutableCollection/mergePartitions(across:sortedBy:)`` - ``Swift/MutableCollection/mergePartitions(across:)`` diff --git a/Sources/Algorithms/Merge.swift b/Sources/Algorithms/Merge.swift index 5d45107b..d30ecbbc 100644 --- a/Sources/Algorithms/Merge.swift +++ b/Sources/Algorithms/Merge.swift @@ -133,7 +133,7 @@ extension MergerSubset { /// - Returns: A lazy sequence for the resulting merge. /// /// - Complexity: O(1). -public func merge( +public func lazilyMerge( _ first: First, _ second: Second, keeping filter: MergerSubset, @@ -160,13 +160,13 @@ where First.Element == Second.Element { /// /// - Complexity: O(1). @inlinable -public func merge( +public func lazilyMerge( _ first: First, _ second: Second, keeping filter: MergerSubset ) -> MergedSequence where First.Element == Second.Element, Second.Element: Comparable { - return merge(first, second, keeping: filter, sortedBy: <) + return lazilyMerge(first, second, keeping: filter, sortedBy: <) } /// Given two sequences treated as (multi)sets, both sorted according to @@ -192,8 +192,9 @@ where First.Element == Second.Element, Second.Element: Comparable { /// /// - Complexity:O(`n` + `m`), /// where *n* and *m* are the lengths of `first` and `second`. -public func merge( +@usableFromInline +func merge( _ first: First, _ second: Second, into type: Result.Type, @@ -220,9 +221,41 @@ where First.Element == Second.Element, Second.Element == Result.Element { do: makeResult(compare:)) } -/// Given two sorted sequences treated as (multi)sets, -/// eagerly apply a given set operation to the sequences then copy the -/// also-sorted result into a collection of a given type. +/// Returns a sorted array containing the result of the given set operation +/// applied to the given sorted sequences, +/// where sorting is determined by the given predicate. +/// +/// For simply merging the sequences, use `.sum` as the operation. +/// +/// - Precondition: Both `first` and `second` must be sorted according to +/// `areInIncreasingOrder`. +/// Said predicate must model a strict weak ordering over its arguments. +/// Both `first` and `second` must be finite. +/// +/// - Parameters: +/// - first: The first sequence to merge. +/// - second: The second sequence to merge. +/// - filter: The subset of the merged sequence to keep. +/// - areInIncreasingOrder: The function expressing the sorting criterion. +/// - Returns: The resulting merge stored in an array. +/// +/// - Complexity:O(`n` + `m`), +/// where *n* and *m* are the lengths of `first` and `second`. +@inlinable +public func merge( + _ first: First, + _ second: Second, + keeping filter: MergerSubset, + sortedBy areInIncreasingOrder: (First.Element, Second.Element) throws(Fault) + -> Bool +) throws(Fault) -> [Second.Element] +where First.Element == Second.Element { + return try merge(first, second, into: Array.self, keeping: filter, + sortedBy: areInIncreasingOrder) +} + +/// Returns a sorted array containing the result of the given set operation +/// applied to the given sorted sequences. /// /// For simply merging the sequences, use `.sum` as the operation. /// @@ -232,24 +265,19 @@ where First.Element == Second.Element, Second.Element == Result.Element { /// - Parameters: /// - first: The first sequence to merge. /// - second: The second sequence to merge. -/// - type: A marker specifying the type of collection for -/// storing the result. /// - filter: The subset of the merged sequence to keep. -/// - Returns: The resulting merge stored in a collection of the given `type`. +/// - Returns: The resulting merge stored in an array. /// /// - Complexity:O(`n` + `m`), /// where *n* and *m* are the lengths of `first` and `second`. @inlinable -public func merge( +public func merge( _ first: First, _ second: Second, - into type: Result.Type, keeping filter: MergerSubset -) -> Result -where First.Element == Second.Element, Second.Element == Result.Element, - Result.Element: Comparable { - return merge(first, second, into: Result.self, keeping: filter, sortedBy: <) +) -> [Second.Element] +where First.Element == Second.Element, First.Element: Comparable { + return merge(first, second, keeping: filter, sortedBy: <) } //===----------------------------------------------------------------------===// diff --git a/Tests/SwiftAlgorithmsTests/MergeTests.swift b/Tests/SwiftAlgorithmsTests/MergeTests.swift index 347b4e0c..0dac0c6a 100644 --- a/Tests/SwiftAlgorithmsTests/MergeTests.swift +++ b/Tests/SwiftAlgorithmsTests/MergeTests.swift @@ -149,12 +149,12 @@ final class MergeTests: XCTestCase { /// Check the lazily-generated subset sequences. func testLazySetMergers() { - mergerTests(converter: { merge($0, $1, keeping: $2) }) + mergerTests(converter: { lazilyMerge($0, $1, keeping: $2) }) } /// Check the eagerly-generated subset sequences. func testEagerSetMergers() { - mergerTests(converter: { merge($0, $1, into: Array.self, keeping: $2) }) + mergerTests(converter: { merge($0, $1, keeping: $2) }) } // MARK: - Sample Code @@ -163,24 +163,20 @@ final class MergeTests: XCTestCase { func testSampleCode() { // From the guide. do { - let merged = merge([10, 4, 0, 0, -3], [20, 6, 1, -1, -5], keeping: .sum, - sortedBy: >) + let merged = lazilyMerge([10, 4, 0, 0, -3], [20, 6, 1, -1, -5], + keeping: .sum, sortedBy: >) XCTAssertEqualSequences(merged, [20, 10, 6, 4, 1, 0, 0, -1, -3, -5]) } do { let first = [0, 1, 1, 2, 5, 10], second = [-1, 0, 1, 2, 2, 7, 10, 20] - XCTAssertEqualSequences(merge(first, second, into: Array.self, - keeping: .union), + XCTAssertEqualSequences(merge(first, second, keeping: .union), [-1, 0, 1, 1, 2, 2, 5, 7, 10, 20]) - XCTAssertEqualSequences(merge(first, second, into: Array.self, - keeping: .intersection), + XCTAssertEqualSequences(merge(first, second, keeping: .intersection), [0, 1, 2, 10]) - XCTAssertEqualSequences(merge(first, second, into: Array.self, - keeping: .secondWithoutFirst), + XCTAssertEqualSequences(merge(first, second, keeping: .secondWithoutFirst), [-1, 2, 7, 20]) - XCTAssertEqualSequences(merge(first, second, into: Array.self, - keeping: .sum), + XCTAssertEqualSequences(merge(first, second, keeping: .sum), [-1, 0, 0, 1, 1, 1, 2, 2, 2, 5, 7, 10, 10, 20]) } } From aa2a5d0d866586bb9a3feb2b77c4c5ec41cb28c7 Mon Sep 17 00:00:00 2001 From: Daryle Walker Date: Wed, 25 Sep 2024 18:12:06 -0400 Subject: [PATCH 34/34] Remove the error type in a public declaration The sequence to merge other sequences needs to specify the ordering predicate's error type. But public use of that name never needs that error type. Rename the merging sequence to a un-public name. Add an alias from that un-public name to the old public name but removing the error type. --- Guides/Merge.md | 4 ++-- Sources/Algorithms/Merge.swift | 22 +++++++++++++++------- 2 files changed, 17 insertions(+), 9 deletions(-) diff --git a/Guides/Merge.md b/Guides/Merge.md index 77bb4386..b11c9398 100644 --- a/Guides/Merge.md +++ b/Guides/Merge.md @@ -97,7 +97,7 @@ extension MutableCollection where Self : BidirectionalCollection, Self.Element : public func lazilyMerge( _ first: First, _ second: Second, keeping filter: MergerSubset, sortedBy areInIncreasingOrder: @escaping (First.Element, Second.Element) -> Bool -) -> MergedSequence +) -> MergedSequence where First : Sequence, Second : Sequence, First.Element == Second.Element /// Given two sorted sequences treated as (multi)sets, @@ -105,7 +105,7 @@ where First : Sequence, Second : Sequence, First.Element == Second.Element /// given set operation to the sequence operands. public func lazilyMerge( _ first: First, _ second: Second, keeping filter: MergerSubset -) -> MergedSequence +) -> MergedSequence where First : Sequence, Second : Sequence, First.Element : Comparable, First.Element == Second.Element diff --git a/Sources/Algorithms/Merge.swift b/Sources/Algorithms/Merge.swift index d30ecbbc..f32a897c 100644 --- a/Sources/Algorithms/Merge.swift +++ b/Sources/Algorithms/Merge.swift @@ -139,7 +139,7 @@ public func lazilyMerge( keeping filter: MergerSubset, sortedBy areInIncreasingOrder: @escaping (First.Element, Second.Element) -> Bool -) -> MergedSequence +) -> MergedSequence where First.Element == Second.Element { return .init(first, second, keeping: filter, sortedBy: areInIncreasingOrder) } @@ -164,7 +164,7 @@ public func lazilyMerge( _ first: First, _ second: Second, keeping filter: MergerSubset -) -> MergedSequence +) -> MergedSequence where First.Element == Second.Element, Second.Element: Comparable { return lazilyMerge(first, second, keeping: filter, sortedBy: <) } @@ -207,8 +207,8 @@ where First.Element == Second.Element, Second.Element == Result.Element { compare: @escaping (First.Element, Second.Element) throws(Fault) -> Bool ) throws(Fault) -> Result { var result = Result() - let sequence = MergedSequence(first, second, keeping: filter, - sortedBy: compare) + let sequence = _MergedSequence(first, second, keeping: filter, + sortedBy: compare) var iterator = sequence.makeIterator() result.reserveCapacity(sequence.underestimatedCount) while let element = try iterator.throwingNext() { @@ -288,7 +288,15 @@ where First.Element == Second.Element, First.Element: Comparable { /// where both sequences' elements are sorted according to some predicate, /// and emits a sorted merger, /// excluding any elements barred by a set operation. -public struct MergedSequence< +public typealias MergedSequence + = _MergedSequence + where First: Sequence, Second: Sequence, First.Element == Second.Element + +/// The implementation for `MergedSequence`. +/// The public face of that type does not need an otherwise +/// unused error type declared, +/// so this type is needed to provide a way to hide the (`Never`) error type. +public struct _MergedSequence< First: Sequence, Second: Sequence, Fault: Error @@ -320,7 +328,7 @@ public struct MergedSequence< } } -extension MergedSequence: Sequence { +extension _MergedSequence: Sequence { public func makeIterator( ) -> MergingIterator { return .init(base1.makeIterator(), base2.makeIterator(), @@ -335,7 +343,7 @@ extension MergedSequence: Sequence { } } -extension MergedSequence: LazySequenceProtocol {} +extension _MergedSequence: LazySequenceProtocol {} //===----------------------------------------------------------------------===// // MARK: - MergingIterator