diff --git a/icu4c/source/data/brkitr/rules/line.txt b/icu4c/source/data/brkitr/rules/line.txt
index 9f3e44984eae..e43e70b36311 100644
--- a/icu4c/source/data/brkitr/rules/line.txt
+++ b/icu4c/source/data/brkitr/rules/line.txt
@@ -297,7 +297,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus; 
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
diff --git a/icu4c/source/data/brkitr/rules/line_cj.txt b/icu4c/source/data/brkitr/rules/line_cj.txt
index fc615f55db21..793163898e00 100644
--- a/icu4c/source/data/brkitr/rules/line_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_cj.txt
@@ -298,7 +298,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
diff --git a/icu4c/source/data/brkitr/rules/line_loose.txt b/icu4c/source/data/brkitr/rules/line_loose.txt
index 2bb9be5845f8..9ff4e17eb3a5 100644
--- a/icu4c/source/data/brkitr/rules/line_loose.txt
+++ b/icu4c/source/data/brkitr/rules/line_loose.txt
@@ -306,7 +306,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
diff --git a/icu4c/source/data/brkitr/rules/line_loose_cj.txt b/icu4c/source/data/brkitr/rules/line_loose_cj.txt
index 15715a225123..428d225f16d9 100644
--- a/icu4c/source/data/brkitr/rules/line_loose_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_loose_cj.txt
@@ -318,7 +318,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
diff --git a/icu4c/source/data/brkitr/rules/line_loose_phrase_cj.txt b/icu4c/source/data/brkitr/rules/line_loose_phrase_cj.txt
index 87ab33b48a1c..2edf4b3bc33a 100644
--- a/icu4c/source/data/brkitr/rules/line_loose_phrase_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_loose_phrase_cj.txt
@@ -331,7 +331,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
diff --git a/icu4c/source/data/brkitr/rules/line_normal.txt b/icu4c/source/data/brkitr/rules/line_normal.txt
index c41280c28d1c..bf6dee8c05cd 100644
--- a/icu4c/source/data/brkitr/rules/line_normal.txt
+++ b/icu4c/source/data/brkitr/rules/line_normal.txt
@@ -299,7 +299,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
diff --git a/icu4c/source/data/brkitr/rules/line_normal_cj.txt b/icu4c/source/data/brkitr/rules/line_normal_cj.txt
index 31dd65854cb1..f596454621d0 100644
--- a/icu4c/source/data/brkitr/rules/line_normal_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_normal_cj.txt
@@ -304,7 +304,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
diff --git a/icu4c/source/data/brkitr/rules/line_normal_phrase_cj.txt b/icu4c/source/data/brkitr/rules/line_normal_phrase_cj.txt
index 85d771fcdbf9..e0bbd00025f9 100644
--- a/icu4c/source/data/brkitr/rules/line_normal_phrase_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_normal_phrase_cj.txt
@@ -317,7 +317,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
diff --git a/icu4c/source/data/brkitr/rules/line_phrase_cj.txt b/icu4c/source/data/brkitr/rules/line_phrase_cj.txt
index 41e05bf4963f..14b118789e7c 100644
--- a/icu4c/source/data/brkitr/rules/line_phrase_cj.txt
+++ b/icu4c/source/data/brkitr/rules/line_phrase_cj.txt
@@ -310,7 +310,7 @@ $LB20NonBreaks = [$LB18NonBreaks - $CB];
 #             and then to default UAX #14 behaviour (UTC-179-C32).
 #
 ^($HY | $HH) $CM* $ALPlus;
-$GL ($HY | $HH) $CM* $ALPlus; 
+$GL $CM* ($HY | $HH) $CM* $ALPlus;
 # Non-breaking CB from LB8a:
 $CB $CM* $ZWJ ($HY | $HH) $CM* $ALPlus;
 # Non-breaking SP from LB14:
diff --git a/icu4c/source/test/intltest/rbbitst.cpp b/icu4c/source/test/intltest/rbbitst.cpp
index c043a0a5d838..e94f76b2ce2e 100644
--- a/icu4c/source/test/intltest/rbbitst.cpp
+++ b/icu4c/source/test/intltest/rbbitst.cpp
@@ -1494,104 +1494,6 @@ void RBBITest::checkUnicodeTestCase(const char *testFileName, int lineNumber,
 
 
 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
-//---------------------------------------------------------------------------------------
-//
-//   class RBBIMonkeyKind
-//
-//      Monkey Test for Break Iteration
-//      Abstract interface class.   Concrete derived classes independently
-//      implement the break rules for different iterator types.
-//
-//      The Monkey Test itself uses doesn't know which type of break iterator it is
-//      testing, but works purely in terms of the interface defined here.
-//
-//---------------------------------------------------------------------------------------
-class RBBIMonkeyKind {
-public:
-    // Return a UVector of UnicodeSets, representing the character classes used
-    //   for this type of iterator.
-    virtual const std::vector<UnicodeSet>& charClasses() = 0;
-
-    // Set the test text on which subsequent calls to next() will operate
-    virtual  void      setText(const UnicodeString &s) = 0;
-
-    // Find the next break position, starting from the prev break position, or from zero.
-    // Return -1 after reaching end of string.
-    virtual  int32_t   next(int32_t i) = 0;
-
-    // Name of each character class, parallel with charClasses. Used for debugging output
-    // of characters.
-    virtual  std::vector<std::string>&     characterClassNames();
-
-    void setAppliedRule(int32_t position, const char* value);
-
-    std::string getAppliedRule(int32_t position);
-
-    virtual ~RBBIMonkeyKind();
-    UErrorCode deferredStatus;
-
-    std::string classNameFromCodepoint(const UChar32 c);
-    unsigned int maxClassNameSize();
-
- protected:
-     RBBIMonkeyKind();
-     std::vector<std::string> classNames;
-     std::vector<std::string> appliedRules;
-
-    // Clear `appliedRules` and fill it with empty strings in the size of test text.
-    void prepareAppliedRules(int32_t size );
-
- private:
-
-};
-
-RBBIMonkeyKind::RBBIMonkeyKind() {
-    deferredStatus = U_ZERO_ERROR;
-}
-
-RBBIMonkeyKind::~RBBIMonkeyKind() {
-}
-
-std::vector<std::string>& RBBIMonkeyKind::characterClassNames() {
-    return classNames;
-}
-
-void RBBIMonkeyKind::prepareAppliedRules(int32_t size) {
-    // Remove all the information in the `appliedRules`.
-    appliedRules.clear();
-    appliedRules.resize(size + 1);
-}
-
-void RBBIMonkeyKind::setAppliedRule(int32_t position, const char* value) {
-    appliedRules[position] = value;
-}
-
-std::string RBBIMonkeyKind::getAppliedRule(int32_t position){
-    return appliedRules[position];
-}
-
-std::string RBBIMonkeyKind::classNameFromCodepoint(const UChar32 c) {
-    // Simply iterate through charClasses to find character's class
-    for (std::size_t aClassNum = 0; aClassNum < charClasses().size(); aClassNum++) {
-        const UnicodeSet& classSet = charClasses()[aClassNum];
-        if (classSet.contains(c)) {
-            return classNames[aClassNum];
-        }
-    }
-    U_ASSERT(false);  // This should not happen.
-    return "bad class name";
-}
-
-unsigned int RBBIMonkeyKind::maxClassNameSize() {
-    unsigned int maxSize = 0;
-    for (std::size_t aClassNum = 0; aClassNum < charClasses().size(); aClassNum++) {
-        auto aClassNumSize = static_cast<unsigned int>(classNames[aClassNum].size());
-        if (aClassNumSize > maxSize) {
-            maxSize = aClassNumSize;
-        }
-    }
-    return maxSize;
-}
 
 namespace {
 
@@ -1697,7 +1599,7 @@ class RemapRule : public SegmentationRule {
                     break;
                 }
                 if (resolved[i].appliedRule != nullptr &&
-                        resolved[i].appliedRule->resolution() == BREAK) {
+                    resolved[i].appliedRule->resolution() == BREAK) {
                     printf("Replacement rule at remapped indices %d sqq. spans a break",
                            matcher->start(status));
                     std::terminate();
@@ -1705,7 +1607,34 @@ class RemapRule : public SegmentationRule {
                 resolved[i].appliedRule = this;
                 resolved[i].indexInRemapped.reset();
             }
+            // While replacing, we need to check that we are not creating
+            // surrogate pairs.  Since appendReplacement performs two
+            // concatenations (the unreplaced segment and the replacement), we
+            // need to check in two places: whether the unreplaced segment
+            // starts with a trailing surrogate that ends up after a leading
+            // surrogate, and whether the replaced segment starts with a leading
+            // surrogate that ends up after a trailing surrogate.
+            // We break the pair by replacing one of the surrogates with U+FFFF,
+            // which has the same properties for all but line breaking, and the
+            // same behaviour in line breaking (lb=SG and lb=XX are both treated
+            // as lb=AL).
+            std::optional<int32_t> trailingLead;
+            if (result.length() > 0 && U16_IS_LEAD(result[result.length() - 1])) {
+                trailingLead = result.length() - 1;
+            }
+
             matcher->appendReplacement(result, replacement_, status);
+
+            if (trailingLead && *trailingLead + 1 < result.length() &&
+                U16_IS_TRAIL(result[*trailingLead + 1])) {
+                result.setCharAt(*trailingLead, u'\uFFFF');
+            }
+
+            if (matcher->start(status) + offset > 0 &&
+                U16_IS_LEAD(result[matcher->start(status) + offset - 1]) &&
+                U16_IS_TRAIL(result[matcher->start(status) + offset])) {
+                result.setCharAt(matcher->start(status) + offset, u'\uFFFF');
+            }
             offset = result.length() - *resolved[i].indexInRemapped;
         }
         for (; i < static_cast<int32_t>(resolved.size()); ++i) {
@@ -1714,7 +1643,17 @@ class RemapRule : public SegmentationRule {
             }
             *resolved[i].indexInRemapped += offset;
         }
+
+        std::optional<int32_t> trailingLead;
+        if (result.length() > 0 && U16_IS_LEAD(result[result.length() - 1])) {
+            trailingLead = result.length() - 1;
+        }
         matcher->appendTail(result);
+        if (trailingLead && *trailingLead + 1 < result.length() &&
+            U16_IS_TRAIL(result[*trailingLead + 1])) {
+            result.setCharAt(*trailingLead, u'\uFFFF');
+        }
+
         if (resolved.back().indexInRemapped != result.length()) {
             std::string indices;
             for (const auto r : resolved) {
@@ -1850,1018 +1789,412 @@ class RegexRule : public SegmentationRule {
     const Resolution resolution_;
 };
 
-}  // namespace
+} // namespace
 
-//----------------------------------------------------------------------------------------
+//---------------------------------------------------------------------------------------
 //
-//   Random Numbers.  We need a long cycle length since we run overnight tests over
-//                    millions of strings involving 1000 random generations per string
-//                    (a 32-bit LCG will not do!), we want and a reasonably small state
-//                    so that we can output it to reproduce failures.
+//   class RBBIMonkeyKind
 //
-//---------------------------------------------------------------------------------------
-namespace {
-
-using RandomNumberGenerator = std::ranlux48;
-constexpr RandomNumberGenerator::result_type defaultSeed = std::ranlux48_base::default_seed;
-static RandomNumberGenerator randomNumberGenerator;
-
-RandomNumberGenerator deserialize(const std::string& state) {
-    RandomNumberGenerator result;
-    std::stringstream(state) >> result;
-    return result;
-}
-
-std::string serialize(const RandomNumberGenerator& generator) {
-    std::stringstream result;
-    result << generator;
-    return result.str();
-}
-
-}  // namespace
-
-//------------------------------------------------------------------------------------------
+//      Monkey Test for Break Iteration
+//      Abstract interface class.   Concrete derived classes independently
+//      implement the break rules for different iterator types.
 //
-//   class RBBICharMonkey      Character (Grapheme Cluster) specific implementation
-//                             of RBBIMonkeyKind.
+//      The Monkey Test itself uses doesn't know which type of break iterator it is
+//      testing, but works purely in terms of the interface defined here.
 //
-//------------------------------------------------------------------------------------------
-class RBBICharMonkey: public RBBIMonkeyKind {
-public:
-    RBBICharMonkey();
-    virtual          ~RBBICharMonkey();
-    virtual const std::vector<UnicodeSet>& charClasses() override;
-    virtual  void     setText(const UnicodeString &s) override;
-    virtual  int32_t  next(int32_t i) override;
-private:
-    std::vector<UnicodeSet> sets;
-
-    UnicodeSet  *fCRLFSet;
-    UnicodeSet  *fControlSet;
-    UnicodeSet  *fExtendSet;
-    UnicodeSet  *fZWJSet;
-    UnicodeSet  *fRegionalIndicatorSet;
-    UnicodeSet  *fPrependSet;
-    UnicodeSet  *fSpacingSet;
-    UnicodeSet  *fLSet;
-    UnicodeSet  *fVSet;
-    UnicodeSet  *fTSet;
-    UnicodeSet  *fLVSet;
-    UnicodeSet  *fLVTSet;
-    UnicodeSet  *fHangulSet;
-    UnicodeSet  *fExtendedPictSet;
-    UnicodeSet  *fInCBLinkerSet;
-    UnicodeSet  *fInCBConsonantSet;
-    UnicodeSet  *fInCBExtendSet;
-    UnicodeSet  *fAnySet;
-
-    const UnicodeString *fText;
-};
-
-
-RBBICharMonkey::RBBICharMonkey() {
-    UErrorCode  status = U_ZERO_ERROR;
-
-    fText = nullptr;
-
-    fCRLFSet    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\r\\n]"), status);
-    fControlSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[[\\p{Grapheme_Cluster_Break = Control}]]"), status);
-    fExtendSet  = new UnicodeSet(UNICODE_STRING_SIMPLE("[[\\p{Grapheme_Cluster_Break = Extend}]]"), status);
-    fZWJSet     = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = ZWJ}]"), status);
-    fRegionalIndicatorSet =
-                  new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Regional_Indicator}]"), status);
-    fPrependSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = Prepend}]"), status);
-    fSpacingSet = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = SpacingMark}]"), status);
-    fLSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = L}]"), status);
-    fVSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = V}]"), status);
-    fTSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = T}]"), status);
-    fLVSet      = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LV}]"), status);
-    fLVTSet     = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Grapheme_Cluster_Break = LVT}]"), status);
-    fHangulSet  = new UnicodeSet();
-    fHangulSet->addAll(*fLSet);
-    fHangulSet->addAll(*fVSet);
-    fHangulSet->addAll(*fTSet);
-    fHangulSet->addAll(*fLVSet);
-    fHangulSet->addAll(*fLVTSet);
-
-    fExtendedPictSet  = new UnicodeSet(u"[:Extended_Pictographic:]", status);
-    fInCBLinkerSet        = new UnicodeSet(u"[\\p{InCB=Linker}]", status);
-    fInCBConsonantSet = new UnicodeSet(u"[\\p{InCB=Consonant}]", status);
-    fInCBExtendSet     = new UnicodeSet(u"[\\p{InCB=Extend}]", status);
-    fAnySet           = new UnicodeSet(0, 0x10ffff);
-
-    // Create sets of characters, and add the names of the above character sets.
-    // In each new ICU release, add new names corresponding to the sets above.
-
-    // Important: Keep class names the same as the class contents.
-    // TODO(egg): Use logic similar to line breaking.
-    sets.emplace_back(*fCRLFSet); classNames.emplace_back("CRLF");
-    sets.emplace_back(*fControlSet); classNames.emplace_back("Control");
-    sets.emplace_back(*fExtendSet); classNames.emplace_back("Extended");
-    sets.emplace_back(*fRegionalIndicatorSet); classNames.emplace_back("RegionalIndicator");
-    if (!fPrependSet->isEmpty()) {
-        sets.emplace_back(*fPrependSet); classNames.emplace_back("Prepend");
-    }
-    sets.emplace_back(*fSpacingSet); classNames.emplace_back("Spacing");
-    sets.emplace_back(*fHangulSet); classNames.emplace_back("Hangul");
-    sets.emplace_back(*fZWJSet); classNames.emplace_back("ZWJ");
-    sets.emplace_back(*fExtendedPictSet); classNames.emplace_back("ExtendedPict");
-    sets.emplace_back(*fInCBLinkerSet); classNames.emplace_back("InCB=Linker");
-    sets.emplace_back(*fInCBConsonantSet); classNames.emplace_back("InCB=Consonant");
-    sets.emplace_back(*fInCBExtendSet); classNames.emplace_back("InCB=Extend");
-    sets.emplace_back(*fAnySet); classNames.emplace_back("Any");
-
-    if (U_FAILURE(status)) {
-        deferredStatus = status;
-    }
-}
-
-
-void RBBICharMonkey::setText(const UnicodeString &s) {
-    fText = &s;
-    prepareAppliedRules(s.length());
-}
-
-
-
-int32_t RBBICharMonkey::next(int32_t prevPos) {
-    int    p0, p1, p2, p3;    // Indices of the significant code points around the
-                              //   break position being tested.  The candidate break
-                              //   location is before p2.
-
-    int     breakPos = -1;
-
-    UChar32 c0, c1, c2, c3;   // The code points at p0, p1, p2 & p3.
-    UChar32 cBase;            // for (X Extend*) patterns, the X character.
-
-    if (U_FAILURE(deferredStatus)) {
-        return -1;
-    }
-
-    // Previous break at end of string.  return DONE.
-    if (prevPos >= fText->length()) {
-        return -1;
-    }
-
-    p0 = p1 = p2 = p3 = prevPos;
-    c3 =  fText->char32At(prevPos);
-    c0 = c1 = c2 = cBase = 0;
-    (void)p0;   // suppress set but not used warning.
-    (void)c0;
-
-    // Loop runs once per "significant" character position in the input text.
-    for (;;) {
-        // Move all of the positions forward in the input string.
-        p0 = p1;  c0 = c1;
-        p1 = p2;  c1 = c2;
-        p2 = p3;  c2 = c3;
-
-        // Advance p3 by one codepoint
-        p3 = fText->moveIndex32(p3, 1);
-        c3 = fText->char32At(p3);
-
-        if (p1 == p2) {
-            // Still warming up the loop.  (won't work with zero length strings, but we don't care)
-            continue;
-        }
-
-        if (p2 == fText->length()) {
-            setAppliedRule(p2, "End of String");
-            break;
-        }
-
-        //     No Extend or Format characters may appear between the CR and LF,
-        //     which requires the additional check for p2 immediately following p1.
-        //
-        if (c1==0x0D && c2==0x0A && p1==(p2-1)) {
-          setAppliedRule(p2, "GB3   CR x LF");
-          continue;
-        }
-
-        if (fControlSet->contains(c1) ||
-            c1 == 0x0D ||
-            c1 == 0x0A)  {
-          setAppliedRule(p2, "GB4   ( Control | CR | LF ) <break>");
-          break;
-        }
-
-        if (fControlSet->contains(c2) ||
-            c2 == 0x0D ||
-            c2 == 0x0A)  {
-            setAppliedRule(p2, "GB5   <break>  ( Control | CR | LF )");
-            break;
-        }
-
-        if (fLSet->contains(c1) &&
-               (fLSet->contains(c2)  ||
-                fVSet->contains(c2)  ||
-                fLVSet->contains(c2) ||
-                fLVTSet->contains(c2))) {
-            setAppliedRule(p2, "GB6   L x ( L | V | LV | LVT )");
-            continue;
-        }
-
-        if ((fLVSet->contains(c1) || fVSet->contains(c1)) &&
-            (fVSet->contains(c2) || fTSet->contains(c2)))  {
-            setAppliedRule(p2, "GB7    ( LV | V )  x  ( V | T )");
-            continue;
-        }
-
-        if ((fLVTSet->contains(c1) || fTSet->contains(c1)) &&
-            fTSet->contains(c2))  {
-            setAppliedRule(p2, "GB8   ( LVT | T)  x T");
-            continue;
-        }
-
-        if (fExtendSet->contains(c2) || fZWJSet->contains(c2))  {
-            if (!fExtendSet->contains(c1)) {
-                cBase = c1;
-            }
-            setAppliedRule(p2, "GB9   x (Extend | ZWJ)");
-            continue;
-        }
-
-        if (fSpacingSet->contains(c2)) {
-            setAppliedRule(p2, "GB9a  x  SpacingMark");
-            continue;
-        }
-
-        if (fPrependSet->contains(c1)) {
-            setAppliedRule(p2, "GB9b  Prepend x");
-            continue;
-        }
-
-        if (fInCBConsonantSet->contains(c2)) {
-            int pi = p1;
-            bool sawVirama = false;
-            while (pi > 0 && (fInCBExtendSet->contains(fText->char32At(pi)) ||
-                              fInCBLinkerSet->contains(fText->char32At(pi)))) {
-                if (fInCBLinkerSet->contains(fText->char32At(pi))) {
-                    sawVirama = true;
-                }
-                pi = fText->moveIndex32(pi, -1);
-            }
-            if (sawVirama && fInCBConsonantSet->contains(fText->char32At(pi))) {
-                setAppliedRule(
-                    p2, R"(GB9c \p{InCB=Consonant} [ \p{InCB=Extend} \p{InCB=Linker} ]* \p{InCB=Linker} [ \p{InCB=Extend} \p{InCB=Linker} ]*	x	\p{InCB=Consonant})");
-                continue;
-            }
-        }
+//---------------------------------------------------------------------------------------
+class RBBIMonkeyKind {
+  public:
+    // Return a vector of UnicodeSets, representing the character classes used
+    //   for this type of iterator.
+    const std::vector<UnicodeSet> &charClasses();
 
-        if (fExtendedPictSet->contains(cBase) && fZWJSet->contains(c1) && fExtendedPictSet->contains(c2)) {
-          setAppliedRule(p2, "GB11  Extended_Pictographic Extend * ZWJ x Extended_Pictographic");
-          continue;
-        }
+    const UnicodeSet &dictionarySet() const;
 
-        //                   Note: The first if condition is a little tricky. We only need to force
-        //                      a break if there are three or more contiguous RIs. If there are
-        //                      only two, a break following will occur via other rules, and will include
-        //                      any trailing extend characters, which is needed behavior.
-        if (fRegionalIndicatorSet->contains(c0) && fRegionalIndicatorSet->contains(c1)
-                && fRegionalIndicatorSet->contains(c2)) {
-          setAppliedRule(p2, "GB12-13  Regional_Indicator x Regional_Indicator");
-          break;
-        }
-        if (fRegionalIndicatorSet->contains(c1) && fRegionalIndicatorSet->contains(c2)) {
-          setAppliedRule(p2, "GB12-13  Regional_Indicator x Regional_Indicator");
-          continue;
-        }
+    // Set the test text on which subsequent calls to next() will operate
+    void setText(const UnicodeString &s);
 
-        setAppliedRule(p2, "GB999 Any <break> Any");
-        break;
-    }
+    // Find the next break position, starting from the prev break position, or from zero.
+    // Return -1 after reaching end of string.
+    int32_t next(int32_t i);
 
-    breakPos = p2;
-    return breakPos;
-}
+    // Name of each character class, parallel with charClasses. Used for debugging output
+    // of characters.
+    std::vector<std::string> &characterClassNames();
 
+    void setAppliedRule(int32_t position, const char *value);
 
+    std::string getAppliedRule(int32_t position);
 
-const std::vector<UnicodeSet>& RBBICharMonkey::charClasses() {
-    return sets;
-}
+    virtual ~RBBIMonkeyKind();
+    UErrorCode deferredStatus;
 
-RBBICharMonkey::~RBBICharMonkey() {
-    delete fCRLFSet;
-    delete fControlSet;
-    delete fExtendSet;
-    delete fRegionalIndicatorSet;
-    delete fPrependSet;
-    delete fSpacingSet;
-    delete fLSet;
-    delete fVSet;
-    delete fTSet;
-    delete fLVSet;
-    delete fLVTSet;
-    delete fHangulSet;
-    delete fAnySet;
-    delete fZWJSet;
-    delete fExtendedPictSet;
-    delete fInCBLinkerSet;
-    delete fInCBConsonantSet;
-    delete fInCBExtendSet;
-}
+    std::string classNameFromCodepoint(const UChar32 c);
+    unsigned int maxClassNameSize();
 
-//------------------------------------------------------------------------------------------
-//
-//   class RBBIWordMonkey      Word Break specific implementation
-//                             of RBBIMonkeyKind.
-//
-//------------------------------------------------------------------------------------------
-class RBBIWordMonkey: public RBBIMonkeyKind {
-public:
-    RBBIWordMonkey();
-    virtual          ~RBBIWordMonkey();
-    virtual const std::vector<UnicodeSet>& charClasses() override;
-    virtual  void     setText(const UnicodeString &s) override;
-    virtual int32_t   next(int32_t i) override;
-private:
+  protected:
+    RBBIMonkeyKind();
+    std::vector<std::string> classNames;
     std::vector<UnicodeSet> sets;
+    std::vector<std::unique_ptr<SegmentationRule>> rules;
+    UnicodeSet dictionarySet_;
 
-    UnicodeSet  *fCRSet;
-    UnicodeSet  *fLFSet;
-    UnicodeSet  *fNewlineSet;
-    UnicodeSet  *fRegionalIndicatorSet;
-    UnicodeSet  *fKatakanaSet;
-    UnicodeSet  *fHebrew_LetterSet;
-    UnicodeSet  *fALetterSet;
-    UnicodeSet  *fSingle_QuoteSet;
-    UnicodeSet  *fDouble_QuoteSet;
-    UnicodeSet  *fMidNumLetSet;
-    UnicodeSet  *fMidLetterSet;
-    UnicodeSet  *fMidNumSet;
-    UnicodeSet  *fNumericSet;
-    UnicodeSet  *fFormatSet;
-    UnicodeSet  *fOtherSet = nullptr;
-    UnicodeSet  *fExtendSet;
-    UnicodeSet  *fExtendNumLetSet;
-    UnicodeSet  *fWSegSpaceSet;
-    UnicodeSet  *fDictionarySet = nullptr;
-    UnicodeSet  *fZWJSet;
-    UnicodeSet  *fExtendedPictSet;
-
-    const UnicodeString  *fText;
-};
-
-
-RBBIWordMonkey::RBBIWordMonkey()
-{
-    UErrorCode  status = U_ZERO_ERROR;
-
-    fCRSet            = new UnicodeSet(u"[\\p{Word_Break = CR}]",           status);
-    fLFSet            = new UnicodeSet(u"[\\p{Word_Break = LF}]",           status);
-    fNewlineSet       = new UnicodeSet(u"[\\p{Word_Break = Newline}]",      status);
-    fKatakanaSet      = new UnicodeSet(u"[\\p{Word_Break = Katakana}]",     status);
-    fRegionalIndicatorSet =  new UnicodeSet(u"[\\p{Word_Break = Regional_Indicator}]", status);
-    fHebrew_LetterSet = new UnicodeSet(u"[\\p{Word_Break = Hebrew_Letter}]", status);
-    fALetterSet       = new UnicodeSet(u"[\\p{Word_Break = ALetter}]", status);
-    fSingle_QuoteSet  = new UnicodeSet(u"[\\p{Word_Break = Single_Quote}]",    status);
-    fDouble_QuoteSet  = new UnicodeSet(u"[\\p{Word_Break = Double_Quote}]",    status);
-    fMidNumLetSet     = new UnicodeSet(u"[\\p{Word_Break = MidNumLet}]",    status);
-    fMidLetterSet     = new UnicodeSet(u"[\\p{Word_Break = MidLetter}]",    status);
-    fMidNumSet        = new UnicodeSet(u"[\\p{Word_Break = MidNum}]",       status);
-    fNumericSet       = new UnicodeSet(u"[\\p{Word_Break = Numeric}]", status);
-    fFormatSet        = new UnicodeSet(u"[\\p{Word_Break = Format}]",       status);
-    fExtendNumLetSet  = new UnicodeSet(u"[\\p{Word_Break = ExtendNumLet}]", status);
-    // There are some sc=Hani characters with WB=Extend.
-    // The break rules need to pick one or the other because
-    // Extend overlapping with something else is messy.
-    // For Unicode 13, we chose to keep U+16FF0 & U+16FF1
-    // in $Han (for $dictionary) and out of $Extend.
-    fExtendSet        = new UnicodeSet(u"[\\p{Word_Break = Extend}-[:Hani:]]", status);
-    fWSegSpaceSet     = new UnicodeSet(u"[\\p{Word_Break = WSegSpace}]",    status);
-
-    fZWJSet           = new UnicodeSet(u"[\\p{Word_Break = ZWJ}]",          status);
-    fExtendedPictSet  = new UnicodeSet(u"[:Extended_Pictographic:]", status);
-    if(U_FAILURE(status)) {
-        IntlTest::gTest->errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
-        deferredStatus = status;
-        return;
-    }
-
-    fDictionarySet = new UnicodeSet(u"[[\\uac00-\\ud7a3][:Han:][:Hiragana:]]", status);
-    fDictionarySet->addAll(*fKatakanaSet);
-    fDictionarySet->addAll(UnicodeSet(u"[\\p{LineBreak = Complex_Context}]", status));
-
-    fALetterSet->removeAll(*fDictionarySet);
-
-    fOtherSet        = new UnicodeSet();
-    if(U_FAILURE(status)) {
-        IntlTest::gTest->errln("%s:%d %s", __FILE__, __LINE__, u_errorName(status));
-        deferredStatus = status;
-        return;
-    }
+    // Clear `appliedRules` and fill it with empty strings in the size of test text.
+    void prepareAppliedRules(int32_t size);
 
-    fOtherSet->complement();
-    fOtherSet->removeAll(*fCRSet);
-    fOtherSet->removeAll(*fLFSet);
-    fOtherSet->removeAll(*fNewlineSet);
-    fOtherSet->removeAll(*fKatakanaSet);
-    fOtherSet->removeAll(*fHebrew_LetterSet);
-    fOtherSet->removeAll(*fALetterSet);
-    fOtherSet->removeAll(*fSingle_QuoteSet);
-    fOtherSet->removeAll(*fDouble_QuoteSet);
-    fOtherSet->removeAll(*fMidLetterSet);
-    fOtherSet->removeAll(*fMidNumSet);
-    fOtherSet->removeAll(*fNumericSet);
-    fOtherSet->removeAll(*fExtendNumLetSet);
-    fOtherSet->removeAll(*fWSegSpaceSet);
-    fOtherSet->removeAll(*fFormatSet);
-    fOtherSet->removeAll(*fExtendSet);
-    fOtherSet->removeAll(*fRegionalIndicatorSet);
-    fOtherSet->removeAll(*fZWJSet);
-    fOtherSet->removeAll(*fExtendedPictSet);
-
-    // Inhibit dictionary characters from being tested at all.
-    fOtherSet->removeAll(*fDictionarySet);
-
-    // Add classes and their names
-    sets.emplace_back(*fCRSet); classNames.emplace_back("CR");
-    sets.emplace_back(*fLFSet); classNames.emplace_back("LF");
-    sets.emplace_back(*fNewlineSet); classNames.emplace_back("Newline");
-    sets.emplace_back(*fRegionalIndicatorSet); classNames.emplace_back("RegionalIndicator");
-    sets.emplace_back(*fHebrew_LetterSet); classNames.emplace_back("Hebrew");
-    sets.emplace_back(*fALetterSet); classNames.emplace_back("ALetter");
-    sets.emplace_back(*fSingle_QuoteSet); classNames.emplace_back("Single Quote");
-    sets.emplace_back(*fDouble_QuoteSet); classNames.emplace_back("Double Quote");
-    // Omit Katakana from fSets, which omits Katakana characters
-    // from the test data. They are all in the dictionary set,
-    // which this (old, to be retired) monkey test cannot handle.
-    //sets.emplace_back(*fKatakanaSet);
-
-    sets.emplace_back(*fMidLetterSet); classNames.emplace_back("MidLetter");
-    sets.emplace_back(*fMidNumLetSet); classNames.emplace_back("MidNumLet");
-    sets.emplace_back(*fMidNumSet); classNames.emplace_back("MidNum");
-    sets.emplace_back(*fNumericSet); classNames.emplace_back("Numeric");
-    sets.emplace_back(*fFormatSet); classNames.emplace_back("Format");
-    sets.emplace_back(*fExtendSet); classNames.emplace_back("Extend");
-    sets.emplace_back(*fOtherSet); classNames.emplace_back("Other");
-    sets.emplace_back(*fExtendNumLetSet); classNames.emplace_back("ExtendNumLet");
-    sets.emplace_back(*fWSegSpaceSet); classNames.emplace_back("WSegSpace");
-
-    sets.emplace_back(*fZWJSet); classNames.emplace_back("ZWJ");
-    sets.emplace_back(*fExtendedPictSet); classNames.emplace_back("ExtendedPict");
+  private:
+    std::vector<std::string> appliedRules;
+    UnicodeString text;
+    std::vector<SegmentationRule::BreakContext> resolved;
+};
 
-    if (U_FAILURE(status)) {
-        deferredStatus = status;
-    }
+RBBIMonkeyKind::RBBIMonkeyKind() {
+    deferredStatus = U_ZERO_ERROR;
 }
 
-void RBBIWordMonkey::setText(const UnicodeString &s) {
-    fText       = &s;
-    prepareAppliedRules(s.length());
+RBBIMonkeyKind::~RBBIMonkeyKind() {
 }
 
+const std::vector<UnicodeSet> &RBBIMonkeyKind::charClasses() {
+   return sets; }
 
-int32_t RBBIWordMonkey::next(int32_t prevPos) {
-    int    p0, p1, p2, p3;    // Indices of the significant code points around the
-                              //   break position being tested.  The candidate break
-                              //   location is before p2.
-
-    int     breakPos = -1;
-
-    UChar32 c0, c1, c2, c3;   // The code points at p0, p1, p2 & p3.
-
-    if (U_FAILURE(deferredStatus)) {
-        return -1;
-    }
-
-    // Prev break at end of string.  return DONE.
-    if (prevPos >= fText->length()) {
-        return -1;
-    }
-    p0 = p1 = p2 = p3 = prevPos;
-    c3 =  fText->char32At(prevPos);
-    c0 = c1 = c2 = 0;
-    (void)p0;       // Suppress set but not used warning.
-
-    // Loop runs once per "significant" character position in the input text.
-    for (;;) {
-        // Move all of the positions forward in the input string.
-        p0 = p1;  c0 = c1;
-        p1 = p2;  c1 = c2;
-        p2 = p3;  c2 = c3;
+const UnicodeSet &RBBIMonkeyKind::dictionarySet() const {
+    return dictionarySet_;
+}
 
-        // Advance p3 by    X(Extend | Format)*   Rule 4
-        //    But do not advance over Extend & Format following a new line. (Unicode 5.1 change)
-        do {
-            p3 = fText->moveIndex32(p3, 1);
-            c3 = fText->char32At(p3);
-            if (fCRSet->contains(c2) || fLFSet->contains(c2) || fNewlineSet->contains(c2)) {
-               break;
+void RBBIMonkeyKind::setText(const UnicodeString &s) {
+   text = s;
+   prepareAppliedRules(s.length());
+   UnicodeString remapped = s;
+   resolved.clear();
+   resolved.reserve(s.length() + 1);
+   for (int i = 0; i < s.length() + 1; ++i) {
+        resolved.emplace_back(i);
+   }
+   for (const auto &rule : rules) {
+        rule->apply(remapped, resolved);
+   }
+   for (std::size_t i = 0; i < resolved.size(); ++i) {
+        if (resolved[i].appliedRule == nullptr) {
+            if (i > 0 && U16_IS_LEAD(s[i-1]) && U16_IS_TRAIL(s[i])) {
+                continue;
             }
+            printf("Failed to resolve at %zu between U+%04X and U+%04X ", i, s.char32At(i - 1),
+                   s.char32At(i));
+            if (resolved[i].indexInRemapped.has_value()) {
+                printf("which is remapped %zu between U+%04X and U+%04X", *resolved[i].indexInRemapped,
+                       remapped.char32At(*resolved[i].indexInRemapped - 1),
+                       remapped.char32At(*resolved[i].indexInRemapped));
+            }
+            std::terminate();
+        } else {
+            setAppliedRule(i, resolved[i].appliedRule->name().c_str());
         }
-        while (fFormatSet->contains(c3) || fExtendSet->contains(c3) || fZWJSet->contains(c3));
-
-
-        if (p1 == p2) {
-            // Still warming up the loop.  (won't work with zero length strings, but we don't care)
-            continue;
-        }
-
-        if (p2 == fText->length()) {
-            // Reached end of string.  Always a break position.
-            break;
-        }
-
-        //     No Extend or Format characters may appear between the CR and LF,
-        //     which requires the additional check for p2 immediately following p1.
-        //
-        if (c1==0x0D && c2==0x0A) {
-          setAppliedRule(p2, "WB3   CR x LF");
-          continue;
-        }
-
-        if (fCRSet->contains(c1) || fLFSet->contains(c1) || fNewlineSet->contains(c1)) {
-            setAppliedRule(p2, "WB3a  Break before and after newlines (including CR and LF)");
-            break;
-        }
-        if (fCRSet->contains(c2) || fLFSet->contains(c2) || fNewlineSet->contains(c2)) {
-            setAppliedRule(p2, "WB3a  Break before and after newlines (including CR and LF)");
-            break;
-        }
-
-        //              Not ignoring extend chars, so peek into input text to
-        //              get the potential ZWJ, the character immediately preceding c2.
-        //              Sloppy UChar32 indexing: p2-1 may reference trail half
-        //              but char32At will get the full code point.
-        if (fZWJSet->contains(fText->char32At(p2 - 1)) && fExtendedPictSet->contains(c2)){
-            setAppliedRule(p2, "WB3c  ZWJ x Extended_Pictographic");
-            continue;
-        }
-
-        if (fWSegSpaceSet->contains(fText->char32At(p2-1)) && fWSegSpaceSet->contains(c2)) {
-            setAppliedRule(p2, "WB3d  Keep horizontal whitespace together.");
-            continue;
-        }
-
-        if ((fALetterSet->contains(c1) || fHebrew_LetterSet->contains(c1)) &&
-            (fALetterSet->contains(c2) || fHebrew_LetterSet->contains(c2)))  {
-            setAppliedRule(p2, "WB4   (ALetter | Hebrew_Letter) x (ALetter | Hebrew_Letter)");
-            continue;
-        }
-
-        if ( (fALetterSet->contains(c1) || fHebrew_LetterSet->contains(c1))   &&
-             (fMidLetterSet->contains(c2) || fMidNumLetSet->contains(c2) || fSingle_QuoteSet->contains(c2)) &&
-             (fALetterSet->contains(c3) || fHebrew_LetterSet->contains(c3))) {
-            setAppliedRule(p2,
-                           "WB6   (ALetter | Hebrew_Letter)  x  (MidLetter | MidNumLet | Single_Quote) (ALetter _Letter)");
-            continue;
-        }
-
-        if ((fALetterSet->contains(c0) || fHebrew_LetterSet->contains(c0)) &&
-            (fMidLetterSet->contains(c1) || fMidNumLetSet->contains(c1) || fSingle_QuoteSet->contains(c1)) &&
-            (fALetterSet->contains(c2) || fHebrew_LetterSet->contains(c2))) {
-            setAppliedRule(p2,
-                           "WB7   (ALetter | Hebrew_Letter) (MidLetter | MidNumLet | Single_Quote)  x  (ALetter | Hebrew_Letter)");
-            continue;
-        }
-
-        if (fHebrew_LetterSet->contains(c1) && fSingle_QuoteSet->contains(c2)) {
-            setAppliedRule(p2, "WB7a  Hebrew_Letter x Single_Quote");
-            continue;
-        }
-
-          if (fHebrew_LetterSet->contains(c1) && fDouble_QuoteSet->contains(c2) && fHebrew_LetterSet->contains(c3)) {
-            setAppliedRule(p2, "WB7b  Hebrew_Letter x Double_Quote Hebrew_Letter");
-            continue;
-        }
-
-        if (fHebrew_LetterSet->contains(c0) && fDouble_QuoteSet->contains(c1) && fHebrew_LetterSet->contains(c2)) {
-            setAppliedRule(p2, "WB7c  Hebrew_Letter Double_Quote x Hebrew_Letter");
-            continue;
-        }
-
-        if (fNumericSet->contains(c1) &&
-            fNumericSet->contains(c2)) {
-            setAppliedRule(p2, "WB8   Numeric x Numeric");
-            continue;
-        }
-
-        if ((fALetterSet->contains(c1) || fHebrew_LetterSet->contains(c1)) &&
-            fNumericSet->contains(c2)) {
-            setAppliedRule(p2, "WB9   (ALetter | Hebrew_Letter) x Numeric");
-            continue;
-        }
-
-        if (fNumericSet->contains(c1) &&
-            (fALetterSet->contains(c2) || fHebrew_LetterSet->contains(c2)))  {
-            setAppliedRule(p2, "WB10   Numeric x (ALetter | Hebrew_Letter)");
-            continue;
-        }
-
-          if (fNumericSet->contains(c0) &&
-            (fMidNumSet->contains(c1) || fMidNumLetSet->contains(c1) || fSingle_QuoteSet->contains(c1))  &&
-            fNumericSet->contains(c2)) {
-            setAppliedRule(p2, "WB11  Numeric (MidNum | MidNumLet | Single_Quote)  x  Numeric");
-            continue;
-        }
+   }
+}
 
-        if (fNumericSet->contains(c1) &&
-            (fMidNumSet->contains(c2) || fMidNumLetSet->contains(c2) || fSingle_QuoteSet->contains(c2))  &&
-            fNumericSet->contains(c3)) {
-            setAppliedRule(p2, "WB12  Numeric x (MidNum | MidNumLet | SingleQuote) Numeric");
-            continue;
+int32_t RBBIMonkeyKind::next(int32_t startPos) {
+   for (std::size_t i = startPos + 1; i < resolved.size(); ++i) {
+        if (resolved[i].appliedRule != nullptr &&
+            resolved[i].appliedRule->resolution() == SegmentationRule::BREAK) {
+            return i;
         }
+   }
+   return -1;
+}
 
-        //            Note: matches UAX 29 rules, but doesn't come into play for ICU because
-        //                  all Katakana are handled by the dictionary breaker.
-        if (fKatakanaSet->contains(c1) &&
-            fKatakanaSet->contains(c2))  {
-            setAppliedRule(p2, "WB13  Katakana x Katakana");
-            continue;
-        }
+std::vector<std::string> &RBBIMonkeyKind::characterClassNames() {
+    return classNames;
+}
 
-        if ((fALetterSet->contains(c1) || fHebrew_LetterSet->contains(c1) ||fNumericSet->contains(c1) ||
-             fKatakanaSet->contains(c1) || fExtendNumLetSet->contains(c1)) &&
-             fExtendNumLetSet->contains(c2)) {
-            setAppliedRule(p2,
-                           "WB13a (ALetter | Hebrew_Letter | Numeric | KataKana | ExtendNumLet) x ExtendNumLet");
-            continue;
-        }
+void RBBIMonkeyKind::prepareAppliedRules(int32_t size) {
+    // Remove all the information in the `appliedRules`.
+    appliedRules.clear();
+    appliedRules.resize(size + 1);
+}
 
-        if (fExtendNumLetSet->contains(c1) &&
-                (fALetterSet->contains(c2) || fHebrew_LetterSet->contains(c2) ||
-                 fNumericSet->contains(c2) || fKatakanaSet->contains(c2)))  {
-            setAppliedRule(p2, "WB13b ExtendNumLet x (ALetter | Hebrew_Letter | Numeric | Katakana)");
-            continue;
-        }
+void RBBIMonkeyKind::setAppliedRule(int32_t position, const char* value) {
+    appliedRules[position] = value;
+}
 
-        if (fRegionalIndicatorSet->contains(c0) && fRegionalIndicatorSet->contains(c1)) {
-            setAppliedRule(p2, "WB15 - WB17   Group pairs of Regional Indicators.");
-            break;
-        }
-        if (fRegionalIndicatorSet->contains(c1) && fRegionalIndicatorSet->contains(c2)) {
-            setAppliedRule(p2, "WB15 - WB17   Group pairs of Regional Indicators.");
-            continue;
-        }
+std::string RBBIMonkeyKind::getAppliedRule(int32_t position){
+    return appliedRules[position];
+}
 
-        setAppliedRule(p2, "WB999");
-        break;
+std::string RBBIMonkeyKind::classNameFromCodepoint(const UChar32 c) {
+    // Simply iterate through charClasses to find character's class
+    for (std::size_t aClassNum = 0; aClassNum < sets.size(); aClassNum++) {
+        const UnicodeSet &classSet = sets[aClassNum];
+        if (classSet.contains(c)) {
+            return classNames[aClassNum];
+        }
     }
+    U_ASSERT(false);  // This should not happen.
+    return "bad class name";
+}
 
-    breakPos = p2;
-    return breakPos;
+unsigned int RBBIMonkeyKind::maxClassNameSize() {
+    unsigned int maxSize = 0;
+    for (std::size_t aClassNum = 0; aClassNum < classNames.size(); aClassNum++) {
+        auto aClassNumSize = static_cast<unsigned int>(classNames[aClassNum].size());
+        if (aClassNumSize > maxSize) {
+            maxSize = aClassNumSize;
+        }
+    }
+    return maxSize;
 }
 
+//----------------------------------------------------------------------------------------
+//
+//   Random Numbers.  We need a long cycle length since we run overnight tests over
+//                    millions of strings involving 1000 random generations per string
+//                    (a 32-bit LCG will not do!), and we want a reasonably small state
+//                    so that we can output it to reproduce failures.
+//
+//---------------------------------------------------------------------------------------
+namespace {
 
-const std::vector<UnicodeSet>& RBBIWordMonkey::charClasses() {
-    return sets;
-}
+using RandomNumberGenerator = std::ranlux48;
+constexpr RandomNumberGenerator::result_type defaultSeed = std::ranlux48_base::default_seed;
+static RandomNumberGenerator randomNumberGenerator;
 
-RBBIWordMonkey::~RBBIWordMonkey() {
-    delete fCRSet;
-    delete fLFSet;
-    delete fNewlineSet;
-    delete fKatakanaSet;
-    delete fHebrew_LetterSet;
-    delete fALetterSet;
-    delete fSingle_QuoteSet;
-    delete fDouble_QuoteSet;
-    delete fMidNumLetSet;
-    delete fMidLetterSet;
-    delete fMidNumSet;
-    delete fNumericSet;
-    delete fFormatSet;
-    delete fExtendSet;
-    delete fExtendNumLetSet;
-    delete fWSegSpaceSet;
-    delete fRegionalIndicatorSet;
-    delete fDictionarySet;
-    delete fOtherSet;
-    delete fZWJSet;
-    delete fExtendedPictSet;
+RandomNumberGenerator deserialize(const std::string& state) {
+    RandomNumberGenerator result;
+    std::stringstream(state) >> result;
+    return result;
 }
 
+std::string serialize(const RandomNumberGenerator& generator) {
+    std::stringstream result;
+    result << generator;
+    return result.str();
+}
 
-
+}  // namespace
 
 //------------------------------------------------------------------------------------------
 //
-//   class RBBISentMonkey      Sentence Break specific implementation
+//   class RBBICharMonkey      Character (Grapheme Cluster) specific implementation
 //                             of RBBIMonkeyKind.
 //
 //------------------------------------------------------------------------------------------
-class RBBISentMonkey: public RBBIMonkeyKind {
+class RBBICharMonkey: public RBBIMonkeyKind {
 public:
-    RBBISentMonkey();
-    virtual          ~RBBISentMonkey();
-    virtual const std::vector<UnicodeSet>& charClasses() override;
-    virtual  void     setText(const UnicodeString &s) override;
-    virtual int32_t   next(int32_t i) override;
-private:
-    int               moveBack(int posFrom);
-    int               moveForward(int posFrom);
-    UChar32           cAt(int pos);
-
-    std::vector<UnicodeSet> sets;
-
-    UnicodeSet  *fSepSet;
-    UnicodeSet  *fFormatSet;
-    UnicodeSet  *fSpSet;
-    UnicodeSet  *fLowerSet;
-    UnicodeSet  *fUpperSet;
-    UnicodeSet  *fOLetterSet;
-    UnicodeSet  *fNumericSet;
-    UnicodeSet  *fATermSet;
-    UnicodeSet  *fSContinueSet;
-    UnicodeSet  *fSTermSet;
-    UnicodeSet  *fCloseSet;
-    UnicodeSet  *fOtherSet;
-    UnicodeSet  *fExtendSet;
-
-    const UnicodeString  *fText;
+    RBBICharMonkey();
 };
 
-RBBISentMonkey::RBBISentMonkey()
-{
-    UErrorCode  status = U_ZERO_ERROR;
-
-    //  Separator Set Note:  Beginning with Unicode 5.1, CR and LF were removed from the separator
-    //                       set and made into character classes of their own.  For the monkey impl,
-    //                       they remain in SEP, since Sep always appears with CR and LF in the rules.
-    fSepSet          = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sep} \\u000a \\u000d]"),     status);
-    fFormatSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Format}]"),    status);
-    fSpSet           = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Sp}]"),        status);
-    fLowerSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Lower}]"),     status);
-    fUpperSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Upper}]"),     status);
-    fOLetterSet      = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = OLetter}]"),   status);
-    fNumericSet      = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Numeric}]"),   status);
-    fATermSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = ATerm}]"),     status);
-    fSContinueSet    = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = SContinue}]"), status);
-    fSTermSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = STerm}]"),     status);
-    fCloseSet        = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Close}]"),     status);
-    fExtendSet       = new UnicodeSet(UNICODE_STRING_SIMPLE("[\\p{Sentence_Break = Extend}]"),    status);
-    fOtherSet        = new UnicodeSet();
-
-    if(U_FAILURE(status)) {
-      deferredStatus = status;
-      return;
-    }
-
-    fOtherSet->complement();
-    fOtherSet->removeAll(*fSepSet);
-    fOtherSet->removeAll(*fFormatSet);
-    fOtherSet->removeAll(*fSpSet);
-    fOtherSet->removeAll(*fLowerSet);
-    fOtherSet->removeAll(*fUpperSet);
-    fOtherSet->removeAll(*fOLetterSet);
-    fOtherSet->removeAll(*fNumericSet);
-    fOtherSet->removeAll(*fATermSet);
-    fOtherSet->removeAll(*fSContinueSet);
-    fOtherSet->removeAll(*fSTermSet);
-    fOtherSet->removeAll(*fCloseSet);
-    fOtherSet->removeAll(*fExtendSet);
-
-    sets.emplace_back(*fSepSet); classNames.emplace_back("Sep");
-    sets.emplace_back(*fFormatSet); classNames.emplace_back("Format");
-    sets.emplace_back(*fSpSet); classNames.emplace_back("Sp");
-    sets.emplace_back(*fLowerSet); classNames.emplace_back("Lower");
-    sets.emplace_back(*fUpperSet); classNames.emplace_back("Upper");
-    sets.emplace_back(*fOLetterSet); classNames.emplace_back("OLetter");
-    sets.emplace_back(*fNumericSet); classNames.emplace_back("Numeric");
-    sets.emplace_back(*fATermSet); classNames.emplace_back("ATerm");
-    sets.emplace_back(*fSContinueSet); classNames.emplace_back("SContinue");
-    sets.emplace_back(*fSTermSet); classNames.emplace_back("STerm");
-    sets.emplace_back(*fCloseSet); classNames.emplace_back("Close");
-    sets.emplace_back(*fOtherSet); classNames.emplace_back("Other");
-    sets.emplace_back(*fExtendSet); classNames.emplace_back("Extend");
-
-    if (U_FAILURE(status)) {
-        deferredStatus = status;
-    }
-}
-
-
 
-void RBBISentMonkey::setText(const UnicodeString &s) {
-    fText       = &s;
-    prepareAppliedRules(s.length());
-}
+RBBICharMonkey::RBBICharMonkey() {
+    UErrorCode status = U_ZERO_ERROR;
 
-const std::vector<UnicodeSet>& RBBISentMonkey::charClasses() {
-    return sets;
-}
+    std::list<std::pair<std::string, UnicodeSet>> partition;
 
-//  moveBack()   Find the "significant" code point preceding the index i.
-//               Skips over ($Extend | $Format)* .
-//
-int RBBISentMonkey::moveBack(int i) {
-    if (i <= 0) {
-        return -1;
-    }
-    UChar32   c;
-    int32_t   j = i;
-    do {
-        j = fText->moveIndex32(j, -1);
-        c = fText->char32At(j);
-    }
-    while (j>0 &&(fFormatSet->contains(c) || fExtendSet->contains(c)));
-    return j;
+    // These two could be part of the rules.
+    rules.push_back(std::make_unique<RegexRule>(uR"(GB1 sot ÷ Any)", uR"(^)", u'÷', uR"()"));
+    // Note that /$/ matches ( BK | CR | LF | NL ) eot, so we use (?!.) instead.
+    // The generated rules use the same (?!.).
+    rules.push_back(std::make_unique<RegexRule>(uR"(GB2 Any ÷ eot)", uR"()", u'÷', uR"((?!.))"));
 
- }
+    // --- NOLI ME TANGERE ---
+    // Generated by GenerateBreakTest.java in the Unicode tools.
+    partition.emplace_back("CR", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=CR}])", status));
+    partition.emplace_back("LF", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=LF}])", status));
+    partition.emplace_back("Control", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=Control}])", status));
+    partition.emplace_back("Extend_ConjunctLinker", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=Extend}&\p{Indic_Conjunct_Break=Linker}])", status));
+    partition.emplace_back("Extend_ConjunctExtendermConjunctLinker", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=Extend}&[\p{Indic_Conjunct_Break=Linker}\p{Indic_Conjunct_Break=Extend}]-\p{Indic_Conjunct_Break=Linker}])", status));
+    partition.emplace_back("ExtendmConjunctLinkermConjunctExtender", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=Extend}-\p{Indic_Conjunct_Break=Linker}-[\p{Indic_Conjunct_Break=Linker}\p{Indic_Conjunct_Break=Extend}]])", status));
+    partition.emplace_back("ZWJ", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=ZWJ}])", status));
+    partition.emplace_back("RI", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=Regional_Indicator}])", status));
+    partition.emplace_back("Prepend", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=Prepend}])", status));
+    partition.emplace_back("SpacingMark", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=SpacingMark}])", status));
+    partition.emplace_back("L", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=L}])", status));
+    partition.emplace_back("V", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=V}])", status));
+    partition.emplace_back("T", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=T}])", status));
+    partition.emplace_back("LV", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=LV}])", status));
+    partition.emplace_back("LVT", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=LVT}])", status));
+    partition.emplace_back("LinkingConsonant", UnicodeSet(uR"([\p{Indic_Conjunct_Break=Consonant}])", status));
+    partition.emplace_back("ExtPict", UnicodeSet(uR"([\p{Extended_Pictographic}])", status));
+    partition.emplace_back("XXmLinkingConsonantmExtPict", UnicodeSet(uR"([\p{Grapheme_Cluster_Break=Other}-\p{Indic_Conjunct_Break=Consonant}-\p{Extended_Pictographic}])", status));
+
+    rules.push_back(std::make_unique<RegexRule>(uR"($CR × $LF)", uR"(\p{Grapheme_Cluster_Break=CR})", u'×', uR"(\p{Grapheme_Cluster_Break=LF})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(( $Control | $CR | $LF ) ÷)", uR"(( \p{Grapheme_Cluster_Break=Control} | \p{Grapheme_Cluster_Break=CR} | \p{Grapheme_Cluster_Break=LF} ))", u'÷', uR"()"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(÷ ( $Control | $CR | $LF ))", uR"()", u'÷', uR"(( \p{Grapheme_Cluster_Break=Control} | \p{Grapheme_Cluster_Break=CR} | \p{Grapheme_Cluster_Break=LF} ))"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($L × ( $L | $V | $LV | $LVT ))", uR"(\p{Grapheme_Cluster_Break=L})", u'×', uR"(( \p{Grapheme_Cluster_Break=L} | \p{Grapheme_Cluster_Break=V} | \p{Grapheme_Cluster_Break=LV} | \p{Grapheme_Cluster_Break=LVT} ))"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(( $LV | $V ) × ( $V | $T ))", uR"(( \p{Grapheme_Cluster_Break=LV} | \p{Grapheme_Cluster_Break=V} ))", u'×', uR"(( \p{Grapheme_Cluster_Break=V} | \p{Grapheme_Cluster_Break=T} ))"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(( $LVT | $T) × $T)", uR"(( \p{Grapheme_Cluster_Break=LVT} | \p{Grapheme_Cluster_Break=T}))", u'×', uR"(\p{Grapheme_Cluster_Break=T})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(× ($Extend | $ZWJ))", uR"()", u'×', uR"((\p{Grapheme_Cluster_Break=Extend} | \p{Grapheme_Cluster_Break=ZWJ}))"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(× $SpacingMark)", uR"()", u'×', uR"(\p{Grapheme_Cluster_Break=SpacingMark})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($Prepend ×)", uR"(\p{Grapheme_Cluster_Break=Prepend})", u'×', uR"()"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($LinkingConsonant $ConjunctExtender* $ConjunctLinker $ConjunctExtender* × $LinkingConsonant)", uR"(\p{Indic_Conjunct_Break=Consonant} [\p{Indic_Conjunct_Break=Linker}\p{Indic_Conjunct_Break=Extend}]* \p{Indic_Conjunct_Break=Linker} [\p{Indic_Conjunct_Break=Linker}\p{Indic_Conjunct_Break=Extend}]*)", u'×', uR"(\p{Indic_Conjunct_Break=Consonant})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($ExtPict $Extend* $ZWJ × $ExtPict)", uR"(\p{Extended_Pictographic} \p{Grapheme_Cluster_Break=Extend}* \p{Grapheme_Cluster_Break=ZWJ})", u'×', uR"(\p{Extended_Pictographic})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(^ ($RI $RI)* $RI × $RI)", uR"(^ (\p{Grapheme_Cluster_Break=Regional_Indicator} \p{Grapheme_Cluster_Break=Regional_Indicator})* \p{Grapheme_Cluster_Break=Regional_Indicator})", u'×', uR"(\p{Grapheme_Cluster_Break=Regional_Indicator})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"([^$RI] ($RI $RI)* $RI × $RI)", uR"([^\p{Grapheme_Cluster_Break=Regional_Indicator}] (\p{Grapheme_Cluster_Break=Regional_Indicator} \p{Grapheme_Cluster_Break=Regional_Indicator})* \p{Grapheme_Cluster_Break=Regional_Indicator})", u'×', uR"(\p{Grapheme_Cluster_Break=Regional_Indicator})"));
+    // --- End of generated code. ---
 
+    // TODO(egg): This could just as well be part of the rules…
+    rules.push_back(std::make_unique<RegexRule>(uR"(ALL ÷ / ÷ ALL)", uR"()", u'÷', uR"()"));
 
-int RBBISentMonkey::moveForward(int i) {
-    if (i>=fText->length()) {
-        return fText->length();
-    }
-    UChar32   c;
-    int32_t   j = i;
-    do {
-        j = fText->moveIndex32(j, 1);
-        c = cAt(j);
+    for (const auto &[name, set] : partition) {
+        sets.push_back(set);
+        classNames.push_back(name);
     }
-    while (fFormatSet->contains(c) || fExtendSet->contains(c));
-    return j;
-}
 
-UChar32 RBBISentMonkey::cAt(int pos) {
-    if (pos<0 || pos>=fText->length()) {
-        return -1;
-    } else {
-        return fText->char32At(pos);
+    if (U_FAILURE(status)) {
+        deferredStatus = status;
     }
 }
 
-int32_t RBBISentMonkey::next(int32_t prevPos) {
-    int    p0, p1, p2, p3;    // Indices of the significant code points around the
-                              //   break position being tested.  The candidate break
-                              //   location is before p2.
-
-    int     breakPos = -1;
-
-    UChar32 c0, c1, c2, c3;   // The code points at p0, p1, p2 & p3.
-    UChar32 c;
-
-    if (U_FAILURE(deferredStatus)) {
-        return -1;
-    }
-
-    // Prev break at end of string.  return DONE.
-    if (prevPos >= fText->length()) {
-        return -1;
-    }
-    p0 = p1 = p2 = p3 = prevPos;
-    c3 =  fText->char32At(prevPos);
-    c0 = c1 = c2 = 0;
-    (void)p0;     // Suppress set but not used warning.
-
-    // Loop runs once per "significant" character position in the input text.
-    for (;;) {
-        // Move all of the positions forward in the input string.
-        p0 = p1;  c0 = c1;
-        p1 = p2;  c1 = c2;
-        p2 = p3;  c2 = c3;
-
-        // Advance p3 by    X(Extend | Format)*   Rule 4
-        p3 = moveForward(p3);
-        c3 = cAt(p3);
-
-        if (c1==0x0d && c2==0x0a && p2==(p1+1)) {
-            setAppliedRule(p2, "SB3   CR x LF");
-            continue;
-        }
+//------------------------------------------------------------------------------------------
+//
+//   class RBBIWordMonkey      Word Break specific implementation
+//                             of RBBIMonkeyKind.
+//
+//------------------------------------------------------------------------------------------
+class RBBIWordMonkey: public RBBIMonkeyKind {
+public:
+    RBBIWordMonkey();
+};
 
-        if (fSepSet->contains(c1)) {
-            p2 = p1+1;   // Separators don't combine with Extend or Format.
 
-            setAppliedRule(p2, "SB4   Sep  <break>");
-            break;
-        }
+RBBIWordMonkey::RBBIWordMonkey()
+{
+    UErrorCode  status = U_ZERO_ERROR;
 
-        if (p2 >= fText->length()) {
-            // Reached end of string.  Always a break position.
-            setAppliedRule(p2, "SB4   Sep  <break>");
-            break;
-        }
+    std::list<std::pair<std::string, UnicodeSet>> partition;
 
-        if (p2 == prevPos) {
-            // Still warming up the loop.  (won't work with zero length strings, but we don't care)
-            setAppliedRule(p2, "SB4   Sep  <break>");
-            continue;
-        }
+    dictionarySet_ = UnicodeSet(uR"([[\uac00-\ud7a3][:Han:][:Hiragana:]])", status);
+    dictionarySet_.addAll(UnicodeSet(uR"([\p{Word_Break = Katakana}])", status));
+    dictionarySet_.addAll(UnicodeSet(uR"([\p{LineBreak = Complex_Context}])", status));
 
-        if (fATermSet->contains(c1) &&  fNumericSet->contains(c2))  {
-            setAppliedRule(p2, "SB6   ATerm x Numeric");
-            continue;
-        }
+    // These two could be part of the rules.
+    rules.push_back(std::make_unique<RegexRule>(uR"(WB1 sot ÷ Any)", uR"(^)", u'÷', uR"()"));
+    // Note that /$/ matches ( BK | CR | LF | NL ) eot, so we use (?!.) instead.
+    // The generated rules use the same (?!.).
+    rules.push_back(std::make_unique<RegexRule>(uR"(WB2 Any ÷ eot)", uR"()", u'÷', uR"((?!.))"));
 
-          if ((fUpperSet->contains(c0) || fLowerSet->contains(c0)) &&
-                fATermSet->contains(c1) && fUpperSet->contains(c2)) {
-            setAppliedRule(p2, "SB7   (Upper | Lower) ATerm  x  Uppper");
-            continue;
-        }
+    // --- NOLI ME TANGERE ---
+    // Generated by GenerateBreakTest.java in the Unicode tools.
+    partition.emplace_back("CR", UnicodeSet(uR"([\p{Word_Break=CR}])", status));
+    partition.emplace_back("LF", UnicodeSet(uR"([\p{Word_Break=LF}])", status));
+    partition.emplace_back("Newline", UnicodeSet(uR"([\p{Word_Break=Newline}])", status));
+    partition.emplace_back("Extend", UnicodeSet(uR"([\p{Word_Break=Extend}])", status));
+    partition.emplace_back("Format", UnicodeSet(uR"([[\p{Word_Break=Format}]])", status));
+    partition.emplace_back("Katakana", UnicodeSet(uR"([\p{Word_Break=Katakana}])", status));
+    partition.emplace_back("ALetter_ExtPict", UnicodeSet(uR"([\p{Word_Break=ALetter}&\p{Extended_Pictographic}])", status));
+    partition.emplace_back("ALettermExtPict", UnicodeSet(uR"([\p{Word_Break=ALetter}-\p{Extended_Pictographic}])", status));
+    partition.emplace_back("MidLetter", UnicodeSet(uR"([\p{Word_Break=MidLetter}])", status));
+    partition.emplace_back("MidNum", UnicodeSet(uR"([\p{Word_Break=MidNum}])", status));
+    partition.emplace_back("MidNumLet", UnicodeSet(uR"([\p{Word_Break=MidNumLet}])", status));
+    partition.emplace_back("Numeric", UnicodeSet(uR"([\p{Word_Break=Numeric}])", status));
+    partition.emplace_back("ExtendNumLet", UnicodeSet(uR"([\p{Word_Break=ExtendNumLet}])", status));
+    partition.emplace_back("RI", UnicodeSet(uR"([\p{Word_Break=Regional_Indicator}])", status));
+    partition.emplace_back("Hebrew_Letter", UnicodeSet(uR"([\p{Word_Break=Hebrew_Letter}])", status));
+    partition.emplace_back("Double_Quote", UnicodeSet(uR"([\p{Word_Break=Double_Quote}])", status));
+    partition.emplace_back("Single_Quote", UnicodeSet(uR"([\p{Word_Break=Single_Quote}])", status));
+    partition.emplace_back("ZWJ", UnicodeSet(uR"([\p{Word_Break=ZWJ}])", status));
+    partition.emplace_back("ExtPictmALetter", UnicodeSet(uR"([\p{Extended_Pictographic}-\p{Word_Break=ALetter}])", status));
+    partition.emplace_back("WSegSpace", UnicodeSet(uR"([\p{Word_Break=WSegSpace}])", status));
+    partition.emplace_back("XXmExtPict", UnicodeSet(uR"([\p{Word_Break=Other}-\p{Extended_Pictographic}])", status));
+
+    rules.push_back(std::make_unique<RegexRule>(uR"($CR × $LF)", uR"(\p{Word_Break=CR})", u'×', uR"(\p{Word_Break=LF})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(($Newline | $CR | $LF) ÷)", uR"((\p{Word_Break=Newline} | \p{Word_Break=CR} | \p{Word_Break=LF}))", u'÷', uR"()"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(÷ ($Newline | $CR | $LF))", uR"()", u'÷', uR"((\p{Word_Break=Newline} | \p{Word_Break=CR} | \p{Word_Break=LF}))"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($ZWJ × $ExtPict)", uR"(\p{Word_Break=ZWJ})", u'×', uR"(\p{Extended_Pictographic})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($WSegSpace × $WSegSpace)", uR"(\p{Word_Break=WSegSpace})", u'×', uR"(\p{Word_Break=WSegSpace})"));
+    rules.push_back(std::make_unique<RemapRule>(uR"((?<X>[^$CR $LF $Newline]) ($Extend | $Format | $ZWJ)* → ${X})", uR"((?<X>[^\p{Word_Break=CR} \p{Word_Break=LF} \p{Word_Break=Newline}]) (\p{Word_Break=Extend} | [\p{Word_Break=Format}] | \p{Word_Break=ZWJ})*)", uR"(${X})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($AHLetter × $AHLetter)", uR"([\p{Word_Break=ALetter} \p{Word_Break=Hebrew_Letter}])", u'×', uR"([\p{Word_Break=ALetter} \p{Word_Break=Hebrew_Letter}])"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($AHLetter × ($MidLetter | $MidNumLetQ) $AHLetter)", uR"([\p{Word_Break=ALetter} \p{Word_Break=Hebrew_Letter}])", u'×', uR"((\p{Word_Break=MidLetter} | [\p{Word_Break=MidNumLet} \p{Word_Break=Single_Quote}]) [\p{Word_Break=ALetter} \p{Word_Break=Hebrew_Letter}])"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($AHLetter ($MidLetter | $MidNumLetQ) × $AHLetter)", uR"([\p{Word_Break=ALetter} \p{Word_Break=Hebrew_Letter}] (\p{Word_Break=MidLetter} | [\p{Word_Break=MidNumLet} \p{Word_Break=Single_Quote}]))", u'×', uR"([\p{Word_Break=ALetter} \p{Word_Break=Hebrew_Letter}])"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($Hebrew_Letter × $Single_Quote)", uR"(\p{Word_Break=Hebrew_Letter})", u'×', uR"(\p{Word_Break=Single_Quote})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($Hebrew_Letter × $Double_Quote $Hebrew_Letter)", uR"(\p{Word_Break=Hebrew_Letter})", u'×', uR"(\p{Word_Break=Double_Quote} \p{Word_Break=Hebrew_Letter})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($Hebrew_Letter $Double_Quote × $Hebrew_Letter)", uR"(\p{Word_Break=Hebrew_Letter} \p{Word_Break=Double_Quote})", u'×', uR"(\p{Word_Break=Hebrew_Letter})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($Numeric × $Numeric)", uR"(\p{Word_Break=Numeric})", u'×', uR"(\p{Word_Break=Numeric})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($AHLetter × $Numeric)", uR"([\p{Word_Break=ALetter} \p{Word_Break=Hebrew_Letter}])", u'×', uR"(\p{Word_Break=Numeric})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($Numeric × $AHLetter)", uR"(\p{Word_Break=Numeric})", u'×', uR"([\p{Word_Break=ALetter} \p{Word_Break=Hebrew_Letter}])"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($Numeric ($MidNum | $MidNumLetQ) × $Numeric)", uR"(\p{Word_Break=Numeric} (\p{Word_Break=MidNum} | [\p{Word_Break=MidNumLet} \p{Word_Break=Single_Quote}]))", u'×', uR"(\p{Word_Break=Numeric})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($Numeric × ($MidNum | $MidNumLetQ) $Numeric)", uR"(\p{Word_Break=Numeric})", u'×', uR"((\p{Word_Break=MidNum} | [\p{Word_Break=MidNumLet} \p{Word_Break=Single_Quote}]) \p{Word_Break=Numeric})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($Katakana × $Katakana)", uR"(\p{Word_Break=Katakana})", u'×', uR"(\p{Word_Break=Katakana})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(($AHLetter | $Numeric | $Katakana | $ExtendNumLet) × $ExtendNumLet)", uR"(([\p{Word_Break=ALetter} \p{Word_Break=Hebrew_Letter}] | \p{Word_Break=Numeric} | \p{Word_Break=Katakana} | \p{Word_Break=ExtendNumLet}))", u'×', uR"(\p{Word_Break=ExtendNumLet})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($ExtendNumLet × ($AHLetter | $Numeric | $Katakana))", uR"(\p{Word_Break=ExtendNumLet})", u'×', uR"(([\p{Word_Break=ALetter} \p{Word_Break=Hebrew_Letter}] | \p{Word_Break=Numeric} | \p{Word_Break=Katakana}))"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(^ ($RI $RI)* $RI × $RI)", uR"(^ (\p{Word_Break=Regional_Indicator} \p{Word_Break=Regional_Indicator})* \p{Word_Break=Regional_Indicator})", u'×', uR"(\p{Word_Break=Regional_Indicator})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"([^$RI] ($RI $RI)* $RI × $RI)", uR"([^\p{Word_Break=Regional_Indicator}] (\p{Word_Break=Regional_Indicator} \p{Word_Break=Regional_Indicator})* \p{Word_Break=Regional_Indicator})", u'×', uR"(\p{Word_Break=Regional_Indicator})"));
+    // --- End of generated code. ---
 
-        //           Note:  STerm | ATerm are added to the negated part of the expression by a
-        //                  note to the Unicode 5.0 documents.
-        int p8 = p1;
-        while (fSpSet->contains(cAt(p8))) {
-            p8 = moveBack(p8);
-        }
-        while (fCloseSet->contains(cAt(p8))) {
-            p8 = moveBack(p8);
-        }
-        if (fATermSet->contains(cAt(p8))) {
-            p8=p2;
-            for (;;) {
-                c = cAt(p8);
-                if (c==-1 || fOLetterSet->contains(c) || fUpperSet->contains(c) ||
-                    fLowerSet->contains(c) || fSepSet->contains(c) ||
-                    fATermSet->contains(c) || fSTermSet->contains(c))  {
+    // TODO(egg): This could just as well be part of the rules…
+    rules.push_back(std::make_unique<RegexRule>(uR"(ALL ÷ / ÷ ALL)", uR"()", u'÷', uR"()"));
 
-                    setAppliedRule(p2,
-                                   "SB8   ATerm Close* Sp*  x  (not (OLettter | Upper | Lower | Sep | STerm | ATerm))* ");
-                    break;
-                }
-                p8 = moveForward(p8);
-            }
-            if (fLowerSet->contains(cAt(p8))) {
+    for (const auto &[name, set] : partition) {
+        sets.push_back(set);
+        classNames.push_back(name);
+    }
 
-                setAppliedRule(p2,
-                               "SB8   ATerm Close* Sp*  x  (not (OLettter | Upper | Lower | Sep | STerm | ATerm))* ");
-                continue;
-            }
-        }
+    if (U_FAILURE(status)) {
+        deferredStatus = status;
+    }
+}
 
-        if (fSContinueSet->contains(c2) || fSTermSet->contains(c2) || fATermSet->contains(c2)) {
-            p8 = p1;
-            while (fSpSet->contains(cAt(p8))) {
-                p8 = moveBack(p8);
-            }
-            while (fCloseSet->contains(cAt(p8))) {
-                p8 = moveBack(p8);
-            }
-            c = cAt(p8);
-            if (fSTermSet->contains(c) || fATermSet->contains(c)) {
-                setAppliedRule(p2, "SB8a  (STerm | ATerm) Close* Sp* x (SContinue | STerm | ATerm)");
-                continue;
-            }
-        }
+//------------------------------------------------------------------------------------------
+//
+//   class RBBISentMonkey      Sentence Break specific implementation
+//                             of RBBIMonkeyKind.
+//
+//------------------------------------------------------------------------------------------
+class RBBISentMonkey: public RBBIMonkeyKind {
+public:
+    RBBISentMonkey();
+};
 
-        int p9 = p1;
-        while (fCloseSet->contains(cAt(p9))) {
-            p9 = moveBack(p9);
-        }
-        c = cAt(p9);
-        if ((fSTermSet->contains(c) || fATermSet->contains(c))) {
-            if (fCloseSet->contains(c2) || fSpSet->contains(c2) || fSepSet->contains(c2)) {
+RBBISentMonkey::RBBISentMonkey()
+{
+    UErrorCode status = U_ZERO_ERROR;
 
-                setAppliedRule(p2, "SB9  (STerm | ATerm) Close*  x  (Close | Sp | Sep | CR | LF)");
-                continue;
-            }
-        }
+    std::list<std::pair<std::string, UnicodeSet>> partition;
 
-        int p10 = p1;
-        while (fSpSet->contains(cAt(p10))) {
-            p10 = moveBack(p10);
-        }
-        while (fCloseSet->contains(cAt(p10))) {
-            p10 = moveBack(p10);
-        }
-        if (fSTermSet->contains(cAt(p10)) || fATermSet->contains(cAt(p10))) {
-            if (fSpSet->contains(c2) || fSepSet->contains(c2)) {
-                setAppliedRule(p2, "SB10  (Sterm | ATerm) Close* Sp*  x  (Sp | Sep | CR | LF)");
-                continue;
-            }
-        }
+    // These two could be part of the rules.
+    rules.push_back(std::make_unique<RegexRule>(uR"(SB1 sot ÷ Any)", uR"(^)", u'÷', uR"()"));
+    // Note that /$/ matches ( BK | CR | LF | NL ) eot, so we use (?!.) instead.
+    // The generated rules use the same (?!.).
+    rules.push_back(std::make_unique<RegexRule>(uR"(SB2 Any ÷ eot)", uR"()", u'÷', uR"((?!.))"));
 
-        int p11 = p1;
-        if (fSepSet->contains(cAt(p11))) {
-            p11 = moveBack(p11);
-        }
-        while (fSpSet->contains(cAt(p11))) {
-            p11 = moveBack(p11);
-        }
-        while (fCloseSet->contains(cAt(p11))) {
-            p11 = moveBack(p11);
-        }
-        if (fSTermSet->contains(cAt(p11)) || fATermSet->contains(cAt(p11))) {
-          setAppliedRule(p2, "SB11  (STerm | ATerm) Close* Sp* (Sep | CR | LF)?  <break>");
-            break;
-        }
+    // --- NOLI ME TANGERE ---
+    // Generated by GenerateBreakTest.java in the Unicode tools.
+    partition.emplace_back("CR", UnicodeSet(uR"([\p{Sentence_Break=CR}])", status));
+    partition.emplace_back("LF", UnicodeSet(uR"([\p{Sentence_Break=LF}])", status));
+    partition.emplace_back("Extend", UnicodeSet(uR"([\p{Sentence_Break=Extend}])", status));
+    partition.emplace_back("Format", UnicodeSet(uR"([\p{Sentence_Break=Format}])", status));
+    partition.emplace_back("Sep", UnicodeSet(uR"([\p{Sentence_Break=Sep}])", status));
+    partition.emplace_back("Sp", UnicodeSet(uR"([\p{Sentence_Break=Sp}])", status));
+    partition.emplace_back("Lower", UnicodeSet(uR"([\p{Sentence_Break=Lower}])", status));
+    partition.emplace_back("Upper", UnicodeSet(uR"([\p{Sentence_Break=Upper}])", status));
+    partition.emplace_back("OLetter", UnicodeSet(uR"([\p{Sentence_Break=OLetter}])", status));
+    partition.emplace_back("Numeric", UnicodeSet(uR"([\p{Sentence_Break=Numeric}])", status));
+    partition.emplace_back("ATerm", UnicodeSet(uR"([\p{Sentence_Break=ATerm}])", status));
+    partition.emplace_back("STerm", UnicodeSet(uR"([\p{Sentence_Break=STerm}])", status));
+    partition.emplace_back("Close", UnicodeSet(uR"([\p{Sentence_Break=Close}])", status));
+    partition.emplace_back("SContinue", UnicodeSet(uR"([\p{Sentence_Break=SContinue}])", status));
+    partition.emplace_back("XX", UnicodeSet(uR"([\p{Sentence_Break=Other}])", status));
+
+    rules.push_back(std::make_unique<RegexRule>(uR"($CR × $LF)", uR"(\p{Sentence_Break=CR})", u'×', uR"(\p{Sentence_Break=LF})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($ParaSep ÷)", uR"([\p{Sentence_Break=Sep} \p{Sentence_Break=CR} \p{Sentence_Break=LF}])", u'÷', uR"()"));
+    rules.push_back(std::make_unique<RemapRule>(uR"((?<X>[^$ParaSep]) ( $Extend | $Format )* → ${X})", uR"((?<X>[^[\p{Sentence_Break=Sep} \p{Sentence_Break=CR} \p{Sentence_Break=LF}]]) ( \p{Sentence_Break=Extend} | \p{Sentence_Break=Format} )*)", uR"(${X})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($ATerm × $Numeric)", uR"(\p{Sentence_Break=ATerm})", u'×', uR"(\p{Sentence_Break=Numeric})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(($Upper | $Lower) $ATerm × $Upper)", uR"((\p{Sentence_Break=Upper} | \p{Sentence_Break=Lower}) \p{Sentence_Break=ATerm})", u'×', uR"(\p{Sentence_Break=Upper})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($ATerm $Close* $Sp* × [^ $OLetter $Upper $Lower $ParaSep $SATerm]* $Lower)", uR"(\p{Sentence_Break=ATerm} \p{Sentence_Break=Close}* \p{Sentence_Break=Sp}*)", u'×', uR"([^ \p{Sentence_Break=OLetter} \p{Sentence_Break=Upper} \p{Sentence_Break=Lower} [\p{Sentence_Break=Sep} \p{Sentence_Break=CR} \p{Sentence_Break=LF}] [\p{Sentence_Break=STerm} \p{Sentence_Break=ATerm}]]* \p{Sentence_Break=Lower})"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($SATerm $Close* $Sp* × ($SContinue | $SATerm))", uR"([\p{Sentence_Break=STerm} \p{Sentence_Break=ATerm}] \p{Sentence_Break=Close}* \p{Sentence_Break=Sp}*)", u'×', uR"((\p{Sentence_Break=SContinue} | [\p{Sentence_Break=STerm} \p{Sentence_Break=ATerm}]))"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($SATerm $Close* × ( $Close | $Sp | $ParaSep ))", uR"([\p{Sentence_Break=STerm} \p{Sentence_Break=ATerm}] \p{Sentence_Break=Close}*)", u'×', uR"(( \p{Sentence_Break=Close} | \p{Sentence_Break=Sp} | [\p{Sentence_Break=Sep} \p{Sentence_Break=CR} \p{Sentence_Break=LF}] ))"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($SATerm $Close* $Sp* × ( $Sp | $ParaSep ))", uR"([\p{Sentence_Break=STerm} \p{Sentence_Break=ATerm}] \p{Sentence_Break=Close}* \p{Sentence_Break=Sp}*)", u'×', uR"(( \p{Sentence_Break=Sp} | [\p{Sentence_Break=Sep} \p{Sentence_Break=CR} \p{Sentence_Break=LF}] ))"));
+    rules.push_back(std::make_unique<RegexRule>(uR"($SATerm $Close* $Sp* $ParaSep? ÷)", uR"([\p{Sentence_Break=STerm} \p{Sentence_Break=ATerm}] \p{Sentence_Break=Close}* \p{Sentence_Break=Sp}* [\p{Sentence_Break=Sep} \p{Sentence_Break=CR} \p{Sentence_Break=LF}]?)", u'÷', uR"()"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(× $Any)", uR"()", u'×', uR"(.)"));
+    // --- End of generated code. ---
 
-        setAppliedRule(p2, "SB12  Any x Any");
+    for (const auto &[name, set] : partition) {
+        sets.push_back(set);
+        classNames.push_back(name);
     }
 
-    breakPos = p2;
-    return breakPos;
-}
-
-RBBISentMonkey::~RBBISentMonkey() {
-    delete fSepSet;
-    delete fFormatSet;
-    delete fSpSet;
-    delete fLowerSet;
-    delete fUpperSet;
-    delete fOLetterSet;
-    delete fNumericSet;
-    delete fATermSet;
-    delete fSContinueSet;
-    delete fSTermSet;
-    delete fCloseSet;
-    delete fOtherSet;
-    delete fExtendSet;
+    if (U_FAILURE(status)) {
+        deferredStatus = status;
+    }
 }
 
 //-------------------------------------------------------------------------------------------
@@ -2873,25 +2206,11 @@ RBBISentMonkey::~RBBISentMonkey() {
 class RBBILineMonkey: public RBBIMonkeyKind {
 public:
     RBBILineMonkey();
-    virtual          ~RBBILineMonkey();
-    virtual const std::vector<UnicodeSet>& charClasses() override;
-    virtual  void     setText(const UnicodeString &s) override;
-    virtual  int32_t  next(int32_t i) override;
 private:
-    std::vector<UnicodeSet> sets;
-    std::vector<std::unique_ptr<SegmentationRule>> rules;
-    std::vector<SegmentationRule::BreakContext> resolved;
-
-    BreakIterator        *fCharBI;
-    const UnicodeString  *fText;
 };
 
 RBBILineMonkey::RBBILineMonkey() :
-    RBBIMonkeyKind(),
-
-    fCharBI(nullptr),
-    fText(nullptr)
-
+    RBBIMonkeyKind()
 {
     if (U_FAILURE(deferredStatus)) {
         return;
@@ -2906,20 +2225,11 @@ RBBILineMonkey::RBBILineMonkey() :
 
     std::list<std::pair<std::string, UnicodeSet>> partition;
 
-    // TODO(egg): The following two workarounds for what seems to be ICU bugs;
-    // with UREGEX_DOTALL (but not UREGEX_MULTILINE):
-    // 1. /.*\u000A/ does not match CR LF;
-    // 2. /$/ matches ( BK | CR | LF | NL ) eot.
-    rules.push_back(std::make_unique<RegexRule>(uR"(CR LF ÷)", uR"(\u000D\u000A)", u'÷', uR"()"));
-    rules.push_back(std::make_unique<RegexRule>(
-        uR"([^ BK CR LF NL ] × [ BK CR LF NL ] eot)",
-        uR"([^ \p{lb=BK} \p{lb=CR} \p{lb=LF} \p{lb=NL} ])",
-        u'×',
-        uR"([ \p{lb=BK} \p{lb=CR} \p{lb=LF} \p{lb=NL} ] $)"));
-
     rules.push_back(std::make_unique<RegexRule>(uR"(sot ÷ contra LB2)", uR"(^)", u'÷', uR"()"));
     // This one could be part of the rules.
-    rules.push_back(std::make_unique<RegexRule>(uR"(LB3 ÷ eot)", uR"()", u'÷', uR"($)"));
+    // Note that /$/ matches ( BK | CR | LF | NL ) eot, so we use (?!.) instead.
+    // The generated rules use the same (?!.).
+    rules.push_back(std::make_unique<RegexRule>(uR"(LB3 ÷ eot)", uR"()", u'÷', uR"((?!.))"));
 
     // --- NOLI ME TANGERE ---
     // Generated by GenerateBreakTest.java in the Unicode tools.
@@ -3015,7 +2325,7 @@ RBBILineMonkey::RBBILineMonkey() :
     rules.push_back(std::make_unique<RegexRule>(uR"(× $CP)", uR"()", u'×', uR"(\p{Line_Break=CP})"));
     rules.push_back(std::make_unique<RegexRule>(uR"(× $SY)", uR"()", u'×', uR"(\p{Line_Break=Break_Symbols})"));
     rules.push_back(std::make_unique<RegexRule>(uR"($OP $SP* ×)", uR"(\p{Line_Break=Open_Punctuation} \p{Line_Break=Space}*)", u'×', uR"()"));
-    rules.push_back(std::make_unique<RegexRule>(uR"(( $sot | $BK | $CR | $LF | $NL | $OP | $QU | $GL | $SP | $ZW ) $QU_Pi $SP* ×)", uR"(( ^ | \p{Line_Break=Mandatory_Break} | \p{Line_Break=Carriage_Return} | \p{Line_Break=Line_Feed} | \p{Line_Break=Next_Line} | \p{Line_Break=Open_Punctuation} | \p{Line_Break=Quotation} | \p{Line_Break=Glue} | \p{Line_Break=Space} | \p{Line_Break=ZWSpace} ) [\p{Line_Break=Quotation} && \p{gc=Pi}] \p{Line_Break=Space}*)", u'×', uR"()"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(( $BK | $CR | $LF | $NL | $OP | $QU | $GL | $SP | $ZW | $sot ) $QU_Pi $SP* ×)", uR"(( \p{Line_Break=Mandatory_Break} | \p{Line_Break=Carriage_Return} | \p{Line_Break=Line_Feed} | \p{Line_Break=Next_Line} | \p{Line_Break=Open_Punctuation} | \p{Line_Break=Quotation} | \p{Line_Break=Glue} | \p{Line_Break=Space} | \p{Line_Break=ZWSpace} | ^ ) [\p{Line_Break=Quotation} && \p{gc=Pi}] \p{Line_Break=Space}*)", u'×', uR"()"));
     rules.push_back(std::make_unique<RegexRule>(uR"(× $QU_Pf ( $SP | $GL | $WJ | $CL | $QU | $CP | $EX | $IS | $SY | $BK | $CR | $LF | $NL | $ZW | $eot ))", uR"()", u'×', uR"([\p{Line_Break=Quotation} && \p{gc=Pf}] ( \p{Line_Break=Space} | \p{Line_Break=Glue} | \p{Line_Break=Word_Joiner} | \p{Line_Break=Close_Punctuation} | \p{Line_Break=Quotation} | \p{Line_Break=CP} | \p{Line_Break=Exclamation} | \p{Line_Break=Infix_Numeric} | \p{Line_Break=Break_Symbols} | \p{Line_Break=Mandatory_Break} | \p{Line_Break=Carriage_Return} | \p{Line_Break=Line_Feed} | \p{Line_Break=Next_Line} | \p{Line_Break=ZWSpace} | (?!.) ))"));
     rules.push_back(std::make_unique<RegexRule>(uR"($SP ÷ $IS $NU)", uR"(\p{Line_Break=Space})", u'÷', uR"(\p{Line_Break=Infix_Numeric} \p{Line_Break=Numeric})"));
     rules.push_back(std::make_unique<RegexRule>(uR"(× $IS)", uR"()", u'×', uR"(\p{Line_Break=Infix_Numeric})"));
@@ -3027,10 +2337,10 @@ RBBILineMonkey::RBBILineMonkey() :
     rules.push_back(std::make_unique<RegexRule>(uR"([^$EastAsian] × $QU)", uR"([^[\p{ea=F}\p{ea=W}\p{ea=H}]])", u'×', uR"(\p{Line_Break=Quotation})"));
     rules.push_back(std::make_unique<RegexRule>(uR"(× $QU ( [^$EastAsian] | $eot ))", uR"()", u'×', uR"(\p{Line_Break=Quotation} ( [^[\p{ea=F}\p{ea=W}\p{ea=H}]] | (?!.) ))"));
     rules.push_back(std::make_unique<RegexRule>(uR"($QU × [^$EastAsian])", uR"(\p{Line_Break=Quotation})", u'×', uR"([^[\p{ea=F}\p{ea=W}\p{ea=H}]])"));
-    rules.push_back(std::make_unique<RegexRule>(uR"(( $sot | [^$EastAsian] ) $QU ×)", uR"(( ^ | [^[\p{ea=F}\p{ea=W}\p{ea=H}]] ) \p{Line_Break=Quotation})", u'×', uR"()"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(( [^$EastAsian] | $sot ) $QU ×)", uR"(( [^[\p{ea=F}\p{ea=W}\p{ea=H}]] | ^ ) \p{Line_Break=Quotation})", u'×', uR"()"));
     rules.push_back(std::make_unique<RegexRule>(uR"(÷ $CB)", uR"()", u'÷', uR"(\p{Line_Break=Contingent_Break})"));
     rules.push_back(std::make_unique<RegexRule>(uR"($CB ÷)", uR"(\p{Line_Break=Contingent_Break})", u'÷', uR"()"));
-    rules.push_back(std::make_unique<RegexRule>(uR"(( $sot | $BK | $CR | $LF | $NL | $SP | $ZW | $CB | $GL ) ( $HY | $Hyphen ) × $AL)", uR"(( ^ | \p{Line_Break=Mandatory_Break} | \p{Line_Break=Carriage_Return} | \p{Line_Break=Line_Feed} | \p{Line_Break=Next_Line} | \p{Line_Break=Space} | \p{Line_Break=ZWSpace} | \p{Line_Break=Contingent_Break} | \p{Line_Break=Glue} ) ( \p{Line_Break=Hyphen} | [\u2010] ))", u'×', uR"([\p{Line_Break=Ambiguous} \p{Line_Break=Alphabetic} \p{Line_Break=Surrogate} \p{Line_Break=Unknown} [\p{Line_Break=Complex_Context}--\p{gc=Mn}--\p{gc=Mc}]])"));
+    rules.push_back(std::make_unique<RegexRule>(uR"(( $BK | $CR | $LF | $NL | $SP | $ZW | $CB | $GL | $sot ) ( $HY | $Hyphen ) × $AL)", uR"(( \p{Line_Break=Mandatory_Break} | \p{Line_Break=Carriage_Return} | \p{Line_Break=Line_Feed} | \p{Line_Break=Next_Line} | \p{Line_Break=Space} | \p{Line_Break=ZWSpace} | \p{Line_Break=Contingent_Break} | \p{Line_Break=Glue} | ^ ) ( \p{Line_Break=Hyphen} | [\u2010] ))", u'×', uR"([\p{Line_Break=Ambiguous} \p{Line_Break=Alphabetic} \p{Line_Break=Surrogate} \p{Line_Break=Unknown} [\p{Line_Break=Complex_Context}--\p{gc=Mn}--\p{gc=Mc}]])"));
     rules.push_back(std::make_unique<RegexRule>(uR"(× $BA)", uR"()", u'×', uR"(\p{Line_Break=Break_After})"));
     rules.push_back(std::make_unique<RegexRule>(uR"(× $HY)", uR"()", u'×', uR"(\p{Line_Break=Hyphen})"));
     rules.push_back(std::make_unique<RegexRule>(uR"(× $NS)", uR"()", u'×', uR"([\p{Line_Break=Nonstarter} \p{Line_Break=Conditional_Japanese_Starter}])"));
@@ -3080,82 +2390,22 @@ RBBILineMonkey::RBBILineMonkey() :
     // --- End of generated code. ---
 
 
+
     // TODO(egg): This could just as well be part of the rules…
     rules.push_back(std::make_unique<RegexRule>(uR"(ALL ÷ / ÷ ALL)",
                                                 uR"()", u'÷',
                                                 uR"()"));
 
-    const UnicodeSet lbSA(uR"(\p{lb=SA})", status);
-    for (auto it = partition.begin(); it != partition.end();) {
-        if (lbSA.containsAll(it->second)) {
-            it = partition.erase(it);
-        } else {
-            ++it;
-        }
-    }
+    dictionarySet_ = UnicodeSet(uR"(\p{lb=SA})", status);
 
     for (const auto &[name, set] : partition) {
         sets.push_back(set);
         classNames.push_back(name);
     }
 
-    fCharBI = BreakIterator::createCharacterInstance(Locale::getEnglish(), status);
-
     if (U_FAILURE(status)) {
         deferredStatus = status;
     }
-
-}
-
-void RBBILineMonkey::setText(const UnicodeString &s) {
-    fText       = &s;
-    fCharBI->setText(s);
-    prepareAppliedRules(s.length());
-    UnicodeString remapped = s;
-    resolved.clear();
-    resolved.reserve(s.length() + 1);
-    for (int i = 0; i < s.length() + 1; ++i) {
-        resolved.emplace_back(i);
-    }
-    for (const auto& rule : rules) {
-        rule->apply(remapped, resolved);
-    }
-    for (std::size_t i = 0; i < resolved.size(); ++i) {
-        if (resolved[i].appliedRule == nullptr) {
-            printf("Failed to resolve at %zu" , i);
-            std::terminate();
-        } else {
-            setAppliedRule(i, resolved[i].appliedRule->name().c_str());
-        }
-    }
-}
-
-int32_t RBBILineMonkey::next(int32_t startPos) {
-    for (std::size_t i = startPos + 1; i < resolved.size(); ++i) {
-        if (resolved[i].appliedRule != nullptr &&
-                resolved[i].appliedRule->resolution() == SegmentationRule::BREAK) {
-            return i;
-        }
-    }
-    return -1;
-}
-
-
-const std::vector<UnicodeSet>& RBBILineMonkey::charClasses() {
-    return sets;
-}
-
-
-RBBILineMonkey::~RBBILineMonkey() {
-    constexpr bool debuggingOldMonkeyPerformance = false;
-    if (debuggingOldMonkeyPerformance) {
-        for (auto const &rule : rules) {
-          puts((rule->name() + " : " + std::to_string(rule->timeSpent() / std::chrono::milliseconds(1)) +
-                " ms").c_str());
-        }
-    }
-
-    delete fCharBI;
 }
 
 
@@ -3343,6 +2593,16 @@ void RBBITest::TestWordBreaks()
         UnicodeString ustr = CharsToUnicodeString(strlist[loop]);
         // RBBICharMonkey monkey;
         RBBIWordMonkey monkey;
+        if (monkey.dictionarySet().containsSome(ustr)) {
+            // Some of these twenty-year-old random examples depend on the
+            // monkey tests not looking across dictionary/non-dictionary
+            // boundaries for context when applying the rules.
+            // The monkeys are not designed to work with dictionary characters,
+            // so this behaviour is out of scope for testing against the
+            // monkeys.
+            logKnownIssue("ICU-22984");
+            continue;
+        }
 
         int expected[50];
         int expectedcount = 0;
@@ -3847,6 +3107,9 @@ void RBBITest::RunMonkey(BreakIterator *bi, RBBIMonkeyKind &mk, const char *name
                 errln("%s:%d c < 0", __FILE__, __LINE__);
                 break;
             }
+            if (mk.dictionarySet().contains(c)) {
+              continue;
+            }
             if (scalarsOnly && U16_IS_SURROGATE(c)) {
               continue;
             }
diff --git a/icu4c/source/test/testdata/break_rules/line.txt b/icu4c/source/test/testdata/break_rules/line.txt
index 9f85b7917139..e2154abf6309 100644
--- a/icu4c/source/test/testdata/break_rules/line.txt
+++ b/icu4c/source/test/testdata/break_rules/line.txt
@@ -176,7 +176,7 @@ LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
 # Needs to apply before LB12, because the new monkeys are not greedy.
-LB20a.2:   GL (HY | HH) CM* AL;
+LB20a.2:   GL CM* (HY | HH) CM* AL;
 LB12:      GL CM* [^CM];
 
 LB12a:       [^SP BA HY] CM* GL;
diff --git a/icu4c/source/test/testdata/break_rules/line_cj.txt b/icu4c/source/test/testdata/break_rules/line_cj.txt
index 7aad76ecf107..bb0a6880ea29 100644
--- a/icu4c/source/test/testdata/break_rules/line_cj.txt
+++ b/icu4c/source/test/testdata/break_rules/line_cj.txt
@@ -180,7 +180,7 @@ LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
 # Needs to apply before LB12, because the new monkeys are not greedy.
-LB20a.2:   GL (HY | HH) CM* AL;
+LB20a.2:   GL CM* (HY | HH) CM* AL;
 LB12:      GL CM* [^CM];
 
 LB12a:       [^SP BA HY] CM* GL;
diff --git a/icu4c/source/test/testdata/break_rules/line_loose.txt b/icu4c/source/test/testdata/break_rules/line_loose.txt
index 72e7563c9274..f9152060bf2d 100644
--- a/icu4c/source/test/testdata/break_rules/line_loose.txt
+++ b/icu4c/source/test/testdata/break_rules/line_loose.txt
@@ -181,7 +181,7 @@ LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
 # Needs to apply before LB12, because the new monkeys are not greedy.
-LB20a.2:   GL (HY | HH) CM* AL;
+LB20a.2:   GL CM* (HY | HH) CM* AL;
 LB12:      GL CM* [^CM];
 
 LB12a:       [^SP BA HY] CM* GL;
diff --git a/icu4c/source/test/testdata/break_rules/line_loose_cj.txt b/icu4c/source/test/testdata/break_rules/line_loose_cj.txt
index 99d01874d1fb..b04236532bbd 100644
--- a/icu4c/source/test/testdata/break_rules/line_loose_cj.txt
+++ b/icu4c/source/test/testdata/break_rules/line_loose_cj.txt
@@ -200,7 +200,7 @@ LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
 # Needs to apply before LB12, because the new monkeys are not greedy.
-LB20a.2:   GL (HY | HH) CM* AL;
+LB20a.2:   GL CM* (HY | HH) CM* AL;
 LB12:      GL CM* [^CM];
 
 LB12a:       [^SP BA BAX HY] CM* GL;
diff --git a/icu4c/source/test/testdata/break_rules/line_normal.txt b/icu4c/source/test/testdata/break_rules/line_normal.txt
index 211298539797..c7c518d5b68b 100644
--- a/icu4c/source/test/testdata/break_rules/line_normal.txt
+++ b/icu4c/source/test/testdata/break_rules/line_normal.txt
@@ -182,7 +182,7 @@ LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
 # Needs to apply before LB12, because the new monkeys are not greedy.
-LB20a.2:   GL (HY | HH) CM* AL;
+LB20a.2:   GL CM* (HY | HH) CM* AL;
 LB12:      GL CM* [^CM];
 
 LB12a:       [^SP BA HY] CM* GL;
diff --git a/icu4c/source/test/testdata/break_rules/line_normal_cj.txt b/icu4c/source/test/testdata/break_rules/line_normal_cj.txt
index 2061f9170848..cfa9c7968e1b 100644
--- a/icu4c/source/test/testdata/break_rules/line_normal_cj.txt
+++ b/icu4c/source/test/testdata/break_rules/line_normal_cj.txt
@@ -186,7 +186,7 @@ LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
 # Needs to apply before LB12, because the new monkeys are not greedy.
-LB20a.2:   GL (HY | HH) CM* AL;
+LB20a.2:   GL CM* (HY | HH) CM* AL;
 LB12:      GL CM* [^CM];
 
 LB12a:       [^SP BA HY] CM* GL;
diff --git a/icu4c/source/test/testdata/rbbitst.txt b/icu4c/source/test/testdata/rbbitst.txt
index 1c7fe9975699..781ce068be7b 100644
--- a/icu4c/source/test/testdata/rbbitst.txt
+++ b/icu4c/source/test/testdata/rbbitst.txt
@@ -2214,3 +2214,7 @@ Bangkok)•</data>
 <data>•« Complex »« chaining » •</data>
 <data>•« .618 »•</data>  # Interaction with the ICU tailoring to break before such numbers.
 
+# A hyphen following non-breaking space that carries an intervening combining
+# mark is treated as word-initial; by LB20a it has no break opportunity after
+# it.  A bug in ICU 76 incorrectly handled that case (ICU-22986).
+<data>• ̄-k•</data>
\ No newline at end of file
diff --git a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line.brk b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line.brk
index 8d0172d055cb..ab4a491c49bd 100644
Binary files a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line.brk and b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line.brk differ
diff --git a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_cj.brk b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_cj.brk
index dbbbc0dfbae1..ca6e43ba4247 100644
Binary files a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_cj.brk and b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_cj.brk differ
diff --git a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_loose.brk b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_loose.brk
index 9f77680c2835..31c737abf247 100644
Binary files a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_loose.brk and b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_loose.brk differ
diff --git a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_loose_cj.brk b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_loose_cj.brk
index 4199ddeda1cf..0608fb2f4de9 100644
Binary files a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_loose_cj.brk and b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_loose_cj.brk differ
diff --git a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_loose_phrase_cj.brk b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_loose_phrase_cj.brk
index bebfe7285a2f..bba43a75fc6c 100644
Binary files a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_loose_phrase_cj.brk and b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_loose_phrase_cj.brk differ
diff --git a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_normal.brk b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_normal.brk
index 0229e2cb2f2d..371a487782a9 100644
Binary files a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_normal.brk and b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_normal.brk differ
diff --git a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_normal_cj.brk b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_normal_cj.brk
index 9b13706bfb58..47f829ce0340 100644
Binary files a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_normal_cj.brk and b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_normal_cj.brk differ
diff --git a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_normal_phrase_cj.brk b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_normal_phrase_cj.brk
index 7cbc69987714..49fa8e0416b3 100644
Binary files a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_normal_phrase_cj.brk and b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_normal_phrase_cj.brk differ
diff --git a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_phrase_cj.brk b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_phrase_cj.brk
index b9f1fa48e7d7..3920cf5e5dba 100644
Binary files a/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_phrase_cj.brk and b/icu4j/main/core/src/main/resources/com/ibm/icu/impl/data/icudata/brkitr/line_phrase_cj.brk differ
diff --git a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java
index 56a4801bea29..5b3489e4369e 100644
--- a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java
+++ b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RBBITestMonkey.java
@@ -19,10 +19,12 @@
 import org.junit.runners.JUnit4;
 
 // Monkey testing of RuleBasedBreakIterator.
-//    The old, original monkey test. TODO: remove
+//    The old monkey test, now using regexes generated by the Unicode tools.
 //    The new monkey test is class RBBIMonkeyTest.
 
 import com.ibm.icu.dev.test.CoreTestFmwk;
+import com.ibm.icu.dev.test.rbbi.SegmentationRule.BreakContext;
+import com.ibm.icu.dev.test.rbbi.SegmentationRule.Resolution;
 import com.ibm.icu.lang.UCharacter;
 import com.ibm.icu.lang.UProperty;
 import com.ibm.icu.text.BreakIterator;
@@ -676,166 +678,15 @@ int   next(int prevPos) {
 
 
     static class RBBILineMonkey extends RBBIMonkeyKind {
-        // UnicodeSets for each of the Line Breaking character classes.
-        // Order matches that of Unicode UAX 14, Table 1, which makes it a little easier
-        // to verify that they are all accounted for.
-
-        // XUnicodeSet is like UnicodeSet, except that the method contains(int codePoint) does not
-        // throw exceptions on out-of-range codePoints. This matches ICU4C behavior.
-        // The LineMonkey test (ported from ICU4C) relies on this behavior, it uses a value of -1
-        // to represent a non-codepoint that is not included in any of the property sets.
-        // This happens for rule 30a.
-        class XUnicodeSet extends UnicodeSet {
-            XUnicodeSet(String pattern) { super(pattern); }
-            XUnicodeSet() { super(); }
-            @Override
-            public boolean contains(int codePoint) {
-                return codePoint < UnicodeSet.MIN_VALUE || codePoint > UnicodeSet.MAX_VALUE ?
-                        false : super.contains(codePoint);
-            }
-        }
-
-        // Declare these variables as XUnicodeSet, not merely as UnicodeSet,
-        // so that when we copy a new declaration from C++ (where only UnicodeSet exists),
-        // the missing 'X' prefix is visible;
-        // and when the prefix is there and we copy a new initializer we get a compiler error.
-        // (Otherwise we rely on the caller catching the IAE from using codePoint=-1
-        // and failing with a message that tells us what to do.)
-        XUnicodeSet fBK;
-        XUnicodeSet fCR;
-        XUnicodeSet fLF;
-        XUnicodeSet fCM;
-        XUnicodeSet fNL;
-        XUnicodeSet fSG;
-        XUnicodeSet fWJ;
-        XUnicodeSet fZW;
-        XUnicodeSet fGL;
-        XUnicodeSet fSP;
-        XUnicodeSet fB2;
-        XUnicodeSet fBA;
-        XUnicodeSet fBB;
-        XUnicodeSet fHH;
-        XUnicodeSet fHY;
-        XUnicodeSet fCB;
-        XUnicodeSet fCL;
-        XUnicodeSet fCP;
-        XUnicodeSet fEX;
-        XUnicodeSet fIN;
-        XUnicodeSet fNS;
-        XUnicodeSet fOP;
-        XUnicodeSet fQU;
-        XUnicodeSet fIS;
-        XUnicodeSet fNU;
-        XUnicodeSet fPO;
-        XUnicodeSet fPR;
-        XUnicodeSet fSY;
-        XUnicodeSet fAI;
-        XUnicodeSet fAL;
-        XUnicodeSet fCJ;
-        XUnicodeSet fH2;
-        XUnicodeSet fH3;
-        XUnicodeSet fHL;
-        XUnicodeSet fID;
-        XUnicodeSet fJL;
-        XUnicodeSet fJV;
-        XUnicodeSet fJT;
-        XUnicodeSet fRI;
-        XUnicodeSet fXX;
-        XUnicodeSet fEB;
-        XUnicodeSet fEM;
-        XUnicodeSet fZWJ;
-        XUnicodeSet fOP30;
-        XUnicodeSet fCP30;
-        XUnicodeSet fExtPictUnassigned;
-        XUnicodeSet fAK;
-        XUnicodeSet fAP;
-        XUnicodeSet fAS;
-        XUnicodeSet fVF;
-        XUnicodeSet fVI;
-        XUnicodeSet fPi;
-        XUnicodeSet fPf;
-        XUnicodeSet feaFWH;
+        List<SegmentationRule> rules;
+        SegmentationRule.BreakContext[] resolved;
 
         StringBuffer  fText;
-        int           fOrigPositions;
 
         RBBILineMonkey()
         {
             fCharProperty  = UProperty.LINE_BREAK;
 
-            fBK    = new XUnicodeSet("[\\p{Line_Break=BK}]");
-            fCR    = new XUnicodeSet("[\\p{Line_break=CR}]");
-            fLF    = new XUnicodeSet("[\\p{Line_break=LF}]");
-            fCM    = new XUnicodeSet("[\\p{Line_break=CM}]");
-            fNL    = new XUnicodeSet("[\\p{Line_break=NL}]");
-            fSG    = new XUnicodeSet("[\\ud800-\\udfff]");
-            fWJ    = new XUnicodeSet("[\\p{Line_break=WJ}]");
-            fZW    = new XUnicodeSet("[\\p{Line_break=ZW}]");
-            fGL    = new XUnicodeSet("[\\p{Line_break=GL}]");
-            fSP    = new XUnicodeSet("[\\p{Line_break=SP}]");
-            fB2    = new XUnicodeSet("[\\p{Line_break=B2}]");
-            fBA    = new XUnicodeSet("[\\p{Line_break=BA}]");
-            fBB    = new XUnicodeSet("[\\p{Line_break=BB}]");
-            fHH    = new XUnicodeSet();
-            fHY    = new XUnicodeSet("[\\p{Line_break=HY}]");
-            fCB    = new XUnicodeSet("[\\p{Line_break=CB}]");
-            fCL    = new XUnicodeSet("[\\p{Line_break=CL}]");
-            fCP    = new XUnicodeSet("[\\p{Line_break=CP}]");
-            fEX    = new XUnicodeSet("[\\p{Line_break=EX}]");
-            fIN    = new XUnicodeSet("[\\p{Line_break=IN}]");
-            fNS    = new XUnicodeSet("[\\p{Line_break=NS}]");
-            fOP    = new XUnicodeSet("[\\p{Line_break=OP}]");
-            fQU    = new XUnicodeSet("[\\p{Line_break=QU}]");
-            fIS    = new XUnicodeSet("[\\p{Line_break=IS}]");
-            fNU    = new XUnicodeSet("[\\p{Line_break=NU}]");
-            fPO    = new XUnicodeSet("[\\p{Line_break=PO}]");
-            fPR    = new XUnicodeSet("[\\p{Line_break=PR}]");
-            fSY    = new XUnicodeSet("[\\p{Line_break=SY}]");
-            fAI    = new XUnicodeSet("[\\p{Line_break=AI}]");
-            fAL    = new XUnicodeSet("[\\p{Line_break=AL}]");
-            fCJ    = new XUnicodeSet("[\\p{Line_break=CJ}]");
-            fH2    = new XUnicodeSet("[\\p{Line_break=H2}]");
-            fH3    = new XUnicodeSet("[\\p{Line_break=H3}]");
-            fHL    = new XUnicodeSet("[\\p{Line_break=HL}]");
-            fID    = new XUnicodeSet("[\\p{Line_break=ID}]");
-            fJL    = new XUnicodeSet("[\\p{Line_break=JL}]");
-            fJV    = new XUnicodeSet("[\\p{Line_break=JV}]");
-            fJT    = new XUnicodeSet("[\\p{Line_break=JT}]");
-            fRI    = new XUnicodeSet("[\\p{Line_break=RI}]");
-            fXX    = new XUnicodeSet("[\\p{Line_break=XX}]");
-            fEB    = new XUnicodeSet("[\\p{Line_break=EB}]");
-            fEM    = new XUnicodeSet("[\\p{Line_break=EM}]");
-            fZWJ   = new XUnicodeSet("[\\p{Line_break=ZWJ}]");
-            fOP30  = new XUnicodeSet("[\\p{Line_break=OP}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]");
-            fCP30  = new XUnicodeSet("[\\p{Line_break=CP}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]");
-            fExtPictUnassigned = new XUnicodeSet("[\\p{Extended_Pictographic}&\\p{Cn}]");
-            fAK = new XUnicodeSet("[\\p{Line_Break=AK}]");
-            fAP = new XUnicodeSet("[\\p{Line_Break=AP}]");
-            fAS = new XUnicodeSet("[\\p{Line_Break=AS}]");
-            fVF = new XUnicodeSet("[\\p{Line_Break=VF}]");
-            fVI = new XUnicodeSet("[\\p{Line_Break=VI}]");
-
-            fPi = new XUnicodeSet("[\\p{Pi}]");
-            fPf = new XUnicodeSet("[\\p{Pf}]");
-
-            feaFWH = new XUnicodeSet("[\\p{ea=F}\\p{ea=W}\\p{ea=H}]");
-
-            // Remove dictionary characters.
-            // The monkey test reference implementation of line break does not replicate the dictionary behavior,
-            // so dictionary characters are omitted from the monkey test data.
-            @SuppressWarnings("unused")
-            UnicodeSet dictionarySet = new UnicodeSet(
-                    "[[:LineBreak = Complex_Context:] & [[:Script = Thai:][:Script = Lao:][:Script = Khmer:] [:script = Myanmar:]]]");
-
-            fAL.addAll(fXX);     // Default behavior for XX is identical to AL
-            fAL.addAll(fAI);     // Default behavior for AI is identical to AL
-            fAL.addAll(fSG);     // Default behavior for SG (unpaired surrogates) is AL
-
-            fNS.addAll(fCJ);     // Default behavior for CJ is identical to NS.
-            fCM.addAll(fZWJ);    // ZWJ behaves as a CM.
-
-            fHH.add('\u2010');   // Hyphen, '‐'
-
             class NamedSet {
                 String name;
                 UnicodeSet set;
@@ -847,40 +698,181 @@ class NamedSet {
                     this(name, new UnicodeSet(pattern));
                 }
             };
-
-            final List<NamedSet> interestingSets = new ArrayList<>();
-            interestingSets.add(new NamedSet("eastAsian", "[\\p{ea=F}\\p{ea=W}\\p{ea=H}]"));
-            interestingSets.add(new NamedSet("Pi", "\\p{Pi}"));
-            interestingSets.add(new NamedSet("Pf",  "\\p{Pf}"));
-            interestingSets.add(new NamedSet("DOTTEDC.",  "[◌]"));
-            interestingSets.add(new NamedSet("HYPHEN",  "[\\u2010]"));
-            interestingSets.add(new NamedSet("ExtPictCn",  "[\\p{Extended_Pictographic}&\\p{Cn}]"));
-            final List<NamedSet> partition = new ArrayList<>();
-            for (int lb = 0; lb < UCharacter.LineBreak.COUNT; ++lb) {
-                final String lbValueShortName =
-                    UCharacter.getPropertyValueName(UProperty.LINE_BREAK, lb, UProperty.NameChoice.SHORT);
-                if (lbValueShortName.equals("SA")) {
+            List<NamedSet> partition = new ArrayList<>();
+            rules = new ArrayList<>();
+
+            rules.add(new RegexRule("sot ÷ contra LB2", "^", Resolution.BREAK, ""));
+            // This one could be part of the rules.
+            // Note that /$/ matches ( BK | CR | LF | NL ) eot, so we use (?!.) instead.
+            // The generated rules use the same (?!.).
+            rules.add(new RegexRule("LB3 ÷ eot", "", Resolution.BREAK, "(?!.)"));
+
+            // --- NOLI ME TANGERE ---
+            // Generated by GenerateBreakTest.java in the Unicode tools.
+            partition.add(new NamedSet("AI_EastAsian", new UnicodeSet("[\\p{Line_Break=Ambiguous}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("AImEastAsian", new UnicodeSet("[\\p{Line_Break=Ambiguous}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("AK", new UnicodeSet("[\\p{Line_Break=Aksara}]")));
+            partition.add(new NamedSet("ALorig_EastAsian", new UnicodeSet("[\\p{Line_Break=Alphabetic}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("ALorig_DottedCircle", new UnicodeSet("[\\p{Line_Break=Alphabetic}&[◌]]")));
+            partition.add(new NamedSet("ALorigmEastAsianmDottedCircle", new UnicodeSet("[\\p{Line_Break=Alphabetic}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]-[◌]]")));
+            partition.add(new NamedSet("AP", new UnicodeSet("[\\p{Line_Break=Aksara_Prebase}]")));
+            partition.add(new NamedSet("AS", new UnicodeSet("[\\p{Line_Break=Aksara_Start}]")));
+            partition.add(new NamedSet("B2", new UnicodeSet("[\\p{Line_Break=Break_Both}]")));
+            partition.add(new NamedSet("BA_EastAsian", new UnicodeSet("[\\p{Line_Break=Break_After}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("BA_Hyphen", new UnicodeSet("[\\p{Line_Break=Break_After}&[\\u2010]]")));
+            partition.add(new NamedSet("BAmEastAsianmHyphen", new UnicodeSet("[\\p{Line_Break=Break_After}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]-[\\u2010]]")));
+            partition.add(new NamedSet("BB", new UnicodeSet("[\\p{Line_Break=Break_Before}]")));
+            partition.add(new NamedSet("BK", new UnicodeSet("[\\p{Line_Break=Mandatory_Break}]")));
+            partition.add(new NamedSet("CB", new UnicodeSet("[\\p{Line_Break=Contingent_Break}]")));
+            partition.add(new NamedSet("CL_EastAsian", new UnicodeSet("[\\p{Line_Break=Close_Punctuation}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("CLmEastAsian", new UnicodeSet("[\\p{Line_Break=Close_Punctuation}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("CP", new UnicodeSet("[\\p{Line_Break=CP}]")));
+            partition.add(new NamedSet("CMorig_EastAsian", new UnicodeSet("[\\p{Line_Break=Combining_Mark}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("CMorigmEastAsian", new UnicodeSet("[\\p{Line_Break=Combining_Mark}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("CR", new UnicodeSet("[\\p{Line_Break=Carriage_Return}]")));
+            partition.add(new NamedSet("EX_EastAsian", new UnicodeSet("[\\p{Line_Break=Exclamation}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("EXmEastAsian", new UnicodeSet("[\\p{Line_Break=Exclamation}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("GL_EastAsian", new UnicodeSet("[\\p{Line_Break=Glue}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("GLmEastAsian", new UnicodeSet("[\\p{Line_Break=Glue}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("H2", new UnicodeSet("[\\p{Line_Break=H2}]")));
+            partition.add(new NamedSet("H3", new UnicodeSet("[\\p{Line_Break=H3}]")));
+            partition.add(new NamedSet("HL", new UnicodeSet("[\\p{Line_Break=HL}]")));
+            partition.add(new NamedSet("HY", new UnicodeSet("[\\p{Line_Break=Hyphen}]")));
+            partition.add(new NamedSet("ID_EastAsian", new UnicodeSet("[\\p{Line_Break=Ideographic}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("ID_ExtPictUnassigned", new UnicodeSet("[\\p{Line_Break=Ideographic}&[\\p{Extended_Pictographic}&\\p{gc=Cn}]]")));
+            partition.add(new NamedSet("IDmEastAsianmExtPictUnassigned", new UnicodeSet("[\\p{Line_Break=Ideographic}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]-[\\p{Extended_Pictographic}&\\p{gc=Cn}]]")));
+            partition.add(new NamedSet("IN_EastAsian", new UnicodeSet("[\\p{Line_Break=Inseparable}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("INmEastAsian", new UnicodeSet("[\\p{Line_Break=Inseparable}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("IS", new UnicodeSet("[\\p{Line_Break=Infix_Numeric}]")));
+            partition.add(new NamedSet("JL", new UnicodeSet("[\\p{Line_Break=JL}]")));
+            partition.add(new NamedSet("JT", new UnicodeSet("[\\p{Line_Break=JT}]")));
+            partition.add(new NamedSet("JV", new UnicodeSet("[\\p{Line_Break=JV}]")));
+            partition.add(new NamedSet("LF", new UnicodeSet("[\\p{Line_Break=Line_Feed}]")));
+            partition.add(new NamedSet("NL", new UnicodeSet("[\\p{Line_Break=Next_Line}]")));
+            partition.add(new NamedSet("NSorig_EastAsian", new UnicodeSet("[\\p{Line_Break=Nonstarter}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("NSorigmEastAsian", new UnicodeSet("[\\p{Line_Break=Nonstarter}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("NU", new UnicodeSet("[\\p{Line_Break=Numeric}]")));
+            partition.add(new NamedSet("OP_EastAsian", new UnicodeSet("[\\p{Line_Break=Open_Punctuation}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("OPmEastAsian", new UnicodeSet("[\\p{Line_Break=Open_Punctuation}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("PO_EastAsian", new UnicodeSet("[\\p{Line_Break=Postfix_Numeric}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("POmEastAsian", new UnicodeSet("[\\p{Line_Break=Postfix_Numeric}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("PR_EastAsian", new UnicodeSet("[\\p{Line_Break=Prefix_Numeric}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("PRmEastAsian", new UnicodeSet("[\\p{Line_Break=Prefix_Numeric}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("QU_Pi", new UnicodeSet("[\\p{Line_Break=Quotation}&\\p{gc=Pi}]")));
+            partition.add(new NamedSet("QU_Pf", new UnicodeSet("[\\p{Line_Break=Quotation}&\\p{gc=Pf}]")));
+            partition.add(new NamedSet("QUmPimPf", new UnicodeSet("[\\p{Line_Break=Quotation}-\\p{gc=Pi}-\\p{gc=Pf}]")));
+            partition.add(new NamedSet("SA_Mn", new UnicodeSet("[[\\p{Line_Break=Complex_Context}&\\p{gc=Mn}]]")));
+            partition.add(new NamedSet("SA_Mc", new UnicodeSet("[[\\p{Line_Break=Complex_Context}&\\p{gc=Mc}]]")));
+            partition.add(new NamedSet("SAmMnmMc", new UnicodeSet("[[\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]]")));
+            partition.add(new NamedSet("SG", new UnicodeSet("[\\p{Line_Break=Surrogate}]")));
+            partition.add(new NamedSet("SP", new UnicodeSet("[\\p{Line_Break=Space}]")));
+            partition.add(new NamedSet("SY", new UnicodeSet("[\\p{Line_Break=Break_Symbols}]")));
+            partition.add(new NamedSet("VF", new UnicodeSet("[\\p{Line_Break=Virama_Final}]")));
+            partition.add(new NamedSet("VI", new UnicodeSet("[\\p{Line_Break=Virama}]")));
+            partition.add(new NamedSet("WJ", new UnicodeSet("[\\p{Line_Break=Word_Joiner}]")));
+            partition.add(new NamedSet("XX_ExtPictUnassigned", new UnicodeSet("[\\p{Line_Break=Unknown}&[\\p{Extended_Pictographic}&\\p{gc=Cn}]]")));
+            partition.add(new NamedSet("XXmExtPictUnassigned", new UnicodeSet("[\\p{Line_Break=Unknown}-[\\p{Extended_Pictographic}&\\p{gc=Cn}]]")));
+            partition.add(new NamedSet("ZW", new UnicodeSet("[\\p{Line_Break=ZWSpace}]")));
+            partition.add(new NamedSet("CJ", new UnicodeSet("[\\p{Line_Break=Conditional_Japanese_Starter}]")));
+            partition.add(new NamedSet("RI", new UnicodeSet("[\\p{Line_Break=Regional_Indicator}]")));
+            partition.add(new NamedSet("EB_EastAsian", new UnicodeSet("[\\p{Line_Break=E_Base}&[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("EBmEastAsian", new UnicodeSet("[\\p{Line_Break=E_Base}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]")));
+            partition.add(new NamedSet("EM", new UnicodeSet("[\\p{Line_Break=E_Modifier}]")));
+            partition.add(new NamedSet("ZWJ", new UnicodeSet("[\\p{Line_Break=ZWJ}]")));
+
+            rules.add(new RegexRule("$BK ÷", "\\p{Line_Break=Mandatory_Break}", Resolution.BREAK, ""));
+            rules.add(new RegexRule("$CR × $LF", "\\p{Line_Break=Carriage_Return}", Resolution.NO_BREAK, "\\p{Line_Break=Line_Feed}"));
+            rules.add(new RegexRule("$CR ÷", "\\p{Line_Break=Carriage_Return}", Resolution.BREAK, ""));
+            rules.add(new RegexRule("$LF ÷", "\\p{Line_Break=Line_Feed}", Resolution.BREAK, ""));
+            rules.add(new RegexRule("$NL ÷", "\\p{Line_Break=Next_Line}", Resolution.BREAK, ""));
+            rules.add(new RegexRule("× ( $BK | $CR | $LF | $NL )", "", Resolution.NO_BREAK, "( \\p{Line_Break=Mandatory_Break} | \\p{Line_Break=Carriage_Return} | \\p{Line_Break=Line_Feed} | \\p{Line_Break=Next_Line} )"));
+            rules.add(new RegexRule("× $SP", "", Resolution.NO_BREAK, "\\p{Line_Break=Space}"));
+            rules.add(new RegexRule("× $ZW", "", Resolution.NO_BREAK, "\\p{Line_Break=ZWSpace}"));
+            rules.add(new RegexRule("$ZW $SP* ÷", "\\p{Line_Break=ZWSpace} \\p{Line_Break=Space}*", Resolution.BREAK, ""));
+            rules.add(new RegexRule("$ZWJ ×", "\\p{Line_Break=ZWJ}", Resolution.NO_BREAK, ""));
+            rules.add(new RemapRule("(?<X>[^$BK $CR $LF $NL $SP $ZW]) ( $CM | $ZWJ )* → ${X}", "(?<X>[^\\p{Line_Break=Mandatory_Break} \\p{Line_Break=Carriage_Return} \\p{Line_Break=Line_Feed} \\p{Line_Break=Next_Line} \\p{Line_Break=Space} \\p{Line_Break=ZWSpace}]) ( [\\p{Line_Break=Combining_Mark} [\\p{Line_Break=Complex_Context}&\\p{gc=Mn}] [\\p{Line_Break=Complex_Context}&\\p{gc=Mc}]] | \\p{Line_Break=ZWJ} )*", "${X}"));
+            rules.add(new RemapRule("( $CM | $ZWJ ) → A", "( [\\p{Line_Break=Combining_Mark} [\\p{Line_Break=Complex_Context}&\\p{gc=Mn}] [\\p{Line_Break=Complex_Context}&\\p{gc=Mc}]] | \\p{Line_Break=ZWJ} )", "A"));
+            rules.add(new RegexRule("× $WJ", "", Resolution.NO_BREAK, "\\p{Line_Break=Word_Joiner}"));
+            rules.add(new RegexRule("$WJ ×", "\\p{Line_Break=Word_Joiner}", Resolution.NO_BREAK, ""));
+            rules.add(new RegexRule("$GL ×", "\\p{Line_Break=Glue}", Resolution.NO_BREAK, ""));
+            rules.add(new RegexRule("[^ $SP $BA $HY] × $GL", "[^ \\p{Line_Break=Space} \\p{Line_Break=Break_After} \\p{Line_Break=Hyphen}]", Resolution.NO_BREAK, "\\p{Line_Break=Glue}"));
+            rules.add(new RegexRule("× $EX", "", Resolution.NO_BREAK, "\\p{Line_Break=Exclamation}"));
+            rules.add(new RegexRule("× $CL", "", Resolution.NO_BREAK, "\\p{Line_Break=Close_Punctuation}"));
+            rules.add(new RegexRule("× $CP", "", Resolution.NO_BREAK, "\\p{Line_Break=CP}"));
+            rules.add(new RegexRule("× $SY", "", Resolution.NO_BREAK, "\\p{Line_Break=Break_Symbols}"));
+            rules.add(new RegexRule("$OP $SP* ×", "\\p{Line_Break=Open_Punctuation} \\p{Line_Break=Space}*", Resolution.NO_BREAK, ""));
+            rules.add(new RegexRule("( $BK | $CR | $LF | $NL | $OP | $QU | $GL | $SP | $ZW | $sot ) $QU_Pi $SP* ×", "( \\p{Line_Break=Mandatory_Break} | \\p{Line_Break=Carriage_Return} | \\p{Line_Break=Line_Feed} | \\p{Line_Break=Next_Line} | \\p{Line_Break=Open_Punctuation} | \\p{Line_Break=Quotation} | \\p{Line_Break=Glue} | \\p{Line_Break=Space} | \\p{Line_Break=ZWSpace} | ^ ) [\\p{Line_Break=Quotation} & \\p{gc=Pi}] \\p{Line_Break=Space}*", Resolution.NO_BREAK, ""));
+            rules.add(new RegexRule("× $QU_Pf ( $SP | $GL | $WJ | $CL | $QU | $CP | $EX | $IS | $SY | $BK | $CR | $LF | $NL | $ZW | $eot )", "", Resolution.NO_BREAK, "[\\p{Line_Break=Quotation} & \\p{gc=Pf}] ( \\p{Line_Break=Space} | \\p{Line_Break=Glue} | \\p{Line_Break=Word_Joiner} | \\p{Line_Break=Close_Punctuation} | \\p{Line_Break=Quotation} | \\p{Line_Break=CP} | \\p{Line_Break=Exclamation} | \\p{Line_Break=Infix_Numeric} | \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Mandatory_Break} | \\p{Line_Break=Carriage_Return} | \\p{Line_Break=Line_Feed} | \\p{Line_Break=Next_Line} | \\p{Line_Break=ZWSpace} | (?!.) )"));
+            rules.add(new RegexRule("$SP ÷ $IS $NU", "\\p{Line_Break=Space}", Resolution.BREAK, "\\p{Line_Break=Infix_Numeric} \\p{Line_Break=Numeric}"));
+            rules.add(new RegexRule("× $IS", "", Resolution.NO_BREAK, "\\p{Line_Break=Infix_Numeric}"));
+            rules.add(new RegexRule("($CL | $CP) $SP* × $NS", "(\\p{Line_Break=Close_Punctuation} | \\p{Line_Break=CP}) \\p{Line_Break=Space}*", Resolution.NO_BREAK, "[\\p{Line_Break=Nonstarter} \\p{Line_Break=Conditional_Japanese_Starter}]"));
+            rules.add(new RegexRule("$B2 $SP* × $B2", "\\p{Line_Break=Break_Both} \\p{Line_Break=Space}*", Resolution.NO_BREAK, "\\p{Line_Break=Break_Both}"));
+            rules.add(new RegexRule("$SP ÷", "\\p{Line_Break=Space}", Resolution.BREAK, ""));
+            rules.add(new RegexRule("× $QUmPi", "", Resolution.NO_BREAK, "[\\p{Line_Break=Quotation} - \\p{gc=Pi}]"));
+            rules.add(new RegexRule("$QUmPf ×", "[\\p{Line_Break=Quotation} - \\p{gc=Pf}]", Resolution.NO_BREAK, ""));
+            rules.add(new RegexRule("[^$EastAsian] × $QU", "[^[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]", Resolution.NO_BREAK, "\\p{Line_Break=Quotation}"));
+            rules.add(new RegexRule("× $QU ( [^$EastAsian] | $eot )", "", Resolution.NO_BREAK, "\\p{Line_Break=Quotation} ( [^[\\p{ea=F}\\p{ea=W}\\p{ea=H}]] | (?!.) )"));
+            rules.add(new RegexRule("$QU × [^$EastAsian]", "\\p{Line_Break=Quotation}", Resolution.NO_BREAK, "[^[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]"));
+            rules.add(new RegexRule("( [^$EastAsian] | $sot ) $QU ×", "( [^[\\p{ea=F}\\p{ea=W}\\p{ea=H}]] | ^ ) \\p{Line_Break=Quotation}", Resolution.NO_BREAK, ""));
+            rules.add(new RegexRule("÷ $CB", "", Resolution.BREAK, "\\p{Line_Break=Contingent_Break}"));
+            rules.add(new RegexRule("$CB ÷", "\\p{Line_Break=Contingent_Break}", Resolution.BREAK, ""));
+            rules.add(new RegexRule("( $BK | $CR | $LF | $NL | $SP | $ZW | $CB | $GL | $sot ) ( $HY | $Hyphen ) × $AL", "( \\p{Line_Break=Mandatory_Break} | \\p{Line_Break=Carriage_Return} | \\p{Line_Break=Line_Feed} | \\p{Line_Break=Next_Line} | \\p{Line_Break=Space} | \\p{Line_Break=ZWSpace} | \\p{Line_Break=Contingent_Break} | \\p{Line_Break=Glue} | ^ ) ( \\p{Line_Break=Hyphen} | [\\u2010] )", Resolution.NO_BREAK, "[\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]]"));
+            rules.add(new RegexRule("× $BA", "", Resolution.NO_BREAK, "\\p{Line_Break=Break_After}"));
+            rules.add(new RegexRule("× $HY", "", Resolution.NO_BREAK, "\\p{Line_Break=Hyphen}"));
+            rules.add(new RegexRule("× $NS", "", Resolution.NO_BREAK, "[\\p{Line_Break=Nonstarter} \\p{Line_Break=Conditional_Japanese_Starter}]"));
+            rules.add(new RegexRule("$BB ×", "\\p{Line_Break=Break_Before}", Resolution.NO_BREAK, ""));
+            rules.add(new RegexRule("$HL ($HY | $NonEastAsianBA) × [^$HL]", "\\p{Line_Break=HL} (\\p{Line_Break=Hyphen} | [\\p{Line_Break=Break_After} & [^[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]])", Resolution.NO_BREAK, "[^\\p{Line_Break=HL}]"));
+            rules.add(new RegexRule("$SY × $HL", "\\p{Line_Break=Break_Symbols}", Resolution.NO_BREAK, "\\p{Line_Break=HL}"));
+            rules.add(new RegexRule("× $IN", "", Resolution.NO_BREAK, "\\p{Line_Break=Inseparable}"));
+            rules.add(new RegexRule("($AL | $HL) × $NU", "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})", Resolution.NO_BREAK, "\\p{Line_Break=Numeric}"));
+            rules.add(new RegexRule("$NU × ($AL | $HL)", "\\p{Line_Break=Numeric}", Resolution.NO_BREAK, "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})"));
+            rules.add(new RegexRule("$PR × ($ID | $EB | $EM)", "\\p{Line_Break=Prefix_Numeric}", Resolution.NO_BREAK, "(\\p{Line_Break=Ideographic} | \\p{Line_Break=E_Base} | \\p{Line_Break=E_Modifier})"));
+            rules.add(new RegexRule("($ID | $EB | $EM) × $PO", "(\\p{Line_Break=Ideographic} | \\p{Line_Break=E_Base} | \\p{Line_Break=E_Modifier})", Resolution.NO_BREAK, "\\p{Line_Break=Postfix_Numeric}"));
+            rules.add(new RegexRule("($PR | $PO) × ($AL | $HL)", "(\\p{Line_Break=Prefix_Numeric} | \\p{Line_Break=Postfix_Numeric})", Resolution.NO_BREAK, "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})"));
+            rules.add(new RegexRule("($AL | $HL) × ($PR | $PO)", "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})", Resolution.NO_BREAK, "(\\p{Line_Break=Prefix_Numeric} | \\p{Line_Break=Postfix_Numeric})"));
+            rules.add(new RegexRule("$NU ( $SY | $IS )* $CL × $PO", "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )* \\p{Line_Break=Close_Punctuation}", Resolution.NO_BREAK, "\\p{Line_Break=Postfix_Numeric}"));
+            rules.add(new RegexRule("$NU ( $SY | $IS )* $CP × $PO", "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )* \\p{Line_Break=CP}", Resolution.NO_BREAK, "\\p{Line_Break=Postfix_Numeric}"));
+            rules.add(new RegexRule("$NU ( $SY | $IS )* $CL × $PR", "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )* \\p{Line_Break=Close_Punctuation}", Resolution.NO_BREAK, "\\p{Line_Break=Prefix_Numeric}"));
+            rules.add(new RegexRule("$NU ( $SY | $IS )* $CP × $PR", "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )* \\p{Line_Break=CP}", Resolution.NO_BREAK, "\\p{Line_Break=Prefix_Numeric}"));
+            rules.add(new RegexRule("$NU ( $SY | $IS )* × $PO", "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )*", Resolution.NO_BREAK, "\\p{Line_Break=Postfix_Numeric}"));
+            rules.add(new RegexRule("$NU ( $SY | $IS )* × $PR", "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )*", Resolution.NO_BREAK, "\\p{Line_Break=Prefix_Numeric}"));
+            rules.add(new RegexRule("$PO × $OP $NU", "\\p{Line_Break=Postfix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=Open_Punctuation} \\p{Line_Break=Numeric}"));
+            rules.add(new RegexRule("$PO × $OP $IS $NU", "\\p{Line_Break=Postfix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=Open_Punctuation} \\p{Line_Break=Infix_Numeric} \\p{Line_Break=Numeric}"));
+            rules.add(new RegexRule("$PO × $NU", "\\p{Line_Break=Postfix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=Numeric}"));
+            rules.add(new RegexRule("$PR × $OP $NU", "\\p{Line_Break=Prefix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=Open_Punctuation} \\p{Line_Break=Numeric}"));
+            rules.add(new RegexRule("$PR × $OP $IS $NU", "\\p{Line_Break=Prefix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=Open_Punctuation} \\p{Line_Break=Infix_Numeric} \\p{Line_Break=Numeric}"));
+            rules.add(new RegexRule("$PR × $NU", "\\p{Line_Break=Prefix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=Numeric}"));
+            rules.add(new RegexRule("$HY × $NU", "\\p{Line_Break=Hyphen}", Resolution.NO_BREAK, "\\p{Line_Break=Numeric}"));
+            rules.add(new RegexRule("$IS × $NU", "\\p{Line_Break=Infix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=Numeric}"));
+            rules.add(new RegexRule("$NU ( $SY | $IS )* × $NU", "\\p{Line_Break=Numeric} ( \\p{Line_Break=Break_Symbols} | \\p{Line_Break=Infix_Numeric} )*", Resolution.NO_BREAK, "\\p{Line_Break=Numeric}"));
+            rules.add(new RegexRule("$JL × $JL | $JV | $H2 | $H3", "\\p{Line_Break=JL}", Resolution.NO_BREAK, "\\p{Line_Break=JL} | \\p{Line_Break=JV} | \\p{Line_Break=H2} | \\p{Line_Break=H3}"));
+            rules.add(new RegexRule("$JV | $H2 × $JV | $JT", "\\p{Line_Break=JV} | \\p{Line_Break=H2}", Resolution.NO_BREAK, "\\p{Line_Break=JV} | \\p{Line_Break=JT}"));
+            rules.add(new RegexRule("$JT | $H3 × $JT", "\\p{Line_Break=JT} | \\p{Line_Break=H3}", Resolution.NO_BREAK, "\\p{Line_Break=JT}"));
+            rules.add(new RegexRule("$JL | $JV | $JT | $H2 | $H3 × $PO", "\\p{Line_Break=JL} | \\p{Line_Break=JV} | \\p{Line_Break=JT} | \\p{Line_Break=H2} | \\p{Line_Break=H3}", Resolution.NO_BREAK, "\\p{Line_Break=Postfix_Numeric}"));
+            rules.add(new RegexRule("$PR × $JL | $JV | $JT | $H2 | $H3", "\\p{Line_Break=Prefix_Numeric}", Resolution.NO_BREAK, "\\p{Line_Break=JL} | \\p{Line_Break=JV} | \\p{Line_Break=JT} | \\p{Line_Break=H2} | \\p{Line_Break=H3}"));
+            rules.add(new RegexRule("($AL | $HL) × ($AL | $HL)", "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})", Resolution.NO_BREAK, "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})"));
+            rules.add(new RegexRule("$AP × ($AK | $DottedCircle | $AS)", "\\p{Line_Break=Aksara_Prebase}", Resolution.NO_BREAK, "(\\p{Line_Break=Aksara} | [◌] | \\p{Line_Break=Aksara_Start})"));
+            rules.add(new RegexRule("($AK | $DottedCircle | $AS) × ($VF | $VI)", "(\\p{Line_Break=Aksara} | [◌] | \\p{Line_Break=Aksara_Start})", Resolution.NO_BREAK, "(\\p{Line_Break=Virama_Final} | \\p{Line_Break=Virama})"));
+            rules.add(new RegexRule("($AK | $DottedCircle | $AS) $VI × ($AK | $DottedCircle)", "(\\p{Line_Break=Aksara} | [◌] | \\p{Line_Break=Aksara_Start}) \\p{Line_Break=Virama}", Resolution.NO_BREAK, "(\\p{Line_Break=Aksara} | [◌])"));
+            rules.add(new RegexRule("($AK | $DottedCircle | $AS) × ($AK | $DottedCircle | $AS) $VF", "(\\p{Line_Break=Aksara} | [◌] | \\p{Line_Break=Aksara_Start})", Resolution.NO_BREAK, "(\\p{Line_Break=Aksara} | [◌] | \\p{Line_Break=Aksara_Start}) \\p{Line_Break=Virama_Final}"));
+            rules.add(new RegexRule("$IS × ($AL | $HL)", "\\p{Line_Break=Infix_Numeric}", Resolution.NO_BREAK, "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL})"));
+            rules.add(new RegexRule("($AL | $HL | $NU) × $OPmEastAsian", "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL} | \\p{Line_Break=Numeric})", Resolution.NO_BREAK, "[\\p{Line_Break=Open_Punctuation}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]"));
+            rules.add(new RegexRule("$CPmEastAsian × ($AL | $HL | $NU)", "[\\p{Line_Break=CP}-[\\p{ea=F}\\p{ea=W}\\p{ea=H}]]", Resolution.NO_BREAK, "([\\p{Line_Break=Ambiguous} \\p{Line_Break=Alphabetic} \\p{Line_Break=Surrogate} \\p{Line_Break=Unknown} [\\p{Line_Break=Complex_Context}-\\p{gc=Mn}-\\p{gc=Mc}]] | \\p{Line_Break=HL} | \\p{Line_Break=Numeric})"));
+            rules.add(new RegexRule("$sot ($RI $RI)* $RI × $RI", "^ (\\p{Line_Break=Regional_Indicator} \\p{Line_Break=Regional_Indicator})* \\p{Line_Break=Regional_Indicator}", Resolution.NO_BREAK, "\\p{Line_Break=Regional_Indicator}"));
+            rules.add(new RegexRule("[^$RI] ($RI $RI)* $RI × $RI", "[^\\p{Line_Break=Regional_Indicator}] (\\p{Line_Break=Regional_Indicator} \\p{Line_Break=Regional_Indicator})* \\p{Line_Break=Regional_Indicator}", Resolution.NO_BREAK, "\\p{Line_Break=Regional_Indicator}"));
+            rules.add(new RegexRule("$RI ÷ $RI", "\\p{Line_Break=Regional_Indicator}", Resolution.BREAK, "\\p{Line_Break=Regional_Indicator}"));
+            rules.add(new RegexRule("$EB × $EM", "\\p{Line_Break=E_Base}", Resolution.NO_BREAK, "\\p{Line_Break=E_Modifier}"));
+            rules.add(new RegexRule("$ExtPictUnassigned × $EM", "[\\p{Extended_Pictographic}&\\p{gc=Cn}]", Resolution.NO_BREAK, "\\p{Line_Break=E_Modifier}"));
+            // --- End of generated code. ---
+
+            // TODO(egg): This could just as well be part of the rules…
+            rules.add(new RegexRule("(ALL ÷ / ÷ ALL)", "", Resolution.BREAK, ""));
+
+            final UnicodeSet lbSA = new UnicodeSet("\\p{lb=SA}");
+            for (final NamedSet part : partition) {
+                if (lbSA.containsAll(part.set)) {
                     continue;
                 }
-                partition.add(new NamedSet(lbValueShortName, "\\p{lb=" + lbValueShortName + "}"));
-            }
-            for (final NamedSet refinement : interestingSets) {
-                for (int i = 0; i < partition.size();) {
-                    final String name = partition.get(i).name;
-                    final UnicodeSet set = partition.get(i).set;
-                    final UnicodeSet intersection = new UnicodeSet(set).retainAll(refinement.set);
-                    final UnicodeSet complement = new UnicodeSet(set).removeAll(refinement.set);
-                    if (!intersection.isEmpty() && !complement.isEmpty()) {
-                        partition.add(i, new NamedSet(name, complement));
-                        partition.add(i + 1, new NamedSet(name + "&" + refinement.name, intersection));
-                        partition.remove(i + 2);
-                        i += 2;
-                    } else {
-                        ++i;
-                    }
-                }
-            }
-            for (final NamedSet part : partition) {
                 fSets.add(part.set);
                 fClassNames.add(part.name);
             }
@@ -890,877 +882,32 @@ class NamedSet {
         void setText(StringBuffer s) {
             fText       = s;
             prepareAppliedRules(s.length());
-        }
-
-
-
-
-        @Override
-        int next(int startPos) {
-            int    pos;       //  Index of the char following a potential break position
-            int    thisChar;  //  Character at above position "pos"
-
-            int    prevPos;   //  Index of the char preceding a potential break position
-            int    prevChar;  //  Character at above position.  Note that prevChar
-            //                //  and thisChar may not be adjacent because combining
-            //                //  characters between them will be ignored.
-
-            int    prevPosX2;
-            int    prevCharX2; //  Character before prevChar, more context for LB 21a
-
-            int    nextPos;   //  Index of the next character following pos.
-            //                //  Usually skips over combining marks.
-            int    tPos;      //  temp value.
-            int    matchVals[]  = null;       // Number  Expression Match Results
-
-
-            if (startPos >= fText.length()) {
-                return -1;
+            StringBuilder remapped = new StringBuilder(s.toString());
+            resolved = new BreakContext[s.length() + 1];
+            for (int i = 0; i < resolved.length; ++i) {
+                resolved[i] = new BreakContext(i);
             }
-
-
-            // Initial values for loop.  Loop will run the first time without finding breaks,
-            //                           while the invalid values shift out and the "this" and
-            //                           "prev" positions are filled in with good values.
-            pos      = prevPos   = prevPosX2  = -1;    // Invalid value, serves as flag for initial loop iteration.
-            thisChar = prevChar  = prevCharX2 =  0;
-            nextPos  = startPos;
-
-
-            // Loop runs once per position in the test text, until a break position
-            //  is found.  In each iteration, we are testing for a possible break
-            //  just preceding the character at index "pos".  The character preceding
-            //  this char is at position "prevPos"; because of combining sequences,
-            //  "prevPos" can be arbitrarily far before "pos".
-            for (;;) {
-                // Advance to the next position to be tested.
-                prevPosX2  = prevPos;
-                prevCharX2 = prevChar;
-                prevPos   = pos;
-                prevChar  = thisChar;
-                pos       = nextPos;
-                nextPos   = moveIndex32(fText, pos, 1);
-
-                if (pos >= fText.length()) {
-                    setAppliedRule(pos, "LB 2   Break at end of text");
-                    break;
-                }
-
-                //             We do this rule out-of-order because the adjustment does
-                //             not effect the way that rules LB 3 through LB 6 match,
-                //             and doing it here rather than after LB 6 is substantially
-                //             simpler when combining sequences do occur.
-
-
-                // LB 9         Keep combining sequences together.
-                //              advance over any CM class chars at "pos",
-                //              result is "nextPos" for the following loop iteration.
-                thisChar  = UTF16.charAt(fText, pos);
-                if (!(fSP.contains(thisChar) || fBK.contains(thisChar) || thisChar==0x0d ||
-                        thisChar==0x0a || fNL.contains(thisChar) || fZW.contains(thisChar) )) {
-                    for (;;) {
-                        if (nextPos == fText.length()) {
-                            break;
-                        }
-                        int nextChar = UTF16.charAt(fText, nextPos);
-                        if (!fCM.contains(nextChar)) {
-                            break;
-                        }
-                        nextPos = moveIndex32(fText, nextPos, 1);
-                    }
-                }
-
-                // LB 9 Treat X CM* as if it were X
-                //        No explicit action required.
-
-                // LB 10     Treat any remaining combining mark as lb=AL, ea=Na
-                if (fCM.contains(thisChar)) {
-                    thisChar = 'A';
-                }
-
-
-                // If the loop is still warming up - if we haven't shifted the initial
-                //   -1 positions out of prevPos yet - loop back to advance the
-                //    position in the input without any further looking for breaks.
-                if (prevPos == -1) {
-                    setAppliedRule(pos, "LB 9   adjust for combining sequences.");
-                    continue;
-                }
-
-                if (fBK.contains(prevChar)) {
-                    setAppliedRule(pos, "LB 4   Always break after hard line breaks");
-                    break;
-                }
-
-                if (fCR.contains(prevChar) && fLF.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 5   Break after CR, LF, NL, but not inside CR LF");
-                    continue;
-                }
-                if  (fCR.contains(prevChar) ||
-                        fLF.contains(prevChar) ||
-                        fNL.contains(prevChar))  {
-                    setAppliedRule(pos, "LB 5   Break after CR, LF, NL, but not inside CR LF");
-                    break;
-                }
-
-                if (fBK.contains(thisChar) || fCR.contains(thisChar) ||
-                        fLF.contains(thisChar) || fNL.contains(thisChar) ) {
-                    setAppliedRule(pos, "LB 6   Don't break before hard line breaks");
-                    continue;
-                }
-
-
-                if (fSP.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 7   Don't break before spaces or zero-width space");
-                    continue;
-                }
-
-                if (fZW.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 7   Don't break before spaces or zero-width space");
-                    continue;
-                }
-
-                //       ZW SP* ÷
-                //       Scan backwards from prevChar for SP* ZW
-                tPos = prevPos;
-                while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
-                    tPos = moveIndex32(fText, tPos, -1);
-                }
-                if (fZW.contains(UTF16.charAt(fText, tPos))) {
-                    setAppliedRule(pos, "LB 8   Break after zero width space");
-                    break;
-                }
-
-                //       The monkey test's way of ignoring combining characters doesn't work
-                //       for this rule. ZWJ is also a CM. Need to get the actual character
-                //       preceding "thisChar", not ignoring combining marks, possibly ZWJ.
-                {
-                    int prevC = fText.codePointBefore(pos);
-                    if (fZWJ.contains(prevC)) {
-                        setAppliedRule(pos, "LB 8a  ZWJ x");
-                        continue;
-                    }
-                }
-
-                // appliedRule: "LB 9, 10"; //  Already done, at top of loop.";
-
-
-                //    x  WJ
-                //    WJ  x
-                if (fWJ.contains(thisChar) || fWJ.contains(prevChar)) {
-                    setAppliedRule(pos, "LB 11  Do not break before or after WORD JOINER and related characters.");
-                    continue;
-                }
-
-
-                if (fGL.contains(prevChar)) {
-                    setAppliedRule(pos, "LB 12  GL  x");
-                    continue;
-                }
-
-                if (!(fSP.contains(prevChar) ||
-                        fBA.contains(prevChar) ||
-                        fHY.contains(prevChar)     ) && fGL.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 12a [^SP BA HY] x GL");
-                    continue;
-                }
-
-                if (fCL.contains(thisChar) ||
-                        fCP.contains(thisChar) ||
-                        fEX.contains(thisChar) ||
-                        fSY.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 13  Don't break before closings");
-                    continue;
-                }
-
-                //       Scan backwards, checking for this sequence.
-                //       The OP char could include combining marks, so we actually check for
-                //           OP CM* SP* x
-                tPos = prevPos;
-                if (fSP.contains(prevChar)) {
-                    while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
-                        tPos=moveIndex32(fText, tPos, -1);
-                    }
-                }
-                while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) {
-                    tPos=moveIndex32(fText, tPos, -1);
-                }
-                if (fOP.contains(UTF16.charAt(fText, tPos))) {
-                    setAppliedRule(pos, "LB 14  Don't break after OP SP*");
-                    continue;
-                }
-
-                // Same as LB 14, scan backward for
-                // (sot | BK | CR | LF | NL | OP CM*| QU CM* | GL CM* | SP) [\p{Pi}&QU] CM* SP*.
-                tPos = prevPos;
-                // SP* (with the aforementioned Twist).
-                if (fSP.contains(prevChar)) {
-                    while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
-                        tPos = moveIndex32(fText, tPos, -1);
-                    }
-                }
-                // CM*.
-                while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) {
-                    tPos = moveIndex32(fText, tPos, -1);
-                }
-                // [\p{Pi}&QU].
-                if (fPi.contains(UTF16.charAt(fText, tPos)) && fQU.contains(UTF16.charAt(fText, tPos))) {
-                    if (tPos == 0) {
-                        setAppliedRule(pos, "LB 15a sot [\\p{Pi}&QU] SP* ×");
-                        continue;
-                    } else {
-                        tPos = moveIndex32(fText, tPos, -1);
-                        if (fBK.contains(UTF16.charAt(fText, tPos)) || fCR.contains(UTF16.charAt(fText, tPos)) ||
-                            fLF.contains(UTF16.charAt(fText, tPos)) || fNL.contains(UTF16.charAt(fText, tPos)) ||
-                            fSP.contains(UTF16.charAt(fText, tPos)) || fZW.contains(UTF16.charAt(fText, tPos))) {
-                            setAppliedRule(pos, "LB 15a (BK | CR | LF | NL | SP | ZW) [\\p{Pi}&QU] SP* ×");
-                            continue;
-                        }
-                    }
-                    // CM*.
-                    while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) {
-                        tPos = moveIndex32(fText, tPos, -1);
-                    }
-                    if (fOP.contains(UTF16.charAt(fText, tPos)) || fQU.contains(UTF16.charAt(fText, tPos)) ||
-                        fGL.contains(UTF16.charAt(fText, tPos))) {
-                        setAppliedRule(pos, "LB 15a (OP | QU | GL) [\\p{Pi}&QU] SP* ×");
-                        continue;
-                    }
-                }
-
-                if (fPf.contains(thisChar) && fQU.contains(thisChar)) {
-                    int nextChar = (nextPos < fText.length())? UTF16.charAt(fText, nextPos): 0;
-                    if (nextPos == fText.length() || fSP.contains(nextChar) || fGL.contains(nextChar) ||
-                        fWJ.contains(nextChar) || fCL.contains(nextChar) || fQU.contains(nextChar) ||
-                        fCP.contains(nextChar) || fEX.contains(nextChar) || fIS.contains(nextChar) ||
-                        fSY.contains(nextChar) || fBK.contains(nextChar) || fCR.contains(nextChar) ||
-                        fLF.contains(nextChar) || fNL.contains(nextChar) || fZW.contains(nextChar)) {
-                        setAppliedRule(pos, "LB 15b × [\\p{Pf}&QU] ( SP | GL | WJ | CL | QU | CP | EX | IS | SY | BK | CR | LF | NL | ZW | eot)");
-                        continue;
-                    }
-                }
-
-                if (nextPos < fText.length()) {
-                    int nextChar = fText.codePointAt(nextPos);
-                    if (fSP.contains(prevChar) && fIS.contains(thisChar) && fNU.contains(nextChar)) {
-                        setAppliedRule(pos, "LB 15c Break before an IS that begins a number and follows a space");
-                        break;
-                    }
-                }
-
-                if (fIS.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 15d Do not break before numeric separators, even after spaces");
-                    continue;
-                }
-
-                if (fNS.contains(thisChar)) {
-                    tPos = prevPos;
-                    while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
-                        tPos = moveIndex32(fText, tPos, -1);
-                    }
-                    while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) {
-                        tPos = moveIndex32(fText, tPos, -1);
-                    }
-                    if (fCL.contains(UTF16.charAt(fText, tPos)) || fCP.contains(UTF16.charAt(fText, tPos))) {
-                        setAppliedRule(pos, "LB 16  (CL | CP) SP* x NS");
-                        continue;
-                    }
-                }
-
-
-                if (fB2.contains(thisChar)) {
-                    tPos = prevPos;
-                    while (tPos > 0 && fSP.contains(UTF16.charAt(fText, tPos))) {
-                        tPos = moveIndex32(fText, tPos, -1);
-                    }
-                    while (tPos > 0 && fCM.contains(UTF16.charAt(fText, tPos))) {
-                        tPos = moveIndex32(fText, tPos, -1);
-                    }
-                    if (fB2.contains(UTF16.charAt(fText, tPos))) {
-                        setAppliedRule(pos, "LB 17  B2 SP* x B2");
-                        continue;
-                    }
-                }
-
-                if (fSP.contains(prevChar)) {
-                    setAppliedRule(pos, "LB 18  break after space");
-                    break;
-                }
-
-                // LB 19
-                // × [QU-\p{Pi}]
-                if (fQU.contains(thisChar) && !fPi.contains(thisChar)) {
-                        setAppliedRule(pos, "LB 19 × [QU-\\p{Pi}]");
-                    continue;
-                }
-                // [QU-\p{Pf}] ×
-                if (fQU.contains(prevChar) && !fPf.contains(prevChar)) {
-                    setAppliedRule(pos, "LB 19 [QU-\\p{Pf}] ×");
-                    continue;
-                }
-
-                // LB 19a
-                // [^\p{ea=F}\p{ea=W}\p{ea=H}] × QU
-                if (!feaFWH.contains(prevChar) && fQU.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 19a [^\\p{ea=F}\\p{ea=W}\\p{ea=H}] × QU");
-                    continue;
-                }
-                // × QU ( [^\p{ea=F}\p{ea=W}\p{ea=H}] | eot )
-                if (fQU.contains(thisChar)) {
-                    if (nextPos < fText.length()) {
-                        int nextChar = fText.codePointAt(nextPos);
-                        if (!feaFWH.contains(nextChar)) {
-                            setAppliedRule(pos, "LB 19a × QU [^\\p{ea=F}\\p{ea=W}\\p{ea=H}]");
-                            continue;
-                        }
-                    } else {
-                        setAppliedRule(pos, "LB 19 × QU eot");
-                        continue;
-                    }
-                }
-                // QU × [^\p{ea=F}\p{ea=W}\p{ea=H}]
-                if (fQU.contains(prevChar) && !feaFWH.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 19a QU × [^\\p{ea=F}\\p{ea=W}\\p{ea=H}]");
-                    continue;
-                }
-                // ( sot | [^\p{ea=F}\p{ea=W}\p{ea=H}] ) QU ×
-                if (fQU.contains(prevChar)) {
-                    if (prevPos == 0) {
-                        setAppliedRule(pos, "LB 19a sot QU ×");
-                        continue;
-                    }
-                    // prevPosX2 is -1 if there was a break, and prevCharX2 is 0; but the UAX #14 rules can
-                    // look through breaks.
-                    int breakObliviousPrevPosX2 = moveIndex32(fText, prevPos, -1);
-                    while (fCM.contains(fText.codePointAt(breakObliviousPrevPosX2))) {
-                        if (breakObliviousPrevPosX2 == 0) {
-                            break;
-                        }
-                        int beforeCM = moveIndex32(fText, breakObliviousPrevPosX2, -1);
-                        if (fBK.contains(fText.codePointAt(beforeCM)) ||
-                            fCR.contains(fText.codePointAt(beforeCM)) ||
-                            fLF.contains(fText.codePointAt(beforeCM)) ||
-                            fNL.contains(fText.codePointAt(beforeCM)) ||
-                            fSP.contains(fText.codePointAt(beforeCM)) ||
-                            fZW.contains(fText.codePointAt(beforeCM))) {
-                            break;
-                        }
-                        breakObliviousPrevPosX2 = beforeCM;
-                    }
-                    if (!feaFWH.contains(fText.codePointAt(breakObliviousPrevPosX2)) ||
-                        fCM.contains(fText.codePointAt(breakObliviousPrevPosX2))) {
-                        setAppliedRule(pos, "LB 19a [^\\p{ea=F}\\p{ea=W}\\p{ea=H}] QU ×");
-                        continue;
-                    }
-                }
-
-                if (fCB.contains(thisChar) || fCB.contains(prevChar)) {
-                    setAppliedRule(pos, "LB 20  Break around a CB");
-                    break;
-                }
-
-                // Don't break between Hyphens and letters if a break or a space precedes the hyphen.
-                // Formerly this was a Finnish tailoring.
-                // (sot | BK | CR | LF | NL | SP | ZW | CB | GL) ( HY | [\u2010] ) × AL
-                if (fAL.contains(thisChar) && (fHY.contains(prevChar) || fHH.contains(prevChar))) {
-                    // sot ( HY | [\u2010] ) × AL.
-                    if (prevPos == 0) {
-                        setAppliedRule(pos, "LB 20a");
-                        continue;
-                    }
-                    // prevPosX2 is -1 if there was a break; but the UAX #14 rules can
-                    // look through breaks.
-                    int breakObliviousPrevPosX2 = moveIndex32(fText, prevPos, -1);
-                    if (fBK.contains(fText.codePointAt(breakObliviousPrevPosX2)) ||
-                        fCR.contains(fText.codePointAt(breakObliviousPrevPosX2)) ||
-                        fLF.contains(fText.codePointAt(breakObliviousPrevPosX2)) ||
-                        fNL.contains(fText.codePointAt(breakObliviousPrevPosX2)) ||
-                        fSP.contains(fText.codePointAt(breakObliviousPrevPosX2)) ||
-                        fGL.contains(fText.codePointAt(breakObliviousPrevPosX2)) ||
-                        fZW.contains(fText.codePointAt(breakObliviousPrevPosX2))) {
-                        setAppliedRule(pos, "LB 20a");
-                        continue;
-                    }
-                    while (breakObliviousPrevPosX2 > 0 &&
-                            fCM.contains(fText.codePointAt(breakObliviousPrevPosX2))) {
-                        breakObliviousPrevPosX2 = moveIndex32(fText, breakObliviousPrevPosX2, -1);
-                    }
-                    if (fCB.contains(fText.codePointAt(breakObliviousPrevPosX2))) {
-                        setAppliedRule(pos, "LB 20a");
-                        continue;
-                    }
-                }
-
-                if (fBA.contains(thisChar) ||
-                        fHY.contains(thisChar) ||
-                        fNS.contains(thisChar) ||
-                        fBB.contains(prevChar) )   {
-                    setAppliedRule(pos, "LB 21");
-                    continue;
-                }
-
-                if (fHL.contains(prevCharX2) &&
-                    (fHY.contains(prevChar) ||
-                     (fBA.contains(prevChar) && !feaFWH.contains(prevChar))) &&
-                    !fHL.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 21a HL (HY | BA) x [^HL]");
-                    continue;
-                }
-
-                if (fSY.contains(prevChar) && fHL.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 21b SY x HL");
-                    continue;
-                }
-
-                if (fIN.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 22");
-                    continue;
-                }
-
-                //          (AL | HL) x NU
-                //          NU x (AL | HL)
-                if ((fAL.contains(prevChar) || fHL.contains(prevChar)) && fNU.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 23");
-                    continue;
-                }
-                if (fNU.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
-                    setAppliedRule(pos, "LB 23");
-                    continue;
-                }
-
-                // Do not break between numeric prefixes and ideographs, or between ideographs and numeric postfixes.
-                //      PR x (ID | EB | EM)
-                //     (ID | EB | EM) x PO
-                if (fPR.contains(prevChar) &&
-                        (fID.contains(thisChar) || fEB.contains(thisChar) || fEM.contains(thisChar)))  {
-                    setAppliedRule(pos, "LB 23a");
-                    continue;
-                }
-                if ((fID.contains(prevChar) || fEB.contains(prevChar) || fEM.contains(prevChar)) &&
-                        fPO.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 23a");
-                    continue;
-                }
-
-                // Do not break between prefix and letters or ideographs.
-                //         (PR | PO) x (AL | HL)
-                //         (AL | HL) x (PR | PO)
-                if ((fPR.contains(prevChar) || fPO.contains(prevChar)) &&
-                        (fAL.contains(thisChar) || fHL.contains(thisChar))) {
-                    setAppliedRule(pos, "LB 24  no break between prefix and letters or ideographs");
-                    continue;
-                }
-                if ((fAL.contains(prevChar) || fHL.contains(prevChar)) &&
-                        (fPR.contains(thisChar) || fPO.contains(thisChar))) {
-                    setAppliedRule(pos, "LB 24  no break between prefix and letters or ideographs");
-                    continue;
-                }
-
-                boolean continueToNextPosition = false;
-                // LB 25.
-                for (XUnicodeSet[] pair : new XUnicodeSet[][]{
-                         new XUnicodeSet[]{fCL, fPO}, // 1. NU (SY | IS)* CL × PO
-                         new XUnicodeSet[]{fCP, fPO}, // 2. NU (SY | IS)* CP × PO
-                         new XUnicodeSet[]{fCL, fPR}, // 3. NU (SY | IS)* CL × PR
-                         new XUnicodeSet[]{fCP, fPR}, // 4. NU (SY | IS)* CP × PR
-                     }) {
-                    XUnicodeSet left = pair[0];
-                    XUnicodeSet right = pair[1];
-                    if (left.contains(prevChar) && right.contains(thisChar)) {
-                        // Check for the NU (SY | IS)* part.
-                        boolean leftHandSideMatches = false;
-                        tPos = moveIndex32(fText, prevPos, -1);
-                        for (;;) {
-                            while (tPos > 0 && fCM.contains(fText.codePointAt(tPos))) {
-                                tPos = moveIndex32(fText, tPos, -1);
-                            }
-                            final int tChar = fText.codePointAt(tPos);
-                            if (fSY.contains(tChar) || fIS.contains(tChar)) {
-                                if (tPos == 0) {
-                                    leftHandSideMatches = false;
-                                    break;
-                                }
-                                tPos = moveIndex32(fText, tPos, -1);
-                            } else if (fNU.contains(tChar)) {
-                                leftHandSideMatches = true;
-                                break;
-                            } else {
-                                leftHandSideMatches = false;
-                                break;
-                            }
-                        }
-                        if (leftHandSideMatches) {
-                            setAppliedRule(pos, "LB 25/1..4");
-                            continueToNextPosition = true;
-                            break;
-                        }
-                    }
-                }
-                if (continueToNextPosition) {
-                    continue;
-                }
-                // 5. NU (SY | IS)* × PO
-                // 6. NU (SY | IS)* × PR
-                // 13. NU (SY | IS)* × NU
-                boolean leftHandSideMatches;
-                tPos = prevPos;
-                for (;;) {
-                    while (tPos > 0 && fCM.contains(fText.codePointAt(tPos))) {
-                        tPos = moveIndex32(fText, tPos, -1);
-                    }
-                    final int tChar = fText.codePointAt(tPos);
-                    if (fSY.contains(tChar) || fIS.contains(tChar)) {
-                        if (tPos == 0) {
-                            leftHandSideMatches = false;
-                            break;
-                        }
-                        tPos = moveIndex32(fText, tPos, -1);
-                    } else if (fNU.contains(tChar)) {
-                        leftHandSideMatches = true;
-                        break;
-                    } else {
-                        leftHandSideMatches = false;
-                        break;
-                    }
-                }
-                if (leftHandSideMatches &&
-                    (fPO.contains(thisChar) || fPR.contains(thisChar) || fNU.contains(thisChar))) {
-                    setAppliedRule(pos, "LB 25/5,6,13,14");
-                    continue;
-                }
-                if (nextPos < fText.length()) {
-                    final int nextChar = fText.codePointAt(nextPos);
-                    // 7. PO × OP NU
-                    if (fPO.contains(prevChar) && fOP.contains(thisChar) && fNU.contains(nextChar)) {
-                        setAppliedRule(pos, "LB 25/7");
-                        continue;
-                    }
-                    // 9. PR × OP NU
-                    if (fPR.contains(prevChar) && fOP.contains(thisChar) && fNU.contains(nextChar)) {
-                        setAppliedRule(pos, "LB 25/9");
-                        continue;
-                    }
-                    int nextPosX2 = moveIndex32(fText, nextPos, 1);
-                    while (nextPosX2 < fText.length() && fCM.contains(fText.codePointAt(nextPosX2))) {
-                        nextPosX2 = moveIndex32(fText, nextPosX2, 1);
-                    }
-        
-                    if (nextPosX2 < fText.length()) {
-                        final int nextCharX2 = fText.codePointAt(nextPosX2);
-                        // 7bis. PO × OP IS NU
-                        if (fPO.contains(prevChar) && fOP.contains(thisChar) && fIS.contains(nextChar) &&
-                            fNU.contains(nextCharX2)) {
-                            setAppliedRule(pos, "LB 25/7bis");
-                            continue;
-                        }
-                        // 9bis. PR × OP IS NU
-                        if (fPR.contains(prevChar) && fOP.contains(thisChar) && fIS.contains(nextChar) &&
-                            fNU.contains(nextCharX2)) {
-                            setAppliedRule(pos, "LB 25/9bis");
-                            continue;
-                        }
-                    }
-                }
-                for (XUnicodeSet[] pair : new XUnicodeSet[][]{
-                         new XUnicodeSet[]{fPO, fNU}, // 8. PO × NU
-                         new XUnicodeSet[]{fPR, fNU}, // 10. PR × NU
-                         new XUnicodeSet[]{fHY, fNU}, // 11. HY × NU
-                         new XUnicodeSet[]{fIS, fNU}, // 12. IS × NU
-                     }) {
-                    XUnicodeSet left = pair[0];
-                    XUnicodeSet right = pair[1];
-                    if (left.contains(prevChar) && right.contains(thisChar)) {
-                        continueToNextPosition = true;
-                        break;
-                    }
-                }
-                if (continueToNextPosition) {
-                  continue;
-                }        
-
-                if (fJL.contains(prevChar) && (fJL.contains(thisChar) ||
-                        fJV.contains(thisChar) ||
-                        fH2.contains(thisChar) ||
-                        fH3.contains(thisChar))) {
-                    setAppliedRule(pos, "LB 26  Do not break a Korean syllable.");
-                    continue;
-                }
-
-                if ((fJV.contains(prevChar) || fH2.contains(prevChar))  &&
-                        (fJV.contains(thisChar) || fJT.contains(thisChar))) {
-                    setAppliedRule(pos, "LB 26  Do not break a Korean syllable.");
-                    continue;
-                }
-
-                if ((fJT.contains(prevChar) || fH3.contains(prevChar)) &&
-                        fJT.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 26  Do not break a Korean syllable.");
-                    continue;
-                }
-
-                if ((fJL.contains(prevChar) || fJV.contains(prevChar) ||
-                        fJT.contains(prevChar) || fH2.contains(prevChar) || fH3.contains(prevChar)) &&
-                        fPO.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 27  Treat a Korean Syllable Block the same as ID.");
-                    continue;
-                }
-                if (fPR.contains(prevChar) && (fJL.contains(thisChar) || fJV.contains(thisChar) ||
-                        fJT.contains(thisChar) || fH2.contains(thisChar) || fH3.contains(thisChar))) {
-                    setAppliedRule(pos, "LB 27  Treat a Korean Syllable Block the same as ID.");
-                    continue;
-                }
-
-
-
-                if ((fAL.contains(prevChar) || fHL.contains(prevChar)) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
-                    setAppliedRule(pos, "LB 28  Do not break between alphabetics");
-                    continue;
-                }
-
-                if (fAP.contains(prevChar) &&
-                    (fAK.contains(thisChar) || thisChar == '◌' || fAS.contains(thisChar))) {
-                    setAppliedRule(pos, "LB 28a.1  AP x (AK | ◌ | AS)");
-                    continue;
-                }
-
-                if ((fAK.contains(prevChar) || prevChar == '◌' || fAS.contains(prevChar)) &&
-                    (fVF.contains(thisChar) || fVI.contains(thisChar))) {
-                    setAppliedRule(pos, "LB 28a.2  (AK | ◌ | AS) x (VF | VI)");
-                    continue;
-                }
-
-                if ((fAK.contains(prevCharX2) || prevCharX2 == '◌' || fAS.contains(prevCharX2)) &&
-                    fVI.contains(prevChar) &&
-                    (fAK.contains(thisChar) || thisChar == '◌')) {
-                    setAppliedRule(pos, "LB 28a.3  (AK | ◌ | AS) VI x (AK | ◌)");
-                    continue;
-                }
-
-                if (nextPos < fText.length()) {
-                    // note: UnicodeString::char32At(length) returns ffff, not distinguishable
-                    //       from a legit ffff noncharacter. So test length separately.
-                    int nextChar = UTF16.charAt(fText, nextPos);
-                    if ((fAK.contains(prevChar) || prevChar == '◌' || fAS.contains(prevChar)) &&
-                        (fAK.contains(thisChar) || thisChar == '◌' || fAS.contains(thisChar)) &&
-                        fVF.contains(nextChar)) {
-                        setAppliedRule(pos, "LB 28a.4  (AK | ◌ | AS) x (AK | ◌ | AS) VF");
-                        continue;
-                    }
-                }
-
-                if (fIS.contains(prevChar) && (fAL.contains(thisChar) || fHL.contains(thisChar))) {
-                    setAppliedRule(pos, "LB 29  Do not break between numeric punctuation and alphabetics");
-                    continue;
-                }
-
-                //          (AL | NU) x OP
-                //          CP x (AL | NU)
-                if ((fAL.contains(prevChar) || fHL.contains(prevChar) || fNU.contains(prevChar)) &&
-                        fOP30.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 30  Do not break between letters, numbers, or ordinary symbols and opening or closing punctuation.");
-                    continue;
-                }
-                if (fCP30.contains(prevChar) &&
-                        (fAL.contains(thisChar) || fHL.contains(thisChar) || fNU.contains(thisChar))) {
-                    setAppliedRule(pos, "LB 30  Do not break between letters, numbers, or ordinary symbols and opening or closing punctuation.");
-                    continue;
-                }
-
-                //             RI RI  ÷  RI
-                //                RI  x  RI
-                if (fRI.contains(prevCharX2) && fRI.contains(prevChar) && fRI.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 30a Break between pairs of Regional Indicators.");
-                    break;
-                }
-                if (fRI.contains(prevChar) && fRI.contains(thisChar)) {
-                    // Two Regional Indicators have been paired.
-                    // Over-write the trailing one (thisChar) to prevent it from forming another pair with a
-                    // following RI. This is a hack.
-                    thisChar = -1;
-                    setAppliedRule(pos, "LB 30a Break between pairs of Regional Indicators.");
-                    continue;
-                }
-
-                // LB30b Do not break between an emoji base (or potential emoji) and an emoji modifier.
-                if (fEB.contains(prevChar) && fEM.contains(thisChar)) {
-                    setAppliedRule(pos, "LB 30b Emoji Base x Emoji Modifier");
-                    continue;
-                }
-
-                if (fExtPictUnassigned.contains(prevChar) && fEM.contains(thisChar)) {
-                    setAppliedRule(pos, "LB30b    [\\p{Extended_Pictographic}&\\p{Cn}] × EM");
-                    continue;
+            for (final SegmentationRule rule : rules) {
+                rule.apply(remapped, resolved);
+            }
+            for (int i = 0; i < resolved.length; ++i) {
+                if (resolved[i].appliedRule == null) {
+                    throw new IllegalArgumentException("Failed to resolve at " + i);
                 }
-
-                // LB 31    Break everywhere else
-                setAppliedRule(pos, "LB 31 Break everywhere else");
-                break;
+                setAppliedRule(i, resolved[i].appliedRule.name());
             }
-
-            return pos;
         }
 
-
-
-        // Match the following regular expression in the input text.
-        //    ((PR | PO) CM*)? ((OP | HY) CM*)? (IS CM*)? NU CM* ((NU | IS | SY) CM*) * ((CL | CP) CM*)?  (PR | PO) CM*)?
-        //      0    0   1       4    4    4      5  5              7    7    7    7      9    9    9     11   11    (match states)
-        //  retVals array  [0]  index of the start of the match, or -1 if no match
-        //                 [1]  index of first char following the match.
-        //  Can not use Java regex because need supplementary character support,
-        //     and because Unicode char properties version must be the same as in
-        //     the version of ICU being tested.
-        private int[] LBNumberCheck(StringBuffer s, int startIdx, int[] retVals) {
-            if (retVals == null) {
-                retVals = new int[2];
-            }
-            retVals[0]     = -1;  // Indicates no match.
-            int matchState = 0;
-            int idx        = startIdx;
-
-            matchLoop: for (idx = startIdx; idx<s.length(); idx = moveIndex32(s, idx, 1)){
-                int c = UTF16.charAt(s, idx);
-                int cLBType = UCharacter.getIntPropertyValue(c, UProperty.LINE_BREAK);
-                switch (matchState) {
-                case 0:
-                    if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC ||
-                    cLBType == UCharacter.LineBreak.POSTFIX_NUMERIC) {
-                        matchState = 1;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.OPEN_PUNCTUATION) {
-                        matchState = 4;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.HYPHEN) {
-                        matchState = 4;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.INFIX_NUMERIC) {
-                        matchState = 5;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.NUMERIC) {
-                        matchState = 7;
-                        break;
-                    }
-                    break matchLoop;   /* No Match  */
-
-                case 1:
-                    if (cLBType == UCharacter.LineBreak.COMBINING_MARK || cLBType == UCharacter.LineBreak.ZWJ) {
-                        matchState = 1;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.OPEN_PUNCTUATION) {
-                        matchState = 4;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.HYPHEN) {
-                        matchState = 4;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.INFIX_NUMERIC) {
-                        matchState = 5;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.NUMERIC) {
-                        matchState = 7;
-                        break;
-                    }
-                    break matchLoop;   /* No Match  */
-
-                case 4:
-                    if (cLBType == UCharacter.LineBreak.COMBINING_MARK || cLBType == UCharacter.LineBreak.ZWJ) {
-                        matchState = 4;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.INFIX_NUMERIC) {
-                        matchState = 5;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.NUMERIC) {
-                        matchState = 7;
-                        break;
-                    }
-                    break matchLoop;   /* No Match  */
-
-                case 5:
-                    if (cLBType == UCharacter.LineBreak.COMBINING_MARK || cLBType == UCharacter.LineBreak.ZWJ) {
-                        matchState = 5;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.NUMERIC) {
-                        matchState = 7;
-                        break;
-                    }
-                    break matchLoop;   /* No Match  */
-
-
-                case 7:
-                    if (cLBType == UCharacter.LineBreak.COMBINING_MARK || cLBType == UCharacter.LineBreak.ZWJ) {
-                        matchState = 7;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.NUMERIC) {
-                        matchState = 7;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.INFIX_NUMERIC) {
-                        matchState = 7;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.BREAK_SYMBOLS) {
-                        matchState = 7;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.CLOSE_PUNCTUATION) {
-                        matchState = 9;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.CLOSE_PARENTHESIS) {
-                        matchState = 9;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.POSTFIX_NUMERIC) {
-                        matchState = 11;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC) {
-                        matchState = 11;
-                        break;
-                    }
-
-                    break matchLoop;    // Match Complete.
-                case 9:
-                    if (cLBType == UCharacter.LineBreak.COMBINING_MARK || cLBType == UCharacter.LineBreak.ZWJ) {
-                        matchState = 9;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.POSTFIX_NUMERIC) {
-                        matchState = 11;
-                        break;
-                    }
-                    if (cLBType == UCharacter.LineBreak.PREFIX_NUMERIC) {
-                        matchState = 11;
-                        break;
-                    }
-                    break matchLoop;    // Match Complete.
-                case 11:
-                    if (cLBType == UCharacter.LineBreak.COMBINING_MARK || cLBType == UCharacter.LineBreak.ZWJ) {
-                        matchState = 11;
-                        break;
-                    }
-                    break matchLoop;    // Match Complete.
+        @Override
+        int next(int startPos) {
+            for (int i = startPos + 1; i < resolved.length; ++i) {
+                if (resolved[i].appliedRule.resolution() == Resolution.BREAK) {
+                    return i;
                 }
             }
-            if (matchState >= 7) {
-                retVals[0] = startIdx;
-                retVals[1] = idx;
-            }
-            return retVals;
+            return -1;
         }
 
-
         @Override
         List  charClasses() {
             return fSets;
@@ -2136,6 +1283,9 @@ static int  nextCP(StringBuffer s, int i) {
     }
 
 
+
+        
+
     /**
      * random number generator.  Not using Java's built-in Randoms for two reasons:
      *    1.  Using this code allows obtaining the same sequences as those from the ICU4C monkey test.
@@ -2151,6 +1301,9 @@ private static int  m_rand()
         return (m_seed >>> 16) % 32768;
     }
 
+    private final static String[] monkeys = new String[] {
+        "🙈", "🙉", "🙊", "🐵", "🐒"};
+
     // Helper function for formatting error output.
     //   Append a string into a fixed-size field in a StringBuffer.
     //   Blank-pad the string if it is shorter than the field.
@@ -2214,6 +1367,7 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
         boolean[]        precedingBreaks  = new boolean[TESTSTRINGLEN*2 + 1];
         int              i;
         int              loopCount        = 0;
+        int              errorCount       = 0;
         boolean          printTestData    = false;
         boolean          printBreaksFromBI = false;
 
@@ -2253,16 +1407,13 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
 
         // For minimizing width of class name output.
         int classNameSize = mk.maxClassNameSize();
-
-        int  dotsOnLine = 0;
         while (loopCount < numIterations || numIterations == -1) {
             if (numIterations == -1 && loopCount % 10 == 0) {
                 // If test is running in an infinite loop, display a periodic tic so
                 //   we can tell that it is making progress.
-                System.out.print(".");
-                if (dotsOnLine++ >= 80){
-                    System.out.println();
-                    dotsOnLine = 0;
+                System.out.print(monkeys[m_rand() % monkeys.length]);
+                if (loopCount % 1000_000_000 == 0) {
+                    System.out.println("\nTested " + loopCount / 1000_000_000 + " million random strings with " + errorCount + " errors");
                 }
             }
             // Save current random number seed, so that we can recreate the random numbers
@@ -2423,6 +1574,7 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
                 }
 
                 if (errorType != null) {
+                    ++errorCount;
                     // Format a range of the test text that includes the failure as
                     //  a data item that can be included in the rbbi test data file.
 
@@ -2465,12 +1617,14 @@ void RunMonkey(BreakIterator  bi, RBBIMonkeyKind mk, String name, int  seed, int
                     buffer.append("\n")
                         .append((expectedBreaks[i] ? "Break expected but not found." : "Break found but not expected."))
                         .append(
-                            String.format(" at index %d. Parameters to reproduce: @\"type=%s  seed=%d  loop=1\"\n",
+                            String.format(" at index %d. Parameters to reproduce: -Dtest=RBBITestMonkey#Test%sMonkey -Dseed=%d -Dloop=1\n",
                               i, name, seed));
 
                     int c;  // Char from test data
                     for (ci = startContext;  ci <= endContext && ci != -1;  ci = nextCP(testText, ci)) {
-
+                        if (ci == testText.length()) {
+                            break;  // TODO(egg): The index dance above seems wrong.
+                        }
                         c = testText.codePointAt(ci);
                         buffer.append((ci == i) ? " --→" : "    ")
                             .append(String.format(" %3d : ", ci))
@@ -2514,7 +1668,7 @@ public void TestCharMonkey() {
 
         RBBICharMonkey  m = new RBBICharMonkey();
         BreakIterator   bi = BreakIterator.getCharacterInstance(Locale.US);
-        RunMonkey(bi, m, "char", seed, loopCount);
+        RunMonkey(bi, m, "Char", seed, loopCount);
     }
 
     @Test
@@ -2525,7 +1679,7 @@ public void TestWordMonkey() {
         logln("Word Break Monkey Test");
         RBBIWordMonkey  m = new RBBIWordMonkey();
         BreakIterator   bi = BreakIterator.getWordInstance(Locale.US);
-        RunMonkey(bi, m, "word", seed, loopCount);
+        RunMonkey(bi, m, "Word", seed, loopCount);
     }
 
     @Test
@@ -2537,7 +1691,7 @@ public void TestLineMonkey() {
         RBBILineMonkey  m = new RBBILineMonkey();
         BreakIterator   bi = BreakIterator.getLineInstance(Locale.US);
         try {
-            RunMonkey(bi, m, "line", seed, loopCount);
+            RunMonkey(bi, m, "Line", seed, loopCount);
         } catch(IllegalArgumentException e) {
             if (e.getMessage().equals("Invalid code point U+-000001")) {
                 // Looks like you used class UnicodeSet instead of class XUnicodeSet
@@ -2558,7 +1712,7 @@ public void TestSentMonkey() {
         logln("Sentence Break Monkey Test");
         RBBISentenceMonkey  m = new RBBISentenceMonkey();
         BreakIterator   bi = BreakIterator.getSentenceInstance(Locale.US);
-        RunMonkey(bi, m, "sent", seed, loopCount);
+        RunMonkey(bi, m, "Sent", seed, loopCount);
     }
     //
     //  Round-trip monkey tests.
@@ -2579,7 +1733,7 @@ public void TestRTCharMonkey() {
         BreakIterator   bi = BreakIterator.getCharacterInstance(Locale.US);
         String rules = bi.toString();
         BreakIterator rtbi = new RuleBasedBreakIterator(rules);
-        RunMonkey(rtbi, m, "char", seed, loopCount);
+        RunMonkey(rtbi, m, "RTChar", seed, loopCount);
     }
 
     @Test
@@ -2592,7 +1746,7 @@ public void TestRTWordMonkey() {
         BreakIterator   bi = BreakIterator.getWordInstance(Locale.US);
         String rules = bi.toString();
         BreakIterator rtbi = new RuleBasedBreakIterator(rules);
-        RunMonkey(rtbi, m, "word", seed, loopCount);
+        RunMonkey(rtbi, m, "RTWord", seed, loopCount);
     }
 
     @Test
@@ -2606,7 +1760,7 @@ public void TestRTLineMonkey() {
         String rules = bi.toString();
         BreakIterator rtbi = new RuleBasedBreakIterator(rules);
         try {
-            RunMonkey(rtbi, m, "line", seed, loopCount);
+            RunMonkey(rtbi, m, "RTLine", seed, loopCount);
         } catch(IllegalArgumentException e) {
             if (e.getMessage().equals("Invalid code point U+-000001")) {
                 // Looks like you used class UnicodeSet instead of class XUnicodeSet
@@ -2629,6 +1783,6 @@ public void TestRTSentMonkey() {
         BreakIterator   bi = BreakIterator.getSentenceInstance(Locale.US);
         String rules = bi.toString();
         BreakIterator rtbi = new RuleBasedBreakIterator(rules);
-        RunMonkey(rtbi, m, "sent", seed, loopCount);
+        RunMonkey(rtbi, m, "RTSent", seed, loopCount);
     }
 }
diff --git a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RegexRule.java b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RegexRule.java
new file mode 100644
index 000000000000..cbe59071f08b
--- /dev/null
+++ b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RegexRule.java
@@ -0,0 +1,109 @@
+// © 2024 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+package com.ibm.icu.dev.test.rbbi;
+
+import java.util.Arrays;
+import java.util.Optional;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * A regex rule expressed as in UAXes #14 and #29.
+ *
+ * The rule consists of two regexes for context before and after a position in
+ * the remapped text,
+ * and of a resolution (break or not) that applies to the corresponding position
+ * in the original
+ * string if both match.
+ */
+class RegexRule extends SegmentationRule {
+    RegexRule(String name, String before, Resolution resolution,
+            String after) {
+        super(name);
+        resolution_ = resolution;
+        before_ = Pattern.compile(expandUnicodeSets(before), Pattern.COMMENTS | Pattern.DOTALL);
+        endsWithBefore_ = Pattern.compile(
+                ".*(" + expandUnicodeSets(before) + ")", Pattern.COMMENTS | Pattern.DOTALL);
+        after_ = Pattern.compile(expandUnicodeSets(after), Pattern.COMMENTS | Pattern.DOTALL);
+    }
+
+    @Override
+    void apply(StringBuilder remapped, BreakContext[] resolved) {
+        // The unicodetools implementation simply tries, for each index, to
+        // match the string up to the index against /.*(before)/ (with
+        // `matches`) and the beginning of the string after the index against
+        // /after/ (with `lookingAt`), but that is very slow, especially for
+        // nonempty /before/. While the old monkeys are not a production
+        // implementation, we still do not want them to be too slow, since we
+        // need to test millions of sample strings. Instead we search for
+        // /before/ and /after/, and check resulting candidates. This speeds
+        // things up by a factor of ~40.
+        // We need to be careful about greedy matching: The first position where
+        // the rule matches may be before the end of the first /before/ match.
+        // However, it is both:
+        // 1. within a /before/ match or at its bounds,
+        // 2. at the beginning of an /after/ match.
+        // Further, the /before/ context of the rule matches within the
+        // aforementioned /before/ match. Note that we need to look for
+        // overlapping matches, thus calls to `find` are always preceded by a
+        // reset via `region`.
+        final Matcher beforeSearch = before_.matcher(remapped);
+        final Matcher afterSearch = after_.matcher(remapped);
+        beforeSearch.useAnchoringBounds(false);
+        afterSearch.useAnchoringBounds(false);
+        if (beforeSearch.find() && afterSearch.find()) {
+            for (;;) {
+                if (afterSearch.start() < beforeSearch.start()) {
+                    afterSearch.region(beforeSearch.start(), remapped.length());
+                    if (!afterSearch.find()) {
+                        break;
+                    }
+                } else if (afterSearch.start() > beforeSearch.end()) {
+                    if (beforeSearch.start() == remapped.length()) {
+                        break;
+                    }
+                    beforeSearch.region(remapped.offsetByCodePoints(beforeSearch.start(), 1),
+                            remapped.length());
+                    if (!beforeSearch.find()) {
+                        break;
+                    }
+                } else {
+                    final Optional<BreakContext> position = Arrays.stream(resolved)
+                            .filter(r -> r.indexInRemapped != null && r.indexInRemapped == afterSearch.start())
+                            .findFirst();
+                    if (!position.isPresent()) {
+                        throw new IllegalArgumentException(("Rule " + name() +
+                                " found a break at a position which does not correspond to an index in " +
+                                "the original string"));
+                    }
+                    if (position.get().appliedRule == null &&
+                            endsWithBefore_.matcher(remapped)
+                                    .useAnchoringBounds(false)
+                                    .region(beforeSearch.start(), afterSearch.start())
+                                    .matches()) {
+                        position.get().appliedRule = this;
+                    }
+                    if (afterSearch.start() == remapped.length()) {
+                        break;
+                    }
+                    afterSearch.region(remapped.offsetByCodePoints(afterSearch.start(), 1),
+                            remapped.length());
+                    if (!afterSearch.find()) {
+                        break;
+                    }
+                }
+            }
+        }
+    }
+
+    @Override
+    Resolution resolution() {
+        return resolution_;
+    }
+
+    private final Pattern before_;
+    private final Pattern endsWithBefore_;
+    private final Pattern after_;
+    private final Resolution resolution_;
+}
\ No newline at end of file
diff --git a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RemapRule.java b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RemapRule.java
new file mode 100644
index 000000000000..e4bc8e79913b
--- /dev/null
+++ b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/RemapRule.java
@@ -0,0 +1,167 @@
+// © 2024 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+package com.ibm.icu.dev.test.rbbi;
+
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+/**
+ * A segmentation rule expressed as in UAXes #14 and #29.
+ *
+ * A remap rule performs normal a regex replacement applied to the remapped
+ * string.
+ * This replacement may use capturing groups. Any positions in the original
+ * string that correspond
+ * to positions within the replaced text are resolved to NO_BREAK by this rule.
+ */
+public class RemapRule extends SegmentationRule {
+    RemapRule(String name, String pattern, String replacement) {
+        super(name);
+        replacement_ = replacement;
+        pattern_ = Pattern.compile(expandUnicodeSets(pattern), Pattern.COMMENTS | Pattern.DOTALL);
+    }
+
+    @Override
+    void apply(StringBuilder remapped, BreakContext[] resolved) {
+        // This one has to be a StringBuffer rather than a StringBuilder because the
+        // overload of
+        // AppendReplacement that takes a StringBuilder is new in Java 9.
+        StringBuffer result = new StringBuffer();
+        int i = 0;
+        int offset = 0;
+        // We find all matches of the `pattern_` and replace them according to
+        // the `replacement_`, producing the new remapped string `result`.
+        // For every position i in the original string,
+        // `resolved[i].indexInRemapped` is null if i lies within a replaced
+        // match, and is set to the new index in `result` otherwise, by adding
+        // the accumulated difference `offset` between match lengths and
+        // replacement lengths.
+        // Consider a 4-codepoint, 6 code unit string s = ⟨ 𒀀, ◌́, ␠, ◌𝅲 ⟩, where
+        // ␠ stands for U+0020 and U+12000 𒀀 and U+1D172 ◌𝅲 each require two code
+        // units, and apply the following two rules:
+        // 1. (?<X>\P{lb=SP}) \p{lb=CM}* → ${X}
+        // 2. \p{lb=CM} → A
+        // The string remapped and the indexInRemapped values change as follows:
+        // indexInRemapped remapped string rule final
+        // (aligned on the initial string) applied offset
+        // 𒀀 ◌́ ␠ ◌𝅲
+        // 0 1 2 3 4 5 6 ⟨ 𒀀, ◌́, ␠, ◌𝅲 ⟩ (none)
+        // 0 - - 2 3 4 5 ⟨ 𒀀, ␠, ◌𝅲 ⟩ 1 -1
+        // 0 - - 2 3 - 4 ⟨ 𒀀, ␠, A ⟩ 2 -1
+        //
+        // Note that the last indexInRemapped is always equal to the length of
+        // the remapped string.
+        final Matcher matcher = pattern_.matcher(remapped);
+        while (matcher.find()) {
+            for (;; ++i) {
+                if (resolved[i].indexInRemapped == null) {
+                    continue;
+                }
+                if (resolved[i].indexInRemapped != null &&
+                        resolved[i].indexInRemapped > matcher.start()) {
+                    break;
+                }
+                resolved[i].indexInRemapped += offset;
+            }
+            for (;; ++i) {
+                if (resolved[i].indexInRemapped == null) {
+                    continue;
+                }
+                // Note that
+                // `*resolved[i].indexInRemapped > matcher.end()` should
+                // never happen with ordinary rules, but could in principle
+                // happen with rules that remap to code point sequences, e.g.,
+                // 1. BC → TYZ
+                // 2. AT → X
+                // applied to ⟨ A, B, C ⟩:
+                // indexInRemapped remapped rule
+                // A B C
+                // 0 1 2 3 ⟨ A, B, C ⟩ (none)
+                // 0 1 - 4 ⟨ A, T, Y, Z ⟩ 1
+                // 0 - - 3 ⟨ X, Y, Z ⟩ 2
+                // Where for the application of rule 2, the match ends at
+                // position 2 in remapped, which does not correspond to a
+                // position in the original string.
+                if (resolved[i].indexInRemapped != null &&
+                        resolved[i].indexInRemapped >= matcher.end()) {
+                    break;
+                }
+                if (resolved[i].appliedRule != null &&
+                        resolved[i].appliedRule.resolution() == Resolution.BREAK) {
+                    throw new IllegalArgumentException(
+                            "Replacement rule at remapped indices " +
+                                    matcher.start() +
+                                    " sqq. spans a break");
+                }
+                resolved[i].appliedRule = this;
+                resolved[i].indexInRemapped = null;
+            }
+            // While replacing, we need to check that we are not creating
+            // surrogate pairs.  Since appendReplacement performs two
+            // concatenations (the unreplaced segment and the replacement), we
+            // need to check in two places: whether the unreplaced segment
+            // starts with a trailing surrogate that ends up after a leading
+            // surrogate, and whether the replaced segment starts with a leading
+            // surrogate that ends up after a trailing surrogate.
+            // We break the pair by replacing one of the surrogates with U+FFFF,
+            // which has the same properties for all but line breaking, and the
+            // same behaviour in line breaking (lb=SG and lb=XX are both treated
+            // as lb=AL).
+            Integer trailingLead = null;
+            if (result.length() > 0 && Character.isHighSurrogate(result.charAt(result.length() - 1))) {
+                trailingLead = result.length() - 1;
+            }
+
+            matcher.appendReplacement(result, replacement_);
+
+            if (trailingLead != null && trailingLead + 1 < result.length() &&
+                    Character.isLowSurrogate(result.charAt(trailingLead + 1))) {
+                result.setCharAt(trailingLead, '\uFFFF');
+            }
+
+            if (matcher.start() + offset > 0 &&
+                    Character.isHighSurrogate(result.charAt(matcher.start() + offset - 1)) &&
+                    Character.isLowSurrogate(result.charAt(matcher.start() + offset))) {
+                result.setCharAt(matcher.start() + offset, '\uFFFF');
+            }
+            offset = result.length() - resolved[i].indexInRemapped;
+        }
+        for (; i < resolved.length; ++i) {
+            if (resolved[i].indexInRemapped == null) {
+                continue;
+            }
+            resolved[i].indexInRemapped += offset;
+        }
+
+        Integer trailingLead = null;
+        if (result.length() > 0 && Character.isHighSurrogate(result.charAt(result.length() - 1))) {
+            trailingLead = result.length() - 1;
+        }
+        matcher.appendTail(result);
+        if (trailingLead != null && trailingLead + 1 < result.length() &&
+                Character.isLowSurrogate(result.charAt(trailingLead + 1))) {
+            result.setCharAt(trailingLead, '\uFFFF');
+        }
+
+        if (resolved[resolved.length - 1].indexInRemapped != result.length()) {
+            StringBuilder indices = new StringBuilder();
+            for (final BreakContext r : resolved) {
+                indices.append(r.indexInRemapped == null ? "null" : r.indexInRemapped.toString());
+                indices.append(",");
+            }
+            throw new IllegalArgumentException("Inconsistent indexInRemapped " + indices + " for new remapped string " +
+                    result);
+        }
+        remapped.setLength(0);
+        remapped.append(result);
+    }
+
+    @Override
+    Resolution resolution() {
+        return Resolution.NO_BREAK;
+    }
+
+    private final Pattern pattern_;
+    private final String replacement_;
+}
\ No newline at end of file
diff --git a/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/SegmentationRule.java b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/SegmentationRule.java
new file mode 100644
index 000000000000..e7abdbe7e301
--- /dev/null
+++ b/icu4j/main/core/src/test/java/com/ibm/icu/dev/test/rbbi/SegmentationRule.java
@@ -0,0 +1,88 @@
+// © 2024 and later: Unicode, Inc. and others.
+// License & terms of use: http://www.unicode.org/copyright.html
+
+package com.ibm.icu.dev.test.rbbi;
+
+import java.text.ParsePosition;
+
+import javax.swing.RowFilter.Entry;
+
+import com.ibm.icu.impl.Utility;
+import com.ibm.icu.text.UnicodeSet;
+import com.ibm.icu.text.UnicodeSet.EntryRange;
+import com.ibm.icu.text.UTF16;
+
+/**
+ * A segmentation rule expressed as in UAXes #14 and #29.
+ * 
+ * Rules are applied sequentially.
+ * Rules operate on a mutable remapped string (which the caller should initially
+ * set to the string
+ * to be segmented), and can resolve positions in the original string to either
+ * BREAK or NO_BREAK.
+ */
+public abstract class SegmentationRule {
+    enum Resolution {
+        BREAK,
+        NO_BREAK,
+    }
+
+    static class BreakContext {
+        BreakContext(int index) {
+            indexInRemapped = index;
+        }
+
+        Integer indexInRemapped;
+        SegmentationRule appliedRule = null;
+    };
+
+    SegmentationRule(String name) {
+        name_ = name;
+    }
+
+    // Returns "\\uhhhh" for a BMP code point and "\\uDhhh\\uDhhh" (UTF-16) for other code points.
+    private String javaUEscape(int codePoint) {
+        if (codePoint <= 0xFFFF) {
+            return "\\u" + Utility.hex(codePoint);
+        } else {
+            return "\\u" + Utility.hex(UTF16.getLeadSurrogate(codePoint)) + "\\u"
+            + Utility.hex(UTF16.getTrailSurrogate(codePoint));
+        }
+    }
+
+    protected String expandUnicodeSets(String regex) {
+        StringBuilder result = new StringBuilder();
+        int i = 0;
+        while (i < regex.length()) {
+            if (regex.charAt(i) == '[' || regex.charAt(i) == '\\') {
+                ParsePosition pp = new ParsePosition(i);
+                final UnicodeSet set = new UnicodeSet(regex, pp, null);
+                // Escape everything.  We could use _generatePattern, but then we would have to
+                // convert \U escapes to sequences of \‌u escapes, and to escape # ourselves.
+                result.append('[');
+                for (EntryRange range : set.ranges()) {
+                    result.append(javaUEscape(range.codepoint));
+                    if (range.codepointEnd != range.codepoint) {
+                        result.append('-');
+                        result.append(javaUEscape(range.codepointEnd));
+                    }
+                }
+                result.append(']');
+                i = pp.getIndex();
+            } else {
+                result.append(regex.charAt(i++));
+            }
+        }
+        return result.toString();
+    }
+
+    abstract void apply(StringBuilder remapped, BreakContext[] resolved);
+
+    abstract Resolution resolution();
+
+    String name() {
+        return name_;
+    }
+
+    private final String name_;
+}
diff --git a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line.txt b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line.txt
index 9f85b7917139..e2154abf6309 100644
--- a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line.txt
+++ b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line.txt
@@ -176,7 +176,7 @@ LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
 # Needs to apply before LB12, because the new monkeys are not greedy.
-LB20a.2:   GL (HY | HH) CM* AL;
+LB20a.2:   GL CM* (HY | HH) CM* AL;
 LB12:      GL CM* [^CM];
 
 LB12a:       [^SP BA HY] CM* GL;
diff --git a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_cj.txt b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_cj.txt
index 7aad76ecf107..bb0a6880ea29 100644
--- a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_cj.txt
+++ b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_cj.txt
@@ -180,7 +180,7 @@ LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
 # Needs to apply before LB12, because the new monkeys are not greedy.
-LB20a.2:   GL (HY | HH) CM* AL;
+LB20a.2:   GL CM* (HY | HH) CM* AL;
 LB12:      GL CM* [^CM];
 
 LB12a:       [^SP BA HY] CM* GL;
diff --git a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_loose.txt b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_loose.txt
index 72e7563c9274..f9152060bf2d 100644
--- a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_loose.txt
+++ b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_loose.txt
@@ -181,7 +181,7 @@ LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
 # Needs to apply before LB12, because the new monkeys are not greedy.
-LB20a.2:   GL (HY | HH) CM* AL;
+LB20a.2:   GL CM* (HY | HH) CM* AL;
 LB12:      GL CM* [^CM];
 
 LB12a:       [^SP BA HY] CM* GL;
diff --git a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_loose_cj.txt b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_loose_cj.txt
index 99d01874d1fb..b04236532bbd 100644
--- a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_loose_cj.txt
+++ b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_loose_cj.txt
@@ -200,7 +200,7 @@ LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
 # Needs to apply before LB12, because the new monkeys are not greedy.
-LB20a.2:   GL (HY | HH) CM* AL;
+LB20a.2:   GL CM* (HY | HH) CM* AL;
 LB12:      GL CM* [^CM];
 
 LB12a:       [^SP BA BAX HY] CM* GL;
diff --git a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_normal.txt b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_normal.txt
index 211298539797..c7c518d5b68b 100644
--- a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_normal.txt
+++ b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_normal.txt
@@ -182,7 +182,7 @@ LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
 # Needs to apply before LB12, because the new monkeys are not greedy.
-LB20a.2:   GL (HY | HH) CM* AL;
+LB20a.2:   GL CM* (HY | HH) CM* AL;
 LB12:      GL CM* [^CM];
 
 LB12a:       [^SP BA HY] CM* GL;
diff --git a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_normal_cj.txt b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_normal_cj.txt
index 2061f9170848..cfa9c7968e1b 100644
--- a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_normal_cj.txt
+++ b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/break_rules/line_normal_cj.txt
@@ -186,7 +186,7 @@ LB11.2:      SP WJ;
 LB11.3:      WJ CM* [^CM];
 
 # Needs to apply before LB12, because the new monkeys are not greedy.
-LB20a.2:   GL (HY | HH) CM* AL;
+LB20a.2:   GL CM* (HY | HH) CM* AL;
 LB12:      GL CM* [^CM];
 
 LB12a:       [^SP BA HY] CM* GL;
diff --git a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/rbbitst.txt b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/rbbitst.txt
index 1c7fe9975699..781ce068be7b 100644
--- a/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/rbbitst.txt
+++ b/icu4j/main/core/src/test/resources/com/ibm/icu/dev/test/rbbi/rbbitst.txt
@@ -2214,3 +2214,7 @@ Bangkok)•</data>
 <data>•« Complex »« chaining » •</data>
 <data>•« .618 »•</data>  # Interaction with the ICU tailoring to break before such numbers.
 
+# A hyphen following non-breaking space that carries an intervening combining
+# mark is treated as word-initial; by LB20a it has no break opportunity after
+# it.  A bug in ICU 76 incorrectly handled that case (ICU-22986).
+<data>• ̄-k•</data>
\ No newline at end of file