From e48281de9b0c2140a854d006c6aa1b8539da3bd1 Mon Sep 17 00:00:00 2001 From: "M. J. Fromberger" Date: Sun, 18 Feb 2024 17:17:08 -0800 Subject: [PATCH] slice: rework the format of Edit instructions Instead of making the caller keep track of index arithmetic inside the original inputs, record the edits with slices into the inputs. This makes the edits a bit larger, but does not allocate any additional storage as the slices share with the inputs. --- slice/edit.go | 75 ++++++++++++++----------- slice/edit_test.go | 125 +++++++++++++++++++++++++----------------- slice/example_test.go | 16 ++---- 3 files changed, 122 insertions(+), 94 deletions(-) diff --git a/slice/edit.go b/slice/edit.go index 172fd1c..aba7fb2 100644 --- a/slice/edit.go +++ b/slice/edit.go @@ -54,34 +54,44 @@ const ( OpDrop EditOp = '-' // Drop items from lhs OpEmit EditOp = '=' // Emit elements from lhs OpCopy EditOp = '+' // Copy items from rhs - OpReplace EditOp = 'x' // Replace with items from rhs (== Drop+Copy) + OpReplace EditOp = '!' // Replace with items from rhs (== Drop+Copy) ) // Edit is an edit operation transforming specified as part of a diff. // Each edit refers to a specific span of one of the inputs. -type Edit struct { +type Edit[T comparable] struct { Op EditOp // the diff operation to apply at the current offset - // N specifies the number of inputs affected by the operation. - N int + // X specifies the elements of lhs affected by the edit. + // For OpDrop and OpReplace it is the elements to be dropped. + // For OpEmit its the elements of to be emitted. + // For OpCopy it is empty. + X []T - // X specifies an additionl argument affected by the operation: - // - // For OpDrop and OpEmit, X is not used and will be 0. - // For OpCopy and OpReplace, X specifies a starting offset in rhs from which - // values are to be copied. - X int + // Y specifies the elements of rhs affected by the edit. + // For OpDrop and OpEmit it is empty. + // For OpCopy and OpReplace it is the elements to be copied. + Y []T } -func (e Edit) String() string { - if e.Op == OpCopy || e.Op == OpReplace { - return fmt.Sprintf("%c%d:%d", e.Op, e.N, e.X) +func (e Edit[T]) String() string { + switch e.Op { + case OpCopy: + return fmt.Sprintf("%c%v", e.Op, e.Y) + case OpReplace: + x, y := fmt.Sprint(e.X), fmt.Sprint(e.Y) + return fmt.Sprintf("%c[%s:%s]", e.Op, x[1:len(x)-1], y[1:len(y)-1]) + case OpDrop: + return fmt.Sprintf("%c%d", e.Op, len(e.X)) + case OpEmit: + return fmt.Sprintf("%c%v", e.Op, e.X) } - return fmt.Sprintf("%c%d", e.Op, e.N) + return fmt.Sprintf("!%c[INVALID]", e.Op) } // EditScript computes a minimal-length sequence of Edit operations that will -// transform lhs into rhs. The result is empty if lhs == rhs. +// transform lhs into rhs. The result is empty if lhs == rhs. The slices stored +// in returned edit operations share storage with the inputs lhs and rhs. // // This implementation takes O(mn) time and O(PĀ·min(m, n)) space to compute a // longest common subsequence, plus overhead of O(m+n) time and space to @@ -104,7 +114,7 @@ func (e Edit) String() string { // // After all edits are processed, output any remaining elements of lhs. This // completes the processing of the script. -func EditScript[T comparable, Slice ~[]T](lhs, rhs Slice) []Edit { +func EditScript[T comparable, Slice ~[]T](lhs, rhs Slice) []Edit[T] { lcs := LCS(lhs, rhs) // To construct the edit sequence, i scans forward through lcs. @@ -120,7 +130,7 @@ func EditScript[T comparable, Slice ~[]T](lhs, rhs Slice) []Edit { // inserted ones. We represent this case explicitly with a replace edit. lpos, rpos, i := 0, 0, 0 - var out []Edit + var out []Edit[T] for i < len(lcs) { // Count the numbers of elements of lhs and rhs prior to the first match. lend := lpos @@ -132,19 +142,18 @@ func EditScript[T comparable, Slice ~[]T](lhs, rhs Slice) []Edit { rend++ } - // If we have equal numbers of discards and insertions, combine them into - // a single replace instruction. If they're not equal, there is no point - // in this substitution, since it doesn't shorten the edit sequence. + // If we have at least as many insertions as discards, combine them into + // a single replace instruction. if n := lend - lpos; n > 0 && n <= rend-rpos { - out = append(out, Edit{Op: OpReplace, N: n, X: rpos}) + out = append(out, Edit[T]{Op: OpReplace, X: lhs[lpos:lend], Y: rhs[rpos : rpos+n]}) rpos += n } else if lend > lpos { // Record drops (there may be none). - out = append(out, Edit{Op: OpDrop, N: lend - lpos}) + out = append(out, Edit[T]{Op: OpDrop, X: lhs[lpos:lend]}) } // Record copies (there may be none). if rend > rpos { - out = append(out, Edit{Op: OpCopy, N: rend - rpos, X: rpos}) + out = append(out, Edit[T]{Op: OpCopy, Y: rhs[rpos:rend]}) } lpos, rpos = lend, rend @@ -155,28 +164,28 @@ func EditScript[T comparable, Slice ~[]T](lhs, rhs Slice) []Edit { for i+m < len(lcs) && lhs[lpos+m] == rhs[rpos+m] { m++ } + out = append(out, Edit[T]{Op: OpEmit, X: lhs[lpos : lpos+m]}) i += m lpos += m rpos += m - out = append(out, Edit{Op: OpEmit, N: m}) } if n := len(lhs) - lpos; n > 0 && n <= len(rhs)-rpos { - out = append(out, Edit{Op: OpReplace, N: n, X: rpos}) + out = append(out, Edit[T]{Op: OpReplace, X: lhs[lpos:], Y: rhs[rpos : rpos+n]}) rpos += n - } else if n := len(lhs) - lpos; n > 0 { + } else if n > 0 { // Drop any leftover elements of lhs. - out = append(out, Edit{Op: OpDrop, N: n}) + out = append(out, Edit[T]{Op: OpDrop, X: lhs[lpos:]}) } // Copy any leftover elements of rhs. - if n := len(rhs) - rpos; n > 0 { - out = append(out, Edit{Op: OpCopy, N: n, X: rpos}) + if len(rhs)-rpos > 0 { + out = append(out, Edit[T]{Op: OpCopy, Y: rhs[rpos:]}) } - // As a special case, drop a trailing emit so that the edit for completely - // equal sequences can be empty. - if n := len(out); n > 0 && out[n-1].Op == OpEmit { - return out[:n-1] + // As a special case, if the whole edit is a single emit, drop it so that + // equal elements have an empty script. + if len(out) == 1 && out[0].Op == OpEmit { + return nil } return out } diff --git a/slice/edit_test.go b/slice/edit_test.go index b7f2e6c..52d566b 100644 --- a/slice/edit_test.go +++ b/slice/edit_test.go @@ -2,8 +2,8 @@ package slice_test import ( "math/rand" + "regexp" "slices" - "strconv" "strings" "testing" @@ -99,68 +99,81 @@ func TestLCSRandom(t *testing.T) { func TestEditScript(t *testing.T) { tests := []struct { a, b string - want []slice.Edit + want []slice.Edit[string] }{ {"", "", nil}, - {"a", "", pedit(t, "-1")}, - {"", "b", pedit(t, "+1:0")}, + {"a", "", pedit(t, "-[a]")}, + {"", "b", pedit(t, "+[b]")}, - {"a b c", "", pedit(t, "-3")}, - {"", "d e f", pedit(t, "+3:0")}, + {"a b c", "", pedit(t, "-[a b c]")}, + {"", "d e f", pedit(t, "+[d e f]")}, - {"a", "a b c", pedit(t, "=1 +2:1")}, - {"b", "a b c", pedit(t, "+1:0 =1 +1:2")}, - {"c", "a b c", pedit(t, "+2:0")}, - {"d", "a b c", pedit(t, "x1:0 +2:1")}, + {"a", "a b c", pedit(t, "=[a] +[b c]")}, + {"b", "a b c", pedit(t, "+[a] =[b] +[c]")}, + {"c", "a b c", pedit(t, "+[a b] =[c]")}, + {"d", "a b c", pedit(t, "![d:a] +[b c]")}, - {"c d", "a b c d", pedit(t, "+2:0")}, - {"a b c", "a b c", pedit(t, "")}, - {"a b c", "a x c", pedit(t, "=1 x1:1")}, - {"a b c", "a b", pedit(t, "=2 -1")}, - {"b c", "a b c", pedit(t, "+1:0")}, - {"a b c d e", "e b c d a", pedit(t, "x1:0 =3 x1:4")}, - {"1 2 3 4", "4 3 2 1", pedit(t, "+3:0 =1 -3")}, + {"c d", "a b c d", pedit(t, "+[a b] =[c d]")}, + {"a b c", "a b c", nil}, + {"a b c", "a x c", pedit(t, "=[a] ![b:x] =[c]")}, + {"a b c", "a b", pedit(t, "=[a b] -[c]")}, + {"b c", "a b c", pedit(t, "+[a] =[b c]")}, + {"a b c d e", "e b c d a", pedit(t, "![a:e] =[b c d] ![e:a]")}, + {"1 2 3 4", "4 3 2 1", pedit(t, "+[4 3 2] =[1] -[2 3 4]")}, - {"a x b x c", "1 x b x 2", pedit(t, "x1:0 =3 x1:4")}, - {"fly you fools", "to fly you must not be fools", pedit(t, "+1:0 =2 +3:3")}, + {"a x b x c", "1 x b x 2", pedit(t, "![a:1] =[x b x] ![c:2]")}, + {"fly you fools", "to fly you must not be fools", + pedit(t, "+[to] =[fly you] +[must not be] =[fools]")}, {"have the best time it is possible to have under the circumstances", "I hope you have the time of your life in the forest", - pedit(t, "+3:0 =2 -1 =1 -6 +4:6 =1 x1:11")}, + pedit(t, "+[I hope you] =[have the] -[best] =[time] -[it is possible to have under] "+ + "+[of your life in] =[the] ![circumstances:forest]")}, } for _, tc := range tests { as, bs := strings.Fields(tc.a), strings.Fields(tc.b) got := slice.EditScript(as, bs) - if !slices.Equal(got, tc.want) { + if !equalEdits(got, tc.want) { t.Errorf("EditScript(%q, %q):\ngot: %v\nwant: %v", tc.a, tc.b, got, tc.want) } checkApply(t, as, bs, got) } } +func equalEdits[T comparable](a, b []slice.Edit[T]) bool { + if len(a) != len(b) { + return false + } + for i := 0; i < len(a); i++ { + if a[i].Op != b[i].Op || + !slices.Equal(a[i].X, b[i].X) || + !slices.Equal(a[i].Y, b[i].Y) { + return false + } + } + return true +} + // checkApply verifies that applying the specified edit script to lhs produces rhs. -func checkApply[T comparable, Slice ~[]T](t *testing.T, lhs, rhs Slice, edit []slice.Edit) { +func checkApply[T comparable, Slice ~[]T](t *testing.T, lhs, rhs Slice, edit []slice.Edit[T]) { t.Helper() var out Slice - i := 0 for _, e := range edit { switch e.Op { case slice.OpDrop: - i += e.N - case slice.OpCopy: - out = append(out, rhs[e.X:e.X+e.N]...) + // nothing to do + case slice.OpCopy, slice.OpReplace: + out = append(out, e.Y...) case slice.OpEmit: - out = append(out, lhs[i:i+e.N]...) - i += e.N - case slice.OpReplace: - out = append(out, rhs[e.X:e.X+e.N]...) - i += e.N + out = append(out, e.X...) default: t.Fatalf("Unexpected edit operation: %v", e) } } - out = append(out, lhs[i:]...) + if len(edit) == 0 { + out = rhs + } if !slices.Equal(out, rhs) { t.Errorf("Apply %v:\ngot: %v\nwant: %v", edit, out, rhs) } else { @@ -168,29 +181,39 @@ func checkApply[T comparable, Slice ~[]T](t *testing.T, lhs, rhs Slice, edit []s } } +var argsRE = regexp.MustCompile(`([-+=!])\[([^\]]*)\](?:\s|$)`) + // pedit parses a string of space-separated edit strings matching the string // format rendered by the String method of a slice.Edit. -func pedit(t *testing.T, ss string) (out []slice.Edit) { +func pedit(t *testing.T, ss string) (out []slice.Edit[string]) { t.Helper() - for _, s := range strings.Fields(ss) { - var next slice.Edit - switch s[0] { - case '-', '=', '+', 'x': - next.Op = slice.EditOp(s[0]) - default: - t.Fatalf("Invalid edit op: %c", s[0]) - } - var err error - fst, snd, ok := strings.Cut(s[1:], ":") - next.N, err = strconv.Atoi(fst) - if err != nil { - t.Fatalf("Invalid N: %v", err) - } - if ok { - next.X, err = strconv.Atoi(snd) - if err != nil { - t.Fatalf("Invalid X: %v", err) + ms := argsRE.FindAllStringSubmatch(ss, -1) + if ms == nil { + t.Fatalf("Invalid argument %q", ss) + } + for _, m := range ms { + fs := strings.Fields(m[2]) + var next slice.Edit[string] + switch m[1] { + case "+": + next.Op = slice.OpCopy + next.Y = fs + case "-": + next.Op = slice.OpDrop + next.X = fs + case "=": + next.Op = slice.OpEmit + next.X = fs + case "!": + next.Op = slice.OpReplace + pre, post, ok := strings.Cut(m[2], ":") + if !ok { + t.Fatalf("Missing separator in argument %q", m[2]) } + next.X = strings.Fields(pre) + next.Y = strings.Fields(post) + default: + t.Fatalf("Invalid edit op %q", m[1]) } out = append(out, next) } diff --git a/slice/example_test.go b/slice/example_test.go index a0c086e..351678d 100644 --- a/slice/example_test.go +++ b/slice/example_test.go @@ -110,24 +110,20 @@ func ExampleEditScript() { lhs := strings.Fields("a stitch in time saves nine") rhs := strings.Fields("we live in a time of nine lives") - i := 0 fmt.Println("start", lhs) var out []string for _, e := range slice.EditScript(lhs, rhs) { switch e.Op { case slice.OpDrop: - i += e.N case slice.OpEmit: - fmt.Println("emit", lhs[i:i+e.N]) - out = append(out, lhs[i:i+e.N]...) - i += e.N + fmt.Println("emit", e.X) + out = append(out, e.X...) case slice.OpCopy: - fmt.Println("copy", rhs[e.X:e.X+e.N]) - out = append(out, rhs[e.X:e.X+e.N]...) + fmt.Println("copy", e.Y) + out = append(out, e.Y...) case slice.OpReplace: - fmt.Println("replace", lhs[i:i+e.N], "with", rhs[e.X:e.X+e.N]) - out = append(out, rhs[e.X:e.X+e.N]...) - i += e.N + fmt.Println("replace", e.X, "with", e.Y) + out = append(out, e.Y...) default: panic("invalid") }