Skip to content

Commit

Permalink
slice: rework the format of Edit instructions
Browse files Browse the repository at this point in the history
Instead of making the caller keep track of index arithmetic inside the original
inputs, record the edits with slices into the inputs. This makes the edits a
bit larger, but does not allocate any additional storage as the slices share
with the inputs.
  • Loading branch information
creachadair committed Feb 19, 2024
1 parent 9296a6b commit e48281d
Show file tree
Hide file tree
Showing 3 changed files with 122 additions and 94 deletions.
75 changes: 42 additions & 33 deletions slice/edit.go
Original file line number Diff line number Diff line change
Expand Up @@ -54,34 +54,44 @@ const (
OpDrop EditOp = '-' // Drop items from lhs
OpEmit EditOp = '=' // Emit elements from lhs
OpCopy EditOp = '+' // Copy items from rhs
OpReplace EditOp = 'x' // Replace with items from rhs (== Drop+Copy)
OpReplace EditOp = '!' // Replace with items from rhs (== Drop+Copy)
)

// Edit is an edit operation transforming specified as part of a diff.
// Each edit refers to a specific span of one of the inputs.
type Edit struct {
type Edit[T comparable] struct {
Op EditOp // the diff operation to apply at the current offset

// N specifies the number of inputs affected by the operation.
N int
// X specifies the elements of lhs affected by the edit.
// For OpDrop and OpReplace it is the elements to be dropped.
// For OpEmit its the elements of to be emitted.
// For OpCopy it is empty.
X []T

// X specifies an additionl argument affected by the operation:
//
// For OpDrop and OpEmit, X is not used and will be 0.
// For OpCopy and OpReplace, X specifies a starting offset in rhs from which
// values are to be copied.
X int
// Y specifies the elements of rhs affected by the edit.
// For OpDrop and OpEmit it is empty.
// For OpCopy and OpReplace it is the elements to be copied.
Y []T
}

func (e Edit) String() string {
if e.Op == OpCopy || e.Op == OpReplace {
return fmt.Sprintf("%c%d:%d", e.Op, e.N, e.X)
func (e Edit[T]) String() string {
switch e.Op {
case OpCopy:
return fmt.Sprintf("%c%v", e.Op, e.Y)
case OpReplace:
x, y := fmt.Sprint(e.X), fmt.Sprint(e.Y)
return fmt.Sprintf("%c[%s:%s]", e.Op, x[1:len(x)-1], y[1:len(y)-1])
case OpDrop:
return fmt.Sprintf("%c%d", e.Op, len(e.X))
case OpEmit:
return fmt.Sprintf("%c%v", e.Op, e.X)
}
return fmt.Sprintf("%c%d", e.Op, e.N)
return fmt.Sprintf("!%c[INVALID]", e.Op)
}

// EditScript computes a minimal-length sequence of Edit operations that will
// transform lhs into rhs. The result is empty if lhs == rhs.
// transform lhs into rhs. The result is empty if lhs == rhs. The slices stored
// in returned edit operations share storage with the inputs lhs and rhs.
//
// This implementation takes O(mn) time and O(P·min(m, n)) space to compute a
// longest common subsequence, plus overhead of O(m+n) time and space to
Expand All @@ -104,7 +114,7 @@ func (e Edit) String() string {
//
// After all edits are processed, output any remaining elements of lhs. This
// completes the processing of the script.
func EditScript[T comparable, Slice ~[]T](lhs, rhs Slice) []Edit {
func EditScript[T comparable, Slice ~[]T](lhs, rhs Slice) []Edit[T] {
lcs := LCS(lhs, rhs)

// To construct the edit sequence, i scans forward through lcs.
Expand All @@ -120,7 +130,7 @@ func EditScript[T comparable, Slice ~[]T](lhs, rhs Slice) []Edit {
// inserted ones. We represent this case explicitly with a replace edit.
lpos, rpos, i := 0, 0, 0

var out []Edit
var out []Edit[T]
for i < len(lcs) {
// Count the numbers of elements of lhs and rhs prior to the first match.
lend := lpos
Expand All @@ -132,19 +142,18 @@ func EditScript[T comparable, Slice ~[]T](lhs, rhs Slice) []Edit {
rend++
}

// If we have equal numbers of discards and insertions, combine them into
// a single replace instruction. If they're not equal, there is no point
// in this substitution, since it doesn't shorten the edit sequence.
// If we have at least as many insertions as discards, combine them into
// a single replace instruction.
if n := lend - lpos; n > 0 && n <= rend-rpos {
out = append(out, Edit{Op: OpReplace, N: n, X: rpos})
out = append(out, Edit[T]{Op: OpReplace, X: lhs[lpos:lend], Y: rhs[rpos : rpos+n]})
rpos += n
} else if lend > lpos {
// Record drops (there may be none).
out = append(out, Edit{Op: OpDrop, N: lend - lpos})
out = append(out, Edit[T]{Op: OpDrop, X: lhs[lpos:lend]})
}
// Record copies (there may be none).
if rend > rpos {
out = append(out, Edit{Op: OpCopy, N: rend - rpos, X: rpos})
out = append(out, Edit[T]{Op: OpCopy, Y: rhs[rpos:rend]})
}

lpos, rpos = lend, rend
Expand All @@ -155,28 +164,28 @@ func EditScript[T comparable, Slice ~[]T](lhs, rhs Slice) []Edit {
for i+m < len(lcs) && lhs[lpos+m] == rhs[rpos+m] {
m++
}
out = append(out, Edit[T]{Op: OpEmit, X: lhs[lpos : lpos+m]})
i += m
lpos += m
rpos += m
out = append(out, Edit{Op: OpEmit, N: m})
}

if n := len(lhs) - lpos; n > 0 && n <= len(rhs)-rpos {
out = append(out, Edit{Op: OpReplace, N: n, X: rpos})
out = append(out, Edit[T]{Op: OpReplace, X: lhs[lpos:], Y: rhs[rpos : rpos+n]})
rpos += n
} else if n := len(lhs) - lpos; n > 0 {
} else if n > 0 {
// Drop any leftover elements of lhs.
out = append(out, Edit{Op: OpDrop, N: n})
out = append(out, Edit[T]{Op: OpDrop, X: lhs[lpos:]})
}
// Copy any leftover elements of rhs.
if n := len(rhs) - rpos; n > 0 {
out = append(out, Edit{Op: OpCopy, N: n, X: rpos})
if len(rhs)-rpos > 0 {
out = append(out, Edit[T]{Op: OpCopy, Y: rhs[rpos:]})
}

// As a special case, drop a trailing emit so that the edit for completely
// equal sequences can be empty.
if n := len(out); n > 0 && out[n-1].Op == OpEmit {
return out[:n-1]
// As a special case, if the whole edit is a single emit, drop it so that
// equal elements have an empty script.
if len(out) == 1 && out[0].Op == OpEmit {
return nil
}
return out
}
125 changes: 74 additions & 51 deletions slice/edit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ package slice_test

import (
"math/rand"
"regexp"
"slices"
"strconv"
"strings"
"testing"

Expand Down Expand Up @@ -99,98 +99,121 @@ func TestLCSRandom(t *testing.T) {
func TestEditScript(t *testing.T) {
tests := []struct {
a, b string
want []slice.Edit
want []slice.Edit[string]
}{
{"", "", nil},

{"a", "", pedit(t, "-1")},
{"", "b", pedit(t, "+1:0")},
{"a", "", pedit(t, "-[a]")},
{"", "b", pedit(t, "+[b]")},

{"a b c", "", pedit(t, "-3")},
{"", "d e f", pedit(t, "+3:0")},
{"a b c", "", pedit(t, "-[a b c]")},
{"", "d e f", pedit(t, "+[d e f]")},

{"a", "a b c", pedit(t, "=1 +2:1")},
{"b", "a b c", pedit(t, "+1:0 =1 +1:2")},
{"c", "a b c", pedit(t, "+2:0")},
{"d", "a b c", pedit(t, "x1:0 +2:1")},
{"a", "a b c", pedit(t, "=[a] +[b c]")},
{"b", "a b c", pedit(t, "+[a] =[b] +[c]")},
{"c", "a b c", pedit(t, "+[a b] =[c]")},
{"d", "a b c", pedit(t, "![d:a] +[b c]")},

{"c d", "a b c d", pedit(t, "+2:0")},
{"a b c", "a b c", pedit(t, "")},
{"a b c", "a x c", pedit(t, "=1 x1:1")},
{"a b c", "a b", pedit(t, "=2 -1")},
{"b c", "a b c", pedit(t, "+1:0")},
{"a b c d e", "e b c d a", pedit(t, "x1:0 =3 x1:4")},
{"1 2 3 4", "4 3 2 1", pedit(t, "+3:0 =1 -3")},
{"c d", "a b c d", pedit(t, "+[a b] =[c d]")},
{"a b c", "a b c", nil},
{"a b c", "a x c", pedit(t, "=[a] ![b:x] =[c]")},
{"a b c", "a b", pedit(t, "=[a b] -[c]")},
{"b c", "a b c", pedit(t, "+[a] =[b c]")},
{"a b c d e", "e b c d a", pedit(t, "![a:e] =[b c d] ![e:a]")},
{"1 2 3 4", "4 3 2 1", pedit(t, "+[4 3 2] =[1] -[2 3 4]")},

{"a x b x c", "1 x b x 2", pedit(t, "x1:0 =3 x1:4")},
{"fly you fools", "to fly you must not be fools", pedit(t, "+1:0 =2 +3:3")},
{"a x b x c", "1 x b x 2", pedit(t, "![a:1] =[x b x] ![c:2]")},
{"fly you fools", "to fly you must not be fools",
pedit(t, "+[to] =[fly you] +[must not be] =[fools]")},
{"have the best time it is possible to have under the circumstances",
"I hope you have the time of your life in the forest",
pedit(t, "+3:0 =2 -1 =1 -6 +4:6 =1 x1:11")},
pedit(t, "+[I hope you] =[have the] -[best] =[time] -[it is possible to have under] "+
"+[of your life in] =[the] ![circumstances:forest]")},
}
for _, tc := range tests {
as, bs := strings.Fields(tc.a), strings.Fields(tc.b)
got := slice.EditScript(as, bs)
if !slices.Equal(got, tc.want) {
if !equalEdits(got, tc.want) {
t.Errorf("EditScript(%q, %q):\ngot: %v\nwant: %v", tc.a, tc.b, got, tc.want)
}
checkApply(t, as, bs, got)
}
}

func equalEdits[T comparable](a, b []slice.Edit[T]) bool {
if len(a) != len(b) {
return false
}
for i := 0; i < len(a); i++ {
if a[i].Op != b[i].Op ||
!slices.Equal(a[i].X, b[i].X) ||
!slices.Equal(a[i].Y, b[i].Y) {
return false
}
}
return true
}

// checkApply verifies that applying the specified edit script to lhs produces rhs.
func checkApply[T comparable, Slice ~[]T](t *testing.T, lhs, rhs Slice, edit []slice.Edit) {
func checkApply[T comparable, Slice ~[]T](t *testing.T, lhs, rhs Slice, edit []slice.Edit[T]) {
t.Helper()

var out Slice
i := 0
for _, e := range edit {
switch e.Op {
case slice.OpDrop:
i += e.N
case slice.OpCopy:
out = append(out, rhs[e.X:e.X+e.N]...)
// nothing to do
case slice.OpCopy, slice.OpReplace:
out = append(out, e.Y...)
case slice.OpEmit:
out = append(out, lhs[i:i+e.N]...)
i += e.N
case slice.OpReplace:
out = append(out, rhs[e.X:e.X+e.N]...)
i += e.N
out = append(out, e.X...)
default:
t.Fatalf("Unexpected edit operation: %v", e)
}
}
out = append(out, lhs[i:]...)
if len(edit) == 0 {
out = rhs
}
if !slices.Equal(out, rhs) {
t.Errorf("Apply %v:\ngot: %v\nwant: %v", edit, out, rhs)
} else {
t.Logf("Apply L %v E %v OK: %v", lhs, edit, out)
}
}

var argsRE = regexp.MustCompile(`([-+=!])\[([^\]]*)\](?:\s|$)`)

// pedit parses a string of space-separated edit strings matching the string
// format rendered by the String method of a slice.Edit.
func pedit(t *testing.T, ss string) (out []slice.Edit) {
func pedit(t *testing.T, ss string) (out []slice.Edit[string]) {
t.Helper()
for _, s := range strings.Fields(ss) {
var next slice.Edit
switch s[0] {
case '-', '=', '+', 'x':
next.Op = slice.EditOp(s[0])
default:
t.Fatalf("Invalid edit op: %c", s[0])
}
var err error
fst, snd, ok := strings.Cut(s[1:], ":")
next.N, err = strconv.Atoi(fst)
if err != nil {
t.Fatalf("Invalid N: %v", err)
}
if ok {
next.X, err = strconv.Atoi(snd)
if err != nil {
t.Fatalf("Invalid X: %v", err)
ms := argsRE.FindAllStringSubmatch(ss, -1)
if ms == nil {
t.Fatalf("Invalid argument %q", ss)
}
for _, m := range ms {
fs := strings.Fields(m[2])
var next slice.Edit[string]
switch m[1] {
case "+":
next.Op = slice.OpCopy
next.Y = fs
case "-":
next.Op = slice.OpDrop
next.X = fs
case "=":
next.Op = slice.OpEmit
next.X = fs
case "!":
next.Op = slice.OpReplace
pre, post, ok := strings.Cut(m[2], ":")
if !ok {
t.Fatalf("Missing separator in argument %q", m[2])
}
next.X = strings.Fields(pre)
next.Y = strings.Fields(post)
default:
t.Fatalf("Invalid edit op %q", m[1])
}
out = append(out, next)
}
Expand Down
16 changes: 6 additions & 10 deletions slice/example_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,24 +110,20 @@ func ExampleEditScript() {
lhs := strings.Fields("a stitch in time saves nine")
rhs := strings.Fields("we live in a time of nine lives")

i := 0
fmt.Println("start", lhs)
var out []string
for _, e := range slice.EditScript(lhs, rhs) {
switch e.Op {
case slice.OpDrop:
i += e.N
case slice.OpEmit:
fmt.Println("emit", lhs[i:i+e.N])
out = append(out, lhs[i:i+e.N]...)
i += e.N
fmt.Println("emit", e.X)
out = append(out, e.X...)
case slice.OpCopy:
fmt.Println("copy", rhs[e.X:e.X+e.N])
out = append(out, rhs[e.X:e.X+e.N]...)
fmt.Println("copy", e.Y)
out = append(out, e.Y...)
case slice.OpReplace:
fmt.Println("replace", lhs[i:i+e.N], "with", rhs[e.X:e.X+e.N])
out = append(out, rhs[e.X:e.X+e.N]...)
i += e.N
fmt.Println("replace", e.X, "with", e.Y)
out = append(out, e.Y...)
default:
panic("invalid")
}
Expand Down

0 comments on commit e48281d

Please sign in to comment.