Skip to content

Commit

Permalink
crossversion: collect fewer artifacts on failure
Browse files Browse the repository at this point in the history
Adjust the metamorphic test to collect fewer artifacts on failure. Previously,
we collected the entire root directory containing all in-progress intermediary
runs. We only need the one that failed or the two that diverged.

This hopes to address out-of-memory errors in CI when uploading artifacts.

Informs cockroachdb#4202.
  • Loading branch information
jbowens committed Jan 23, 2025
1 parent 2fa3b96 commit 407c284
Showing 1 changed file with 88 additions and 38 deletions.
126 changes: 88 additions & 38 deletions internal/metamorphic/crossversion/crossversion_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@ import (
"os"
"os/exec"
"path/filepath"
"slices"
"strconv"
"strings"
"sync"
Expand Down Expand Up @@ -130,18 +131,6 @@ type pebbleVersion struct {
TestBinaryPath string
}

type initialState struct {
desc string
path string
}

func (s initialState) String() string {
if s.desc == "" {
return "<empty>"
}
return s.desc
}

func runCrossVersion(
ctx context.Context,
t *testing.T,
Expand Down Expand Up @@ -179,22 +168,22 @@ func runCrossVersion(
initialStates := []initialState{{}}
for i := range versions {
t.Logf("Running tests with version %s with %d initial state(s).", versions[i].SHA, len(initialStates))
histories, nextInitialStates, err := runVersion(ctx, t, &fatalOnce, rootDir, versions[i], versionSeeds[i], initialStates)
if err != nil {
return err
}
subrunResults := runVersion(ctx, t, &fatalOnce, rootDir, versions[i], versionSeeds[i], initialStates)

// All the initial states described the same state and all of this
// version's metamorphic runs used the same seed, so all of the
// resulting histories should be identical.
histories := subrunResults.historyPaths()
if h, diff := metamorphic.CompareHistories(t, histories); h > 0 {
fatalf(t, &fatalOnce, rootDir, "Metamorphic test divergence between %q and %q:\nDiff:\n%s",
nextInitialStates[0].desc, nextInitialStates[h].desc, diff)
fatalf(t, &fatalOnce, []string{subrunResults[0].runDir, subrunResults[h].runDir},
"Metamorphic test divergence between %q and %q:\nDiff:\n%s",
subrunResults[0].initialState.desc, subrunResults[h].initialState.desc, diff)
}

// Prune the set of initial states we collected for this version, using
// the deterministic randomness of prng to pick which states we keep.
if len(nextInitialStates) > factor {
nextInitialStates := subrunResults.initialStates()
if len(subrunResults) > factor {
prng.Shuffle(len(nextInitialStates), func(i, j int) {
nextInitialStates[i], nextInitialStates[j] = nextInitialStates[j], nextInitialStates[i]
})
Expand All @@ -209,6 +198,56 @@ func runCrossVersion(
return nil
}

// subrunResult describes the output of a single run of the metamorphic test
// against a particular version.
type subrunResult struct {
// runDir is the path to the directory containing the output of this
// TestMeta run.
runDir string
// subrunDir is the path to the individual execution with a particular
// OPTIONS file. It's the most granular level of running the metamorphic
// test, producing a history file but nothing to compare it against.
subrunDir string
// historyPath is the path to the history file recording the output of this
// run. All the histories produced by a single call to runVersion should be
// semantically equivalent.
historyPath string
// initialState describes the location of the database on disk after the run
// completed. It can be used to seed a new run of the metamorphic test in a
// later Pebble version.
initialState initialState
}

type initialState struct {
desc string
path string
}

func (s initialState) String() string {
if s.desc == "" {
return "<empty>"
}
return s.desc
}

type subrunResults []subrunResult

func (r subrunResults) historyPaths() []string {
paths := make([]string, len(r))
for i := range r {
paths[i] = r[i].historyPath
}
return paths
}

func (r subrunResults) initialStates() []initialState {
states := make([]initialState, len(r))
for i := range r {
states[i] = r[i].initialState
}
return states
}

func runVersion(
ctx context.Context,
t *testing.T,
Expand All @@ -217,7 +256,7 @@ func runVersion(
vers pebbleVersion,
seed uint64,
initialStates []initialState,
) (histories []string, nextInitialStates []initialState, err error) {
) (results subrunResults) {
// mu guards histories and nextInitialStates. The subtests may be run in
// parallel (via t.Parallel()).
var mu sync.Mutex
Expand Down Expand Up @@ -248,7 +287,8 @@ func runVersion(
t.Logf(" Running test with version %s with initial state %s.",
vers.SHA, s)
if err := r.run(ctx, out); err != nil {
fatalf(t, fatalOnce, rootDir, "Metamorphic test failed: %s\nOutput:%s\n", err, buf.String())
fatalf(t, fatalOnce, []string{r.dir},
"Metamorphic test failed: %s\nOutput:%s\n", err, buf.String())
}

// dir is a directory containing the ops file and subdirectories for
Expand All @@ -269,37 +309,47 @@ func runVersion(
mu.Lock()
defer mu.Unlock()
for _, subrunDir := range subrunDirs {
// Record the subrun as an initial state for the next version.
nextInitialStates = append(nextInitialStates, initialState{
path: filepath.Join(dir, subrunDir),
desc: fmt.Sprintf("sha=%s-seed=%d-opts=%s(%s)", vers.SHA, seed, subrunDir, s.String()),
results = append(results, subrunResult{
runDir: dir,
subrunDir: filepath.Join(dir, subrunDir),
historyPath: filepath.Join(dir, subrunDir, "history"),
initialState: initialState{
path: filepath.Join(dir, subrunDir),
desc: fmt.Sprintf("sha=%s-seed=%d-opts=%s(%s)", vers.SHA, seed, subrunDir, s.String()),
},
})
histories = append(histories, filepath.Join(dir, subrunDir, "history"))
}
})
}
})
return histories, nextInitialStates, err
return results
}

func fatalf(t testing.TB, fatalOnce *sync.Once, dir string, msg string, args ...interface{}) {
func fatalf(t testing.TB, fatalOnce *sync.Once, dirs []string, msg string, args ...interface{}) {
fatalOnce.Do(func() {
if artifactsDir == "" {
var err error
artifactsDir, err = os.Getwd()
require.NoError(t, err)
}
// When run with test parallelism, other subtests may still be running
// within subdirectories of `dir`. We copy instead of rename so that those
// substests don't also fail when we remove their files out from under them.
// Those additional failures would confuse the test output.
dst := filepath.Join(artifactsDir, filepath.Base(dir))
t.Logf("Copying test dir %q to %q.", dir, dst)
_, err := vfs.Clone(vfs.Default, vfs.Default, dir, dst, vfs.CloneTryLink)
if err != nil {
t.Error(err)
// De-duplicate the directories.
slices.Sort(dirs)
dirs = slices.Compact(dirs)

for _, dir := range dirs {
// When run with test parallelism, other subtests may still be
// running within subdirectories of `dir`. We copy instead of rename
// so that those substests don't also fail when we remove their
// files out from under them. Those additional failures would
// confuse the test output.
dst := filepath.Join(artifactsDir, filepath.Base(dir))
t.Logf("Copying test dir %q to %q.", dir, dst)
_, err := vfs.Clone(vfs.Default, vfs.Default, dir, dst, vfs.CloneTryLink)
if err != nil {
t.Error(err)
}
t.Fatalf(msg, args...)
}
t.Fatalf(msg, args...)
})
}

Expand Down

0 comments on commit 407c284

Please sign in to comment.