From 0a8b9194069d896aef129ee401e8a5ce431c6517 Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Thu, 31 Oct 2024 12:29:48 +0100 Subject: [PATCH 1/3] hydra-eval-jobs: allow globs with `fnmatch(3)` for constituents Using named constituents, i.e. contituents = [ "nixos.channel" "nixos.tests.foo" ] is way faster than using derivations since the process evaluating the aggregate doesn't need to evaluate the constituents again, only the aggregate job. In the Flying Circus platform, we had code before that added most of the other jobs as constituents to the aggregate[1]. When we decided to switch to named constituents, there was a question on what to do about this: recursively evaluating attribute names may be costly since it's not clear at which level to stop and if you recurse one level too deep to check if you can stop, you may have a lot of evaluation work again. The other idea was to just use globs, i.e. constituents = [ "nixos.*" ] to select all jobs starting with `nixos.`. The behavior is basically if a constituent name X doesn't exist in the jobset, try to match it against all job names. Since this is costly, derivations must opt in explicitly with `_hydraGlobConstituents = true;`. This uses libc's `fnmatch(3)` internally, so all semantics from it apply here. We learned that the code in here already has issues with transitivity, i.e. if you have the jobs A, B, C where B & C are aggregates with { name = "B"; constituents = [ "A" ]; } and { name = "C"; constituents = [ "B" ]; } you may end up with C only depending on the B without the derivation rewrites (i.e. with `B` only having strings instead of derivations as constituents). With globs it may be way easier to run into this, so one of the next patches will fix this. [1] https://github.com/flyingcircusio/fc-nixos/blob/b7475201f067404d972ee640ffb3fe50a1989862/release/default.nix#L311 --- src/hydra-eval-jobs/hydra-eval-jobs.cc | 78 ++++++++++++++------ t/evaluator/evaluate-constituents-globbing.t | 58 +++++++++++++++ t/jobs/constituents-glob.nix | 41 ++++++++++ 3 files changed, 155 insertions(+), 22 deletions(-) create mode 100644 t/evaluator/evaluate-constituents-globbing.t create mode 100644 t/jobs/constituents-glob.nix diff --git a/src/hydra-eval-jobs/hydra-eval-jobs.cc b/src/hydra-eval-jobs/hydra-eval-jobs.cc index b83cae916..142f320c5 100644 --- a/src/hydra-eval-jobs/hydra-eval-jobs.cc +++ b/src/hydra-eval-jobs/hydra-eval-jobs.cc @@ -27,6 +27,8 @@ #include #include +#include + #include void check_pid_status_nonblocking(pid_t check_pid) @@ -234,6 +236,10 @@ static void worker( if (v->type() == nString) job["namedConstituents"].push_back(v->string_view()); } + + auto glob = v->attrs()->get(state.symbols.create("_hydraGlobConstituents")); + bool globConstituents = glob && state.forceBool(*glob->value, glob->pos, "while evaluating the `_hydraGlobConstituents` attribute"); + job["globConstituents"] = globConstituents; } /* Register the derivation as a GC root. !!! This @@ -497,46 +503,74 @@ int main(int argc, char * * argv) auto named = job.find("namedConstituents"); if (named == job.end()) continue; + bool globConstituents = job.value("globConstituents", false); + std::unordered_map brokenJobs; - auto getNonBrokenJobOrRecordError = [&brokenJobs, &jobName, &state]( - const std::string & childJobName) -> std::optional { - auto childJob = state->jobs.find(childJobName); - if (childJob == state->jobs.end()) { - printError("aggregate job '%s' references non-existent job '%s'", jobName, childJobName); - brokenJobs[childJobName] = "does not exist"; - return std::nullopt; - } - if (childJob->find("error") != childJob->end()) { - std::string error = (*childJob)["error"]; + auto isBroken = [&brokenJobs, &jobName]( + const std::string & childJobName, nlohmann::json & job) -> bool { + if (job.find("error") != job.end()) { + std::string error = job["error"]; printError("aggregate job '%s' references broken job '%s': %s", jobName, childJobName, error); brokenJobs[childJobName] = error; - return std::nullopt; + return true; + } else { + return false; } - return *childJob; + }; + auto getNonBrokenJobsOrRecordError = [&state, &isBroken, &jobName, &brokenJobs, &globConstituents]( + const std::string & childJobName) -> std::vector { + auto childJob = state->jobs.find(childJobName); + std::vector results; + if (childJob == state->jobs.end()) { + if (!globConstituents) { + printError("aggregate job '%s' references non-existent job '%s'", jobName, childJobName); + brokenJobs[childJobName] = "does not exist"; + } else { + for (auto job = state->jobs.begin(); job != state->jobs.end(); job++) { + auto jobName = job.key(); + if (fnmatch(childJobName.c_str(), jobName.c_str(), 0) == 0 + && !isBroken(jobName, *job) + ) { + results.push_back(*job); + } + } + if (results.empty()) { + warn("aggregate job '%s' references constituent glob pattern '%s' with no matches", jobName, childJobName); + brokenJobs[childJobName] = "constituent glob pattern had no matches"; + } + } + } else if (!isBroken(childJobName, *childJob)) { + results.push_back(*childJob); + } + return results; }; if (myArgs.dryRun) { for (std::string jobName2 : *named) { - auto job2 = getNonBrokenJobOrRecordError(jobName2); - if (!job2) { + auto foundJobs = getNonBrokenJobsOrRecordError(jobName2); + if (foundJobs.empty()) { continue; } - std::string drvPath2 = (*job2)["drvPath"]; - job["constituents"].push_back(drvPath2); + for (auto & childJob : foundJobs) { + std::string constituentDrvPath = childJob["drvPath"]; + job["constituents"].push_back(constituentDrvPath); + } } } else { auto drvPath = store->parseStorePath((std::string) job["drvPath"]); auto drv = store->readDerivation(drvPath); for (std::string jobName2 : *named) { - auto job2 = getNonBrokenJobOrRecordError(jobName2); - if (!job2) { + auto foundJobs = getNonBrokenJobsOrRecordError(jobName2); + if (foundJobs.empty()) { continue; } - auto drvPath2 = store->parseStorePath((std::string) (*job2)["drvPath"]); - auto drv2 = store->readDerivation(drvPath2); - job["constituents"].push_back(store->printStorePath(drvPath2)); - drv.inputDrvs.map[drvPath2].value = {drv2.outputs.begin()->first}; + for (auto & childJob : foundJobs) { + auto childDrvPath = store->parseStorePath((std::string) childJob["drvPath"]); + auto childDrv = store->readDerivation(childDrvPath); + job["constituents"].push_back(store->printStorePath(childDrvPath)); + drv.inputDrvs.map[childDrvPath].value = {childDrv.outputs.begin()->first}; + } } if (brokenJobs.empty()) { diff --git a/t/evaluator/evaluate-constituents-globbing.t b/t/evaluator/evaluate-constituents-globbing.t new file mode 100644 index 000000000..3792c0a21 --- /dev/null +++ b/t/evaluator/evaluate-constituents-globbing.t @@ -0,0 +1,58 @@ +use strict; +use warnings; +use Setup; +use Test2::V0; +use Hydra::Helper::Exec; +use Data::Dumper; + +my $ctx = test_context(); + +my $jobsetCtx = $ctx->makeJobset( + expression => 'constituents-glob.nix', +); +my $jobset = $jobsetCtx->{"jobset"}; + +my ($res, $stdout, $stderr) = captureStdoutStderr(60, + ("hydra-eval-jobset", $jobsetCtx->{"project"}->name, $jobset->name) +); + +subtest "non_match_aggregate failed" => sub { + ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); + like( + $stderr, + qr/warning: aggregate job 'non_match_aggregate' references constituent glob pattern 'tests\.\*' with no matches/, + "The stderr record includes a relevant error message" + ); + + $jobset->discard_changes; # refresh from DB + like( + $jobset->errormsg, + qr/tests\.\*: constituent glob pattern had no matches/, + "The jobset records a relevant error message" + ); +}; + +my $builds = {}; +for my $build ($jobset->builds) { + $builds->{$build->job} = $build; +} + +subtest "basic globbing works" => sub { + ok(defined $builds->{"ok_aggregate"}, "'ok_aggregate' is part of the jobset evaluation"); + my @constituents = $builds->{"ok_aggregate"}->constituents->all; + is(2, scalar @constituents, "'ok_aggregate' has two constituents"); + + my @sortedConstituentNames = sort (map { $_->nixname } @constituents); + + is($sortedConstituentNames[0], "empty-dir-A", "first constituent of 'ok_aggregate' is 'empty-dir-A'"); + is($sortedConstituentNames[1], "empty-dir-B", "second constituent of 'ok_aggregate' is 'empty-dir-B'"); +}; + +#subtest "transitivity is OK" => sub { + #ok(defined $builds->{"indirect_aggregate"}, "'indirect_aggregate' is part of the jobset evaluation"); + #my @constituents = $builds->{"indirect_aggregate"}->constituents->all; + #is(1, scalar @constituents, "'indirect_aggregate' has one constituent"); + #is($constituents[0]->nixname, "direct_aggregate", "'indirect_aggregate' has 'direct_aggregate' as single constituent"); +#}; + +done_testing; diff --git a/t/jobs/constituents-glob.nix b/t/jobs/constituents-glob.nix new file mode 100644 index 000000000..27cabd668 --- /dev/null +++ b/t/jobs/constituents-glob.nix @@ -0,0 +1,41 @@ +with import ./config.nix; +{ + packages.constituentA = mkDerivation { + name = "empty-dir-A"; + builder = ./empty-dir-builder.sh; + }; + + packages.constituentB = mkDerivation { + name = "empty-dir-B"; + builder = ./empty-dir-builder.sh; + }; + + ok_aggregate = mkDerivation { + name = "direct_aggregate"; + _hydraAggregate = true; + _hydraGlobConstituents = true; + constituents = [ + "packages.*" + ]; + builder = ./empty-dir-builder.sh; + }; + + indirect_aggregate = mkDerivation { + name = "indirect_aggregate"; + _hydraAggregate = true; + constituents = [ + "ok_aggregate" + ]; + builder = ./empty-dir-builder.sh; + }; + + non_match_aggregate = mkDerivation { + name = "mixed_aggregate"; + _hydraAggregate = true; + _hydraGlobConstituents = true; + constituents = [ + "tests.*" + ]; + builder = ./empty-dir-builder.sh; + }; +} From 57a92052f5ab16742dc47da61269421912e61de2 Mon Sep 17 00:00:00 2001 From: Christian Theune Date: Fri, 25 Oct 2024 13:22:03 +0200 Subject: [PATCH 2/3] hydra-eval-jobs: basic logging of memory increase per job --- src/hydra-eval-jobs/hydra-eval-jobs.cc | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/src/hydra-eval-jobs/hydra-eval-jobs.cc b/src/hydra-eval-jobs/hydra-eval-jobs.cc index 142f320c5..b1759bb4a 100644 --- a/src/hydra-eval-jobs/hydra-eval-jobs.cc +++ b/src/hydra-eval-jobs/hydra-eval-jobs.cc @@ -162,6 +162,9 @@ static void worker( auto vRoot = state.allocValue(); state.autoCallFunction(autoArgs, vTop, *vRoot); + size_t prev = 0; + auto start = std::chrono::steady_clock::now(); + while (true) { /* Wait for the master to send us a job name. */ writeLine(to.get(), "next"); @@ -300,8 +303,17 @@ static void worker( /* If our RSS exceeds the maximum, exit. The master will start a new process. */ + + auto end = std::chrono::steady_clock::now(); + auto duration = std::chrono::duration_cast(start - end).count(); struct rusage r; getrusage(RUSAGE_SELF, &r); + size_t delta = (size_t)r.ru_maxrss - prev; // KiB + if (delta > maxMemorySize * 1024 * 0.5 || (duration > 1)) + printError("evaluating job '%s' increased memory usage by %d MiB", attrPath, + (r.ru_maxrss - prev)/1024); + + prev = r.ru_maxrss; if ((size_t) r.ru_maxrss > maxMemorySize * 1024) break; } From a5a146263e64310615a3a1f9e856afa925ed2f5e Mon Sep 17 00:00:00 2001 From: Maximilian Bosch Date: Wed, 6 Nov 2024 15:35:37 +0100 Subject: [PATCH 3/3] Topological sort before rewriting aggregate jobs Given the leaf jobs `packages.foo` & `packages.bar`, an aggregate job `aggregate0` with _hydraGlobConstituents = true; constituents = [ "packages.*" ]; and an aggregate job `aggregate1` with constituents = [ "aggregate0" ]; then it may happen depending on the order of evaluation that `aggregate1` depends on the old derivation of `aggregate0` (i.e. the one without rewritten constituents) and doesn't depend on `packages.foo` and `packages.bar` because it was rewritten before `aggregate0` was rewritten. Using a toposort (derived from the one in CppNix's libutil) to solve that. Cycles are a hard error, then _all_ aggregates are failed to make sure we don't finish one of those too early. This also extracts the code into a few helper functions and a slightly better data structure, otherwise this would've gotten too chaotic. --- src/hydra-eval-jobs/hydra-eval-jobs.cc | 335 ++++++++++++------- t/evaluator/evaluate-constituents-globbing.t | 175 ++++++++-- t/jobs/constituents-cycle-glob.nix | 34 ++ t/jobs/constituents-cycle.nix | 21 ++ t/jobs/constituents-glob-all.nix | 22 ++ 5 files changed, 436 insertions(+), 151 deletions(-) create mode 100644 t/jobs/constituents-cycle-glob.nix create mode 100644 t/jobs/constituents-cycle.nix create mode 100644 t/jobs/constituents-glob-all.nix diff --git a/src/hydra-eval-jobs/hydra-eval-jobs.cc b/src/hydra-eval-jobs/hydra-eval-jobs.cc index b1759bb4a..793f1c242 100644 --- a/src/hydra-eval-jobs/hydra-eval-jobs.cc +++ b/src/hydra-eval-jobs/hydra-eval-jobs.cc @@ -320,6 +320,212 @@ static void worker( writeLine(to.get(), "restart"); } +struct DependencyCycle : public std::exception { + std::string a; + std::string b; + std::set remainingAggregates; + + DependencyCycle(const std::string & a, const std::string & b, const std::set & remainingAggregates) : a(a), b(b), remainingAggregates(remainingAggregates) {} + + std::string what() { + return fmt("Dependency cycle: %s <-> %s", a, b); + } +}; + +struct AggregateJob +{ + std::string name; + std::set dependencies; + std::unordered_map brokenJobs; + + bool operator<(const AggregateJob & b) const { return name < b.name; } +}; + +// This is copied from `libutil/topo-sort.hh` in CppNix and slightly modified. +// However, I needed a way to use strings as identifiers to sort, but still be able +// to put AggregateJob objects into this function since I'd rather not +// have to transform back and forth between a list of strings and AggregateJobs +// in resolveNamedConstituents. +std::vector topoSort(std::set items) +{ + std::vector sorted; + std::set visited, parents; + + std::map dictIdentToObject; + for (auto & it : items) { + dictIdentToObject.insert({it.name, it}); + } + + std::function dfsVisit; + + dfsVisit = [&](const std::string & path, const std::string * parent) { + if (parents.count(path)) { + dictIdentToObject.erase(path); + dictIdentToObject.erase(*parent); + std::set remaining; + for (auto & [k, _] : dictIdentToObject) { + remaining.insert(k); + } + throw DependencyCycle(path, *parent, remaining); + } + + if (!visited.insert(path).second) return; + parents.insert(path); + + std::set references = dictIdentToObject[path].dependencies; + + for (auto & i : references) + /* Don't traverse into items that don't exist in our starting set. */ + if (i != path && dictIdentToObject.find(i) != dictIdentToObject.end()) + dfsVisit(i, &path); + + sorted.push_back(dictIdentToObject[path]); + parents.erase(path); + }; + + for (auto & [i, _] : dictIdentToObject) + dfsVisit(i, nullptr); + + return sorted; +} + +static bool insertMatchingConstituents(const std::string & childJobName, + const std::string & jobName, + std::function isBroken, + nlohmann::json & jobs, + std::set & results) +{ + bool expansionFound = false; + for (auto job = jobs.begin(); job != jobs.end(); job++) { + // Never select the job itself as constituent. Trivial way + // to avoid obvious cycles. + if (job.key() == jobName) { + continue; + } + auto jobName = job.key(); + if (fnmatch(childJobName.c_str(), jobName.c_str(), 0) == 0 && !isBroken(jobName, *job)) { + results.insert(jobName); + expansionFound = true; + } + } + + return expansionFound; +} + +static std::vector resolveNamedConstituents(nlohmann::json & jobs) +{ + std::set aggregateJobs; + for (auto i = jobs.begin(); i != jobs.end(); ++i) { + auto jobName = i.key(); + auto & job = i.value(); + + auto named = job.find("namedConstituents"); + if (named != job.end()) { + bool globConstituents = job.value("globConstituents", false); + std::unordered_map brokenJobs; + std::set results; + + auto isBroken = [&brokenJobs, &jobName]( + const std::string & childJobName, nlohmann::json & job) -> bool { + if (job.find("error") != job.end()) { + std::string error = job["error"]; + printError("aggregate job '%s' references broken job '%s': %s", jobName, childJobName, error); + brokenJobs[childJobName] = error; + return true; + } else { + return false; + } + }; + + for (const std::string & childJobName : *named) { + auto childJob = jobs.find(childJobName); + if (childJob == jobs.end()) { + if (!globConstituents) { + printError("aggregate job '%s' references non-existent job '%s'", jobName, childJobName); + brokenJobs[childJobName] = "does not exist"; + } else if (!insertMatchingConstituents(childJobName, jobName, isBroken, jobs, results)) { + warn("aggregate job '%s' references constituent glob pattern '%s' with no matches", jobName, childJobName); + brokenJobs[childJobName] = "constituent glob pattern had no matches"; + } + } else if (!isBroken(childJobName, *childJob)) { + results.insert(childJobName); + } + } + + aggregateJobs.insert(AggregateJob(jobName, results, brokenJobs)); + } + } + + return topoSort(aggregateJobs); +} + +static void rewriteAggregates(nlohmann::json & jobs, + std::vector aggregateJobs, + bool dryRun, + ref store) +{ + for (auto & aggregateJob : aggregateJobs) { + auto & job = jobs.find(aggregateJob.name).value(); + if (dryRun) { + for (auto & childJobName : aggregateJob.dependencies) { + std::string constituentDrvPath = jobs[childJobName]["drvPath"]; + job["constituents"].push_back(constituentDrvPath); + } + } else { + auto drvPath = store->parseStorePath((std::string) job["drvPath"]); + auto drv = store->readDerivation(drvPath); + + for (auto & childJobName : aggregateJob.dependencies) { + auto childDrvPath = store->parseStorePath((std::string) jobs[childJobName]["drvPath"]); + auto childDrv = store->readDerivation(childDrvPath); + job["constituents"].push_back(store->printStorePath(childDrvPath)); + drv.inputDrvs.map[childDrvPath].value = {childDrv.outputs.begin()->first}; + } + + if (aggregateJob.brokenJobs.empty()) { + std::string drvName(drvPath.name()); + assert(hasSuffix(drvName, drvExtension)); + drvName.resize(drvName.size() - drvExtension.size()); + + auto hashModulo = hashDerivationModulo(*store, drv, true); + if (hashModulo.kind != DrvHash::Kind::Regular) continue; + auto h = hashModulo.hashes.find("out"); + if (h == hashModulo.hashes.end()) continue; + auto outPath = store->makeOutputPath("out", h->second, drvName); + drv.env["out"] = store->printStorePath(outPath); + drv.outputs.insert_or_assign("out", DerivationOutput::InputAddressed { .path = outPath }); + auto newDrvPath = store->printStorePath(writeDerivation(*store, drv)); + + debug("rewrote aggregate derivation %s -> %s", store->printStorePath(drvPath), newDrvPath); + + job["drvPath"] = newDrvPath; + job["outputs"]["out"] = store->printStorePath(outPath); + } + } + + job.erase("namedConstituents"); + + /* Register the derivation as a GC root. !!! This + registers roots for jobs that we may have already + done. */ + auto localStore = store.dynamic_pointer_cast(); + if (gcRootsDir != "" && localStore) { + auto drvPath = job["drvPath"].get(); + Path root = gcRootsDir + "/" + std::string(baseNameOf(drvPath)); + if (!pathExists(root)) + localStore->addPermRoot(localStore->parseStorePath(drvPath), root); + } + + if (!aggregateJob.brokenJobs.empty()) { + std::stringstream ss; + for (const auto& [jobName, error] : aggregateJob.brokenJobs) { + ss << jobName << ": " << error << "\n"; + } + job["error"] = ss.str(); + } + } +} + int main(int argc, char * * argv) { /* Prevent undeclared dependencies in the evaluation via @@ -502,129 +708,22 @@ int main(int argc, char * * argv) if (state->exc) std::rethrow_exception(state->exc); - /* For aggregate jobs that have named consistuents + /* For aggregate jobs that have named constituents (i.e. constituents that are a job name rather than a derivation), look up the referenced job and add it to the dependencies of the aggregate derivation. */ auto store = openStore(); - for (auto i = state->jobs.begin(); i != state->jobs.end(); ++i) { - auto jobName = i.key(); - auto & job = i.value(); - - auto named = job.find("namedConstituents"); - if (named == job.end()) continue; - - bool globConstituents = job.value("globConstituents", false); - - std::unordered_map brokenJobs; - auto isBroken = [&brokenJobs, &jobName]( - const std::string & childJobName, nlohmann::json & job) -> bool { - if (job.find("error") != job.end()) { - std::string error = job["error"]; - printError("aggregate job '%s' references broken job '%s': %s", jobName, childJobName, error); - brokenJobs[childJobName] = error; - return true; - } else { - return false; - } - }; - auto getNonBrokenJobsOrRecordError = [&state, &isBroken, &jobName, &brokenJobs, &globConstituents]( - const std::string & childJobName) -> std::vector { - auto childJob = state->jobs.find(childJobName); - std::vector results; - if (childJob == state->jobs.end()) { - if (!globConstituents) { - printError("aggregate job '%s' references non-existent job '%s'", jobName, childJobName); - brokenJobs[childJobName] = "does not exist"; - } else { - for (auto job = state->jobs.begin(); job != state->jobs.end(); job++) { - auto jobName = job.key(); - if (fnmatch(childJobName.c_str(), jobName.c_str(), 0) == 0 - && !isBroken(jobName, *job) - ) { - results.push_back(*job); - } - } - if (results.empty()) { - warn("aggregate job '%s' references constituent glob pattern '%s' with no matches", jobName, childJobName); - brokenJobs[childJobName] = "constituent glob pattern had no matches"; - } - } - } else if (!isBroken(childJobName, *childJob)) { - results.push_back(*childJob); - } - return results; - }; - - if (myArgs.dryRun) { - for (std::string jobName2 : *named) { - auto foundJobs = getNonBrokenJobsOrRecordError(jobName2); - if (foundJobs.empty()) { - continue; - } - for (auto & childJob : foundJobs) { - std::string constituentDrvPath = childJob["drvPath"]; - job["constituents"].push_back(constituentDrvPath); - } - } - } else { - auto drvPath = store->parseStorePath((std::string) job["drvPath"]); - auto drv = store->readDerivation(drvPath); - - for (std::string jobName2 : *named) { - auto foundJobs = getNonBrokenJobsOrRecordError(jobName2); - if (foundJobs.empty()) { - continue; - } - for (auto & childJob : foundJobs) { - auto childDrvPath = store->parseStorePath((std::string) childJob["drvPath"]); - auto childDrv = store->readDerivation(childDrvPath); - job["constituents"].push_back(store->printStorePath(childDrvPath)); - drv.inputDrvs.map[childDrvPath].value = {childDrv.outputs.begin()->first}; - } - } - - if (brokenJobs.empty()) { - std::string drvName(drvPath.name()); - assert(hasSuffix(drvName, drvExtension)); - drvName.resize(drvName.size() - drvExtension.size()); - - auto hashModulo = hashDerivationModulo(*store, drv, true); - if (hashModulo.kind != DrvHash::Kind::Regular) continue; - auto h = hashModulo.hashes.find("out"); - if (h == hashModulo.hashes.end()) continue; - auto outPath = store->makeOutputPath("out", h->second, drvName); - drv.env["out"] = store->printStorePath(outPath); - drv.outputs.insert_or_assign("out", DerivationOutput::InputAddressed { .path = outPath }); - auto newDrvPath = store->printStorePath(writeDerivation(*store, drv)); - - debug("rewrote aggregate derivation %s -> %s", store->printStorePath(drvPath), newDrvPath); - - job["drvPath"] = newDrvPath; - job["outputs"]["out"] = store->printStorePath(outPath); - } - } - - job.erase("namedConstituents"); - - /* Register the derivation as a GC root. !!! This - registers roots for jobs that we may have already - done. */ - auto localStore = store.dynamic_pointer_cast(); - if (gcRootsDir != "" && localStore) { - auto drvPath = job["drvPath"].get(); - Path root = gcRootsDir + "/" + std::string(baseNameOf(drvPath)); - if (!pathExists(root)) - localStore->addPermRoot(localStore->parseStorePath(drvPath), root); - } - - if (!brokenJobs.empty()) { - std::stringstream ss; - for (const auto& [jobName, error] : brokenJobs) { - ss << jobName << ": " << error << "\n"; - } - job["error"] = ss.str(); + try { + auto namedConstituents = resolveNamedConstituents(state->jobs); + rewriteAggregates(state->jobs, namedConstituents, myArgs.dryRun, store); + } catch (DependencyCycle & e) { + printError("Found dependency cycle between jobs '%s' and '%s'", e.a, e.b); + state->jobs[e.a]["error"] = e.what(); + state->jobs[e.b]["error"] = e.what(); + + for (auto & jobName : e.remainingAggregates) { + state->jobs[jobName]["error"] = "Skipping aggregate because of a dependency cycle"; } } diff --git a/t/evaluator/evaluate-constituents-globbing.t b/t/evaluator/evaluate-constituents-globbing.t index 3792c0a21..c4a67f131 100644 --- a/t/evaluator/evaluate-constituents-globbing.t +++ b/t/evaluator/evaluate-constituents-globbing.t @@ -7,52 +7,161 @@ use Data::Dumper; my $ctx = test_context(); -my $jobsetCtx = $ctx->makeJobset( - expression => 'constituents-glob.nix', -); -my $jobset = $jobsetCtx->{"jobset"}; +subtest "general glob testing" => sub { + my $jobsetCtx = $ctx->makeJobset( + expression => 'constituents-glob.nix', + ); + my $jobset = $jobsetCtx->{"jobset"}; -my ($res, $stdout, $stderr) = captureStdoutStderr(60, - ("hydra-eval-jobset", $jobsetCtx->{"project"}->name, $jobset->name) -); + my ($res, $stdout, $stderr) = captureStdoutStderr(60, + ("hydra-eval-jobset", $jobsetCtx->{"project"}->name, $jobset->name) + ); -subtest "non_match_aggregate failed" => sub { - ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); - like( - $stderr, - qr/warning: aggregate job 'non_match_aggregate' references constituent glob pattern 'tests\.\*' with no matches/, - "The stderr record includes a relevant error message" + subtest "non_match_aggregate failed" => sub { + ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); + like( + $stderr, + qr/warning: aggregate job 'non_match_aggregate' references constituent glob pattern 'tests\.\*' with no matches/, + "The stderr record includes a relevant error message" + ); + + $jobset->discard_changes; # refresh from DB + like( + $jobset->errormsg, + qr/tests\.\*: constituent glob pattern had no matches/, + "The jobset records a relevant error message" + ); + }; + + my $builds = {}; + for my $build ($jobset->builds) { + $builds->{$build->job} = $build; + } + + subtest "basic globbing works" => sub { + ok(defined $builds->{"ok_aggregate"}, "'ok_aggregate' is part of the jobset evaluation"); + my @constituents = $builds->{"ok_aggregate"}->constituents->all; + is(2, scalar @constituents, "'ok_aggregate' has two constituents"); + + my @sortedConstituentNames = sort (map { $_->nixname } @constituents); + + is($sortedConstituentNames[0], "empty-dir-A", "first constituent of 'ok_aggregate' is 'empty-dir-A'"); + is($sortedConstituentNames[1], "empty-dir-B", "second constituent of 'ok_aggregate' is 'empty-dir-B'"); + }; + + subtest "transitivity is OK" => sub { + ok(defined $builds->{"indirect_aggregate"}, "'indirect_aggregate' is part of the jobset evaluation"); + my @constituents = $builds->{"indirect_aggregate"}->constituents->all; + is(1, scalar @constituents, "'indirect_aggregate' has one constituent"); + is($constituents[0]->nixname, "direct_aggregate", "'indirect_aggregate' has 'direct_aggregate' as single constituent"); + }; +}; + +subtest "* selects all except current aggregate" => sub { + my $jobsetCtx = $ctx->makeJobset( + expression => 'constituents-glob-all.nix', + ); + my $jobset = $jobsetCtx->{"jobset"}; + + my ($res, $stdout, $stderr) = captureStdoutStderr(60, + ("hydra-eval-jobset", $jobsetCtx->{"project"}->name, $jobset->name) + ); + + subtest "no eval errors" => sub { + ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); + ok( + $stderr !~ "aggregate job ‘ok_aggregate’ has a constituent .* that doesn't correspond to a Hydra build", + "Catchall wildcard must not select itself as constituent" + ); + + $jobset->discard_changes; # refresh from DB + is( + $jobset->errormsg, + "", + "eval-errors non-empty" + ); + }; + + my $builds = {}; + for my $build ($jobset->builds) { + $builds->{$build->job} = $build; + } + + subtest "two constituents" => sub { + ok(defined $builds->{"ok_aggregate"}, "'ok_aggregate' is part of the jobset evaluation"); + my @constituents = $builds->{"ok_aggregate"}->constituents->all; + is(2, scalar @constituents, "'ok_aggregate' has two constituents"); + + my @sortedConstituentNames = sort (map { $_->nixname } @constituents); + + is($sortedConstituentNames[0], "empty-dir-A", "first constituent of 'ok_aggregate' is 'empty-dir-A'"); + is($sortedConstituentNames[1], "empty-dir-B", "second constituent of 'ok_aggregate' is 'empty-dir-B'"); + }; +}; + +subtest "trivial cycle check" => sub { + my $jobsetCtx = $ctx->makeJobset( + expression => 'constituents-cycle.nix', ); + my $jobset = $jobsetCtx->{"jobset"}; + + my ($res, $stdout, $stderr) = captureStdoutStderr(60, + ("hydra-eval-jobset", $jobsetCtx->{"project"}->name, $jobset->name) + ); + + ok( + $stderr =~ "Found dependency cycle between jobs 'indirect_aggregate' and 'ok_aggregate'", + "Dependency cycle error is on stderr" + ); + + ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); $jobset->discard_changes; # refresh from DB like( $jobset->errormsg, - qr/tests\.\*: constituent glob pattern had no matches/, - "The jobset records a relevant error message" + qr/Dependency cycle: indirect_aggregate <-> ok_aggregate/, + "eval-errors non-empty" ); + + is(0, $jobset->builds->count, "No builds should be scheduled"); }; -my $builds = {}; -for my $build ($jobset->builds) { - $builds->{$build->job} = $build; -} +subtest "cycle check with globbing" => sub { + my $jobsetCtx = $ctx->makeJobset( + expression => 'constituents-cycle-glob.nix', + ); + my $jobset = $jobsetCtx->{"jobset"}; -subtest "basic globbing works" => sub { - ok(defined $builds->{"ok_aggregate"}, "'ok_aggregate' is part of the jobset evaluation"); - my @constituents = $builds->{"ok_aggregate"}->constituents->all; - is(2, scalar @constituents, "'ok_aggregate' has two constituents"); + my ($res, $stdout, $stderr) = captureStdoutStderr(60, + ("hydra-eval-jobset", $jobsetCtx->{"project"}->name, $jobset->name) + ); - my @sortedConstituentNames = sort (map { $_->nixname } @constituents); + ok(utf8::decode($stderr), "Stderr output is UTF8-clean"); - is($sortedConstituentNames[0], "empty-dir-A", "first constituent of 'ok_aggregate' is 'empty-dir-A'"); - is($sortedConstituentNames[1], "empty-dir-B", "second constituent of 'ok_aggregate' is 'empty-dir-B'"); -}; + $jobset->discard_changes; # refresh from DB + like( + $jobset->errormsg, + qr/in job ‘packages\.constituentA’:\nDependency cycle: indirect_aggregate <-> packages.constituentA/, + "packages.constituentA error missing" + ); + like( + $jobset->errormsg, + qr/in job ‘indirect_aggregate’:\nDependency cycle: indirect_aggregate <-> packages.constituentA/, + "indirect_aggregate error missing" + ); + like( + $jobset->errormsg, + qr/in job ‘ok_aggregate’:\nSkipping aggregate because of a dependency cycle/, + "skipped aggregate error missing" + ); -#subtest "transitivity is OK" => sub { - #ok(defined $builds->{"indirect_aggregate"}, "'indirect_aggregate' is part of the jobset evaluation"); - #my @constituents = $builds->{"indirect_aggregate"}->constituents->all; - #is(1, scalar @constituents, "'indirect_aggregate' has one constituent"); - #is($constituents[0]->nixname, "direct_aggregate", "'indirect_aggregate' has 'direct_aggregate' as single constituent"); -#}; + is(1, $jobset->builds->count, "One job is scheduled"); + my $builds = {}; + for my $build ($jobset->builds) { + $builds->{$build->job} = $build; + } + + ok(defined $builds->{"packages.constituentB"}, "'packages.constituentB' is part of the jobset evaluation"); +}; done_testing; diff --git a/t/jobs/constituents-cycle-glob.nix b/t/jobs/constituents-cycle-glob.nix new file mode 100644 index 000000000..efc152ced --- /dev/null +++ b/t/jobs/constituents-cycle-glob.nix @@ -0,0 +1,34 @@ +with import ./config.nix; +{ + packages.constituentA = mkDerivation { + name = "empty-dir-A"; + builder = ./empty-dir-builder.sh; + _hydraAggregate = true; + _hydraGlobConstituents = true; + constituents = [ "*_aggregate" ]; + }; + + packages.constituentB = mkDerivation { + name = "empty-dir-B"; + builder = ./empty-dir-builder.sh; + }; + + ok_aggregate = mkDerivation { + name = "direct_aggregate"; + _hydraAggregate = true; + _hydraGlobConstituents = true; + constituents = [ + "packages.*" + ]; + builder = ./empty-dir-builder.sh; + }; + + indirect_aggregate = mkDerivation { + name = "indirect_aggregate"; + _hydraAggregate = true; + constituents = [ + "ok_aggregate" + ]; + builder = ./empty-dir-builder.sh; + }; +} diff --git a/t/jobs/constituents-cycle.nix b/t/jobs/constituents-cycle.nix new file mode 100644 index 000000000..7e086aa19 --- /dev/null +++ b/t/jobs/constituents-cycle.nix @@ -0,0 +1,21 @@ +with import ./config.nix; +{ + ok_aggregate = mkDerivation { + name = "direct_aggregate"; + _hydraAggregate = true; + _hydraGlobConstituents = true; + constituents = [ + "indirect_aggregate" + ]; + builder = ./empty-dir-builder.sh; + }; + + indirect_aggregate = mkDerivation { + name = "indirect_aggregate"; + _hydraAggregate = true; + constituents = [ + "ok_aggregate" + ]; + builder = ./empty-dir-builder.sh; + }; +} diff --git a/t/jobs/constituents-glob-all.nix b/t/jobs/constituents-glob-all.nix new file mode 100644 index 000000000..d671fd70f --- /dev/null +++ b/t/jobs/constituents-glob-all.nix @@ -0,0 +1,22 @@ +with import ./config.nix; +{ + packages.constituentA = mkDerivation { + name = "empty-dir-A"; + builder = ./empty-dir-builder.sh; + }; + + packages.constituentB = mkDerivation { + name = "empty-dir-B"; + builder = ./empty-dir-builder.sh; + }; + + ok_aggregate = mkDerivation { + name = "direct_aggregate"; + _hydraAggregate = true; + _hydraGlobConstituents = true; + constituents = [ + "*" + ]; + builder = ./empty-dir-builder.sh; + }; +}