From c44c522688e3329b5881ee2baea0d5be777d5ebc Mon Sep 17 00:00:00 2001 From: Jake Smith Date: Fri, 25 Oct 2024 18:40:48 +0100 Subject: [PATCH] HPCC-32890 Add cleanjobqueues to daliadmin cleanjobqueues scans all /JobQueues and the Clients within them, and deletes any which are not in use (connected==0). NB: this should only be necessary on old (pre 9.4 systems). (see HPCC-31807 in 9.4+) Signed-off-by: Jake Smith --- dali/daliadmin/daadmin.cpp | 39 ++++++++++++++++++++++++++++++++++++ dali/daliadmin/daadmin.hpp | 1 + dali/daliadmin/daliadmin.cpp | 6 ++++++ 3 files changed, 46 insertions(+) diff --git a/dali/daliadmin/daadmin.cpp b/dali/daliadmin/daadmin.cpp index d1ea8bd9b02..e143850e3eb 100644 --- a/dali/daliadmin/daadmin.cpp +++ b/dali/daliadmin/daadmin.cpp @@ -3385,4 +3385,43 @@ void removeOrphanedGlobalVariables(bool dryrun, bool reconstruct) } } +void cleanJobQueues(bool dryRun) +{ + Owned conn = querySDS().connect("/JobQueues", myProcessSession(), 0, SDS_LOCK_TIMEOUT); + if (!conn) + { + WARNLOG("Failed to connect to /JobQueues"); + return; + } + Owned queueIter = conn->queryRoot()->getElements("Queue"); + ForEach(*queueIter) + { + IPropertyTree &queue = queueIter->query(); + const char *name = queue.queryProp("@name"); + if (isEmptyString(name)) // should not be blank, but guard + continue; + PROGLOG("Processing queue: %s", name); + VStringBuffer queuePath("/JobQueues/Queue[@name=\"%s\"]", name); + Owned queueConn = querySDS().connect(queuePath, myProcessSession(), RTM_LOCK_WRITE, SDS_LOCK_TIMEOUT); + IPropertyTree *queueRoot = queueConn->queryRoot(); + + Owned clientIter = queueRoot->getElements("Client"); + std::vector toRemove; + ForEach (*clientIter) + { + IPropertyTree &client = clientIter->query(); + if (client.getPropInt("@connected") == 0) + toRemove.push_back(&client); + } + if (!dryRun) + { + for (auto &client: toRemove) + queue.removeTree(client); + } + PROGLOG("Job queue '%s': %s %u stale client entries", name, dryRun ? "dryrun, there are" : "removed", (unsigned)toRemove.size()); + queueConn->commit(); + queueConn.clear(); + } +} + } // namespace daadmin diff --git a/dali/daliadmin/daadmin.hpp b/dali/daliadmin/daadmin.hpp index 687d0d882b2..f0141760e03 100644 --- a/dali/daliadmin/daadmin.hpp +++ b/dali/daliadmin/daadmin.hpp @@ -95,5 +95,6 @@ extern DALIADMIN_API void daliping(const char *dalis, unsigned connecttime, unsi extern DALIADMIN_API void validateStore(bool fix, bool deleteFiles, bool verbose); extern DALIADMIN_API void removeOrphanedGlobalVariables(bool dryrun, bool reconstruct); +extern DALIADMIN_API void cleanJobQueues(bool dryRun); } // namespace daadmin \ No newline at end of file diff --git a/dali/daliadmin/daliadmin.cpp b/dali/daliadmin/daliadmin.cpp index 0491b7d42f1..4c56836aac6 100644 --- a/dali/daliadmin/daliadmin.cpp +++ b/dali/daliadmin/daliadmin.cpp @@ -93,6 +93,7 @@ void usage(const char *exe) printf("Other dali server and misc commands:\n"); printf(" auditlog \n"); printf(" cleanglobalwuid [dryrun] [noreconstruct]\n"); + printf(" cleanjobqueues [dryrun]\n"); printf(" clusterlist -- list clusters (mask optional)\n"); printf(" coalesce -- force transaction coalesce\n"); printf(" dalilocks [ ] [ files ] -- get all locked files/xpaths\n"); @@ -576,6 +577,11 @@ int main(int argc, const char* argv[]) } removeOrphanedGlobalVariables(dryrun, reconstruct); } + else if (strieq(cmd, "cleanjobqueues")) + { + bool dryRun = np>0 && strieq("dryrun", params.item(1)); + cleanJobQueues(dryRun); + } else if (strieq(cmd, "remotetest")) remoteTest(params.item(1), true); else