Skip to content

Commit

Permalink
Merge pull request #47013 from makortel/reduceInputFileCatalogMemory
Browse files Browse the repository at this point in the history
Remove redundant data members from InputFileCatalog to reduce memory use
  • Loading branch information
cmsbuild authored Jan 9, 2025
2 parents 7915102 + 5744eaa commit c504609
Show file tree
Hide file tree
Showing 9 changed files with 259 additions and 108 deletions.
12 changes: 6 additions & 6 deletions FWCore/Catalog/interface/InputFileCatalog.h
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
namespace edm {
class FileCatalogItem {
public:
FileCatalogItem(std::vector<std::string> const& pfns, std::string const& lfn) : pfns_(pfns), lfn_(lfn) {}
FileCatalogItem(std::vector<std::string> pfns, std::string lfn) : pfns_(std::move(pfns)), lfn_(std::move(lfn)) {}

std::string const& fileName(unsigned iCatalog) const { return pfns_[iCatalog]; }
std::string const& logicalFileName() const { return lfn_; }
Expand All @@ -33,7 +33,7 @@ namespace edm {

class InputFileCatalog {
public:
InputFileCatalog(std::vector<std::string> const& fileNames,
InputFileCatalog(std::vector<std::string> fileNames,
std::string const& override,
bool useLFNasPFNifLFNnotFound = false,
//switching between two catalog types
Expand All @@ -42,19 +42,19 @@ namespace edm {

~InputFileCatalog();
std::vector<FileCatalogItem> const& fileCatalogItems() const { return fileCatalogItems_; }
std::vector<std::string> const& logicalFileNames() const { return logicalFileNames_; }
std::vector<std::string> fileNames(unsigned iCatalog) const;
bool empty() const { return fileCatalogItems_.empty(); }
static bool isPhysical(std::string const& name) { return (name.empty() || name.find(':') != std::string::npos); }

private:
void init(std::string const& override, bool useLFNasPFNifLFNnotFound, edm::CatalogType catType);
void init(std::vector<std::string> logicalFileNames,
std::string const& override,
bool useLFNasPFNifLFNnotFound,
edm::CatalogType catType);
void findFile(std::string const& lfn,
std::vector<std::string>& pfns,
bool useLFNasPFNifLFNnotFound,
edm::CatalogType catType);
std::vector<std::string> logicalFileNames_;
std::vector<std::string> fileNames_;
std::vector<FileCatalogItem> fileCatalogItems_;
edm::propagate_const<std::unique_ptr<FileLocator>> overrideFileLocator_;

Expand Down
30 changes: 15 additions & 15 deletions FWCore/Catalog/src/InputFileCatalog.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@

namespace edm {

InputFileCatalog::InputFileCatalog(std::vector<std::string> const& fileNames,
InputFileCatalog::InputFileCatalog(std::vector<std::string> fileNames,
std::string const& override,
bool useLFNasPFNifLFNnotFound,
edm::CatalogType catType)
: logicalFileNames_(fileNames), fileNames_(fileNames), fileCatalogItems_(), overrideFileLocator_() {
init(override, useLFNasPFNifLFNnotFound, catType);
: fileCatalogItems_(), overrideFileLocator_() {
init(std::move(fileNames), override, useLFNasPFNifLFNnotFound, catType);
}

InputFileCatalog::~InputFileCatalog() {}
Expand All @@ -36,7 +36,8 @@ namespace edm {
return tmp;
}

void InputFileCatalog::init(std::string const& inputOverride,
void InputFileCatalog::init(std::vector<std::string> logicalFileNames,
std::string const& inputOverride,
bool useLFNasPFNifLFNnotFound,
edm::CatalogType catType) {
typedef std::vector<std::string>::iterator iter;
Expand Down Expand Up @@ -123,32 +124,31 @@ namespace edm {
throw ex;
}

for (iter it = fileNames_.begin(), lt = logicalFileNames_.begin(), itEnd = fileNames_.end(); it != itEnd;
++it, ++lt) {
boost::trim(*it);
for (auto& lfn : logicalFileNames) {
boost::trim(lfn);
std::vector<std::string> pfns;
if (it->empty()) {
if (lfn.empty()) {
cms::Exception ex("FileCatalog");
ex << "An empty string specified in the fileNames parameter for input source";
ex.addContext("Calling edm::InputFileCatalog::init()");
throw ex;
}
if (isPhysical(*it)) {
if (it->back() == ':') {
if (isPhysical(lfn)) {
if (lfn.back() == ':') {
cms::Exception ex("FileCatalog");
ex << "An empty physical file name specified in the fileNames parameter for input source";
ex.addContext("Calling edm::InputFileCatalog::init()");
throw ex;
}
pfns.push_back(*it);
pfns.push_back(lfn);
// Clear the LFN.
lt->clear();
lfn.clear();
} else {
boost::trim(*lt);
findFile(*lt, pfns, useLFNasPFNifLFNnotFound, catType);
findFile(lfn, pfns, useLFNasPFNifLFNnotFound, catType);
}
lfn.shrink_to_fit(); // try to release memory

fileCatalogItems_.push_back(FileCatalogItem(pfns, *lt));
fileCatalogItems_.emplace_back(std::move(pfns), std::move(lfn));
}
}

Expand Down
2 changes: 1 addition & 1 deletion FWCore/Catalog/test/BuildFile.xml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
<use name="FWCore/Catalog"/>
<use name="FWCore/ServiceRegistry"/>
<use name="catch2"/>
<bin name="edmCatalogFileLocator_t" file="FileLocator_t.cpp">
<bin name="TestFWCoreCatalog" file="FileLocator_t.cpp,InputFileCatalog_t.cpp">
</bin>
94 changes: 11 additions & 83 deletions FWCore/Catalog/test/FileLocator_t.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,96 +2,24 @@
#include "FWCore/Catalog/interface/SiteLocalConfig.h"
#include "FWCore/Utilities/interface/Exception.h"
#include "FWCore/ServiceRegistry/interface/ServiceRegistry.h"
#include <filesystem>

#include "TestSiteLocalConfig.h"

#include <filesystem>
#include <string>

#define CATCH_CONFIG_MAIN
#include "catch.hpp"

namespace {
class TestSiteLocalConfig : public edm::SiteLocalConfig {
public:
//constructor using trivial data catalogs
TestSiteLocalConfig(std::vector<std::string> catalogs) : m_trivialCatalogs(std::move(catalogs)) {}
//constructor using Rucio data catalogs
TestSiteLocalConfig(std::vector<edm::CatalogAttributes> catalogs) : m_catalogs(std::move(catalogs)) {}
std::vector<std::string> const& trivialDataCatalogs() const final { return m_trivialCatalogs; }
std::vector<edm::CatalogAttributes> const& dataCatalogs() const final { return m_catalogs; }
std::filesystem::path const storageDescriptionPath(const edm::CatalogAttributes& aDataCatalog) const final {
return std::filesystem::path();
}

std::string const lookupCalibConnect(std::string const& input) const final { return std::string(); }
std::string const rfioType(void) const final { return std::string(); }

std::string const* sourceCacheTempDir() const final { return nullptr; }
double const* sourceCacheMinFree() const final { return nullptr; }
std::string const* sourceCacheHint() const final { return nullptr; }
std::string const* sourceCloneCacheHint() const final { return nullptr; }
std::string const* sourceReadHint() const final { return nullptr; }
unsigned int const* sourceTTreeCacheSize() const final { return nullptr; }
unsigned int const* sourceTimeout() const final { return nullptr; }
bool enablePrefetching() const final { return false; }
unsigned int debugLevel() const final { return 0; }
std::vector<std::string> const* sourceNativeProtocols() const final { return nullptr; }
struct addrinfo const* statisticsDestination() const final { return nullptr; }
std::set<std::string> const* statisticsInfo() const final { return nullptr; }
std::string const& siteName(void) const final { return m_emptyString; }
std::string const& subSiteName(void) const final { return m_emptyString; }
bool useLocalConnectString() const final { return false; }
std::string const& localConnectPrefix() const final { return m_emptyString; }
std::string const& localConnectSuffix() const final { return m_emptyString; }

private:
std::vector<std::string> m_trivialCatalogs;
std::vector<edm::CatalogAttributes> m_catalogs;
std::filesystem::path m_storageDescription_path;
std::string m_emptyString;
};
} // namespace

TEST_CASE("FileLocator with Rucio data catalog", "[filelocatorRucioDataCatalog]") {
//catalog for testing "prefix"
edm::CatalogAttributes aCatalog;
aCatalog.site = "T1_US_FNAL";
aCatalog.subSite = "T1_US_FNAL";
aCatalog.storageSite = "T1_US_FNAL";
aCatalog.volume = "American_Federation";
aCatalog.protocol = "XRootD";
std::vector<edm::CatalogAttributes> tmp{aCatalog};
//catalog for testing "rules"
aCatalog.site = "T1_US_FNAL";
aCatalog.subSite = "T1_US_FNAL";
aCatalog.storageSite = "T1_US_FNAL";
aCatalog.volume = "FNAL_dCache_EOS";
aCatalog.protocol = "XRootD";
tmp.push_back(aCatalog);
//catalog for testing chained "rules"
aCatalog.site = "T1_US_FNAL";
aCatalog.subSite = "T1_US_FNAL";
aCatalog.storageSite = "T1_US_FNAL";
aCatalog.volume = "FNAL_dCache_EOS";
aCatalog.protocol = "root";
tmp.push_back(aCatalog);

//create the services
edm::ServiceToken tempToken(
edm::ServiceRegistry::createContaining(std::unique_ptr<edm::SiteLocalConfig>(new TestSiteLocalConfig(tmp))));

std::string CMSSW_BASE(std::getenv("CMSSW_BASE"));
std::string CMSSW_RELEASE_BASE(std::getenv("CMSSW_RELEASE_BASE"));
std::string file_name("/src/FWCore/Catalog/test/storage.json");
std::string full_file_name = std::filesystem::exists((CMSSW_BASE + file_name).c_str())
? CMSSW_BASE + file_name
: CMSSW_RELEASE_BASE + file_name;
TEST_CASE("FileLocator with Rucio data catalog", "[FWCore/Catalog]") {
edm::ServiceToken tempToken = edmtest::catalog::makeTestSiteLocalConfigToken();

SECTION("prefix") {
edm::ServiceRegistry::Operate operate(tempToken);
//empty catalog
edm::CatalogAttributes tmp_cat;
//use the first catalog provided by site local config
edm::FileLocator fl(tmp_cat, 0, full_file_name);
edm::FileLocator fl(tmp_cat, 0);
CHECK("root://cmsxrootd.fnal.gov/store/group/bha/bho" ==
fl.pfn("/store/group/bha/bho", edm::CatalogType::RucioCatalog));
}
Expand All @@ -100,7 +28,7 @@ TEST_CASE("FileLocator with Rucio data catalog", "[filelocatorRucioDataCatalog]"
//empty catalog
edm::CatalogAttributes tmp_cat;
//use the second catalog provided by site local config
edm::FileLocator fl(tmp_cat, 1, full_file_name);
edm::FileLocator fl(tmp_cat, 1);
const std::array<const char*, 7> lfn = {{"/bha/bho",
"bha",
"file:bha",
Expand All @@ -119,7 +47,7 @@ TEST_CASE("FileLocator with Rucio data catalog", "[filelocatorRucioDataCatalog]"
//empty catalog
edm::CatalogAttributes tmp_cat;
//use the third catalog provided by site local config above
edm::FileLocator fl(tmp_cat, 2, full_file_name);
edm::FileLocator fl(tmp_cat, 2);
const std::array<const char*, 7> lfn = {{"/bha/bho",
"bha",
"file:bha",
Expand All @@ -139,7 +67,7 @@ TEST_CASE("FileLocator with Rucio data catalog", "[filelocatorRucioDataCatalog]"
}
}

TEST_CASE("FileLocator", "[filelocator]") {
TEST_CASE("FileLocator with TrivialFileCatalog", "[FWCore/Catalog]") {
std::string CMSSW_BASE(std::getenv("CMSSW_BASE"));
std::string CMSSW_RELEASE_BASE(std::getenv("CMSSW_RELEASE_BASE"));
std::string file_name("/src/FWCore/Catalog/test/simple_catalog.xml");
Expand All @@ -149,8 +77,8 @@ TEST_CASE("FileLocator", "[filelocator]") {

//create the services
std::vector<std::string> tmp{std::string("trivialcatalog_file:") + full_file_name + "?protocol=xrd"};
edm::ServiceToken tempToken(
edm::ServiceRegistry::createContaining(std::unique_ptr<edm::SiteLocalConfig>(new TestSiteLocalConfig(tmp))));
edm::ServiceToken tempToken(edm::ServiceRegistry::createContaining(
std::unique_ptr<edm::SiteLocalConfig>(std::make_unique<edmtest::catalog::TestSiteLocalConfig>(tmp))));

//make the services available
SECTION("standard") {
Expand Down
139 changes: 139 additions & 0 deletions FWCore/Catalog/test/InputFileCatalog_t.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
#include "FWCore/Catalog/interface/InputFileCatalog.h"
#include "FWCore/ServiceRegistry/interface/ServiceRegistry.h"

#include "TestSiteLocalConfig.h"

#include "catch.hpp"

TEST_CASE("InputFileCatalog with Rucio data catalog", "[FWCore/Catalog]") {
edm::ServiceToken tempToken = edmtest::catalog::makeTestSiteLocalConfigToken();

SECTION("Empty") {
edm::ServiceRegistry::Operate operate(tempToken);
edm::InputFileCatalog catalog({}, "");
REQUIRE(catalog.empty());
}

SECTION("isPhysical") {
REQUIRE(edm::InputFileCatalog::isPhysical(""));
REQUIRE(not edm::InputFileCatalog::isPhysical("/foo/bar"));
REQUIRE(edm::InputFileCatalog::isPhysical("file:/foo/bar"));
}

SECTION("Default behavior") {
edm::ServiceRegistry::Operate operate(tempToken);

edm::InputFileCatalog catalog(std::vector<std::string>{"/store/foo/bar", " file:/foo/bar", "root://foobar "}, "");

SECTION("fileNames") {
SECTION("Catalog 0") {
auto const names = catalog.fileNames(0);
REQUIRE(names.size() == 3);
CHECK(names[0] == "root://cmsxrootd.fnal.gov/store/foo/bar");
CHECK(names[1] == "file:/foo/bar");
CHECK(names[2] == "root://foobar");
}
// The fileNames() works only for catalog 0
// Catalog 1 or 2 leads to a crash because the input file list
// is a mixture of LFNs and PFNs
// This isn't really good behavior...
}

SECTION("fileCatalogItems") {
auto const& items = catalog.fileCatalogItems();
REQUIRE(items.size() == 3);
CHECK(items[0].logicalFileName() == "/store/foo/bar");
CHECK(items[1].logicalFileName() == "");
CHECK(items[2].logicalFileName() == "");

REQUIRE(items[0].fileNames().size() == 3);
REQUIRE(items[1].fileNames().size() == 1);
REQUIRE(items[2].fileNames().size() == 1);

SECTION("Catalog 0") {
CHECK(items[0].fileName(0) == "root://cmsxrootd.fnal.gov/store/foo/bar");
CHECK(items[1].fileName(0) == "file:/foo/bar");
CHECK(items[2].fileName(0) == "root://foobar");
}
SECTION("Catalog 1") {
CHECK(items[0].fileName(1) == "root://cmsdcadisk.fnal.gov//dcache/uscmsdisk/store/foo/bar");
}
SECTION("Catalog 2") { CHECK(items[0].fileName(2) == "root://host.domain//pnfs/cms/store/foo/bar"); }
}
}

SECTION("Override catalog") {
edm::ServiceRegistry::Operate operate(tempToken);

edm::InputFileCatalog catalog(std::vector<std::string>{"/store/foo/bar", " file:/foo/bar", "root://foobar "},
"T1_US_FNAL,,T1_US_FNAL,FNAL_dCache_EOS,XRootD");

SECTION("fileNames") {
auto const names = catalog.fileNames(0);
REQUIRE(names.size() == 3);
CHECK(names[0] == "root://cmsdcadisk.fnal.gov//dcache/uscmsdisk/store/foo/bar");
CHECK(names[1] == "file:/foo/bar");
CHECK(names[2] == "root://foobar");
}

SECTION("fileCatalogItems") {
auto const& items = catalog.fileCatalogItems();
REQUIRE(items.size() == 3);

CHECK(items[0].logicalFileName() == "/store/foo/bar");
CHECK(items[1].logicalFileName() == "");
CHECK(items[2].logicalFileName() == "");

REQUIRE(items[0].fileNames().size() == 1);
REQUIRE(items[1].fileNames().size() == 1);
REQUIRE(items[2].fileNames().size() == 1);

CHECK(items[0].fileName(0) == "root://cmsdcadisk.fnal.gov//dcache/uscmsdisk/store/foo/bar");
CHECK(items[1].fileName(0) == "file:/foo/bar");
CHECK(items[2].fileName(0) == "root://foobar");
}
}

SECTION("useLFNasPFNifLFNnotFound") {
edm::ServiceRegistry::Operate operate(tempToken);

edm::InputFileCatalog catalog(
std::vector<std::string>{"/store/foo/bar", "/tmp/foo/bar", "root://foobar "}, "", true);

SECTION("fileNames") {
SECTION("Catalog 0") {
auto const names = catalog.fileNames(0);
REQUIRE(names.size() == 3);
CHECK(names[0] == "root://cmsxrootd.fnal.gov/store/foo/bar");
CHECK(names[1] == "/tmp/foo/bar");
CHECK(names[2] == "root://foobar");
}
}

SECTION("fileCatalogItems") {
auto const& items = catalog.fileCatalogItems();
REQUIRE(items.size() == 3);
CHECK(items[0].logicalFileName() == "/store/foo/bar");
CHECK(items[1].logicalFileName() == "/tmp/foo/bar");
CHECK(items[2].logicalFileName() == "");

REQUIRE(items[0].fileNames().size() == 3);
REQUIRE(items[1].fileNames().size() == 3);
REQUIRE(items[2].fileNames().size() == 1);

SECTION("Catalog 0") {
CHECK(items[0].fileName(0) == "root://cmsxrootd.fnal.gov/store/foo/bar");
CHECK(items[1].fileName(0) == "/tmp/foo/bar");
CHECK(items[2].fileName(0) == "root://foobar");
}
SECTION("Catalog 1") {
CHECK(items[0].fileName(1) == "root://cmsdcadisk.fnal.gov//dcache/uscmsdisk/store/foo/bar");
CHECK(items[1].fileName(1) == "/tmp/foo/bar");
}
SECTION("Catalog 2") {
CHECK(items[0].fileName(2) == "root://host.domain//pnfs/cms/store/foo/bar");
CHECK(items[1].fileName(2) == "/tmp/foo/bar");
}
}
}
}
Loading

0 comments on commit c504609

Please sign in to comment.