From 4868c087b7d0694fcf7a46d0fc8d193d24cf9343 Mon Sep 17 00:00:00 2001 From: lkotipal Date: Mon, 4 Dec 2023 10:44:16 +0200 Subject: [PATCH 1/2] Actually tell why file read fails --- vlsv_reader_parallel.cpp | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/vlsv_reader_parallel.cpp b/vlsv_reader_parallel.cpp index fa5be8a..68cfd9c 100644 --- a/vlsv_reader_parallel.cpp +++ b/vlsv_reader_parallel.cpp @@ -417,7 +417,8 @@ namespace vlsv { * @param mpiInfo Additional MPI info for optimizing file I/O. Must be the same value on all processes. * @return If true, VLSV file was opened successfully. All processes return the same value.*/ bool ParallelReader::open(const std::string& fname,MPI_Comm comm,const int& masterRank,MPI_Info mpiInfo) { - bool success = true; + bool success {true}; + int error {MPI_SUCCESS}; this->comm = comm; this->masterRank = masterRank; MPI_Comm_rank(comm,&myRank); @@ -430,10 +431,13 @@ namespace vlsv { // Attempt to open the given input file using MPI: fileName = fname; int accessMode = MPI_MODE_RDONLY; - if (MPI_File_open(comm,const_cast(fileName.c_str()),accessMode,mpiInfo,&filePtr) != MPI_SUCCESS) success = false; - else parallelFileOpen = true; - - if (success == false) cerr << "Failed to open parallel file" << endl; + error = MPI_File_open(comm,const_cast(fileName.c_str()),accessMode,mpiInfo,&filePtr); + if (error == MPI_SUCCESS) { + parallelFileOpen = true; + } else { + success = false; + cerr << "Failed to open parallel file with MPI error " << error << endl; + } // Only master process reads file footer and endianness. This is done using // VLSVReader open() member function: From d223d2121f4b37dbb49573b0863e9068386363f3 Mon Sep 17 00:00:00 2001 From: lkotipal Date: Mon, 4 Dec 2023 13:31:23 +0200 Subject: [PATCH 2/2] Output MPI errors as strings --- vlsv_common.cpp | 8 ++++++++ vlsv_common.h | 2 ++ vlsv_reader_parallel.cpp | 11 ++++++----- vlsv_writer.cpp | 30 +++++++++++++++++++++++------- 4 files changed, 39 insertions(+), 12 deletions(-) diff --git a/vlsv_common.cpp b/vlsv_common.cpp index eda3bf7..4a1a2c7 100644 --- a/vlsv_common.cpp +++ b/vlsv_common.cpp @@ -19,6 +19,7 @@ #include #include #include +#include #include "vlsv_common.h" @@ -152,5 +153,12 @@ namespace vlsv { break; } } + + const std::string getMPIErrorString(const int err) { + char s[MPI_MAX_ERROR_STRING]; + int len; + MPI_Error_string(err, s, &len); + return std::string(s, len); + } } // namespace vlsv diff --git a/vlsv_common.h b/vlsv_common.h index 5ba5e5e..588e5c4 100644 --- a/vlsv_common.h +++ b/vlsv_common.h @@ -329,6 +329,8 @@ namespace vlsv { double convReal8(const char* const ptr,const bool& swapEndian=false); std::string printDataRate(const uint64_t& bytes,const double& t); + + const std::string getMPIErrorString(const int err); } // namespace vlsv #endif diff --git a/vlsv_reader_parallel.cpp b/vlsv_reader_parallel.cpp index 68cfd9c..bc84b9e 100644 --- a/vlsv_reader_parallel.cpp +++ b/vlsv_reader_parallel.cpp @@ -418,7 +418,6 @@ namespace vlsv { * @return If true, VLSV file was opened successfully. All processes return the same value.*/ bool ParallelReader::open(const std::string& fname,MPI_Comm comm,const int& masterRank,MPI_Info mpiInfo) { bool success {true}; - int error {MPI_SUCCESS}; this->comm = comm; this->masterRank = masterRank; MPI_Comm_rank(comm,&myRank); @@ -431,12 +430,12 @@ namespace vlsv { // Attempt to open the given input file using MPI: fileName = fname; int accessMode = MPI_MODE_RDONLY; - error = MPI_File_open(comm,const_cast(fileName.c_str()),accessMode,mpiInfo,&filePtr); - if (error == MPI_SUCCESS) { + int err {MPI_File_open(comm,const_cast(fileName.c_str()),accessMode,mpiInfo,&filePtr)}; + if (err == MPI_SUCCESS) { parallelFileOpen = true; } else { success = false; - cerr << "Failed to open parallel file with MPI error " << error << endl; + cerr << "Failed to open parallel file with MPI error " << getMPIErrorString(err) << endl; } // Only master process reads file footer and endianness. This is done using @@ -516,7 +515,9 @@ namespace vlsv { } MPI_Status status; - if (MPI_File_read_at_all(filePtr,start+counter*maxBytes,pos,readSize,MPI_BYTE,&status) != MPI_SUCCESS) { + int err {MPI_File_read_at_all(filePtr,start+counter*maxBytes,pos,readSize,MPI_BYTE,&status)}; + if (err != MPI_SUCCESS) { + cerr << "Failed to read data with MPI error " << getMPIErrorString(err) << endl; success = false; } diff --git a/vlsv_writer.cpp b/vlsv_writer.cpp index c0d866d..69895fb 100644 --- a/vlsv_writer.cpp +++ b/vlsv_writer.cpp @@ -253,10 +253,14 @@ namespace vlsv { // possibly caused by MPI_File_delete call, that's the reason for the barrier. int accessMode = (MPI_MODE_WRONLY | MPI_MODE_CREATE); if (dryRunning == false) { - if (myrank == masterRank && append == false) MPI_File_delete(const_cast(fname.c_str()),mpiInfo); + if (myrank == masterRank && append == false) { + MPI_File_delete(const_cast(fname.c_str()),mpiInfo); + } MPI_Barrier(comm); - if (MPI_File_open(comm,const_cast(fileName.c_str()),accessMode,mpiInfo,&fileptr) != MPI_SUCCESS) { + int err {MPI_File_open(comm,const_cast(fileName.c_str()),accessMode,mpiInfo,&fileptr)}; + if (err != MPI_SUCCESS) { fileOpen = false; + cerr << "Failed to open parallel file with MPI error " << getMPIErrorString(err) << endl; return fileOpen; } } @@ -311,8 +315,16 @@ namespace vlsv { ptr[0] = detectEndianness(); const double t_start = MPI_Wtime(); if (dryRunning == false) { - if (MPI_File_write_at(fileptr,0,&endianness,1,MPI_Type(),MPI_STATUS_IGNORE) != MPI_SUCCESS) success = false; - if (MPI_File_write_at(fileptr,8,&endianness,1,MPI_Type(),MPI_STATUS_IGNORE) != MPI_SUCCESS) success = false; + int err {MPI_File_write_at(fileptr,0,&endianness,1,MPI_Type(),MPI_STATUS_IGNORE)}; + if (err != MPI_SUCCESS) { + cerr << "Failed to write endianness MPI error " << getMPIErrorString(err) << endl; + success = false; + } + err = MPI_File_write_at(fileptr,8,&endianness,1,MPI_Type(),MPI_STATUS_IGNORE); + if (err != MPI_SUCCESS) { + cerr << "Failed to write endianness MPI error " << getMPIErrorString(err) << endl; + success = false; + } } writeTime += (MPI_Wtime() - t_start); offset += 2*sizeof(uint64_t); //only master rank keeps a running count @@ -344,9 +356,13 @@ namespace vlsv { * @param newSize New size. * @return If true, output file was successfully resized.*/ bool Writer::setSize(MPI_Offset newSize) { - int rvalue = MPI_File_set_size(fileptr,newSize); - if (rvalue == MPI_SUCCESS) return true; - return false; + int err = MPI_File_set_size(fileptr,newSize); + if (err != MPI_SUCCESS) { + cerr << "Failed to resize output with MPI error " << getMPIErrorString(err) << endl; + return false; + } else { + return true; + } } /** Start dry run mode. In this mode no file I/O is performed, but getBytesWritten()