From 402e32d8053b8c1d200a0bfd955967bbfcb794b7 Mon Sep 17 00:00:00 2001 From: Mathieu Pellerin Date: Wed, 23 Oct 2024 12:35:46 +0700 Subject: [PATCH 1/2] Avoid constantly re-downloading same large files from the cloud by fixing checksum --- src/core/qfieldcloudprojectsmodel.cpp | 16 +++++++------- src/core/utils/fileutils.cpp | 32 +++++++++++++++++++++++++++ src/core/utils/fileutils.h | 18 ++++++++++----- 3 files changed, 53 insertions(+), 13 deletions(-) diff --git a/src/core/qfieldcloudprojectsmodel.cpp b/src/core/qfieldcloudprojectsmodel.cpp index 4221e75948..cb45d12750 100644 --- a/src/core/qfieldcloudprojectsmodel.cpp +++ b/src/core/qfieldcloudprojectsmodel.cpp @@ -1033,24 +1033,24 @@ void QFieldCloudProjectsModel::projectDownload( const QString &projectId ) const QJsonArray files = payload.value( QStringLiteral( "files" ) ).toArray(); for ( const QJsonValue &fileValue : files ) { - QJsonObject fileObject = fileValue.toObject(); - int fileSize = fileObject.value( QStringLiteral( "size" ) ).toInt(); - QString fileName = fileObject.value( QStringLiteral( "name" ) ).toString(); - QString projectFileName = QStringLiteral( "%1/%2/%3/%4" ).arg( QFieldCloudUtils::localCloudDirectory(), mUsername, projectId, fileName ); - QString cloudChecksumMd5 = fileObject.value( QStringLiteral( "md5sum" ) ).toString(); - QString localChecksumMd5 = FileUtils::fileChecksum( projectFileName, QCryptographicHash::Md5 ).toHex(); + const QJsonObject fileObject = fileValue.toObject(); + const int fileSize = fileObject.value( QStringLiteral( "size" ) ).toInt(); + const QString fileName = fileObject.value( QStringLiteral( "name" ) ).toString(); + const QString projectFileName = QStringLiteral( "%1/%2/%3/%4" ).arg( QFieldCloudUtils::localCloudDirectory(), mUsername, projectId, fileName ); + const QString cloudChecksum = fileObject.value( QStringLiteral( "md5sum" ) ).toString(); + const QString localChecksum = FileUtils::fileEtag( projectFileName ); if ( !fileObject.value( QStringLiteral( "size" ) ).isDouble() || fileName.isEmpty() - || cloudChecksumMd5.isEmpty() ) + || cloudChecksum.isEmpty() ) { QgsLogger::debug( QStringLiteral( "Project %1: package in \"files\" list does not contain the expected fields: size(int), name(string), md5sum(string)" ).arg( projectId ) ); emit projectDownloadFinished( projectId, tr( "Latest package data structure error." ) ); return; } - if ( cloudChecksumMd5 == localChecksumMd5 ) + if ( cloudChecksum == localChecksum ) continue; project->downloadFileTransfers.insert( fileName, FileTransfer( fileName, fileSize ) ); diff --git a/src/core/utils/fileutils.cpp b/src/core/utils/fileutils.cpp index 54a3799d7c..e2908e2f19 100644 --- a/src/core/utils/fileutils.cpp +++ b/src/core/utils/fileutils.cpp @@ -174,6 +174,38 @@ QByteArray FileUtils::fileChecksum( const QString &fileName, const QCryptographi return QByteArray(); } +QString FileUtils::fileEtag( const QString &fileName, int partSize ) +{ + QFile f( fileName ); + if ( !f.open( QFile::ReadOnly ) ) + return QString(); + + const qint64 fileSize = f.size(); + QCryptographicHash hash( QCryptographicHash::Md5 ); + if ( fileSize <= partSize ) + { + if ( hash.addData( &f ) ) + { + return hash.result().toHex(); + } + } + else + { + QByteArray md5SumsData; + qint64 readSize = 0; + while ( readSize < fileSize ) + { + hash.addData( f.read( partSize ) ); + md5SumsData += hash.result(); + hash.reset(); + readSize += partSize; + } + hash.addData( md5SumsData ); + return QStringLiteral( "%1-%2" ).arg( hash.result().toHex() ).arg( readSize / partSize ); + } + return QString(); +} + void FileUtils::restrictImageSize( const QString &imagePath, int maximumWidthHeight ) { if ( !QFileInfo::exists( imagePath ) ) diff --git a/src/core/utils/fileutils.h b/src/core/utils/fileutils.h index a89f172ef7..34d2dd1c29 100644 --- a/src/core/utils/fileutils.h +++ b/src/core/utils/fileutils.h @@ -61,15 +61,23 @@ class QFIELD_CORE_EXPORT FileUtils : public QObject Q_INVOKABLE void addImageStamp( const QString &imagePath, const QString &text ); static bool copyRecursively( const QString &sourceFolder, const QString &destFolder, QgsFeedback *feedback = nullptr, bool wipeDestFolder = true ); + /** - * Creates checksum of a file. Returns null QByteArray if cannot be calculated. - * - * @param fileName file name to get checksum of - * @param hashAlgorithm hash algorithm (md5, sha1, sha256 etc) - * @return QByteArray checksum + * Returns the checksum of a file. An empty QByteArray will be returned if it cannot be calculated. + * \param fileName file name to get checksum of + * \param hashAlgorithm hash algorithm (md5, sha1, sha256 etc) + * \return QByteArray checksum value */ Q_INVOKABLE static QByteArray fileChecksum( const QString &fileName, const QCryptographicHash::Algorithm hashAlgorithm ); + /** + * Returns an S3 ETag of a file. An empty string will be returned if it cannot be calculated. + * \param fileName file name to get checksum of + * \param partSize maximum size used to divide the file content into parts + * \return QString Etag value + */ + Q_INVOKABLE static QString fileEtag( const QString &fileName, int partSize = 8 * 1024 * 1024 ); + private: static int copyRecursivelyPrepare( const QString &sourceFolder, const QString &destFolder, QList> &mapping ); }; From c117b13c4d258be8dead4e9ff211b78ef1761ac5 Mon Sep 17 00:00:00 2001 From: Mathieu Pellerin Date: Wed, 23 Oct 2024 17:01:21 +0700 Subject: [PATCH 2/2] Apply suggestions --- src/core/qfieldcloudprojectsmodel.cpp | 10 ++++++---- src/core/utils/fileutils.h | 2 +- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/src/core/qfieldcloudprojectsmodel.cpp b/src/core/qfieldcloudprojectsmodel.cpp index cb45d12750..693f24015a 100644 --- a/src/core/qfieldcloudprojectsmodel.cpp +++ b/src/core/qfieldcloudprojectsmodel.cpp @@ -1037,20 +1037,22 @@ void QFieldCloudProjectsModel::projectDownload( const QString &projectId ) const int fileSize = fileObject.value( QStringLiteral( "size" ) ).toInt(); const QString fileName = fileObject.value( QStringLiteral( "name" ) ).toString(); const QString projectFileName = QStringLiteral( "%1/%2/%3/%4" ).arg( QFieldCloudUtils::localCloudDirectory(), mUsername, projectId, fileName ); - const QString cloudChecksum = fileObject.value( QStringLiteral( "md5sum" ) ).toString(); - const QString localChecksum = FileUtils::fileEtag( projectFileName ); + // NOTE the cloud API is giving the false impression that the file keys `md5sum` is having a MD5 or another checksum. + // This actually is an Object Storage (S3) implementation specific ETag. + const QString cloudEtag = fileObject.value( QStringLiteral( "md5sum" ) ).toString(); + const QString localEtag = FileUtils::fileEtag( projectFileName ); if ( !fileObject.value( QStringLiteral( "size" ) ).isDouble() || fileName.isEmpty() - || cloudChecksum.isEmpty() ) + || cloudEtag.isEmpty() ) { QgsLogger::debug( QStringLiteral( "Project %1: package in \"files\" list does not contain the expected fields: size(int), name(string), md5sum(string)" ).arg( projectId ) ); emit projectDownloadFinished( projectId, tr( "Latest package data structure error." ) ); return; } - if ( cloudChecksum == localChecksum ) + if ( cloudEtag == localEtag ) continue; project->downloadFileTransfers.insert( fileName, FileTransfer( fileName, fileSize ) ); diff --git a/src/core/utils/fileutils.h b/src/core/utils/fileutils.h index 34d2dd1c29..bb979dfb98 100644 --- a/src/core/utils/fileutils.h +++ b/src/core/utils/fileutils.h @@ -71,7 +71,7 @@ class QFIELD_CORE_EXPORT FileUtils : public QObject Q_INVOKABLE static QByteArray fileChecksum( const QString &fileName, const QCryptographicHash::Algorithm hashAlgorithm ); /** - * Returns an S3 ETag of a file. An empty string will be returned if it cannot be calculated. + * Returns an Object Storage (S3) ETag of a file. An empty string will be returned if it cannot be calculated. * \param fileName file name to get checksum of * \param partSize maximum size used to divide the file content into parts * \return QString Etag value