-
Notifications
You must be signed in to change notification settings - Fork 16
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
sarif: initial implementation of csdiff/v0 fingerprints
It hashes the data that csdiff uses in its matching algorithm. The interfaces are already prepared for csdiff/v1, which will also take the line content into account when available. From the updated tests it is obvious that these hashes already have numerous collisions on the existing test data. Related: https://issues.redhat.com/browse/OSH-9 Related: #98
- Loading branch information
Showing
14 changed files
with
3,679 additions
and
867 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,84 @@ | ||
/* | ||
* Copyright (C) 2024 Red Hat, Inc. | ||
* | ||
* This file is part of csdiff. | ||
* | ||
* csdiff is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation, either version 3 of the License, or | ||
* any later version. | ||
* | ||
* csdiff is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License | ||
* along with csdiff. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#include "finger-print.hh" | ||
|
||
#include "hash-util.hh" | ||
#include "msg-filter.hh" | ||
|
||
#include <cassert> | ||
|
||
#include <boost/uuid/name_generator.hpp> // for boost::uuids::detail::sha1 | ||
|
||
/// return SHA1 hash of `str` as hex-encoded string | ||
static std::string computeHexSHA1(const std::string &str) | ||
{ | ||
using boost::uuids::detail::sha1; | ||
return hexHashStr<sha1>(str); | ||
} | ||
|
||
struct FingerPrinter::Private { | ||
std::string basicData; | ||
std::string lineContent; | ||
}; | ||
|
||
// separator used for hashing of data composed from multiple strings | ||
const std::string sep = "\n"; | ||
|
||
// TODO: consider lazy evaluation of basicData/lineContent | ||
FingerPrinter::FingerPrinter(const Defect &def): | ||
d(new Private) | ||
{ | ||
// filter that csdiff uses to drop details insignificant for matching | ||
const MsgFilter &filt = MsgFilter::inst(); | ||
|
||
// read and transform file path | ||
const DefEvent &keyEvt = def.events[def.keyEventIdx]; | ||
const std::string path = | ||
filt.filterPath(keyEvt.fileName, /* forceFullPath */ true); | ||
|
||
// initialize basicData by taking all that DefLookup::lookup() looks at | ||
d->basicData = | ||
/* checker */ def.checker + sep + | ||
/* file path */ path + sep + | ||
/* key event */ keyEvt.event + sep + | ||
/* message */ filt.filterMsg(keyEvt.msg, def.checker); | ||
} | ||
|
||
FingerPrinter::~FingerPrinter() = default; | ||
|
||
// TODO: consider caching of SHA1 hashes for subsequent calls | ||
std::string FingerPrinter::getHash(const EFingerPrintVer fpv) const | ||
{ | ||
if (d->basicData.empty()) | ||
// not enough data to compute the hash from | ||
return ""; | ||
|
||
if (fpv == FPV_CSDIFF) | ||
// return SHA1 hash from basicData | ||
return computeHexSHA1(d->basicData); | ||
|
||
assert(fpv == FPV_CSDIFF_WITH_LINE_CONTENT); | ||
if (d->lineContent.empty()) | ||
// no line content available | ||
return ""; | ||
|
||
// return SHA1 hash from basicData AND lineContent | ||
return computeHexSHA1(d->basicData + sep + d->lineContent); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
/* | ||
* Copyright (C) 2024 Red Hat, Inc. | ||
* | ||
* This file is part of csdiff. | ||
* | ||
* csdiff is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation, either version 3 of the License, or | ||
* any later version. | ||
* | ||
* csdiff is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License | ||
* along with csdiff. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#ifndef H_GUARD_FINGER_PRINT_H | ||
#define H_GUARD_FINGER_PRINT_H | ||
|
||
#include "defect.hh" | ||
|
||
#include <memory> | ||
|
||
enum EFingerPrintVer { | ||
FPV_CSDIFF = 0, | ||
FPV_CSDIFF_WITH_LINE_CONTENT, | ||
FPV_MAX | ||
}; | ||
|
||
class FingerPrinter { | ||
public: | ||
FingerPrinter(const Defect &); | ||
~FingerPrinter(); | ||
|
||
/// return fingerprint of the selected kind | ||
std::string getHash(EFingerPrintVer) const; | ||
|
||
private: | ||
struct Private; | ||
std::unique_ptr<Private> d; | ||
}; | ||
|
||
#endif /* H_GUARD_FINGER_PRINT_H */ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,51 @@ | ||
/* | ||
* Copyright (C) 2024 Red Hat, Inc. | ||
* | ||
* This file is part of csdiff. | ||
* | ||
* csdiff is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU General Public License as published by | ||
* the Free Software Foundation, either version 3 of the License, or | ||
* any later version. | ||
* | ||
* csdiff is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU General Public License | ||
* along with csdiff. If not, see <http://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#include <string> | ||
#include <vector> | ||
|
||
#include <boost/algorithm/hex.hpp> | ||
#include <boost/algorithm/string.hpp> | ||
|
||
/// compute TEng hash of `src` and return it as hex-encoded string | ||
template <typename TEng, typename TSrc> | ||
std::string hexHashStr(const TSrc &src) | ||
{ | ||
// create hash engine and process the input | ||
TEng eng; | ||
eng.process_bytes(src.data(), src.size()); | ||
|
||
// export the hash as an array of unsigned int | ||
// FIXME: std::remove_reference is needed on el7 (boost-1.53) | ||
using TDst = typename TEng::digest_type; | ||
typename std::remove_reference<TDst>::type dst; | ||
eng.get_digest(dst); | ||
|
||
// convert the hash to a vector of unsigned int | ||
static const size_t len = sizeof(dst) / sizeof(dst[0]); | ||
const std::vector<unsigned> hash(dst, dst + len); | ||
|
||
// convert the hash to a hex string | ||
std::string result; | ||
boost::algorithm::hex(hash.begin(), hash.end(), back_inserter(result)); | ||
|
||
// convert uppercase letters to lowercase | ||
boost::algorithm::to_lower(result); | ||
return result; | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.