Skip to content

Commit

Permalink
Merge pull request #1 from leejoey0921/linclust_integration
Browse files Browse the repository at this point in the history
Make adjacent sequence matching configurable
  • Loading branch information
leejoey0921 authored Aug 30, 2024
2 parents e12665b + ecea89f commit 5b68a8e
Show file tree
Hide file tree
Showing 4 changed files with 332 additions and 139 deletions.
3 changes: 3 additions & 0 deletions src/commons/Parameters.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -152,6 +152,7 @@ Parameters::Parameters():
PARAM_IGNORE_MULTI_KMER(PARAM_IGNORE_MULTI_KMER_ID, "--ignore-multi-kmer", "Skip repeating k-mers", "Skip k-mers occurring multiple times (>=2)", typeid(bool), (void *) &ignoreMultiKmer, "", MMseqsParameter::COMMAND_CLUSTLINEAR | MMseqsParameter::COMMAND_EXPERT),
PARAM_HASH_SHIFT(PARAM_HASH_SHIFT_ID, "--hash-shift", "Shift hash", "Shift k-mer hash initialization", typeid(int), (void *) &hashShift, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_CLUSTLINEAR | MMseqsParameter::COMMAND_EXPERT),
PARAM_PICK_N_SIMILAR(PARAM_PICK_N_SIMILAR_ID, "--pick-n-sim-kmer", "Add N similar to search", "Add N similar k-mers to search", typeid(int), (void *) &pickNbest, "^[1-9]{1}[0-9]*$", MMseqsParameter::COMMAND_CLUSTLINEAR | MMseqsParameter::COMMAND_EXPERT),
PARAM_MATCH_ADJACENT_SEQ(PARAM_MATCH_ADJACENT_SEQ_ID, "--match-adjacent-seq", "Compare adjacent sequences to k-mers", "Compare sequence information adjacent to k-mers and elect multiple representative sequences per cluster", typeid(bool), (void *) &matchAdjacentSeq, "", MMseqsParameter::COMMAND_CLUSTLINEAR),
PARAM_ADJUST_KMER_LEN(PARAM_ADJUST_KMER_LEN_ID, "--adjust-kmer-len", "Adjust k-mer length", "Adjust k-mer length based on specificity (only for nucleotides)", typeid(bool), (void *) &adjustKmerLength, "", MMseqsParameter::COMMAND_CLUSTLINEAR | MMseqsParameter::COMMAND_EXPERT),
PARAM_RESULT_DIRECTION(PARAM_RESULT_DIRECTION_ID, "--result-direction", "Result direction", "result is 0: query, 1: target centric", typeid(int), (void *) &resultDirection, "^[0-1]{1}$", MMseqsParameter::COMMAND_CLUSTLINEAR | MMseqsParameter::COMMAND_EXPERT),
PARAM_WEIGHT_FILE(PARAM_WEIGHT_FILE_ID, "--weights", "Weight file name", "Weights used for cluster priorization", typeid(std::string), (void*) &weightFile, "", MMseqsParameter::COMMAND_CLUSTLINEAR | MMseqsParameter::COMMAND_EXPERT ),
Expand Down Expand Up @@ -2513,6 +2514,8 @@ void Parameters::setDefaults() {
resultDirection = Parameters::PARAM_RESULT_DIRECTION_TARGET;
weightThr = 0.9;
weightFile = "";
// TODO: change to true after fixing regression tests
matchAdjacentSeq = false;
hashSeqBuffer = 1.05;

// result2stats
Expand Down
2 changes: 2 additions & 0 deletions src/commons/Parameters.h
Original file line number Diff line number Diff line change
Expand Up @@ -552,6 +552,7 @@ class Parameters {
int resultDirection;
float weightThr;
std::string weightFile;
bool matchAdjacentSeq;
float hashSeqBuffer;

// indexdb
Expand Down Expand Up @@ -866,6 +867,7 @@ class Parameters {
PARAMETER(PARAM_IGNORE_MULTI_KMER)
PARAMETER(PARAM_HASH_SHIFT)
PARAMETER(PARAM_PICK_N_SIMILAR)
PARAMETER(PARAM_MATCH_ADJACENT_SEQ)
PARAMETER(PARAM_ADJUST_KMER_LEN)
PARAMETER(PARAM_RESULT_DIRECTION)
PARAMETER(PARAM_WEIGHT_FILE)
Expand Down
Loading

0 comments on commit 5b68a8e

Please sign in to comment.