-
Notifications
You must be signed in to change notification settings - Fork 1
/
FastqSplitter.hpp
116 lines (104 loc) · 3.71 KB
/
FastqSplitter.hpp
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
/**
* shark - Mapping-free filtering of useless RNA-Seq reads
* Copyright (C) 2019 Tamara Ceccato, Luca Denti, Yuri Pirola, Marco Previtali
*
* This file is part of shark.
*
* shark is free software: you can redistribute it and/or modify it
* under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* shark is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with shark; see the file LICENSE. If not, see
* <https://www.gnu.org/licenses/>.
**/
#ifndef FASTQ_SPLITTER_HPP
#define FASTQ_SPLITTER_HPP
#include "common.hpp"
#include "kseq.h"
#include <string>
#include <vector>
#include <memory>
#include <mutex>
using namespace std;
class FastqSplitter {
public:
typedef vector<elem_t> output_t;
FastqSplitter(kseq_t * const _seq1, kseq_t * const _seq2, const int _maxnum, const char _min_quality, const bool _full_mode)
: seq1(_seq1), seq2(_seq2), maxnum(_maxnum), min_quality(_min_quality), full_mode(_full_mode), empty_el({})
{
}
~FastqSplitter() {
}
void operator()(output_t& fastq) {
std::lock_guard<std::mutex> lock(mtx);
fastq.reserve(maxnum);
int seq_len1, seq_len2;
if (min_quality == 0) {
if (seq2 == nullptr) {
while (fastq.size() < maxnum && (seq_len1 = kseq_read(seq1)) >= 0) {
fastq.push_back({
seq1->seq.s,
{ { seq1->name.s, full_mode ? seq1->seq.s : "", full_mode ? seq1->qual.s : "" },
empty_el }
});
}
} else {
while (fastq.size() < maxnum && (seq_len1 = kseq_read(seq1)) >= 0 && (seq_len2 = kseq_read(seq2)) >= 0) {
fastq.push_back({
string(seq1->seq.s) + "N" + string(seq2->seq.s),
{ { seq1->name.s, full_mode ? seq1->seq.s : "", full_mode ? seq1->qual.s : "" },
{ seq2->name.s, full_mode ? seq2->seq.s : "", full_mode ? seq2->qual.s : "" } }
});
}
}
} else {
const char mq = min_quality + 33;
if (seq2 == nullptr) {
while (fastq.size() < maxnum && (seq_len1 = kseq_read(seq1)) >= 0) {
fastq.push_back({
mask_seq(seq1->seq.s, seq1->qual.s, seq1->qual.l, mq),
{ { seq1->name.s, full_mode ? seq1->seq.s : "", full_mode ? seq1->qual.s : "" },
empty_el }
});
}
} else {
while (fastq.size() < maxnum && (seq_len1 = kseq_read(seq1)) >= 0 && (seq_len2 = kseq_read(seq2)) >= 0) {
fastq.push_back({
mask_seq(
string(seq1->seq.s) + "N" + string(seq2->seq.s),
string(seq1->qual.s) + "\33" + string(seq2->qual.s),
mq
),
{ { seq1->name.s, full_mode ? seq1->seq.s : "", full_mode ? seq1->qual.s : "" },
{ seq2->name.s, full_mode ? seq2->seq.s : "", full_mode ? seq2->qual.s : "" } }
});
}
}
}
}
private:
kseq_t * const seq1;
kseq_t * const seq2;
const size_t maxnum;
const char min_quality;
const bool full_mode;
const sharseq_t empty_el;
std::mutex mtx;
static string mask_seq(string seq, const char* const qual, const size_t l, const char min_quality) {
for (size_t i = 0; i < l; ++i) {
if (qual[i] < min_quality) seq[i] = seq[i] - 64;
}
return seq;
}
static string mask_seq(string seq, const string& qual, const char min_quality) {
return mask_seq(seq, qual.c_str(), qual.length(), min_quality);
}
};
#endif