From 037f78ab42106f19624378d1e012a7771005ae55 Mon Sep 17 00:00:00 2001 From: Evan Bolyen Date: Tue, 23 Jul 2024 12:57:11 -0700 Subject: [PATCH] REF: q2-types changes --- q2_demux/_demux.py | 6 ++--- q2_demux/_subsample.py | 8 +++--- q2_demux/_summarize/_visualizer.py | 7 ++--- q2_demux/_tabulate.py | 4 +-- q2_demux/_util.py | 20 ++++++++++++++ q2_demux/plugin_setup.py | 3 +++ q2_demux/tests/test_demux.py | 2 +- q2_demux/types/__init__.py | 11 ++++++++ q2_demux/types/_deferred_setup/__init__.py | 11 ++++++++ .../types/_deferred_setup/_transformers.py | 26 +++++++++++++++++++ q2_demux/types/_objects.py | 20 ++++++++++++++ 11 files changed, 105 insertions(+), 13 deletions(-) create mode 100644 q2_demux/_util.py create mode 100644 q2_demux/types/__init__.py create mode 100644 q2_demux/types/_deferred_setup/__init__.py create mode 100644 q2_demux/types/_deferred_setup/_transformers.py create mode 100644 q2_demux/types/_objects.py diff --git a/q2_demux/_demux.py b/q2_demux/_demux.py index 44b3980..7bfa66e 100644 --- a/q2_demux/_demux.py +++ b/q2_demux/_demux.py @@ -25,9 +25,9 @@ SingleLanePerSampleSingleEndFastqDirFmt, SingleLanePerSamplePairedEndFastqDirFmt, FastqManifestFormat, YamlFormat) -from q2_types.multiplexed_sequences import ErrorCorrectionDetailsFmt -from q2_types.feature_data import ( - BarcodeSequenceFastqIterator, BarcodePairedSequenceFastqIterator +from q2_types.multiplexed_sequences import ( + ErrorCorrectionDetailsFmt, BarcodeSequenceFastqIterator, + BarcodePairedSequenceFastqIterator ) from ._ecc import GolayDecoder diff --git a/q2_demux/_subsample.py b/q2_demux/_subsample.py index 5251bdc..022d578 100644 --- a/q2_demux/_subsample.py +++ b/q2_demux/_subsample.py @@ -17,7 +17,7 @@ SingleLanePerSamplePairedEndFastqDirFmt, CasavaOneEightSingleLanePerSampleDirFmt) -from q2_types.feature_data._util import _read_fastq_seqs +from ._util import read_fastq_seqs def subsample_single(sequences: SingleLanePerSampleSingleEndFastqDirFmt, @@ -31,7 +31,7 @@ def subsample_single(sequences: SingleLanePerSampleSingleEndFastqDirFmt, fwd_path_in = str(sequences.path / fwd_name) fwd_path_out = str(result.path / fwd_name) with gzip.open(str(fwd_path_out), mode='w') as fwd: - for fwd_rec in _read_fastq_seqs(fwd_path_in): + for fwd_rec in read_fastq_seqs(fwd_path_in): if random.random() <= fraction: fwd.write(('\n'.join(fwd_rec) + '\n').encode('utf-8')) @@ -53,8 +53,8 @@ def subsample_paired(sequences: SingleLanePerSamplePairedEndFastqDirFmt, rev_path_out = str(result.path / rev_name) with gzip.open(str(fwd_path_out), mode='w') as fwd: with gzip.open(str(rev_path_out), mode='w') as rev: - file_pair = zip(_read_fastq_seqs(fwd_path_in), - _read_fastq_seqs(rev_path_in)) + file_pair = zip(read_fastq_seqs(fwd_path_in), + read_fastq_seqs(rev_path_in)) for fwd_rec, rev_rec in file_pair: if random.random() <= fraction: fwd.write(('\n'.join(fwd_rec) + '\n').encode('utf-8')) diff --git a/q2_demux/_summarize/_visualizer.py b/q2_demux/_summarize/_visualizer.py index 20edb1d..ef89f7f 100644 --- a/q2_demux/_summarize/_visualizer.py +++ b/q2_demux/_summarize/_visualizer.py @@ -17,7 +17,8 @@ import seaborn as sns import numpy as np -from q2_types.feature_data._util import (_read_fastq_seqs, _PlotQualView) +from .._util import read_fastq_seqs +from ..types import _PlotQualView import q2templates TEMPLATES = pkg_resources.resource_filename('q2_demux', '_summarize') @@ -49,7 +50,7 @@ def _subsample(fastq_map): qual_sample = [] min_seq_len = float('inf') for file, index in fastq_map: - for i, seq in enumerate(_read_fastq_seqs(file)): + for i, seq in enumerate(read_fastq_seqs(file)): if i == index[0]: min_seq_len = min(min_seq_len, len(seq[1])) qual_sample.append(_decode_qual_to_phred33(seq[3])) @@ -126,7 +127,7 @@ def summarize(output_dir: str, data: _PlotQualView, n: int = 10000) -> None: if filename is None or np.isnan(filename): continue - for seq in _read_fastq_seqs(filename): + for seq in read_fastq_seqs(filename): count += 1 per_sample_fastq_counts[direction][sample_id] = count file_records[direction].append({ diff --git a/q2_demux/_tabulate.py b/q2_demux/_tabulate.py index 46d4e8b..65642de 100644 --- a/q2_demux/_tabulate.py +++ b/q2_demux/_tabulate.py @@ -14,7 +14,7 @@ from q2_types.per_sample_sequences import ( SingleLanePerSampleSingleEndFastqDirFmt) -from q2_types.feature_data._util import _read_fastq_seqs +from ._util import read_fastq_seqs def tabulate_read_counts(sequences: SingleLanePerSampleSingleEndFastqDirFmt @@ -33,7 +33,7 @@ def tabulate_read_counts(sequences: SingleLanePerSampleSingleEndFastqDirFmt "Sample ids must be unique across inputs.") fwd_name = os.path.basename(fwd_path) fwd_path = str(e.path / fwd_name) - for fwd_rec in _read_fastq_seqs(fwd_path): + for fwd_rec in read_fastq_seqs(fwd_path): read_count += 1 result[sample_id] = read_count diff --git a/q2_demux/_util.py b/q2_demux/_util.py new file mode 100644 index 0000000..e5b568d --- /dev/null +++ b/q2_demux/_util.py @@ -0,0 +1,20 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2016-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- + + +import itertools +import gzip + + +def read_fastq_seqs(filepath): + # This function is adapted from @jairideout's SO post: + # http://stackoverflow.com/a/39302117/3424666 + fh = gzip.open(filepath, 'rt') + for seq_header, seq, qual_header, qual in itertools.zip_longest(*[fh] * 4): + yield (seq_header.strip(), seq.strip(), qual_header.strip(), + qual.strip()) diff --git a/q2_demux/plugin_setup.py b/q2_demux/plugin_setup.py index ac3e53e..edb2c59 100644 --- a/q2_demux/plugin_setup.py +++ b/q2_demux/plugin_setup.py @@ -6,6 +6,7 @@ # The full license is in the file LICENSE, distributed with this software. # ---------------------------------------------------------------------------- +import importlib from qiime2.plugin import ( Plugin, Metadata, MetadataColumn, Categorical, Bool, Str, Int, Float, @@ -317,3 +318,5 @@ 'with the WHERE clause, and the `exclude_ids` parameter ' 'allows for filtering of all samples not specified.', ) + +importlib.import_module('q2_demux.types._deferred_setup') diff --git a/q2_demux/tests/test_demux.py b/q2_demux/tests/test_demux.py index 2fa6c5d..d20a7be 100644 --- a/q2_demux/tests/test_demux.py +++ b/q2_demux/tests/test_demux.py @@ -19,7 +19,7 @@ import qiime2 import numpy.testing as npt -from q2_types.feature_data._transformer import ( +from q2_types.multiplexed_sequences import ( BarcodeSequenceFastqIterator, BarcodePairedSequenceFastqIterator) from qiime2.plugin.testing import TestPluginBase, assert_no_nans_in_tables diff --git a/q2_demux/types/__init__.py b/q2_demux/types/__init__.py new file mode 100644 index 0000000..739a4cb --- /dev/null +++ b/q2_demux/types/__init__.py @@ -0,0 +1,11 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2016-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- + +from ._objects import _PlotQualView + +__all__ = ['_PlotQualView'] diff --git a/q2_demux/types/_deferred_setup/__init__.py b/q2_demux/types/_deferred_setup/__init__.py new file mode 100644 index 0000000..db3c9d7 --- /dev/null +++ b/q2_demux/types/_deferred_setup/__init__.py @@ -0,0 +1,11 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2016-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- + +import importlib + +importlib.import_module('._transformers', __name__) diff --git a/q2_demux/types/_deferred_setup/_transformers.py b/q2_demux/types/_deferred_setup/_transformers.py new file mode 100644 index 0000000..e67d408 --- /dev/null +++ b/q2_demux/types/_deferred_setup/_transformers.py @@ -0,0 +1,26 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2016-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- + +from q2_types.per_sample_sequences import ( + SingleLanePerSampleSingleEndFastqDirFmt, + SingleLanePerSamplePairedEndFastqDirFmt) + +from .. import _PlotQualView + +from ...plugin_setup import plugin + + +# TODO: Remove _PlotQualView once Union works +@plugin.register_transformer +def _30(dirfmt: SingleLanePerSampleSingleEndFastqDirFmt) -> _PlotQualView: + return _PlotQualView(dirfmt, paired=False) + + +@plugin.register_transformer +def _31(dirfmt: SingleLanePerSamplePairedEndFastqDirFmt) -> _PlotQualView: + return _PlotQualView(dirfmt, paired=True) diff --git a/q2_demux/types/_objects.py b/q2_demux/types/_objects.py new file mode 100644 index 0000000..f9183f0 --- /dev/null +++ b/q2_demux/types/_objects.py @@ -0,0 +1,20 @@ +# ---------------------------------------------------------------------------- +# Copyright (c) 2016-2023, QIIME 2 development team. +# +# Distributed under the terms of the Modified BSD License. +# +# The full license is in the file LICENSE, distributed with this software. +# ---------------------------------------------------------------------------- + +__all__ = ['_PlotQualView'] + + +# TODO: convert to a Union[...] +class _PlotQualView: + """ + A very simple pass-through view which is made up of a single-end or + paired-end directory format with a bool indicating if single or paired. + """ + def __init__(self, directory_format, paired): + self.directory_format = directory_format + self.paired = paired