diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 2df8719bbf..41a948a538 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -10,7 +10,7 @@ repos: - id: check-toml - id: debug-statements - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.8.6 + rev: v0.9.1 hooks: - id: ruff-format - id: ruff diff --git a/src/sourmash/cli/compare.py b/src/sourmash/cli/compare.py index 45844aaa1d..aca5863c78 100644 --- a/src/sourmash/cli/compare.py +++ b/src/sourmash/cli/compare.py @@ -92,7 +92,7 @@ def subparser(subparsers): subparser.add_argument( "--csv", metavar="F", - help="write matrix to specified file in CSV format (with column " "headers)", + help="write matrix to specified file in CSV format (with column headers)", ) subparser.add_argument( "--labels-to", diff --git a/src/sourmash/cli/compute.py b/src/sourmash/cli/compute.py index cfdb48f42a..f5f20647c1 100644 --- a/src/sourmash/cli/compute.py +++ b/src/sourmash/cli/compute.py @@ -105,7 +105,7 @@ def subparser(subparsers): type=str, default="", metavar="FILE", - help="merge all input files into one signature file with the " "specified name", + help="merge all input files into one signature file with the specified name", ) file_args.add_argument( "--name-from-first", diff --git a/src/sourmash/cli/gather.py b/src/sourmash/cli/gather.py index 159d8c2c0e..2f449aca7f 100644 --- a/src/sourmash/cli/gather.py +++ b/src/sourmash/cli/gather.py @@ -100,8 +100,7 @@ def subparser(subparsers): subparser.add_argument( "--save-matches", metavar="FILE", - help="save gather matched signatures from the database to the " - "specified file", + help="save gather matched signatures from the database to the specified file", ) subparser.add_argument( "--save-prefetch", diff --git a/src/sourmash/cli/prefetch.py b/src/sourmash/cli/prefetch.py index 55ee063d0b..30d7fcc96d 100644 --- a/src/sourmash/cli/prefetch.py +++ b/src/sourmash/cli/prefetch.py @@ -58,12 +58,12 @@ def subparser(subparsers): subparser.add_argument( "--save-unmatched-hashes", metavar="FILE", - help="output unmatched query hashes as a signature to the " "specified file", + help="output unmatched query hashes as a signature to the specified file", ) subparser.add_argument( "--save-matching-hashes", metavar="FILE", - help="output matching query hashes as a signature to the " "specified file", + help="output matching query hashes as a signature to the specified file", ) subparser.add_argument( "--md5", default=None, help="select the signature with this md5 as query" diff --git a/src/sourmash/command_compute.py b/src/sourmash/command_compute.py index 78edc8dcaa..e3eede0715 100644 --- a/src/sourmash/command_compute.py +++ b/src/sourmash/command_compute.py @@ -78,10 +78,10 @@ def compute(args): notify("Computing both nucleotide and protein signatures.") num_sigs = 2 * len(ksizes) elif args.dna and args.dayhoff: - notify("Computing both nucleotide and Dayhoff-encoded protein " "signatures.") + notify("Computing both nucleotide and Dayhoff-encoded protein signatures.") num_sigs = 2 * len(ksizes) elif args.dna and args.hp: - notify("Computing both nucleotide and hp-encoded protein " "signatures.") + notify("Computing both nucleotide and hp-encoded protein signatures.") num_sigs = 2 * len(ksizes) elif args.dna: notify("Computing only nucleotide (and not protein) signatures.") @@ -91,11 +91,11 @@ def compute(args): num_sigs = len(ksizes) elif args.dayhoff: notify( - "Computing only Dayhoff-encoded protein (and not nucleotide) " "signatures." + "Computing only Dayhoff-encoded protein (and not nucleotide) signatures." ) num_sigs = len(ksizes) elif args.hp: - notify("Computing only hp-encoded protein (and not nucleotide) " "signatures.") + notify("Computing only hp-encoded protein (and not nucleotide) signatures.") num_sigs = len(ksizes) if args.protein or args.dayhoff or args.hp: @@ -269,7 +269,7 @@ def _compute_individual(args, signatures_factory): save_sigs_to_location(sigs, save_sigs) notify( - f"calculated {len(sigs)} signatures for {n+1} sequences in {filename}" + f"calculated {len(sigs)} signatures for {n + 1} sequences in {filename}" ) # if not args.output, close output for every input filename. @@ -443,7 +443,7 @@ def to_param_str(self): kstr = [f"k={k}" for k in self.ksizes] else: # for protein, divide ksize by three. - kstr = [f"k={k//3}" for k in self.ksizes] + kstr = [f"k={k // 3}" for k in self.ksizes] assert kstr pi.extend(kstr) diff --git a/src/sourmash/command_sketch.py b/src/sourmash/command_sketch.py index d962d2ec77..d7aeba3588 100644 --- a/src/sourmash/command_sketch.py +++ b/src/sourmash/command_sketch.py @@ -207,7 +207,7 @@ def _execute_sketch(args, signatures_factory): error("error: sourmash only supports CC0-licensed signatures. sorry!") sys.exit(-1) - notify(f'computing signatures for files: {", ".join(args.filenames)}') + notify(f"computing signatures for files: {', '.join(args.filenames)}") if args.merge and not args.output: error("ERROR: must specify -o with --merge") @@ -353,7 +353,7 @@ def _compute_sigs(to_build, output, *, check_sequence=False): save_sigs.add(sig) notify( - f"calculated {len(sigs)} signatures for {n+1} sequences in {filename}" + f"calculated {len(sigs)} signatures for {n + 1} sequences in {filename}" ) save_sigs.close() @@ -768,7 +768,7 @@ def _compute_individual(args, signatures_factory): save_sigs_to_location(sigs, save_sigs) notify( - f"calculated {len(sigs)} signatures for {n+1} sequences in {filename}" + f"calculated {len(sigs)} signatures for {n + 1} sequences in {filename}" ) # if not args.output, close output for every input filename. @@ -942,7 +942,7 @@ def to_param_str(self): kstr = [f"k={k}" for k in self.ksizes] else: # for protein, divide ksize by three. - kstr = [f"k={k//3}" for k in self.ksizes] + kstr = [f"k={k // 3}" for k in self.ksizes] assert kstr pi.extend(kstr) diff --git a/src/sourmash/commands.py b/src/sourmash/commands.py index 26661f4317..147de9fb3f 100644 --- a/src/sourmash/commands.py +++ b/src/sourmash/commands.py @@ -1077,7 +1077,7 @@ def gather(args): ) print_results( - f"the recovered matches hit {sum_f_uniq_found*100:.1f}% of the query k-mers (unweighted)." + f"the recovered matches hit {sum_f_uniq_found * 100:.1f}% of the query k-mers (unweighted)." ) print_results("") @@ -1345,7 +1345,7 @@ def multigather(args): ) print_results( - f"the recovered matches hit {sum_f_uniq_found*100:.1f}% of the query k-mers (unweighted)." + f"the recovered matches hit {sum_f_uniq_found * 100:.1f}% of the query k-mers (unweighted)." ) print_results("") diff --git a/src/sourmash/distance_utils.py b/src/sourmash/distance_utils.py index ad267bd0fe..41c325ccfa 100644 --- a/src/sourmash/distance_utils.py +++ b/src/sourmash/distance_utils.py @@ -16,7 +16,7 @@ def check_distance(dist): if not 0 <= dist <= 1: - raise ValueError(f"Error: distance value {dist :.4f} is not between 0 and 1!") + raise ValueError(f"Error: distance value {dist:.4f} is not between 0 and 1!") else: return dist diff --git a/src/sourmash/plugins.py b/src/sourmash/plugins.py index 0871154f2d..fe9b073563 100644 --- a/src/sourmash/plugins.py +++ b/src/sourmash/plugins.py @@ -198,7 +198,7 @@ def list_all_plugins(): notify( f"{'plugin type':<20s} {'from python module':<30s} {'v':<5s} {'entry point name':<20s}" ) - notify(f"{'-'*20} {'-'*30} {'-'*5} {'-'*20}") + notify(f"{'-' * 20} {'-' * 30} {'-' * 5} {'-' * 20}") for plugin in plugins: name = plugin.name diff --git a/src/sourmash/sbt.py b/src/sourmash/sbt.py index 21e6b25c16..6ca4a746d7 100644 --- a/src/sourmash/sbt.py +++ b/src/sourmash/sbt.py @@ -763,7 +763,9 @@ def save(self, path, storage=None, sparseness=0.0, structure_only=False): manifest_rows.append(row) if n % 100 == 0: - notify(f"{format(n+1)} of {format(total_nodes)} nodes saved", end="\r") + notify( + f"{format(n + 1)} of {format(total_nodes)} nodes saved", end="\r" + ) # now, save the index file and manifests. # diff --git a/src/sourmash/sig/__main__.py b/src/sourmash/sig/__main__.py index e63fc433a4..dcc166f701 100644 --- a/src/sourmash/sig/__main__.py +++ b/src/sourmash/sig/__main__.py @@ -1298,7 +1298,7 @@ def kmers(args): found_hashes = set(found_mh.hashes) cont = len(query_hashes.intersection(found_hashes)) / len(query_hashes) - notify(f"found {len(found_mh)} distinct matching hashes ({cont*100:.1f}%)") + notify(f"found {len(found_mh)} distinct matching hashes ({cont * 100:.1f}%)") if not kmer_w and not save_seqs: notify("NOTE: see --save-kmers or --save-sequences for output options.") diff --git a/src/sourmash/tax/__main__.py b/src/sourmash/tax/__main__.py index c4e22d07fa..8d3692011e 100644 --- a/src/sourmash/tax/__main__.py +++ b/src/sourmash/tax/__main__.py @@ -93,7 +93,7 @@ def metagenome(args): if not tax_assign: error( - f'ERROR: No taxonomic assignments loaded from {",".join(args.taxonomy_csv)}. Exiting.' + f"ERROR: No taxonomic assignments loaded from {','.join(args.taxonomy_csv)}. Exiting." ) sys.exit(-1) @@ -284,7 +284,7 @@ def genome(args): if not tax_assign: error( - f'ERROR: No taxonomic assignments loaded from {",".join(args.taxonomy_csv)}. Exiting.' + f"ERROR: No taxonomic assignments loaded from {','.join(args.taxonomy_csv)}. Exiting." ) sys.exit(-1) @@ -345,7 +345,7 @@ def genome(args): else: classif_perc = (float(n_classified) / float(n_total)) * 100 notify( - f"classified {n_classified}/{n_total} queries ({classif_perc :.2f}%). Writing results" + f"classified {n_classified}/{n_total} queries ({classif_perc:.2f}%). Writing results" ) # write outputs @@ -442,7 +442,7 @@ def annotate(args): if not tax_assign: error( - f'ERROR: No taxonomic assignments loaded from {",".join(args.taxonomy_csv)}. Exiting.' + f"ERROR: No taxonomic assignments loaded from {','.join(args.taxonomy_csv)}. Exiting." ) sys.exit(-1) @@ -518,7 +518,7 @@ def annotate(args): ) else: notify( - f"Annotated {rows_annotated} of {n+1} total rows from '{in_csv}'." + f"Annotated {rows_annotated} of {n + 1} total rows from '{in_csv}'." ) except ValueError as exc: diff --git a/src/sourmash/tax/tax_utils.py b/src/sourmash/tax/tax_utils.py index 60a967c6b4..8ad43c3b36 100644 --- a/src/sourmash/tax/tax_utils.py +++ b/src/sourmash/tax/tax_utils.py @@ -22,8 +22,8 @@ "ascending_taxlist", "collect_gather_csvs", "load_gather_results", - "check_and_load_gather_csvs" "report_missing_and_skipped_identities", - "aggregate_by_lineage_at_rank" "format_for_krona", + "check_and_load_gather_csvsreport_missing_and_skipped_identities", + "aggregate_by_lineage_at_rankformat_for_krona", "write_output", "write_bioboxes", "parse_lingroups", @@ -793,7 +793,7 @@ def read_lingroups(lingroup_csv): if n is None: raise ValueError(f"No lingroups loaded from {lingroup_csv}.") n_lg = len(lingroupD.keys()) - notify(f"Read {n+1} lingroup rows and found {n_lg} distinct lingroup prefixes.") + notify(f"Read {n + 1} lingroup rows and found {n_lg} distinct lingroup prefixes.") return lingroupD @@ -978,7 +978,7 @@ def report_missing_and_skipped_identities(gather_results): f"of {total_taxresults} gather results, lineage assignments for {total_n_missed} results were missed." ) notify( - f'The following are missing from the taxonomy information: {", ".join(ident_missed)}' + f"The following are missing from the taxonomy information: {', '.join(ident_missed)}" ) @@ -2106,9 +2106,9 @@ def as_summary_dict(self, query_info, limit_float=False, lingroups=None): def as_human_friendly_dict(self, query_info): sD = self.as_summary_dict(query_info=query_info, limit_float=True) - sD["f_weighted_at_rank"] = f"{self.f_weighted_at_rank*100:>4.1f}%" + sD["f_weighted_at_rank"] = f"{self.f_weighted_at_rank * 100:>4.1f}%" if self.query_ani_at_rank is not None: - sD["query_ani_at_rank"] = f"{self.query_ani_at_rank*100:>3.1f}%" + sD["query_ani_at_rank"] = f"{self.query_ani_at_rank * 100:>3.1f}%" else: sD["query_ani_at_rank"] = "- " return sD diff --git a/tests/test_sourmash_compute.py b/tests/test_sourmash_compute.py index 35f5635237..172b520d07 100644 --- a/tests/test_sourmash_compute.py +++ b/tests/test_sourmash_compute.py @@ -453,8 +453,7 @@ def test_do_sourmash_compute_multik_with_hp(): in_directory=location, ) assert ( - "Computing only hp-encoded protein (and not nucleotide) " - "signatures." in err + "Computing only hp-encoded protein (and not nucleotide) signatures." in err ) outfile = os.path.join(location, "short.fa.sig") assert os.path.exists(outfile) diff --git a/tests/test_tax_utils.py b/tests/test_tax_utils.py index dfca20628a..34c4743b7c 100644 --- a/tests/test_tax_utils.py +++ b/tests/test_tax_utils.py @@ -2221,7 +2221,7 @@ def test_RankLineageInfo_init_lineage_str_2(): taxinf = RankLineageInfo(lineage_str=x) print(taxinf.lineage) print(taxinf.lineage_str) - assert taxinf.zip_lineage() == ["a", "b", "", "c" "", "", "", "", ""] + assert taxinf.zip_lineage() == ["a", "b", "", "c", "", "", "", ""] def test_RankLineageInfo_init_lineage_str_2_truncate():