Add CleanCoNLL object #2422
Annotations
3 errors and 1 warning
test:
flair/datasets/__init__.py#L341
ruff
pytest_ruff.RuffError: flair/datasets/__init__.py:2:1: I001 [*] Import block is un-sorted or un-formatted
|
1 | # Expose base classses
2 | / from .base import (
3 | | DataLoader,
4 | | FlairDatapointDataset,
5 | | MongoDataset,
6 | | SentenceDataset,
7 | | StringDataset,
8 | | )
9 | |
10 | | # Expose all biomedical data sets used for the evaluation of BioBERT
11 | | # -
12 | | # -
13 | | # -
14 | | # -
15 | | # Expose all biomedical data sets using the HUNER splits
16 | | # Expose all biomedical data sets
17 | | from .biomedical import (
18 | | ANAT_EM,
19 | | AZDZ,
20 | | BC2GM,
21 | | BIO_INFER,
22 | | BIOBERT_CHEMICAL_BC4CHEMD,
23 | | BIOBERT_CHEMICAL_BC5CDR,
24 | | BIOBERT_DISEASE_BC5CDR,
25 | | BIOBERT_DISEASE_NCBI,
26 | | BIOBERT_GENE_BC2GM,
27 | | BIOBERT_GENE_JNLPBA,
28 | | BIOBERT_SPECIES_LINNAEUS,
29 | | BIOBERT_SPECIES_S800,
30 | | BIONLP2013_CG,
31 | | BIONLP2013_PC,
32 | | BIOSEMANTICS,
33 | | CDR,
34 | | CELL_FINDER,
35 | | CEMP,
36 | | CHEMDNER,
37 | | CLL,
38 | | CRAFT,
39 | | CRAFT_V4,
40 | | DECA,
41 | | FSU,
42 | | GELLUS,
43 | | GPRO,
44 | | HUNER_CELL_LINE,
45 | | HUNER_CELL_LINE_CELL_FINDER,
46 | | HUNER_CELL_LINE_CLL,
47 | | HUNER_CELL_LINE_GELLUS,
48 | | HUNER_CELL_LINE_JNLPBA,
49 | | HUNER_CHEMICAL,
50 | | HUNER_CHEMICAL_CDR,
51 | | HUNER_CHEMICAL_CEMP,
52 | | HUNER_CHEMICAL_CHEBI,
53 | | HUNER_CHEMICAL_CHEMDNER,
54 | | HUNER_CHEMICAL_CRAFT_V4,
55 | | HUNER_CHEMICAL_SCAI,
56 | | HUNER_DISEASE,
57 | | HUNER_DISEASE_CDR,
58 | | HUNER_DISEASE_MIRNA,
59 | | HUNER_DISEASE_NCBI,
60 | | HUNER_DISEASE_PDR,
61 | | HUNER_DISEASE_SCAI,
62 | | HUNER_DISEASE_VARIOME,
63 | | HUNER_GENE,
64 | | HUNER_GENE_BC2GM,
65 | | HUNER_GENE_BIO_INFER,
66 | | HUNER_GENE_CELL_FINDER,
67 | | HUNER_GENE_CHEBI,
68 | | HUNER_GENE_CRAFT_V4,
69 | | HUNER_GENE_DECA,
70 | | HUNER_GENE_FSU,
71 | | HUNER_GENE_GPRO,
72 | | HUNER_GENE_IEPA,
73 | | HUNER_GENE_JNLPBA,
74 | | HUNER_GENE_LOCTEXT,
75 | | HUNER_GENE_MIRNA,
76 | | HUNER_GENE_OSIRIS,
77 | | HUNER_GENE_VARIOME,
78 | | HUNER_SPECIES,
79 | | HUNER_SPECIES_CELL_FINDER,
80 | | HUNER_SPECIES_CHEBI,
81 | | HUNER_SPECIES_CRAFT_V4,
82 | | HUNER_SPECIES_LINNEAUS,
83 | | HUNER_SPECIES_LOCTEXT,
84 | | HUNER_SPECIES_MIRNA,
85 | | HUNER_SPECIES_S800,
86 | | HUNER_SPECIES_VARIOME,
87 | | IEPA,
88 | | JNLPBA,
89 | | LINNEAUS,
90 | | LOCTEXT,
91 | | MIRNA,
92 | | NCBI_DISEASE,
93 | | OSIRIS,
94 | | PDR,
95 | | S800,
96 | | SCAI_CHEMICALS,
97 | | SCAI_DISEASE,
98 | | VARIOME,
99 | | )
100 | |
101 | | # Expose all document classification datasets
102 | | from .document_classification import (
103 | | AGNEWS,
104 | | AMAZON_REVIEWS,
105 | | COMMUNICATIVE_FUNCTIONS,
106 | | GERMEVAL_2018_OFFENSIVE_LANGUAGE,
107 | | GLUE_COLA,
108 | | GLUE_SST2,
109 | | GO_EMOTIONS,
110 | | IMDB,
111 | | NEWSGROUPS,
112 | | SENTEVAL_CR,
113 | | SENTEVAL_MPQA,
114 | | SENTEVAL_MR,
115 | | SENTEVAL_SST_BINARY,
116 | | SENTEVAL_SST_GRANULAR,
117 | | SENTEVAL_SUBJ,
118 | | SENTIMENT_140,
119 | | STACKOVERFLOW,
120 | | TREC_6,
121 | | TREC_50,
122 | | WASSA_ANGER,
123 | | WASSA_FEAR,
124 | | WASSA_JOY,
125 | | WASSA_SADNESS,
126 | | YAHOO_ANSWERS,
127 | | ClassificationCorpus,
128 | | ClassificationDataset,
129 | | CSVClassificationCorpus,
130 | | CSVClassificationDataset,
131 | | )
132 | |
133 | | # word sense disambiguation
134 | | # Expose all entity linking datasets
135 | | from .entity_linking import (
136 | | CTD_CHEMICALS_DICTIONARY,
137 | | CTD_DISEASES_DICTIONARY,
138 | | NCBI_GENE_HUMAN_DICTIONARY,
139 | | NCBI_TAXONOMY_DICTIO
|
test:
flair/datasets/sequence_labeling.py#L341
ruff
pytest_ruff.RuffError: flair/datasets/sequence_labeling.py:1:1: I001 [*] Import block is un-sorted or un-formatted
|
1 | / import copy
2 | | import json
3 | | import logging
4 | | import os
5 | | import re
6 | |
7 | | # import shutil
8 | | from collections import defaultdict
9 | | from pathlib import Path
10 | | import tempfile
11 | | import shutil
12 | | import requests
13 | | import zipfile
14 | | import subprocess
15 | | from typing import (
16 | | Any,
17 | | DefaultDict,
18 | | Dict,
19 | | Iterable,
20 | | Iterator,
21 | | List,
22 | | Optional,
23 | | Tuple,
24 | | Union,
25 | | cast,
26 | | )
27 | |
28 | | from torch.utils.data import ConcatDataset, Dataset
29 | |
30 | | import flair
31 | | from flair.data import (
32 | | Corpus,
33 | | FlairDataset,
34 | | MultiCorpus,
35 | | Relation,
36 | | Sentence,
37 | | Token,
38 | | get_spans_from_bio,
39 | | )
40 | | from flair.datasets.base import find_train_dev_test_files
41 | | from flair.file_utils import cached_path, unpack_file
42 | | from flair.tokenization import Tokenizer
43 | |
44 | | log = logging.getLogger("flair")
| |_^ I001
|
= help: Organize imports
flair/datasets/sequence_labeling.py:14:8: F401 [*] `subprocess` imported but unused
|
12 | import requests
13 | import zipfile
14 | import subprocess
| ^^^^^^^^^^ F401
15 | from typing import (
16 | Any,
|
= help: Remove unused import: `subprocess`
flair/datasets/sequence_labeling.py:1441:9: D212 [*] Multi-line docstring summary should start at the first line
|
1439 | **corpusargs,
1440 | ) -> None:
1441 | """
| _________^
1442 | | Initialize the CleanCoNLL corpus.
1443 | |
1444 | | Args:
1445 | | base_path: Base directory for the dataset. If None, defaults to flair.cache_root / "datasets".
1446 | | in_memory: If True, keeps dataset in memory for faster training.
1447 | | """
| |___________^ D212
1448 | # Set the base path for the dataset
1449 | base_path = flair.cache_root / "datasets" if not base_path else Path(base_path)
|
= help: Remove whitespace after opening quotes
flair/datasets/sequence_labeling.py:1485:13: D200 One-line docstring should fit on one line
|
1483 | def download_and_prepare_data(data_folder: Path):
1484 | def parse_patch(patch_file_path):
1485 | """
| _____________^
1486 | | Parses a patch file and returns a structured representation of the changes.
1487 | | """
| |_______________^ D200
1488 |
1489 | changes = []
|
= help: Reformat to one line
flair/datasets/sequence_labeling.py:1485:13: D202 [*] No blank lines allowed after function docstring (found 1)
|
1483 | def download_and_prepare_data(data_folder: Path):
1484 | def parse_patch(patch_file_path):
1485 | """
| _____________^
1486 | | Parses a patch file and returns a structured representation of the changes.
1487 | | """
| |_______________^ D202
1488 |
1489 | changes = []
|
= help: Remove blank line(s) after function docstring
flair/datasets/sequence_labeling.py:1485:13: D212 [*] Multi-line docstring summary should start at the first line
|
1483 | def download_and_prepare_data(data_folder: Path):
1484 | def parse_patch(patch_file_path):
1485 | """
| _____________^
1486 | | Parses a patch file and returns a structured representation of the changes.
1487 | | """
| |_______________^ D212
1488 |
1489 | changes = []
|
= help: Remove whitespace after opening quotes
flair/datasets/sequence_labeling.py:1492:18: UP015 [*] Unnecessary open mode parameters
|
1490 | current_change = None
1491 |
1492 | with open(patch_file_path, "r", encoding="utf-8") as patch_file:
|
|
test
Process completed with exit code 1.
|
test
The following actions use a deprecated Node.js version and will be forced to run on node20: actions/checkout@v3, actions/setup-python@v4, actions/cache@v3. For more info: https://github.blog/changelog/2024-03-07-github-actions-all-actions-will-run-on-node20-instead-of-node16-by-default/
|