diff --git a/README.md b/README.md index 41c679a..ff2b9e4 100644 --- a/README.md +++ b/README.md @@ -47,7 +47,7 @@ By default `fake-vcf` writes to stdout ```shell poetry run fake-vcf generate -s 2 -r 2 ##fileformat=VCFv4.2 -##source=VCFake 0.2.0 +##source=VCFake 0.2.1 ##FILTER= ##INFO= ##contig= diff --git a/docs/source/conf.py b/docs/source/conf.py index b18f2d8..9c5da76 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -9,7 +9,7 @@ project = "fake-vcf" copyright = "2023, Magnus Wahlberg" author = "Magnus Wahlberg" -version = "0.2.0" +version = "0.2.1" # -- General configuration --------------------------------------------------- # https://www.sphinx-doc.org/en/master/usage/configuration.html#general-configuration diff --git a/docs/source/overview.rst b/docs/source/overview.rst index 4d38f47..67143cd 100644 --- a/docs/source/overview.rst +++ b/docs/source/overview.rst @@ -25,7 +25,7 @@ By default `fake-vcf` writes to stdout poetry run fake-vcf generate -s 2 -r 2 ##fileformat=VCFv4.2 - ##source=VCFake 0.2.0 + ##source=VCFake 0.2.1 ##FILTER= ##INFO= ##contig= diff --git a/fake_vcf/vcf_faker.py b/fake_vcf/vcf_faker.py index 9ccce0d..e75036c 100644 --- a/fake_vcf/vcf_faker.py +++ b/fake_vcf/vcf_faker.py @@ -136,7 +136,9 @@ def __init__( self.reference_data = None if self.reference_dir: - self.reference_data = vcf_reference.load_reference_data(self.reference_file) + self.reference_data = vcf_reference.load_reference_data( + self.reference_file, memory_map=False + ) if self.reference_data.shape[0] < max(self.positions): raise ValueError( f"""Max position size {max(self.positions)} is outside the reference which has a max of {len(self.reference_data)}""" diff --git a/fake_vcf/vcf_reference.py b/fake_vcf/vcf_reference.py index 3e8c924..07ba043 100644 --- a/fake_vcf/vcf_reference.py +++ b/fake_vcf/vcf_reference.py @@ -10,12 +10,12 @@ METADATA_FILE_NAME = "sequence_metadata.json" -def get_ref_at_pos(ref_data, position): - reference_value = ref_data.take([position])[0][0].as_py() +def get_ref_at_pos(ref_data: pa.array, position): + reference_value = ref_data.column(0)[position].as_py() return reference_value -def load_reference_data(reference_file, memory_map=True): +def load_reference_data(reference_file, memory_map): reference_data = pq.read_table(reference_file, memory_map=memory_map) return reference_data diff --git a/pyproject.toml b/pyproject.toml index b0feb8b..8aa67e0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "fake-vcf" -version = "0.2.0" +version = "0.2.1" description = "A fake vcf file generator " readme = "README.md" authors = ["fake-vcf "] diff --git a/tests/test_data/chr1_small.vcf b/tests/test_data/chr1_small.vcf index 88f2780..f2202e5 100644 --- a/tests/test_data/chr1_small.vcf +++ b/tests/test_data/chr1_small.vcf @@ -1,5 +1,5 @@ ##fileformat=VCFv4.2 -##source=VCFake 0.2.0 +##source=VCFake 0.1.0 ##FILTER= ##INFO= ##contig= diff --git a/tests/test_data/chr2_small.vcf b/tests/test_data/chr2_small.vcf index a6e7288..c75e140 100644 --- a/tests/test_data/chr2_small.vcf +++ b/tests/test_data/chr2_small.vcf @@ -1,5 +1,5 @@ ##fileformat=VCFv4.2 -##source=VCFake 0.2.0 +##source=VCFake 0.2.1 ##FILTER= ##INFO= ##contig= diff --git a/tests/test_data/reference/parquet/sequence_metadata.json b/tests/test_data/reference/parquet/sequence_metadata.json index e0bf4bb..ba020f3 100644 --- a/tests/test_data/reference/parquet/sequence_metadata.json +++ b/tests/test_data/reference/parquet/sequence_metadata.json @@ -1,6 +1,6 @@ { "source_reference_file": "test-reference.fa", - "fake-vcf-version": "0.2.0", + "fake-vcf-version": "0.2.1", "reference_files": { "chr1": "fasta_chr1.parquet", "chr2": "fasta_chr2.parquet",