Skip to content

Commit

Permalink
Merge pull request #261 from icbi-lab/pin-anndata
Browse files Browse the repository at this point in the history
Pin anndata v0.7.6 and fix issue with saving adata object
  • Loading branch information
grst authored Apr 13, 2021
2 parents 6099f0c + e836cdc commit ff6b048
Show file tree
Hide file tree
Showing 4 changed files with 17 additions and 9 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ classifiers = [
]
requires-python = '>= 3.7'
requires = [
'anndata>=0.7.3',
'anndata>=0.7.6',
'scanpy>=1.6.0',
'pandas>=1.0',
'numpy>=1.17.0',
Expand Down
18 changes: 11 additions & 7 deletions scirpy/io/_convert_anndata.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
from typing import Collection, Iterable, List, Optional
from .. import __version__
import numpy as np
from pandas.api.types import is_object_dtype


def _sanitize_anndata(adata: AnnData) -> None:
Expand All @@ -17,12 +18,14 @@ def _sanitize_anndata(adata: AnnData) -> None:
len(adata.X.shape) == 2
), "X needs to have dimensions, otherwise concat doesn't work. "

CATEGORICAL_COLS = ("locus", "v_call", "d_call", "j_call", "c_call")

# Pending updates to anndata to properly handle boolean columns.
# For now, let's turn them into a categorical with "True/False"
BOOLEAN_COLS = ("has_ir", "is_cell", "multi_chain", "high_confidence", "productive")

# explicitly convert those to categoricals. All IR_ columns that are strings
# will be converted to categoricals, too
CATEGORICAL_COLS = ("extra_chains",)

# Sanitize has_ir column into categorical
# This should always be a categorical with True / False
for col in adata.obs.columns:
Expand All @@ -34,13 +37,14 @@ def _sanitize_anndata(adata: AnnData) -> None:
],
categories=["True", "False", "None"],
)

# Turn other columns into categorical
for col in adata.obs.columns:
if col.endswith(CATEGORICAL_COLS):
elif col.endswith(CATEGORICAL_COLS) or (
col.startswith("IR_") and is_object_dtype(adata.obs[col])
):
# Turn all IR_VJ columns that are of type string or object to categoricals
# otherwise saving anndata doesn't work.
adata.obs[col] = pd.Categorical(adata.obs[col])

adata._sanitize()
adata.strings_to_categoricals()


@_doc_params(doc_working_model=doc_working_model)
Expand Down
3 changes: 2 additions & 1 deletion scirpy/io/_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@
from pathlib import Path
import airr
from ..util import _doc_params, _is_true, _is_true2, _translate_dna_to_protein
from ._convert_anndata import from_airr_cells, to_airr_cells
from ._convert_anndata import from_airr_cells, to_airr_cells, _sanitize_anndata
from ._util import doc_working_model, _IOLogger, _check_upgrade_schema
from .._compat import Literal
from airr import RearrangementSchema
Expand Down Expand Up @@ -623,6 +623,7 @@ def upgrade_schema(adata) -> None:
adata.obs.rename(columns=rename_dict, inplace=True)
adata.obs["extra_chains"] = None
adata.uns["scirpy_version"] = __version__
_sanitize_anndata(adata)


@_check_upgrade_schema()
Expand Down
3 changes: 3 additions & 0 deletions scirpy/tests/test_io.py
Original file line number Diff line number Diff line change
Expand Up @@ -375,16 +375,19 @@ def test_read_airr():
anndata.obs.loc[anndata.obs["IR_VJ_1_locus"] == "TRA", tra_cols],
anndata_tra.obs.loc[:, tra_cols],
check_categorical=False, # categories differ, obviously
check_dtype=False,
)
pdt.assert_frame_equal(
anndata.obs.loc[anndata.obs["IR_VDJ_1_locus"] == "TRB", trb_cols],
anndata_trb.obs.loc[:, trb_cols],
check_categorical=False, # categories differ, obviously
check_dtype=False,
)
pdt.assert_frame_equal(
anndata.obs.loc[anndata.obs["IR_VDJ_1_locus"] == "IGH", ig_cols],
anndata_ig.obs.loc[:, ig_cols],
check_categorical=False, # categories differ, obviously
check_dtype=False,
)

# test some fundamental values
Expand Down

0 comments on commit ff6b048

Please sign in to comment.