From f7f556cdcaeef3824e81d32d43f9fee261a403e9 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Wed, 7 Apr 2021 19:14:16 +0200
Subject: [PATCH 1/6] WIP update docs

---
 docs/tutorials/tutorial_io.md | 1 +
 1 file changed, 1 insertion(+)
diff --git a/docs/tutorials/tutorial_io.md b/docs/tutorials/tutorial_io.md
index 381001065..6d8952402 100644
--- a/docs/tutorials/tutorial_io.md
+++ b/docs/tutorials/tutorial_io.md
@@ -46,6 +46,7 @@ AnnData and how Scirpy makes use of it, check out the :ref:`data structure <data
 The example data used in this notebook are available from the
 `Scirpy repository <https://github.com/icbi-lab/scirpy/tree/master/docs/tutorials/example_data>`__.
 
+.. TODO update!!
 
 .. important:: **The Scirpy data model**
 

From 1943e53e60c2f9790e25eff827ada15a69706644 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Thu, 8 Apr 2021 09:48:56 +0200
Subject: [PATCH 2/6] Properly rename clonotype in upgrade_schema

---
 scirpy/io/_io.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/scirpy/io/_io.py b/scirpy/io/_io.py
index 4066661f2..00e1a1d24 100644
--- a/scirpy/io/_io.py
+++ b/scirpy/io/_io.py
@@ -618,10 +618,10 @@ def upgrade_schema(adata) -> None:
                 "j_gene": "j_call",
                 "c_gene": "c_call",
                 "cdr3_nt": "junction",
-                "clonotype": "clone_id",
             }.items(),
         )
     }
+    rename_dict["clonotype"] = "clone_id"
     adata.obs.rename(columns=rename_dict, inplace=True)
     adata.obs["extra_chains"] = None
     adata.uns["scirpy_version"] = __version__

From 66c845f1f9d75465088b1674d03da2b820ea4066 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Mon, 12 Apr 2021 10:09:27 +0200
Subject: [PATCH 3/6] Constrain CI to py 3.8 for win and macos

---
 .github/workflows/test.yml | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 9faada178..fee4afba8 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -9,12 +9,18 @@ on:
 jobs:
   test:
     if: "!contains(github.event.head_commit.message, 'skip ci')"
-    runs-on: ${{ matrix.os }}
+    runs-on: ${{ matrix.config.os }}
     strategy:
       fail-fast: false
       matrix:
-        python-version: [3.7, 3.8, 3.9]
-        os: [ubuntu-latest, macos-latest, windows-latest]
+        config:
+          - { python-version: 3.7, os: ubuntu-latest }
+          - { python-version: 3.8, os: ubuntu-latest }
+          - { python-version: 3.9, os: ubuntu-latest }
+          # 3.8 is enough for macos and linux. For 3.9 essential wheels are still missing and building
+          # from source is very painful, especially on windows.
+          - { python-version: 3.8, os: macos-latest }
+          - { python-version: 3.8, os: windows-latest }
 
     steps:
       - uses: actions/checkout@v2
@@ -26,18 +32,19 @@ jobs:
           key: ${{ runner.os }}-pip-${{ hashFiles('pyproject.toml') }}
           restore-keys: |
             ${{ runner.os }}-pip-
+      # The HDF5 system requirements are necessary until pytables provides a wheel for python 3.9
       - name: Install Ubuntu system dependencies
-        if: matrix.os == 'ubuntu-latest'
+        if: matrix.config.os == 'ubuntu-latest'
         run: |
           sudo apt-get install libhdf5-serial-dev
       - name: Install macOS system dependencies
-        if: matrix.os == 'macos-latest'
+        if: matrix.config.os == 'macos-latest'
         run: |
           brew install cairo pkg-config autoconf automake libtool
-      - name: Set up Python ${{ matrix.python-version }}
+      - name: Set up Python ${{ matrix.config.python-version }}
         uses: actions/setup-python@v1
         with:
-          python-version: ${{ matrix.python-version }}
+          python-version: ${{ matrix.config.python-version }}
       - name: Install dependencies
         # TODO the separate numpy installation is to workaround an issue with dandelion's skbio dependency
         run: |

From 45184103520110939500c21c2fce34243fa26a48 Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Mon, 12 Apr 2021 13:03:34 +0200
Subject: [PATCH 4/6] Update IO tutorial

---
 docs/glossary.rst                 |  8 ++++---
 docs/tutorials/tutorial_3k_tcr.md |  5 ++--
 docs/tutorials/tutorial_io.md     | 39 +++++++++++++++++++------------
 scirpy/io/_io.py                  |  2 +-
 scirpy/io/_util.py                | 13 +++++++----
 5 files changed, 40 insertions(+), 27 deletions(-)

diff --git a/docs/glossary.rst b/docs/glossary.rst
index 27144d926..8afb0e401 100644
--- a/docs/glossary.rst
+++ b/docs/glossary.rst
@@ -206,10 +206,12 @@ Glossary
         page about our :ref:`IR model<receptor-model>`.
 
     AIRR
-        Adaptive Immune Receptor Repertoire.
-        See also the `AIRR community <https://www.antibodysociety.org/the-airr-community/>`_.
+        Adaptive Immune Receptor Repertoire. Within the Scirpy documentation, we simply 
+        speak of :term:`immune receptors (IR)<IR>`.
 
-        Within the Scirpy documentation, we simply speak of :term:`immune receptors (IR)<IR>`.
+        The `AIRR community <https://www.antibodysociety.org/the-airr-community/>`_ 
+        defines standards around AIRR data. Scirpy supports the `AIRR Rearrangement <https://docs.airr-community.org/en/latest/datarep/rearrangements.html>`_
+        schema and complies with the `AIRR Software Guidelines <https://docs.airr-community.org/en/latest/swtools/airr_swtools_standard.html>`_.
 
     Chain locus
         Scirpy supports all valid `IGMT locus names <http://www.imgt.org/IMGTScientificChart/Nomenclature/IMGTnomenclature.html>`_:
diff --git a/docs/tutorials/tutorial_3k_tcr.md b/docs/tutorials/tutorial_3k_tcr.md
index 3e0051317..fde72de2d 100644
--- a/docs/tutorials/tutorial_3k_tcr.md
+++ b/docs/tutorials/tutorial_3k_tcr.md
@@ -231,9 +231,6 @@ ax = ir.pl.group_abundance(adata, groupby="chain_pairing", target_col="source")
 
 ## Define clonotypes and clonotype clusters
 
-<!-- TODO explain that there are different values for dual_ir -->
-
-
 <!-- #raw raw_mimetype="text/restructuredtext" -->
 .. warning::
 
@@ -277,6 +274,8 @@ The function :func:`scirpy.tl.define_clonotypes` matches cells based on the dist
 detects connected modules in the graph and annotates them as clonotypes. This will add a `clone_id` and
 `clone_id_size` column to `adata.obs`.
 
+The `dual_ir` parameter defines how scirpy handles cells with :term:`more than one pair of receptors <Dual IR>`. The default value is `any` which implies that cells with any of their primary or secondary receptor chain matching will be considered to be of the same clonotype. 
+
 Here, we define :term:`clonotypes <Clonotype>` based on nt-sequence identity.
 In a later step, we will define :term:`clonotype clusters <Clonotype cluster>` based on
 amino-acid similarity.
diff --git a/docs/tutorials/tutorial_io.md b/docs/tutorials/tutorial_io.md
index 6d8952402..24bcc8138 100644
--- a/docs/tutorials/tutorial_io.md
+++ b/docs/tutorials/tutorial_io.md
@@ -12,7 +12,22 @@ jupyter:
 ```python
 %load_ext autoreload
 %autoreload 2
+import anndata
+import logging
+
+
+class NoCategoricalWarningFilter(logging.Filter):
+    """suppress "storing XXX as categorical" warnings."""
+
+    def filter(self, record):
+        m = record.getMessage()
+        return not m.startswith("storing") and m.endswith("as categorical.")
+
+
+anndata.logging.anndata_logger.addFilter(NoCategoricalWarningFilter)
+```
 
+```python
 import scirpy as ir
 import scanpy as sc
 from glob import glob
@@ -21,14 +36,6 @@ import tarfile
 import anndata
 import warnings
 
-# from numba import NumbaPerformanceWarning
-
-# # ignore numba performance warnings
-# warnings.filterwarnings("ignore", category=NumbaPerformanceWarning)
-
-# suppress "storing XXX as categorical" warnings.
-anndata.logging.anndata_logger.setLevel("ERROR")
-
 sc.set_figure_params(figsize=(4, 4))
 sc.settings.verbosity = 2  # verbosity: errors (0), warnings (1), info (2), hints (3)
 ```
@@ -46,21 +53,23 @@ AnnData and how Scirpy makes use of it, check out the :ref:`data structure <data
 The example data used in this notebook are available from the
 `Scirpy repository <https://github.com/icbi-lab/scirpy/tree/master/docs/tutorials/example_data>`__.
 
-.. TODO update!!
-
 .. important:: **The Scirpy data model**
 
     Currently, the Scirpy data model has the following constraints:
 
      * BCR and TCR chains are supported. Chain loci must be valid :term:`Chain locus`,
        i.e. one of `TRA`, `TRG`, `IGK`, or `IGL` (chains with a :term:`VJ<V(D)J>` junction) or
-       `TRB`, `TRD`, or `IGH` (chains with a :term:`VDJ<V(D)J>` junction). Other chains are discarded.
-     * Non-productive chains are removed. *CellRanger*, *TraCeR*, and the *AIRR rearrangment format*
+       `TRB`, `TRD`, or `IGH` (chains with a :term:`VDJ<V(D)J>` junction). 
+     * Each cell can contain up to two `VJ` and two `VDJ` chains (:term:`Dual IR`).
+       Excess chains are ignored (those with lowest read count/:term:`UMI` count)
+       and cells flagged as :term:`Multichain-cell`.
+     * Non-productive chains are ignored. *CellRanger*, *TraCeR*, and the *AIRR rearrangment format*
        flag these cells appropriately. When reading :ref:`custom formats <importing-custom-formats>`,
        you need to pass the flag explicitly or filter the chains beforehand.
-     * Each chain can contain up to two `VJ` and two `VDJ` chains (:term:`Dual IR`).
-       Excess chains are removed (those with lowest read count/:term:`UMI` count)
-       and cells flagged as :term:`Multichain-cell`.
+     * Excess chains, non-productive chains, or chains with invalid loci
+       are serialized to JSON and stored in the `extra_chains` column. They are not 
+       used by scirpy except when exporting the `AnnData` object to :term:`AIRR` format. 
+     
 
     For more information, see :ref:`receptor-model`.
 
diff --git a/scirpy/io/_io.py b/scirpy/io/_io.py
index 00e1a1d24..db271a8bb 100644
--- a/scirpy/io/_io.py
+++ b/scirpy/io/_io.py
@@ -665,7 +665,7 @@ def to_dandelion(adata: AnnData):
 
 
 def from_dandelion(dandelion, transfer=False) -> AnnData:
-    """Import data from dandelion (:cite:`Stephenson2021`).
+    """Import data from `Dandelion <https://github.com/zktuong/dandelion>`_ (:cite:`Stephenson2021`).
 
     Parameters
     ----------
diff --git a/scirpy/io/_util.py b/scirpy/io/_util.py
index 407f6d4dd..249074555 100644
--- a/scirpy/io/_util.py
+++ b/scirpy/io/_util.py
@@ -7,12 +7,15 @@
 
 .. note::
     Reading data into *Scirpy* has the following constraints:
-     * each cell can have up to four chains (:term:`Dual IR`):
-       two :term:`VJ<V(D)J>` and two :term:`VDJ<V(D)J>` chains.
-     * Excess chains are removed (those with lowest read count/:term:`UMI` count)
+     * each cell can have up to four productive chains chains (:term:`Dual IR`):
+       two :term:`VJ<V(D)J>` and two :term:`VDJ<V(D)J>` chains. 
+     * Excess chains are ignored (those with lowest read count/:term:`UMI` count)
        and cells flagged as :term:`Multichain-cell`.
-     * non-productive chains are removed
-     * chain loci must be :term:`IGMT locus names<Chain locus>`.
+     * non-productive chains are ignored. 
+     * chain loci must be valid :term:`IGMT locus names<Chain locus>`.
+     * excess chains, non-productive chains, or chains with invalid loci
+       are serialized to JSON and stored in the `extra_chains` column. They are not 
+       used by scirpy except when exporting the `AnnData` object to AIRR format. 
 
     For more information, see :ref:`receptor-model`.
 """

From 4e922df4b4bd5497b54599649b4bcc21754563ab Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Mon, 12 Apr 2021 13:27:14 +0200
Subject: [PATCH 5/6] Update API page

---
 docs/api.rst              | 18 ++++++++++++++----
 docs/usage-principles.rst |  2 +-
 scirpy/io/_io.py          | 16 ++++++++--------
 scirpy/io/_util.py        |  8 ++++----
 4 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/docs/api.rst b/docs/api.rst
index fa2d42c6f..a84e102cd 100644
--- a/docs/api.rst
+++ b/docs/api.rst
@@ -16,18 +16,23 @@ as closely as possible.
 Input/Output: `io`
 ------------------
 
+.. module:: scirpy.io
+
 .. note::
    In scirpy v0.7.0 the way VDJ data is stored in `adata.obs` has changed to 
    be fully compliant with the `AIRR Rearrangement <https://docs.airr-community.org/en/latest/datarep/rearrangements.html#productive>`__ 
    schema. Please use :func:`~scirpy.io.upgrade_schema` to make `AnnData` objects
    from previous scirpy versions compatible with the most recent scirpy workflow. 
 
+   .. autosummary::
+      :toctree: ./generated
+      
+      upgrade_schema
+
 
 The following functions allow to import :term:`V(D)J` information from various
 formats.
 
-.. module:: scirpy.io
-
 .. autosummary::
    :toctree: ./generated
 
@@ -36,8 +41,14 @@ formats.
    read_tracer
    read_bracer
    read_airr
-   write_airr
    from_dandelion
+
+Scirpy can export data to the following formats:
+
+.. autosummary::
+   :toctree: ./generated
+
+   write_airr
    to_dandelion
 
 To convert own formats into the scirpy :ref:`data-structure`, we recommend building
@@ -51,7 +62,6 @@ For more details, check the :ref:`Data loading tutorial <importing-data>`.
    AirrCell
    from_airr_cells
    to_airr_cells
-   upgrade_schema
 
 
 Preprocessing: `pp`
diff --git a/docs/usage-principles.rst b/docs/usage-principles.rst
index c17ffd234..366aef9c0 100644
--- a/docs/usage-principles.rst
+++ b/docs/usage-principles.rst
@@ -22,7 +22,7 @@ Scirpy is an extension to `Scanpy <https://scanpy.readthedocs.io>`_ and adheres
  * The :class:`~anndata.AnnData` instance is modified inplace, unless the functions
    is called with the keyword argument `inplace=False`.
 
-We decided to handle a few minor points differenlty to Scanpy:
+We decided to handle a few minor points differently to Scanpy:
 
  * Plotting functions with inexpensive computations (e.g. :func:`scirpy.pl.clonal_expansion`)
    call the corresponding tool (:func:`scirpy.tl.clonal_expansion`) on-the-fly and
diff --git a/scirpy/io/_io.py b/scirpy/io/_io.py
index db271a8bb..1f3060568 100644
--- a/scirpy/io/_io.py
+++ b/scirpy/io/_io.py
@@ -339,9 +339,7 @@ def read_airr(
     include_fields: Optional[Collection[str]] = DEFAULT_AIRR_FIELDS,
 ) -> AnnData:
     """\
-    Read AIRR-compliant data.
-
-    Reads data organized in the `AIRR rearrangement schema <https://docs.airr-community.org/en/latest/datarep/rearrangements.html>`_.
+    Read data from `AIRR rearrangement <https://docs.airr-community.org/en/latest/datarep/rearrangements.html>`_ format.
 
     The following columns are required by scirpy: 
      * `cell_id`
@@ -558,7 +556,7 @@ def read_bracer(path: Union[str, Path]) -> AnnData:
 
 @_check_upgrade_schema()
 def write_airr(adata: AnnData, filename: Union[str, Path]) -> None:
-    """Write immune receptor fields from `adata.obs` in AIRR Rearrangement TSV format.
+    """Export :term:`IR` data to :term:`AIRR` Rearrangement `tsv` format.
 
     Parameters
     ----------
@@ -629,8 +627,7 @@ def upgrade_schema(adata) -> None:
 
 @_check_upgrade_schema()
 def to_dandelion(adata: AnnData):
-    """
-    Convert a scirpy-initialized AnnData object to Dandelion format using `to_ir_objs`.
+    """Export data to `Dandelion <https://github.com/zktuong/dandelion>`_ (:cite:`Stephenson2021`).
 
     Parameters
     ----------
@@ -640,7 +637,6 @@ def to_dandelion(adata: AnnData):
     Returns
     -------
     `Dandelion` object.
-
     """
     try:
         import dandelion as ddl
@@ -664,8 +660,12 @@ def to_dandelion(adata: AnnData):
     return ddl.Dandelion(ddl.load_data(data))
 
 
+@_doc_params(doc_working_model=doc_working_model)
 def from_dandelion(dandelion, transfer=False) -> AnnData:
-    """Import data from `Dandelion <https://github.com/zktuong/dandelion>`_ (:cite:`Stephenson2021`).
+    """\
+    Import data from `Dandelion <https://github.com/zktuong/dandelion>`_ (:cite:`Stephenson2021`).
+
+    {doc_working_model}
 
     Parameters
     ----------
diff --git a/scirpy/io/_util.py b/scirpy/io/_util.py
index 249074555..31c9af294 100644
--- a/scirpy/io/_util.py
+++ b/scirpy/io/_util.py
@@ -7,13 +7,13 @@
 
 .. note::
     Reading data into *Scirpy* has the following constraints:
-     * each cell can have up to four productive chains chains (:term:`Dual IR`):
+     * Each cell can have up to four productive chains chains (:term:`Dual IR`):
        two :term:`VJ<V(D)J>` and two :term:`VDJ<V(D)J>` chains. 
      * Excess chains are ignored (those with lowest read count/:term:`UMI` count)
        and cells flagged as :term:`Multichain-cell`.
-     * non-productive chains are ignored. 
-     * chain loci must be valid :term:`IGMT locus names<Chain locus>`.
-     * excess chains, non-productive chains, or chains with invalid loci
+     * Non-productive chains are ignored. 
+     * Chain loci must be valid :term:`IGMT locus names<Chain locus>`.
+     * Excess chains, non-productive chains, or chains with invalid loci
        are serialized to JSON and stored in the `extra_chains` column. They are not 
        used by scirpy except when exporting the `AnnData` object to AIRR format. 
 

From 8187b26a40674964deb9a42c42fea57e968f5a8f Mon Sep 17 00:00:00 2001
From: Gregor Sturm <mail@gregor-sturm.de>
Date: Mon, 12 Apr 2021 14:06:05 +0200
Subject: [PATCH 6/6] Fix log filtering

---
 docs/tutorials/tutorial_io.md | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/docs/tutorials/tutorial_io.md b/docs/tutorials/tutorial_io.md
index 24bcc8138..8994896b3 100644
--- a/docs/tutorials/tutorial_io.md
+++ b/docs/tutorials/tutorial_io.md
@@ -13,18 +13,11 @@ jupyter:
 %load_ext autoreload
 %autoreload 2
 import anndata
-import logging
 
-
-class NoCategoricalWarningFilter(logging.Filter):
-    """suppress "storing XXX as categorical" warnings."""
-
-    def filter(self, record):
-        m = record.getMessage()
-        return not m.startswith("storing") and m.endswith("as categorical.")
-
-
-anndata.logging.anndata_logger.addFilter(NoCategoricalWarningFilter)
+anndata.logging.anndata_logger.addFilter(
+    lambda r: not r.getMessage().startswith("storing")
+    and r.getMessage().endswith("as categorical.")
+)
 ```
 
 ```python