Set provider parameter when instantiating onnxruntime.InferenceSession (

#1976) * Set provider parameter when instantiating onnxruntime.InferenceSession fixes #1973 * Change device type to torch.device * set type annotation of device to torch.device everywhere * Apply Black * Change types of device and devices params across the codebase * Update Documentation & Code Style * Add type: ignore in the right location * Update Documentation & Code Style * Add type: ignore * feedback * Update Documentation & Code Style * feedback 2 * Fix convert_to_transformers * Fix syntax error * Update Documentation & Code Style * Consider augment and load_glove user-facing as well * Update Documentation & Code Style * Fix mypy * Update Documentation & Code Style Co-authored-by: Julian Risch <[email protected]> Co-authored-by: Sara Zan <[email protected]>
deepset-ai · Mar 23, 2022 · 3b2001e · 3b2001e
1 parent 851fe1c
commit 3b2001e
Show file tree

Hide file tree

Showing 16 changed files with 165 additions and 112 deletions.
diff --git a/docs/_src/api/api/ranker.md b/docs/_src/api/api/ranker.md
@@ -92,7 +92,7 @@ p.add_node(component=ranker, name="Ranker", inputs=["ESRetriever"])
 #### \_\_init\_\_
 
 ```python
-def __init__(model_name_or_path: Union[str, Path], model_version: Optional[str] = None, top_k: int = 10, use_gpu: bool = True, devices: Optional[List[Union[int, str, torch.device]]] = None)
+def __init__(model_name_or_path: Union[str, Path], model_version: Optional[str] = None, top_k: int = 10, use_gpu: bool = True, devices: Optional[List[Union[str, torch.device]]] = None)
 ```
 
 **Arguments**:
@@ -103,7 +103,10 @@ See https://huggingface.co/cross-encoder for full list of available models
 - `model_version`: The version of model to use from the HuggingFace model hub. Can be tag name, branch name, or commit hash.
 - `top_k`: The maximum number of documents to return
 - `use_gpu`: Whether to use all available GPUs or the CPU. Falls back on CPU if no GPU is available.
-- `devices`: List of GPU devices to limit inference to certain GPUs and not use all available ones (e.g. ["cuda:0"]).
+- `devices`: List of GPU (or CPU) devices, to limit inference to certain GPUs and not use all available ones
+The strings will be converted into pytorch devices, so use the string notation described here:
+https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device
+(e.g. ["cuda:0"]).
 
 <a id="sentence_transformers.SentenceTransformersRanker.predict_batch"></a>
 

diff --git a/docs/_src/api/api/reader.md b/docs/_src/api/api/reader.md
@@ -398,7 +398,7 @@ Dict containing query and answers
 #### eval\_on\_file
 
 ```python
-def eval_on_file(data_dir: str, test_filename: str, device: Optional[str] = None)
+def eval_on_file(data_dir: Union[Path, str], test_filename: str, device: Optional[Union[str, torch.device]] = None)
 ```
 
 Performs evaluation on a SQuAD-formatted file.
@@ -410,16 +410,18 @@ Returns a dict containing the following metrics:
 
 **Arguments**:
 
-- `data_dir` (`Path or str`): The directory in which the test set can be found
-- `test_filename` (`str`): The name of the file containing the test data in SQuAD format.
-- `device` (`str`): The device on which the tensors should be processed. Choose from "cpu" and "cuda" or use the Reader's device by default.
+- `data_dir`: The directory in which the test set can be found
+- `test_filename`: The name of the file containing the test data in SQuAD format.
+- `device`: The device on which the tensors should be processed.
+Choose from torch.device("cpu") and torch.device("cuda") (or simply "cpu" or "cuda")
+or use the Reader's device by default.
 
 <a id="farm.FARMReader.eval"></a>
 
 #### eval
 
 ```python
-def eval(document_store: BaseDocumentStore, device: Optional[str] = None, label_index: str = "label", doc_index: str = "eval_document", label_origin: str = "gold-label", calibrate_conf_scores: bool = False)
+def eval(document_store: BaseDocumentStore, device: Optional[Union[str, torch.device]] = None, label_index: str = "label", doc_index: str = "eval_document", label_origin: str = "gold-label", calibrate_conf_scores: bool = False)
 ```
 
 Performs evaluation on evaluation documents in the DocumentStore.
@@ -432,7 +434,9 @@ Returns a dict containing the following metrics:
 **Arguments**:
 
 - `document_store`: DocumentStore containing the evaluation documents
-- `device`: The device on which the tensors should be processed. Choose from "cpu" and "cuda" or use the Reader's device by default.
+- `device`: The device on which the tensors should be processed.
+Choose from torch.device("cpu") and torch.device("cuda") (or simply "cpu" or "cuda")
+or use the Reader's device by default.
 - `label_index`: Index/Table name where labeled questions are stored
 - `doc_index`: Index/Table name where documents that are used for evaluation are stored
 - `label_origin`: Field name where the gold labels are stored
@@ -443,15 +447,17 @@ Returns a dict containing the following metrics:
 #### calibrate\_confidence\_scores
 
 ```python
-def calibrate_confidence_scores(document_store: BaseDocumentStore, device: Optional[str] = None, label_index: str = "label", doc_index: str = "eval_document", label_origin: str = "gold_label")
+def calibrate_confidence_scores(document_store: BaseDocumentStore, device: Optional[Union[str, torch.device]] = None, label_index: str = "label", doc_index: str = "eval_document", label_origin: str = "gold_label")
 ```
 
 Calibrates confidence scores on evaluation documents in the DocumentStore.
 
 **Arguments**:
 
 - `document_store`: DocumentStore containing the evaluation documents
-- `device`: The device on which the tensors should be processed. Choose from "cpu" and "cuda" or use the Reader's device by default.
+- `device`: The device on which the tensors should be processed.
+Choose from torch.device("cpu") and torch.device("cuda") (or simply "cpu" or "cuda")
+or use the Reader's device by default.
 - `label_index`: Index/Table name where labeled questions are stored
 - `doc_index`: Index/Table name where documents that are used for evaluation are stored
 - `label_origin`: Field name where the gold labels are stored

diff --git a/docs/_src/api/api/retriever.md b/docs/_src/api/api/retriever.md
@@ -312,7 +312,7 @@ Karpukhin, Vladimir, et al. (2020): "Dense Passage Retrieval for Open-Domain Que
 #### \_\_init\_\_
 
 ```python
-def __init__(document_store: BaseDocumentStore, query_embedding_model: Union[Path, str] = "facebook/dpr-question_encoder-single-nq-base", passage_embedding_model: Union[Path, str] = "facebook/dpr-ctx_encoder-single-nq-base", model_version: Optional[str] = None, max_seq_len_query: int = 64, max_seq_len_passage: int = 256, top_k: int = 10, use_gpu: bool = True, batch_size: int = 16, embed_title: bool = True, use_fast_tokenizers: bool = True, infer_tokenizer_classes: bool = False, similarity_function: str = "dot_product", global_loss_buffer_size: int = 150000, progress_bar: bool = True, devices: Optional[List[Union[int, str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None)
+def __init__(document_store: BaseDocumentStore, query_embedding_model: Union[Path, str] = "facebook/dpr-question_encoder-single-nq-base", passage_embedding_model: Union[Path, str] = "facebook/dpr-ctx_encoder-single-nq-base", model_version: Optional[str] = None, max_seq_len_query: int = 64, max_seq_len_passage: int = 256, top_k: int = 10, use_gpu: bool = True, batch_size: int = 16, embed_title: bool = True, use_fast_tokenizers: bool = True, infer_tokenizer_classes: bool = False, similarity_function: str = "dot_product", global_loss_buffer_size: int = 150000, progress_bar: bool = True, devices: Optional[List[Union[str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None)
 ```
 
 Init the Retriever incl. the two encoder models from a local or remote model checkpoint.
@@ -362,8 +362,11 @@ Options: `dot_product` (Default) or `cosine`
 Increase if errors like "encoded data exceeds max_size ..." come up
 - `progress_bar`: Whether to show a tqdm progress bar or not.
 Can be helpful to disable in production deployments to keep the logs clean.
-- `devices`: List of GPU devices to limit inference to certain GPUs and not use all available ones (e.g. ["cuda:0"]).
-As multi-GPU training is currently not implemented for DPR, training will only use the first device provided in this list.
+- `devices`: List of GPU (or CPU) devices, to limit inference to certain GPUs and not use all available ones
+These strings will be converted into pytorch devices, so use the string notation described here:
+https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device
+(e.g. ["cuda:0"]). Note: as multi-GPU training is currently not implemented for DPR, training
+will only use the first device provided in this list.
 - `use_auth_token`: API token used to download private models from Huggingface. If this parameter is set to `True`,
 the local token will be used, which must be previously created via `transformer-cli login`.
 Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained
@@ -520,7 +523,7 @@ Kostić, Bogdan, et al. (2021): "Multi-modal Retrieval of Tables and Texts Using
 #### \_\_init\_\_
 
 ```python
-def __init__(document_store: BaseDocumentStore, query_embedding_model: Union[Path, str] = "deepset/bert-small-mm_retrieval-question_encoder", passage_embedding_model: Union[Path, str] = "deepset/bert-small-mm_retrieval-passage_encoder", table_embedding_model: Union[Path, str] = "deepset/bert-small-mm_retrieval-table_encoder", model_version: Optional[str] = None, max_seq_len_query: int = 64, max_seq_len_passage: int = 256, max_seq_len_table: int = 256, top_k: int = 10, use_gpu: bool = True, batch_size: int = 16, embed_meta_fields: List[str] = ["name", "section_title", "caption"], use_fast_tokenizers: bool = True, infer_tokenizer_classes: bool = False, similarity_function: str = "dot_product", global_loss_buffer_size: int = 150000, progress_bar: bool = True, devices: Optional[List[Union[int, str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None)
+def __init__(document_store: BaseDocumentStore, query_embedding_model: Union[Path, str] = "deepset/bert-small-mm_retrieval-question_encoder", passage_embedding_model: Union[Path, str] = "deepset/bert-small-mm_retrieval-passage_encoder", table_embedding_model: Union[Path, str] = "deepset/bert-small-mm_retrieval-table_encoder", model_version: Optional[str] = None, max_seq_len_query: int = 64, max_seq_len_passage: int = 256, max_seq_len_table: int = 256, top_k: int = 10, use_gpu: bool = True, batch_size: int = 16, embed_meta_fields: List[str] = ["name", "section_title", "caption"], use_fast_tokenizers: bool = True, infer_tokenizer_classes: bool = False, similarity_function: str = "dot_product", global_loss_buffer_size: int = 150000, progress_bar: bool = True, devices: Optional[List[Union[str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None)
 ```
 
 Init the Retriever incl. the two encoder models from a local or remote model checkpoint.
@@ -556,8 +559,11 @@ Options: `dot_product` (Default) or `cosine`
 Increase if errors like "encoded data exceeds max_size ..." come up
 - `progress_bar`: Whether to show a tqdm progress bar or not.
 Can be helpful to disable in production deployments to keep the logs clean.
-- `devices`: List of GPU devices to limit inference to certain GPUs and not use all available ones (e.g. ["cuda:0"]).
-As multi-GPU training is currently not implemented for DPR, training will only use the first device provided in this list.
+- `devices`: List of GPU (or CPU) devices, to limit inference to certain GPUs and not use all available ones
+These strings will be converted into pytorch devices, so use the string notation described here:
+https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device
+(e.g. ["cuda:0"]). Note: as multi-GPU training is currently not implemented for TableTextRetriever,
+training will only use the first device provided in this list.
 - `use_auth_token`: API token used to download private models from Huggingface. If this parameter is set to `True`,
 the local token will be used, which must be previously created via `transformer-cli login`.
 Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained
@@ -695,7 +701,7 @@ class EmbeddingRetriever(BaseRetriever)
 #### \_\_init\_\_
 
 ```python
-def __init__(document_store: BaseDocumentStore, embedding_model: str, model_version: Optional[str] = None, use_gpu: bool = True, batch_size: int = 32, max_seq_len: int = 512, model_format: str = "farm", pooling_strategy: str = "reduce_mean", emb_extraction_layer: int = -1, top_k: int = 10, progress_bar: bool = True, devices: Optional[List[Union[int, str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None)
+def __init__(document_store: BaseDocumentStore, embedding_model: str, model_version: Optional[str] = None, use_gpu: bool = True, batch_size: int = 32, max_seq_len: int = 512, model_format: str = "farm", pooling_strategy: str = "reduce_mean", emb_extraction_layer: int = -1, top_k: int = 10, progress_bar: bool = True, devices: Optional[List[Union[str, torch.device]]] = None, use_auth_token: Optional[Union[str, bool]] = None)
 ```
 
 **Arguments**:
@@ -721,8 +727,11 @@ Options:
 Default: -1 (very last layer).
 - `top_k`: How many documents to return per query.
 - `progress_bar`: If true displays progress bar during embedding.
-- `devices`: List of GPU devices to limit inference to certain GPUs and not use all available ones (e.g. ["cuda:0"]).
-As multi-GPU training is currently not implemented for DPR, training will only use the first device provided in this list.
+- `devices`: List of GPU (or CPU) devices, to limit inference to certain GPUs and not use all available ones
+These strings will be converted into pytorch devices, so use the string notation described here:
+https://pytorch.org/docs/stable/tensor_attributes.html?highlight=torch%20device#torch.torch.device
+(e.g. ["cuda:0"]). Note: As multi-GPU training is currently not implemented for EmbeddingRetriever,
+training will only use the first device provided in this list.
 - `use_auth_token`: API token used to download private models from Huggingface. If this parameter is set to `True`,
 the local token will be used, which must be previously created via `transformer-cli login`.
 Additional information can be found here https://huggingface.co/transformers/main_classes/model.html#transformers.PreTrainedModel.from_pretrained

diff --git a/haystack/modeling/conversion/transformers.py b/haystack/modeling/conversion/transformers.py
@@ -1,11 +1,13 @@
 import logging
 from typing import Union
 
+import torch
 from transformers import AutoModelForQuestionAnswering
 
 from haystack.modeling.model import adaptive_model as am
 from haystack.modeling.model.language_model import LanguageModel
 from haystack.modeling.model.prediction_head import QuestionAnsweringHead
+from haystack.modeling.data_handler.processor import Processor
 
 
 logger = logging.getLogger(__name__)
@@ -46,10 +48,10 @@ def convert_to_transformers(adaptive_model):
     @staticmethod
     def convert_from_transformers(
         model_name_or_path,
-        device,
-        revision=None,
-        task_type=None,
-        processor=None,
+        device: Union[str, torch.device],
+        revision: str = None,
+        task_type: str = "question_answering",
+        processor: Processor = None,
         use_auth_token: Union[bool, str] = None,
         **kwargs,
     ):
@@ -65,14 +67,10 @@ def convert_from_transformers(
                                               - deepset/bert-large-uncased-whole-word-masking-squad2
 
                                               See https://huggingface.co/models for full list
-        :param device: "cpu" or "cuda"
+        :param device: torch.device("cpu") or torch.device("cuda")
         :param revision: The version of model to use from the HuggingFace model hub. Can be tag name, branch name, or commit hash.
-        :type revision: str
-        :param task_type: One of :
-                          - 'question_answering'
-                          More tasks coming soon ...
-        :param processor: populates prediction head with information coming from tasks
-        :type processor: Processor
+                         Right now accepts only 'question_answering'.
+        :param processor: populates prediction head with information coming from tasks.
         :return: AdaptiveModel
         """
 

diff --git a/haystack/modeling/data_handler/data_silo.py b/haystack/modeling/data_handler/data_silo.py
@@ -785,7 +785,7 @@ def __init__(
         self,
         teacher_model: "FARMReader",
         teacher_batch_size: int,
-        device: str,
+        device: torch.device,
         processor: Processor,
         batch_size: int,
         eval_batch_size: Optional[int] = None,

diff --git a/haystack/modeling/evaluation/eval.py b/haystack/modeling/evaluation/eval.py
@@ -20,11 +20,11 @@ class Evaluator:
     Handles evaluation of a given model over a specified dataset.
     """
 
-    def __init__(self, data_loader: torch.utils.data.DataLoader, tasks, device: str, report: bool = True):
+    def __init__(self, data_loader: torch.utils.data.DataLoader, tasks, device: torch.device, report: bool = True):
         """
         :param data_loader: The PyTorch DataLoader that will return batches of data from the evaluation dataset
         :param tesks:
-        :param device: The device on which the tensors should be processed. Choose from "cpu" and "cuda".
+        :param device: The device on which the tensors should be processed. Choose from torch.device("cpu") and torch.device("cuda").
         :param report: Whether an eval report should be generated (e.g. classification report per class).
         """
         self.data_loader = data_loader

diff --git a/haystack/modeling/infer.py b/haystack/modeling/infer.py
@@ -128,7 +128,7 @@ def load(
         use_fast: bool = True,
         tokenizer_args: Dict = None,
         multithreading_rust: bool = True,
-        devices: Optional[List[Union[int, str, torch.device]]] = None,
+        devices: Optional[List[torch.device]] = None,
         use_auth_token: Union[bool, str] = None,
         **kwargs,
     ):