Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

v4.1.4 Release. #3015

Merged
merged 1 commit into from
Jan 15, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion python/deeplake/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,9 @@ def progress_bar(iterable, *args, **kwargs):

import deeplake
from ._deeplake import *
from deeplake.ingestion import from_coco

__version__ = "4.1.3"
__version__ = "4.1.4"

__all__ = [
"__version__",
Expand All @@ -33,6 +34,7 @@ def progress_bar(iterable, *args, **kwargs):
"DatasetView",
"Dataset",
"ReadOnlyDataset",
"IndexingMode",
"ExpiredTokenError",
"FormatNotSupportedError",
"UnevenColumnsError",
Expand All @@ -43,6 +45,7 @@ def progress_bar(iterable, *args, **kwargs):
"InvalidColumnValueError",
"InvalidPolygonShapeError",
"InvalidLinkDataError",
"EmbeddingSizeMismatch",
"PushError",
"GcsStorageProviderFailed",
"History",
Expand Down Expand Up @@ -129,6 +132,7 @@ def progress_bar(iterable, *args, **kwargs):
"Client",
"client",
"__prepare_atfork",
"from_coco",
]


Expand Down
101 changes: 101 additions & 0 deletions python/deeplake/__init__.pyi
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
import datetime
import typing
import pathlib

import formats
import storage
Expand All @@ -23,6 +24,7 @@ __all__ = [
"DatasetView",
"Dataset",
"ReadOnlyDataset",
"IndexingMode",
"ExpiredTokenError",
"FormatNotSupportedError",
"UnevenColumnsError",
Expand All @@ -33,6 +35,7 @@ __all__ = [
"InvalidColumnValueError",
"InvalidPolygonShapeError",
"InvalidLinkDataError",
"EmbeddingSizeMismatch",
"PushError",
"GcsStorageProviderFailed",
"History",
Expand Down Expand Up @@ -119,6 +122,7 @@ __all__ = [
"Client",
"client",
"__prepare_atfork",
"from_coco",
]

class Future:
Expand Down Expand Up @@ -1825,6 +1829,48 @@ class DatasetView:
...


class IndexingMode:
"""
Enumeration of available indexing modes in deeplake.

Members:
Always: Indices are always updated at commit.
Automatic: Deeplake automatically detects when to update the indices.
Off: Index updates are disabled during the session.
"""
Always: typing.ClassVar[IndexingMode]
Automatic: typing.ClassVar[IndexingMode]
Off: typing.ClassVar[IndexingMode]
__members__: typing.ClassVar[Dict[str, IndexingMode]]

def __eq__(self, other: typing.Any) -> bool:
...
def __getstate__(self) -> int:
...
def __hash__(self) -> int:
...
def __index__(self) -> int:
...
def __init__(self, value: int) -> None:
...
def __int__(self) -> int:
...
def __ne__(self, other: typing.Any) -> bool:
...
def __repr__(self) -> str:
...
def __setstate__(self, state: int) -> None:
...
def __str__(self) -> str:
...
@property
def name(self) -> str:
...
@property
def value(self) -> int:
...


class Dataset(DatasetView):
"""
Datasets are the primary data structure used in DeepLake. They are used to store and manage data for searching, training, evaluation.
Expand Down Expand Up @@ -1870,6 +1916,31 @@ class Dataset(DatasetView):
When the dataset was created. The value is auto-generated at creation time.
"""

indexing_mode: IndexingMode
"""
The indexing mode of the dataset. This property can be set to change the indexing mode of the dataset for the current session,
other sessions will not be affected.

<!-- test-context
```python
import deeplake
ds = deeplake.create("tmp://")
ds.indexing_mode = deeplake.IndexingMode.Off
ds.add_column("column_name", deeplake.types.Text(deeplake.types.BM25))
a = ['a']*10_000
ds.append({"column_name":a})
ds.commit()
```
-->

Examples:
```python
ds = deeplake.open("tmp://")
ds.indexing_mode = deeplake.IndexingMode.Automatic
ds.commit()
```
"""

@property
def version(self) -> str:
"""
Expand Down Expand Up @@ -2444,6 +2515,9 @@ class InvalidPolygonShapeError(Exception):
class InvalidLinkDataError(Exception):
pass

class EmbeddingSizeMismatch(Exception):
pass

class InvalidCredsKeyAssignmentError(Exception):
pass

Expand Down Expand Up @@ -3197,4 +3271,31 @@ def from_parquet(url: str) -> ReadOnlyDataset:
url: The URL of the Parquet dataset. If no protocol is specified, it assumes `file://`
"""

def from_coco(
images_directory: typing.Union[str, pathlib.Path],
annotation_files: typing.Dict[str, typing.Union[str, pathlib.Path]],
dest: typing.Union[str, pathlib.Path],
dest_creds: typing.Optional[Dict[str, str]] = None,
) -> dp.Dataset:
"""Ingest images and annotations in COCO format to a Deep Lake Dataset. The source data can be stored locally or in the cloud.

Args:
images_directory (str, pathlib.Path): The path to the directory containing images.
annotation_files Dict(str, Union[str, pathlib.Path]): dictionary from key to path to JSON annotation file in COCO format.
- the required keys are the following `instances`, `keypoints` and `stuff`
dest (str, pathlib.Path):
- The full path to the dataset. Can be:
- a Deep Lake cloud path of the form ``al://org_id/datasetname``. To write to Deep Lake cloud datasets, ensure that you are authenticated to Deep Lake (pass in a token using the 'token' parameter).
- an s3 path of the form ``s3://bucketname/path/to/dataset``. Credentials are required in either the environment or passed to the creds argument.
- a local file system path of the form ``./path/to/dataset`` or ``~/path/to/dataset`` or ``path/to/dataset``.
- a memory path of the form ``mem://path/to/dataset`` which doesn't save the dataset but keeps it in memory instead. Should be used only for testing as it does not persist.
dest_creds (Optional[Dict[str, str]]): The dictionary containing credentials used to access the destination path of the dataset.

Returns:
Dataset: The Dataset created from images and COCO annotations.

Raises:
CocoAnnotationMissingError: If one or many annotation key is missing from file.
"""

def __prepare_atfork() -> None: ...
2 changes: 1 addition & 1 deletion python/deeplake/ingestion/__init__.py
Original file line number Diff line number Diff line change
@@ -1 +1 @@
from deeplake.ingestion.coco.ingest_coco import ingest_coco
from deeplake.ingestion.coco.from_coco import from_coco
Loading