Skip to content

Commit

Permalink
Wildcard deletion ability (GSI-917) (#2)
Browse files Browse the repository at this point in the history
* Allow wildcard deletion of all documents

* Add unit tests

* Don't retrieve all collection names unless needed

* Add tests to make sure no error is raised for not-there db
  • Loading branch information
TheByronHimes authored Jul 23, 2024
1 parent 4cc060d commit 4ed2689
Show file tree
Hide file tree
Showing 11 changed files with 297 additions and 23 deletions.
3 changes: 2 additions & 1 deletion openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,8 @@ paths:
description: Validation Error
security:
- HTTPBearer: []
summary: Deletes all or some documents in the collection.
summary: Deletes all or some documents in the collection. No error is raised
if db or collection do not exist.
tags:
- StateManagementService
- sms-mongodb
Expand Down
9 changes: 5 additions & 4 deletions src/sms/adapters/inbound/fastapi_/routes.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ async def get_docs(
except DocsHandlerPort.CriteriaFormatError as err:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"Check query parameters: {err}",
detail=str(err),
) from err


Expand Down Expand Up @@ -171,7 +171,8 @@ async def upsert_docs(
"/documents/{namespace}",
operation_id="delete_documents",
tags=["StateManagementService", "sms-mongodb"],
summary="Deletes all or some documents in the collection.",
summary="Deletes all or some documents in the collection. No error is raised if db"
+ " or collection do not exist.",
status_code=status.HTTP_204_NO_CONTENT,
)
async def delete_docs(
Expand Down Expand Up @@ -202,8 +203,8 @@ async def delete_docs(
status_code=status.HTTP_500_INTERNAL_SERVER_ERROR,
detail=str(err),
) from err
except DocsHandlerPort.CriteriaFormatError as err:
except (DocsHandlerPort.CriteriaFormatError, ValueError) as err:
raise HTTPException(
status_code=status.HTTP_422_UNPROCESSABLE_ENTITY,
detail=f"Check query parameters: {err}",
detail=str(err),
) from err
32 changes: 32 additions & 0 deletions src/sms/adapters/outbound/docs_dao.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,9 +82,41 @@ async def delete(
) -> None:
"""Delete documents satisfying the criteria.
No error is raised if the db or collection does not exist.
Args:
- `db_name`: The database name.
- `collection`: The collection name.
- `criteria`: The criteria to use for filtering the documents (mapping)
"""
await self._client[db_name][collection].delete_many(criteria)

async def get_db_map_for_prefix(
self, *, prefix: str, db_name: str | None = None
) -> dict[str, list[str]]:
"""Get a dict containing a list of collections for each database, or a specific
database (if `db_name` is provided).
Only returns databases that start with the given prefix, and it returns the
database names with `prefix` stripped. An empty dict is returned if `prefix` is
empty. If `db_name` is provided but no collections exist, an empty list is
returned with the database name as the key.
"""
if not prefix:
return {}

if db_name:
full_db_name = f"{prefix}{db_name}"
return {
db_name: sorted(
await self._client[full_db_name].list_collection_names()
)
}

return {
db.removeprefix(prefix): sorted(
await self._client[db].list_collection_names()
)
for db in await self._client.list_database_names()
if db.startswith(prefix)
}
76 changes: 61 additions & 15 deletions src/sms/core/docs_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,13 +16,16 @@

import json
import logging
from contextlib import suppress
from typing import Literal, NamedTuple

from sms.config import Config
from sms.models import Criteria, DocumentType, UpsertionDetails
from sms.ports.inbound.docs_handler import DocsHandlerPort
from sms.ports.outbound.docs_dao import DocsDaoPort

log = logging.getLogger(__name__)


def log_and_raise_permissions_error(
db_name: str, collection: str, operation: Literal["read", "write"]
Expand All @@ -33,7 +36,7 @@ def log_and_raise_permissions_error(
f"'{operation.title()}' operations not allowed on db '{db_name}',"
+ f" collection '{collection}'. No rule found that matches '{rule}'",
)
logging.error(
log.error(
error,
extra={"db_name": db_name, "collection": collection, "operation": operation},
)
Expand Down Expand Up @@ -104,7 +107,7 @@ def _parse_criteria(self, criteria: Criteria) -> Criteria:
parsed_criteria[key] = json.loads(value)
except json.JSONDecodeError as err:
error = self.CriteriaFormatError(key=key)
logging.error(
log.error(
error,
extra={"key": key, "value": value},
)
Expand Down Expand Up @@ -138,7 +141,7 @@ async def get(
)
except Exception as err:
error = self.ReadOperationError(criteria=criteria)
logging.error(error)
log.error(error)
raise error from err

return results
Expand Down Expand Up @@ -185,15 +188,39 @@ async def upsert(
)
except Exception as err:
error = self.UpsertionError(id_field=id_field)
logging.error(
log.error(
error,
extra={"documents": documents},
)
raise error from err

async def _delete(self, db_name: str, collection: str, criteria: Criteria) -> None:
"""Delete documents satisfying the criteria. Called by the public delete method."""
if not self._permissions.can_write(db_name, collection):
log_and_raise_permissions_error(db_name, collection, "write")

full_db_name = f"{self._prefix}{db_name}"

try:
await self._docs_dao.delete(
db_name=full_db_name, collection=collection, criteria=criteria
)
except Exception as err:
error = self.DeletionError(criteria=criteria)
log.error(error)
raise error from err

async def delete(self, db_name: str, collection: str, criteria: Criteria) -> None:
"""Delete documents satisfying the criteria.
If the wildcard for both db_name and collection is used, all data from all
collections is deleted. If a db is specified but the collection is a wildcard,
all collections in that db are deleted. However, deleting data from a specific
collection in all databases is not allowed in order to prevent accidental data
loss.
No error is raised if the db or collection does not exist.
Args:
- `db_name`: The name of the database.
- `collection`: The name of the collection.
Expand All @@ -204,17 +231,36 @@ async def delete(self, db_name: str, collection: str, criteria: Criteria) -> Non
- `OperationError`: If the operation fails in the database for any reason.
- `CriteriaFormatError`: If the filter criteria format is invalid.
"""
if not self._permissions.can_write(db_name, collection):
log_and_raise_permissions_error(db_name, collection, "write")
to_delete: list[tuple[str, str]] = []

if collection == "*":
# Get a list of database and collection names for all dbs with the prefix
# if db is wildcard, otherwise just the collections under the specified db
db_map: dict[str, list[str]] = (
await self._docs_dao.get_db_map_for_prefix(prefix=self._prefix)
if db_name == "*"
else await self._docs_dao.get_db_map_for_prefix(
prefix=self._prefix, db_name=db_name
)
)

parsed_criteria = self._parse_criteria(criteria)
full_db_name = f"{self._prefix}{db_name}"
# Make a list of tuples representing the (db, collection)s to delete
to_delete = [(db, collection) for db in db_map for collection in db_map[db]]
elif db_name == "*":
error = ValueError(
"Cannot use wildcard for db_name with specific collection"
)
log.error(error)
raise error

try:
await self._docs_dao.delete(
db_name=full_db_name, collection=collection, criteria=parsed_criteria
parsed_criteria = self._parse_criteria(criteria)
if to_delete:
log.debug("Iteratively deleting data from these collections: %s", to_delete)
for db, coll in to_delete:
with suppress(PermissionError):
await self._delete(db, coll, parsed_criteria)
else:
log.debug(
"Deleting data from a specific collection: %s", (db_name, collection)
)
except Exception as err:
error = self.DeletionError(criteria=criteria)
logging.error(error)
raise error from err
await self._delete(db_name, collection, parsed_criteria)
10 changes: 9 additions & 1 deletion src/sms/ports/inbound/docs_handler.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,7 +53,7 @@ class CriteriaFormatError(RuntimeError):
"""Raised when the criteria format is invalid."""

def __init__(self, *, key: str):
super().__init__(f"The value for key '{key}' is invalid.")
super().__init__(f"The value for query parameter '{key}' is invalid.")

@abstractmethod
async def get(
Expand Down Expand Up @@ -96,6 +96,14 @@ async def upsert(
async def delete(self, db_name: str, collection: str, criteria: Criteria) -> None:
"""Delete documents satisfying the criteria.
If the wildcard for both db_name and collection is used, all data from all
collections is deleted. If a db is specified but the collection is a wildcard,
all collections in that db are deleted. However, deleting data from a specific
collection in all databases is not allowed in order to prevent accidental data
loss.
No error is raised if the db or collection does not exist.
Args:
- `db_name`: The name of the database.
- `collection`: The name of the collection.
Expand Down
16 changes: 16 additions & 0 deletions src/sms/ports/outbound/docs_dao.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,9 +60,25 @@ async def delete(
) -> None:
"""Delete documents satisfying the criteria.
No error is raised if the db or collection does not exist.
Args:
- `db_name`: The database name.
- `collection`: The collection name.
- `criteria`: The criteria to use for filtering the documents (mapping)
"""
...

@abstractmethod
async def get_db_map_for_prefix(
self, *, prefix: str, db_name: str | None = None
) -> dict[str, list[str]]:
"""Get a dict containing a list of collections for each database, or a specific
database (if `db_name` is provided).
Only returns databases that start with the given prefix, and it returns the
database names with `prefix` stripped. An empty dict is returned if `prefix` is
empty. If `db_name` is provided but no collections exist, an empty list is
returned with the database name as the key.
"""
...
2 changes: 2 additions & 0 deletions tests/fixtures/test_config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,5 @@ db_permissions:
- "testdb.writeonly:w"
- "testdb.readwrite:rw"
- "testdb.allops:*"
- "testdb2.writeonly:w"
- "testdb3.readonly:r"
45 changes: 44 additions & 1 deletion tests/integration/test_docs.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ async def test_get_docs(
[
(
"?age={unquoted_key:53}",
"Check query parameters: The value for key 'age' is invalid.",
"The value for query parameter 'age' is invalid.",
),
(
"?age=34&age=33",
Expand Down Expand Up @@ -139,3 +139,46 @@ async def test_get_docs_permission_error(http_method: str):
)
assert response.status_code == 403
assert response.json() == {"detail": message}


async def test_deletion_on_nonexistent_resources(mongodb: MongoDbFixture):
"""Test delete method on nonexistent dbs, collections.
There should not be any error raised.
"""
base_config = get_config(sources=[mongodb.config])
db_permissions = ["*.*:*"]
new_config = base_config.model_copy(update={"db_permissions": db_permissions})
config = get_config(sources=[new_config])

async with (
prepare_rest_app(config=config) as app,
AsyncTestClient(app=app) as client,
):
# Insert documents into testdb.allops
await client.put(
f"/documents/{ALLOPS}",
headers={"Authorization": VALID_BEARER_TOKEN},
json={"documents": ALL_TEST_DOCS},
)

# Delete nonexistent db contents with fully specified namespace
response = await client.delete(
"/documents/nonexistent.nonexistent",
headers={"Authorization": VALID_BEARER_TOKEN},
)
assert response.status_code == 204

# Delete nonexistent db contents with wildcard collection
response = await client.delete(
"/documents/nonexistent.*",
headers={"Authorization": VALID_BEARER_TOKEN},
)
assert response.status_code == 204

# Delete nonexistent collection contents
response = await client.delete(
"/documents/testdb.doesnotexist",
headers={"Authorization": VALID_BEARER_TOKEN},
)
assert response.status_code == 204
56 changes: 56 additions & 0 deletions tests/unit/test_docs_dao.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,3 +80,59 @@ async def test_all_ops(mongodb: MongoDbFixture):
# Delete all docs
await docs_dao.delete(db_name=TESTDB, collection=ALLOPS, criteria={})
assert not await docs_dao.get(db_name=TESTDB, collection=ALLOPS, criteria={})


async def test_get_db_map_for_prefix(mongodb: MongoDbFixture):
"""Test get_db_map_for_prefix method on the docs dao."""
config = get_config(sources=[mongodb.config])

db_name1 = "db1"
db_name2 = "db2"
expected_db_map = {db_name1: ["test", "test2"], db_name2: ["test1"]}

async with DocsDao(config=config) as docs_dao:
# MongoDbFixture reset only empties collections, it doesn't delete them
# so we need to drop the databases manually to verify the functionality
for db in await docs_dao._client.list_database_names():
if db.startswith(config.db_prefix):
await docs_dao._client.drop_database(db)

# Insert documents to create the expected db_map
for db_name, colls in expected_db_map.items():
for coll in colls:
await docs_dao._client[f"{config.db_prefix}{db_name}"][coll].insert_one(
{"key": "value"}
)

assert not await docs_dao.get_db_map_for_prefix(prefix="db")
assert not await docs_dao.get_db_map_for_prefix(prefix="")
db_map = await docs_dao.get_db_map_for_prefix(prefix=config.db_prefix)
assert db_map == expected_db_map

db1_map = await docs_dao.get_db_map_for_prefix(
prefix=config.db_prefix, db_name=db_name1
)

assert db1_map == {db_name1: ["test", "test2"]}

assert await docs_dao.get_db_map_for_prefix(
prefix=config.db_prefix, db_name="nonexistent"
) == {"nonexistent": []}


async def test_deletion_on_nonexistent_resources(mongodb: MongoDbFixture):
"""Test delete method on nonexistent dbs, collections.
There should not be any error raised.
"""
config = get_config(sources=[mongodb.config])

async with DocsDao(config=config) as docs_dao:
await docs_dao._client["exists"]["exists"].insert_one({"key": "value"})
# Delete nonexistent db contents
await docs_dao.delete(
db_name="nonexistent", collection="nonexistent", criteria={}
)

# Delete nonexistent collection contents
await docs_dao.delete(db_name="exists", collection="nonexistent", criteria={})
Loading

0 comments on commit 4ed2689

Please sign in to comment.