From e9794ca21841f227823d4a828c73a1a4b467aaf0 Mon Sep 17 00:00:00 2001
From: signebedi <signe@siftingwinnowing.com>
Date: Fri, 22 Mar 2024 14:34:08 -0500
Subject: [PATCH] Added: working document_id overrides for TinyDB (#15)

---
 libreforms_fastapi/utils/document_database.py | 139 +++++++++++++++++-
 1 file changed, 135 insertions(+), 4 deletions(-)

diff --git a/libreforms_fastapi/utils/document_database.py b/libreforms_fastapi/utils/document_database.py
index c06ddd1..87f0b5a 100644
--- a/libreforms_fastapi/utils/document_database.py
+++ b/libreforms_fastapi/utils/document_database.py
@@ -2,11 +2,141 @@
 from bson import ObjectId
 from datetime import datetime
 from zoneinfo import ZoneInfo
-from tinydb import TinyDB, Query
+from tinydb import (
+    TinyDB, 
+    Query, 
+    Storage
+)
+from tinydb.table import (
+    Table as TinyTable, 
+    Document
+)
+
+from typing import (
+    Mapping,
+    Union,
+    Iterable,
+    List,
+)
 from abc import ABC, abstractmethod
 
 from libreforms_fastapi.utils.logging import set_logger
 
+# We want to modify TinyDB use use string representations of bson 
+# ObjectIDs. As such, we will need to modify some underlying behavior, 
+# see https://github.com/signebedi/libreforms-fastapi/issues/15.
+class CustomTable(TinyTable):
+    document_id_class = str  # Use string IDs instead of integers
+
+    def _get_next_id(self, document_id=str(ObjectId())):
+        """
+        Generate a new BSON ObjectID string to use as the TinyDB document ID.
+        """
+        return document_id
+
+
+    def insert(self, document: Mapping, document_id:Union[str, bool]=False) -> int:
+        """
+        Insert a new document into the table.
+
+        :param document: the document to insert
+        :returns: the inserted document's ID
+        """
+
+        if not document_id:
+            document_id = str(ObjectId())
+
+        # Make sure the document implements the ``Mapping`` interface
+        if not isinstance(document, Mapping):
+            raise ValueError('Document is not a Mapping')
+
+        # First, we get the document ID for the new document
+        if isinstance(document, Document):
+            # For a `Document` object we use the specified ID
+            doc_id = document.doc_id
+
+            # We also reset the stored next ID so the next insert won't
+            # re-use document IDs by accident when storing an old value
+            self._next_id = None
+        else:
+            # In all other cases we use the next free ID
+            doc_id = self._get_next_id(document_id=document_id)
+
+        # Now, we update the table and add the document
+        def updater(table: dict):
+            if doc_id in table:
+                raise ValueError(f'Document with ID {str(doc_id)} '
+                                 f'already exists')
+                
+            # By calling ``dict(document)`` we convert the data we got to a
+            # ``dict`` instance even if it was a different class that
+            # implemented the ``Mapping`` interface
+            table[doc_id] = dict(document)
+
+        # See below for details on ``Table._update``
+        self._update_table(updater)
+
+        return doc_id
+
+    def insert_multiple(self, documents: Iterable[Mapping], document_ids:Union[List, bool]=False) -> List[int]:
+        """
+        Insert multiple documents into the table.
+
+        :param documents: an Iterable of documents to insert
+        :returns: a list containing the inserted documents' IDs
+        """
+        doc_ids = []
+
+        if document_ids and len(document_ids) != len(documents):
+            raise Exception("When inserting multiple and passing your own document_ids," \
+                "the list must be the same length as the document list")
+
+        def updater(table: dict):
+            # for document in documents:
+            for i, document in enumerate(documents):
+
+                # Make sure the document implements the ``Mapping`` interface
+                if not isinstance(document, Mapping):
+                    raise ValueError('Document is not a Mapping')
+
+                if isinstance(document, Document):
+                    # Check if document does not override an existing document
+                    if document.doc_id in table:
+                        raise ValueError(
+                            f'Document with ID {str(document.doc_id)} '
+                            f'already exists'
+                        )
+
+                    # Store the doc_id, so we can return all document IDs
+                    # later. Then save the document with its doc_id and
+                    # skip the rest of the current loop
+                    doc_id = document.doc_id
+                    doc_ids.append(doc_id)
+                    table[doc_id] = dict(document)
+                    continue
+
+                # Generate new document ID for this document
+                # Store the doc_id, so we can return all document IDs
+                # later, then save the document with the new doc_id
+                if not document_ids:
+                    document_id = str(ObjectId())
+                else:
+                    document_id = document_ids[i]
+                doc_id = self._get_next_id()
+                doc_ids.append(doc_id)
+                table[doc_id] = dict(document)
+
+        # See below for details on ``Table._update``
+        self._update_table(updater)
+
+        return doc_ids
+
+# Subclass TinyDB and override the table_class attribute with our new logic
+class CustomTinyDB(TinyDB):
+    table_class = CustomTable
+
+
+
 class CollectionDoesNotExist(Exception):
     """Exception raised when attempting to access a collection that does not exist."""
     def __init__(self, form_name):
@@ -133,7 +263,8 @@ def _initialize_database_collections(self):
         # Initialize databases
         self.databases = {}
         for form_name in self.config.keys():
-            self.databases[form_name] = TinyDB(self._get_db_path(form_name))
+            # self.databases[form_name] = TinyDB(self._get_db_path(form_name))
+            self.databases[form_name] = CustomTinyDB(self._get_db_path(form_name))
 
     def _get_db_path(self, form_name:str):
         """Constructs a file path for the given form's database."""
@@ -160,7 +291,7 @@ def create_document(self, form_name:str, json_data, metadata={}):
         data_dict = {
             "data": convert_data_to_dict,
             "metadata": {
-                self.document_id_field: document_id,
+                # self.document_id_field: document_id,
                 self.is_deleted_field: metadata.get(self.is_deleted_field, False),
                 self.timezone_field: metadata.get(self.timezone_field, self.timezone.key),
                 self.created_at_field: metadata.get(self.created_at_field, current_timestamp.isoformat()),
@@ -176,7 +307,7 @@ def create_document(self, form_name:str, json_data, metadata={}):
         }
 
         # document_id = self.databases[form_name].insert(data_dict)
-        _ = self.databases[form_name].insert(data_dict)
+        _ = self.databases[form_name].insert(data_dict, document_id=document_id)
 
         if self.use_logger:
             self.logger.info(f"Inserted document for {form_name} with document_id {document_id}")