From 40f16cf7b1319c212f95c4638c8be8efb8f15588 Mon Sep 17 00:00:00 2001 From: Christoph Reiter Date: Wed, 21 Aug 2024 16:10:55 +0200 Subject: [PATCH] typense: process document conversion in chunks to reduce memory Also delete all no longer active collections, to reduce memory usage of typensense too, before and after a full sync. --- src/TypesenseClient/SearchIndex.php | 38 ++++++++--------------------- src/TypesenseSync/TypesenseSync.php | 36 +++++++++++++++------------ 2 files changed, 31 insertions(+), 43 deletions(-) diff --git a/src/TypesenseClient/SearchIndex.php b/src/TypesenseClient/SearchIndex.php index a8c1a7d..b379879 100644 --- a/src/TypesenseClient/SearchIndex.php +++ b/src/TypesenseClient/SearchIndex.php @@ -106,7 +106,7 @@ public function purgeAll(): void { $newName = $this->createNewCollection(); $this->updateAlias($newName); - $this->expireOldCollections(0); + $this->deleteOldCollections(); } protected function isAliasExists(string $aliasName): bool @@ -130,28 +130,20 @@ public function ensureSetup(): void } } - public function expireOldCollections(int $keepLast = 3): bool + /** + * Delete all collections that are no longer actively used. + */ + public function deleteOldCollections(): void { $client = $this->getClient(); + $collectionNameSkipList = []; + // Don't delete the currently linked collection in all cases $alias = $client->aliases[$this->getAliasName()]->retrieve(); - $collectionNameSkipList = [$alias['collection_name']]; - - // TODO: remove this once we are done testing - // We use these for testing as well, so skip for now - $collectionNameSkipList = array_merge( - $collectionNameSkipList, - ['cabinet-students', 'cabinet-files'] - ); - - // Fetch all collections - try { - $collections = $client->collections->retrieve(); - } catch (Exception|TypesenseClientErrorAlias) { - return false; - } + $collectionNameSkipList[] = $alias['collection_name']; // Collect all collections with the given prefix that are not in the skip list + $collections = $client->collections->retrieve(); $collectionNameList = []; foreach ($collections as $collection) { if (str_starts_with($collection['name'], $this->getCollectionPrefix()) @@ -160,21 +152,11 @@ public function expireOldCollections(int $keepLast = 3): bool } } - rsort($collectionNameList); - // Slice off $keepLast collections - $collectionNameList = array_slice($collectionNameList, $keepLast); - // Delete the remaining collections foreach ($collectionNameList as $collectionName) { $this->logger->info("Deleting old collection '$collectionName'"); - try { - $client->collections[$collectionName]->delete(); - } catch (Exception|TypesenseClientErrorAlias $e) { - $this->logger->error('Deleting collection failed', ['exception' => $e]); - } + $client->collections[$collectionName]->delete(); } - - return true; } public function setLogger(LoggerInterface $logger): void diff --git a/src/TypesenseSync/TypesenseSync.php b/src/TypesenseSync/TypesenseSync.php index d46fb5c..66c3e60 100644 --- a/src/TypesenseSync/TypesenseSync.php +++ b/src/TypesenseSync/TypesenseSync.php @@ -62,40 +62,46 @@ public function sync(bool $full = false) } $cursor = $this->getCursor(); + // Process in chunks to reduce memory consumption + $chunkSize = 10000; + if ($cursor === null) { $this->logger->info('Starting a full sync'); $schema = $this->translator->getSchema(); $this->searchIndex->setSchema($schema); $this->searchIndex->ensureSetup(); + $this->searchIndex->deleteOldCollections(); $collectionName = $this->searchIndex->createNewCollection(); $res = $this->personSync->getAllPersons(); - $documents = []; - foreach ($res->getPersons() as $person) { - $documents[] = $this->personToDocument($person); - } - - $this->searchIndex->addDocumentsToCollection($collectionName, $documents); - if ($documents !== []) { - $this->addDummyDocuments($collectionName, $documents); + foreach (array_chunk($res->getPersons(), $chunkSize) as $persons) { + $documents = []; + foreach ($persons as $person) { + $documents[] = $this->personToDocument($person); + } + $this->searchIndex->addDocumentsToCollection($collectionName, $documents); + if ($documents !== []) { + $this->addDummyDocuments($collectionName, $documents); + } } - $this->searchIndex->ensureSetup(); $this->searchIndex->updateAlias($collectionName); - $this->searchIndex->expireOldCollections(); + $this->searchIndex->deleteOldCollections(); $this->saveCursor($res->getCursor()); } else { $this->logger->info('Starting a partial sync'); $res = $this->personSync->getAllPersons($cursor); + $collectionName = $this->searchIndex->getCollectionName(); - $documents = []; - foreach ($res->getPersons() as $person) { - $documents[] = $this->personToDocument($person); + foreach (array_chunk($res->getPersons(), $chunkSize) as $persons) { + $documents = []; + foreach ($persons as $person) { + $documents[] = $this->personToDocument($person); + } + $this->searchIndex->addDocumentsToCollection($collectionName, $documents); } - $collectionName = $this->searchIndex->getCollectionName(); - $this->searchIndex->addDocumentsToCollection($collectionName, $documents); $this->saveCursor($res->getCursor()); }