Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(documents): delete orphan harvested documents #3777

Open
wants to merge 1 commit into
base: staging
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions rero_ils/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -518,6 +518,14 @@ def _(x):
"kwargs": {"delete": True},
"enabled": False,
},
"delete-orphan-harvested": {
"task": "rero_ils.modules.documents.tasks.delete_orphan_harvested",
"schedule": crontab(
minute=0, hour=5, day_of_week=6
), # Every Sunday at 05:00 UTC,
"kwargs": {"delete": True},
"enabled": False,
},
# "mef-harvester": {
# "task": "rero_ils.modules.apiharvester.tasks.harvest_records",
# "schedule": timedelta(minutes=60),
Expand Down
40 changes: 40 additions & 0 deletions rero_ils/modules/documents/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,46 @@ def reindex_document(pid):
Document.get_record_by_pid(pid).reindex()


@shared_task(ignore_result=True)
def delete_orphan_harvested(delete=False, verbose=False):
"""Delete orphan harvested documents.

:param delete: if True delete from DB and ES.
:param verbose: Verbose print.
:returns: count of deleted documents.
"""
query = (
DocumentsSearch()
.filter("term", harvested=True)
.exclude("exists", field="holdings")
)
pids = [hit.pid for hit in query.source("pid").scan()]
count = 0

if verbose:
click.secho(f"Orphan harvested documents count: {len(pids)}", fg="yellow")
for pid in pids:
if doc := Document.get_record_by_pid(pid):
if verbose:
click.secho(f"Deleting orphan harvested: {pid}", fg="yellow")
if delete:
try:
# only delete documents that have no links to me, only reason not to delete should be 'harvested'
if doc.reasons_not_to_delete() == {"others": {"harvested": True}}:
doc.pop("harvested")
doc.replace(doc, dbcommit=True, reindex=True)
doc.delete(dbcommit=True, delindex=True)
count += 1
except Exception:
msg = f"COULD NOT DELETE ORPHAN HARVESTED: {pid} {doc.reasons_not_to_delete()}"
if verbose:
click.secho(f"ERROR: {msg}", fg="red")
current_app.logger.warning(msg)

set_timestamp("delete_orphan_harvested", msg={"deleted": count})
return count


@shared_task(ignore_result=True)
def delete_drafts(days=1, delete=False, verbose=False):
"""Delete drafts.
Expand Down
18 changes: 17 additions & 1 deletion tests/ui/documents/test_documents_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,7 @@
document_id_fetcher,
)
from rero_ils.modules.documents.models import DocumentIdentifier
from rero_ils.modules.documents.tasks import delete_drafts
from rero_ils.modules.documents.tasks import delete_drafts, delete_orphan_harvested
from rero_ils.modules.ebooks.tasks import create_records
from rero_ils.modules.entities.models import EntityType
from rero_ils.modules.entities.remote_entities.api import (
Expand Down Expand Up @@ -421,3 +421,19 @@ def test_document_delete_draft(app, document_chinese_data):
doc["_draft"] = True
doc.update(data=doc, dbcommit=True, reindex=True)
assert delete_drafts(days=0, delete=True) == 1


def test_document_delete_orphan_harvested(
app, document_data, holding_lib_sion_electronic
):
"""Test document delete orphan harvested.

Make sure that ebooks with electronic holdings (holding_lib_sion_electronic are not deleted).
"""
doc = Document.create(
data=document_data, delete_pid=True, dbcommit=True, reindex=True
)
assert delete_orphan_harvested(delete=True) == 0
doc["harvested"] = True
doc.update(data=doc, dbcommit=True, reindex=True)
assert delete_orphan_harvested(delete=True) == 1
Loading