Skip to content

Commit

Permalink
Remove unused test.
Browse files Browse the repository at this point in the history
  • Loading branch information
JSv4 committed Oct 14, 2024
1 parent 7adf4eb commit 7bb640e
Show file tree
Hide file tree
Showing 3 changed files with 5 additions and 46 deletions.
4 changes: 3 additions & 1 deletion opencontractserver/documents/signals.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@
from opencontractserver.tasks.doc_tasks import (
extract_pdf_thumbnail,
nlm_ingest_pdf,
set_doc_lock_state, extract_txt_thumbnail, ingest_txt,
set_doc_lock_state,
extract_txt_thumbnail,
ingest_txt,
)
from opencontractserver.tasks.embeddings_task import calculate_embedding_for_doc_text

Expand Down
5 changes: 2 additions & 3 deletions opencontractserver/tasks/doc_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,6 @@
import io
import json
import logging
import pathlib
import uuid
from typing import Any

import requests
Expand Down Expand Up @@ -39,7 +37,8 @@
from opencontractserver.utils.etl import build_document_export, pawls_bbox_to_funsd_box
from opencontractserver.utils.files import (
check_if_pdf_needs_ocr,
split_pdf_into_images, create_text_thumbnail,
split_pdf_into_images,
create_text_thumbnail,
)
from opencontractserver.utils.permissioning import set_permissions_for_obj_to_user
from opencontractserver.utils.text import __consolidate_common_equivalent_chars
Expand Down
42 changes: 0 additions & 42 deletions opencontractserver/tests/test_doc_parser.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
# Copyright (C) 2022 John Scrudato
import io
import logging
import pathlib
import uuid

from django.contrib.auth import get_user_model
from django.core.files.base import ContentFile
from django.core.files.storage import default_storage
from django.db import transaction
from django.test import TestCase
from PyPDF2 import PdfReader, PdfWriter

from opencontractserver.annotations.models import Annotation, AnnotationLabel
from opencontractserver.corpuses.models import Corpus
Expand All @@ -18,7 +13,6 @@
burn_doc_annotations,
convert_doc_to_funsd,
extract_pdf_thumbnail,
process_pdf_page,
set_doc_lock_state,
)
from opencontractserver.tests.fixtures import (
Expand Down Expand Up @@ -66,42 +60,6 @@ def test_pdf_thumbnail_extraction(self):
# TODO - expand test to actually check results
extract_pdf_thumbnail.s(doc_id=self.doc.id).apply().get()

def test_process_pdf_page(self):
page_bytes_stream = io.BytesIO()
doc_path = self.doc.pdf_file.name
doc_file = default_storage.open(doc_path, mode="rb")
pdf = PdfReader(doc_file)
page = pdf.pages[0]
pdf_writer = PdfWriter()
pdf_writer.add_page(page)
pdf_writer.write(page_bytes_stream)

pdf_fragment_folder_path = pathlib.Path(
f"/tmp/user_{self.user.id}/pdf_fragments"
)
pdf_fragment_folder_path.mkdir(parents=True, exist_ok=True)
pdf_fragment_path = pdf_fragment_folder_path / f"{uuid.uuid4()}.pdf"
with pdf_fragment_path.open("wb") as f:
f.write(page_bytes_stream.getvalue())

page_path = pdf_fragment_path.resolve().__str__()

result = (
process_pdf_page.si(
total_page_count=23,
page_num=0,
page_path=page_path,
user_id=self.user.id,
)
.apply()
.get()
)

self.assertEqual(len(result), 3)
self.assertEqual(result[0], 0) # page number
self.assertIsInstance(result[1], str) # pawls fragment path
self.assertEqual(result[2], pdf_fragment_path.__str__()) # page path

def test_set_doc_lock_state(self):
set_doc_lock_state.apply(kwargs={"locked": True, "doc_id": self.doc.id}).get()

Expand Down

0 comments on commit 7bb640e

Please sign in to comment.