-
-
Notifications
You must be signed in to change notification settings - Fork 67
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Updated Dockerfile to preload embeddings model into container. Remove…
…d cassette file from repo where it container model binary in it. Was too large. Now this won't be necessary thanks to cached model in image.
- Loading branch information
Showing
15 changed files
with
144 additions
and
47 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,2 +1,3 @@ | ||
frontend/public/** linguist-vendored | ||
frontend/src/assets/** linguist-vendored | ||
*.yaml filter=lfs diff=lfs merge=lfs -text |
Git LFS file not shown
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,16 @@ | ||
import os | ||
from sentence_transformers import SentenceTransformer | ||
|
||
# Specify the desired sentence transformer model | ||
model_name = "multi-qa-MiniLM-L6-cos-v1" | ||
|
||
# Directory to save the model (absolute path) | ||
cache_dir = "/models" | ||
|
||
# Create the directory if it doesn't exist | ||
os.makedirs(cache_dir, exist_ok=True) | ||
|
||
# Download and save the sentence transformer model | ||
model = SentenceTransformer(model_name, cache_folder=cache_dir) | ||
|
||
print(f"Sentence transformer model '{model_name}' has been downloaded and saved to '{cache_dir}'.") |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,2 @@ | ||
*.yaml | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/sh | ||
command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting the 'post-checkout' file in the hooks directory (set by 'core.hookspath'; usually '.git/hooks').\n"; exit 2; } | ||
git lfs post-checkout "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/sh | ||
command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting the 'post-commit' file in the hooks directory (set by 'core.hookspath'; usually '.git/hooks').\n"; exit 2; } | ||
git lfs post-commit "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/sh | ||
command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting the 'post-merge' file in the hooks directory (set by 'core.hookspath'; usually '.git/hooks').\n"; exit 2; } | ||
git lfs post-merge "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
#!/bin/sh | ||
command -v git-lfs >/dev/null 2>&1 || { echo >&2 "\nThis repository is configured for Git LFS but 'git-lfs' was not found on your path. If you no longer wish to use Git LFS, remove this hook by deleting the 'pre-push' file in the hooks directory (set by 'core.hookspath'; usually '.git/hooks').\n"; exit 2; } | ||
git lfs pre-push "$@" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,83 @@ | ||
import vcr | ||
from django.contrib.auth import get_user_model | ||
from django.core.files.base import ContentFile | ||
from django.db.models.signals import post_save | ||
from django.test import TestCase | ||
from django.test.utils import override_settings | ||
|
||
from opencontractserver.annotations.models import Annotation | ||
from opencontractserver.annotations.signals import process_annot_on_create_atomic | ||
from opencontractserver.corpuses.models import Corpus, CorpusQuery | ||
from opencontractserver.corpuses.signals import run_query_on_create | ||
from opencontractserver.documents.models import Document | ||
from opencontractserver.tasks.doc_tasks import nlm_ingest_pdf | ||
from opencontractserver.tasks.embeddings_task import ( | ||
calculate_embedding_for_annotation_text, | ||
) | ||
from opencontractserver.tasks.query_tasks import run_query | ||
from opencontractserver.tests.fixtures import SAMPLE_PDF_FILE_TWO_PATH | ||
|
||
User = get_user_model() | ||
|
||
|
||
class QueryTasksTestCase(TestCase): | ||
@override_settings(CELERY_TASK_ALWAYS_EAGER=True) | ||
def setUp(self): | ||
|
||
post_save.disconnect(run_query_on_create, sender=CorpusQuery) | ||
post_save.disconnect(process_annot_on_create_atomic, sender=Annotation) | ||
|
||
self.user = User.objects.create_user( | ||
username="testuser", password="testpassword" | ||
) | ||
|
||
# Create any necessary test data | ||
self.corpus = Corpus.objects.create(title="Test Corpus") | ||
|
||
pdf_file = ContentFile( | ||
SAMPLE_PDF_FILE_TWO_PATH.open("rb").read(), name="test.pdf" | ||
) | ||
|
||
self.doc = Document.objects.create( | ||
creator=self.user, | ||
title="Test Doc", | ||
description="USC Title 1 - Chapter 1", | ||
custom_meta={}, | ||
pdf_file=pdf_file, | ||
backend_lock=True, | ||
) | ||
|
||
# Run ingest pipeline SYNCHRONOUS and, with @responses.activate decorator, no API call ought to go out to | ||
# nlm-ingestor host | ||
nlm_ingest_pdf.delay(user_id=self.user.id, doc_id=self.doc.id) | ||
|
||
# Manually run the calcs for the embeddings as post_save hook is hard | ||
# to await for in test | ||
for annot in Annotation.objects.all(): | ||
calculate_embedding_for_annotation_text.delay(annotation_id=annot.id) | ||
|
||
self.corpus.documents.add(self.doc) | ||
self.corpus.save() | ||
|
||
self.query = CorpusQuery.objects.create( | ||
query="Test query", corpus=self.corpus, creator=self.user | ||
) | ||
|
||
@override_settings(CELERY_TASK_ALWAYS_EAGER=True) | ||
@vcr.use_cassette("fixtures/vcr_cassettes/run_query.yaml") | ||
def test_run_query(self): | ||
|
||
print(self.query) | ||
# Call the run_query task | ||
run_query.delay(query_id=self.query.id) | ||
|
||
# Refresh the query object from the database | ||
self.query.refresh_from_db() | ||
|
||
# Assert the expected behavior | ||
self.assertIsNotNone(self.query.started) | ||
self.assertIsNotNone(self.query.completed) | ||
self.assertIsNone(self.query.failed) | ||
self.assertIsNone(self.query.stacktrace) | ||
self.assertIsNotNone(self.query.response) | ||
self.assertTrue(self.query.sources.exists()) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters