Skip to content

Commit

Permalink
Merge pull request #156 from JSv4/JSv4/fix-production-embeddings-model
Browse files Browse the repository at this point in the history
Install Embeddings Model @ /models in Production Container + Fix Extract Where Search Text is None
  • Loading branch information
JSv4 authored Jul 15, 2024
2 parents af0995b + 1955659 commit 1491be4
Show file tree
Hide file tree
Showing 3 changed files with 10 additions and 25 deletions.
24 changes: 0 additions & 24 deletions compose/local/django/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -33,30 +33,6 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
# psycopg2 dependencies
libpq-dev

# RUN wget https://poppler.freedesktop.org/poppler-data-0.4.10.tar.gz \
# && tar -xf poppler-data-0.4.10.tar.gz \
# && cd poppler-data-0.4.10 \
# && make install \
# && cd .. \
# && wget https://poppler.freedesktop.org/poppler-21.03.0.tar.xz \
# && tar -xf poppler-21.03.0.tar.xz \
# && cd poppler-21.03.0 \
# && mkdir build \
# && cd build \
# && cmake .. \
# && make \
# && make install \
# && cd ../.. \
# && ldconfig \
# && rm poppler-data-0.4.10.tar.gz \
# && rm -rf poppler-data-0.4.10 \
# && rm poppler-21.03.0.tar.xz \
# && rm -rf poppler-21.03.0
# CMD tail -f /dev/null

# Get tesseract tessdata file
# RUN wget https://github.com/tesseract-ocr/tessdata_best/raw/main/eng.traineddata -P /usr/share/tessdata

# Requirements are installed here to ensure they will be cached.
COPY ./requirements .

Expand Down
9 changes: 9 additions & 0 deletions compose/production/django/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -71,10 +71,19 @@ RUN apt-get update && apt-get install --no-install-recommends -y \
# copy python dependency wheels from python-build-stage
COPY --from=python-build-stage /usr/src/app/wheels /wheels/

# Install CPU-less requirements
RUN pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu
RUN pip install sentence-transformers

# use wheels to install python dependencies
RUN pip install --no-cache-dir --no-index --find-links=/wheels/ /wheels/* \
&& rm -rf /wheels/

# Download sentence transformer binaries
# ensure /models directory exists
COPY download_embeddings_model.py .
RUN mkdir -p /models
RUN python download_embeddings_model.py

COPY --chown=django:django ./compose/production/django/entrypoint /entrypoint
RUN sed -i 's/\r$//g' /entrypoint
Expand Down
2 changes: 1 addition & 1 deletion opencontractserver/tasks/data_extract_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def oc_llama_index_doc_query(cell_id, similarity_top_k=15, max_token_length: int
query = datacell.column.query

# Special character
if "|||" in search_text:
if isinstance(search_text, str) and "|||" in search_text:

logger.info(
"Detected special break character in examples `|||` - splitting and averaging embeddings."
Expand Down

0 comments on commit 1491be4

Please sign in to comment.