From cf133b6f0fc53a7c0a71530cc4053a1ae330f964 Mon Sep 17 00:00:00 2001 From: Miguel Grinberg Date: Fri, 2 Feb 2024 10:09:34 +0000 Subject: [PATCH] Add nbtest CI job (#179) --- .github/workflows/tests.yml | 44 +++++++++++++++++++ Makefile | 22 +++------- bin/find-notebooks-to-test.sh | 26 +++++++++++ bin/mocks/elasticsearch.py | 41 +++++++++++++++++ bin/nbtest | 13 ++++++ notebooks/document-chunking/Makefile | 12 ----- .../integrations/hugging-face/.nbtest.yml | 2 + .../loading-model-from-hugging-face.ipynb | 12 ++--- notebooks/langchain/Makefile | 11 ----- notebooks/model-upgrades/Makefile | 10 ----- notebooks/search/Makefile | 16 ------- 11 files changed, 140 insertions(+), 69 deletions(-) create mode 100644 .github/workflows/tests.yml create mode 100755 bin/find-notebooks-to-test.sh create mode 100644 bin/mocks/elasticsearch.py delete mode 100644 notebooks/document-chunking/Makefile create mode 100644 notebooks/integrations/hugging-face/.nbtest.yml delete mode 100644 notebooks/langchain/Makefile delete mode 100644 notebooks/model-upgrades/Makefile delete mode 100644 notebooks/search/Makefile diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml new file mode 100644 index 00000000..19cc3423 --- /dev/null +++ b/.github/workflows/tests.yml @@ -0,0 +1,44 @@ +name: tests +on: + push: + branches: + - main + paths: + - notebooks/** + pull_request: + branches: + - main + paths: + - notebooks/** +jobs: + notebook-tests: + strategy: + matrix: + es_stack: + - 8.11.4 + - 8.12.0 + runs-on: ubuntu-latest + services: + elasticsearch: + image: docker.elastic.co/elasticsearch/elasticsearch:${{ matrix.es_stack }} + env: + discovery.type: single-node + xpack.security.enabled: false + xpack.security.http.ssl.enabled: false + xpack.license.self_generated.type: trial + ports: + - 9200:9200 + steps: + - name: Checkout + uses: actions/checkout@v4 + - name: Setup python + uses: actions/setup-python@v5 + with: + python-version: '3.10' + - name: Setup nbtest + run: make nbtest + - name: Warm up + continue-on-error: true + run: sleep 30 && PATCH_ES=1 ELASTIC_CLOUD_ID=foo ELASTIC_API_KEY=bar bin/nbtest notebooks/search/00-quick-start.ipynb + - name: Run tests + run: PATCH_ES=1 FORCE_COLOR=1 make -s test diff --git a/Makefile b/Makefile index 103c3ebc..55ffe7c2 100644 --- a/Makefile +++ b/Makefile @@ -1,28 +1,20 @@ +# this is the list of notebooks that are integrated with the testing framework +NOTEBOOKS = $(shell bin/find-notebooks-to-test.sh) + .PHONY: install pre-commit nbtest test notebooks test: nbtest notebooks -notebooks: search document-chunking model-upgrades langchain - -search: - $(MAKE) -C notebooks/search - -document-chunking: - $(MAKE) -C notebooks/document-chunking - -model-upgrades: - $(MAKE) -C notebooks/model-upgrades - -langchain: - $(MAKE) -C notebooks/langchain +notebooks: + bin/nbtest $(NOTEBOOKS) install: pre-commit nbtest pre-commit: python -m venv .venv - .venv/bin/pip install -r requirements-dev.txt + .venv/bin/pip install -qqq -r requirements-dev.txt .venv/bin/pre-commit install nbtest: python3 -m venv .venv - .venv/bin/pip install elastic-nbtest + .venv/bin/pip install -qqq elastic-nbtest diff --git a/bin/find-notebooks-to-test.sh b/bin/find-notebooks-to-test.sh new file mode 100755 index 00000000..443de5b3 --- /dev/null +++ b/bin/find-notebooks-to-test.sh @@ -0,0 +1,26 @@ +#!/bin/bash +# add any notebooks that are currently not testable to the exempt list +EXEMPT_NOTEBOOKS=( + "notebooks/search/07-inference.ipynb" + "notebooks/search/08-learning-to-rank.ipynb" + "notebooks/langchain/langchain-vector-store.ipynb" + "notebooks/langchain/self-query-retriever-examples/chatbot-example.ipynb" + "notebooks/langchain/self-query-retriever-examples/chatbot-with-bm25-only-example.ipynb" + "notebooks/langchain/self-query-retriever-examples/langchain-self-query-retriever.ipynb" + "notebooks/langchain/multi-query-retriever-examples/chatbot-with-multi-query-retriever.ipynb" + "notebooks/langchain/multi-query-retriever-examples/langchain-multi-query-retriever.ipynb" + "notebooks/generative-ai/question-answering.ipynb" + "notebooks/generative-ai/chatbot.ipynb" + "notebooks/integrations/amazon-bedrock/langchain-qa-example.ipynb" + "notebooks/integrations/llama-index/intro.ipynb" + "notebooks/integrations/gemini/vector-search-gemini-elastic.ipynb" + "notebooks/integrations/gemini/qa-langchain-gemini-elasticsearch.ipynb" + "notebooks/integrations/openai/openai-KNN-RAG.ipynb" +) + +ALL_NOTEBOOKS=$(find notebooks -name "*.ipynb" | grep -v "_nbtest" | grep -v ".ipynb_checkpoints" | sort) +for notebook in $ALL_NOTEBOOKS; do + if [[ ! "${EXEMPT_NOTEBOOKS[@]}" =~ $notebook ]]; then + echo $notebook + fi +done diff --git a/bin/mocks/elasticsearch.py b/bin/mocks/elasticsearch.py new file mode 100644 index 00000000..684996d0 --- /dev/null +++ b/bin/mocks/elasticsearch.py @@ -0,0 +1,41 @@ +import os +import sys + + +def patch_elasticsearch(): + # preserve the original import path + saved_path = sys.path.copy() + + # remove the path entry that refers to this directory + for path in sys.path: + if not path.startswith('/'): + path = os.path.join(os.getcwd(), path) + if __file__ == os.path.join(path, 'elasticsearch.py'): + sys.path.remove(path) + break + + # remove this module, and import the real one instead + del sys.modules['elasticsearch'] + import elasticsearch + + # restore the import path + sys.path = saved_path + + # preserve the original Elasticsearch.__init__ method + orig_es_init = elasticsearch.Elasticsearch.__init__ + + # patched version of Elasticsearch.__init__ that connects to self-hosted + # regardless of connection arguments given + def patched_es_init(self, *args, **kwargs): + if 'cloud_id' in kwargs: + assert kwargs['cloud_id'] == 'foo' + if 'api_key' in kwargs: + assert kwargs['api_key'] == 'bar' + return orig_es_init(self, 'http://localhost:9200') + + # patch Elasticsearch.__init__ + elasticsearch.Elasticsearch.__init__ = patched_es_init + + +patch_elasticsearch() +del patch_elasticsearch diff --git a/bin/nbtest b/bin/nbtest index 3cd38763..21a622ad 100755 --- a/bin/nbtest +++ b/bin/nbtest @@ -4,5 +4,18 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) if [[ ! -f $SCRIPT_DIR/../.venv/bin/nbtest ]]; then make nbtest fi + +if [[ "$PATCH_ES" != "" ]]; then + # here we do some Python dark magic to patch the elasticsearch package to + # connect to a locally hosted instance in spite of connection arguments + # given + export ELASTIC_CLOUD_ID=foo + export ELASTIC_API_KEY=bar + export PYTHONPATH=$SCRIPT_DIR/mocks + + # ensure elasticsearch is installed so that it can be patched + $SCRIPT_DIR/../.venv/bin/pip install -qqq elasticsearch +fi + source $SCRIPT_DIR/../.venv/bin/activate $SCRIPT_DIR/../.venv/bin/nbtest $* diff --git a/notebooks/document-chunking/Makefile b/notebooks/document-chunking/Makefile deleted file mode 100644 index 8704a788..00000000 --- a/notebooks/document-chunking/Makefile +++ /dev/null @@ -1,12 +0,0 @@ -NBTEST = ../../bin/nbtest -NOTEBOOKS = \ - tokenization.ipynb \ - with-index-pipelines.ipynb \ - with-langchain-splitters.ipynb - -.PHONY: all $(NOTEBOOKS) - -all: $(NOTEBOOKS) - -$(NOTEBOOKS): - -$(NBTEST) $@ diff --git a/notebooks/integrations/hugging-face/.nbtest.yml b/notebooks/integrations/hugging-face/.nbtest.yml new file mode 100644 index 00000000..0b88e44d --- /dev/null +++ b/notebooks/integrations/hugging-face/.nbtest.yml @@ -0,0 +1,2 @@ +masks: +- 'Score: [0-9]+\.[0-9][0-9]*' diff --git a/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb b/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb index f9bbaf4c..f2688cd5 100644 --- a/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb +++ b/notebooks/integrations/hugging-face/loading-model-from-hugging-face.ipynb @@ -47,7 +47,7 @@ }, "outputs": [], "source": [ - "!python3 -m pip -qU install sentence-transformers eland elasticsearch transformers" + "!python3 -m pip install sentence-transformers eland elasticsearch transformers" ] }, { @@ -60,7 +60,8 @@ "from elasticsearch import Elasticsearch\n", "from getpass import getpass\n", "from urllib.request import urlopen\n", - "import json" + "import json\n", + "from time import sleep" ] }, { @@ -111,7 +112,7 @@ "metadata": {}, "outputs": [], "source": [ - "!eland_import_hub_model --cloud-id $ELASTIC_CLOUD_ID --hub-model-id sentence-transformers/all-MiniLM-L6-v2 --task-type text_embedding --es-api-key $ELASTIC_API_KEY --start" + "!eland_import_hub_model --cloud-id $ELASTIC_CLOUD_ID --hub-model-id sentence-transformers/all-MiniLM-L6-v2 --task-type text_embedding --es-api-key $ELASTIC_API_KEY --start --clear-previous" ] }, { @@ -301,7 +302,8 @@ "for title in titles:\n", " actions.append({\"index\": {\"_index\": \"blogs\"}})\n", " actions.append(title)\n", - "es.bulk(index=\"blogs\", operations=actions)" + "es.bulk(index=\"blogs\", operations=actions)\n", + "sleep(5)" ] }, { @@ -423,7 +425,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.6" + "version": "3.10.13" }, "vscode": { "interpreter": { diff --git a/notebooks/langchain/Makefile b/notebooks/langchain/Makefile deleted file mode 100644 index 7217c610..00000000 --- a/notebooks/langchain/Makefile +++ /dev/null @@ -1,11 +0,0 @@ -NBTEST = ../../bin/nbtest -NOTEBOOKS = \ - langchain-using-own-model.ipynb \ - langchain-vector-store-using-elser.ipynb - -.PHONY: all $(NOTEBOOKS) - -all: $(NOTEBOOKS) - -$(NOTEBOOKS): - -$(NBTEST) $@ diff --git a/notebooks/model-upgrades/Makefile b/notebooks/model-upgrades/Makefile deleted file mode 100644 index a66139b1..00000000 --- a/notebooks/model-upgrades/Makefile +++ /dev/null @@ -1,10 +0,0 @@ -NBTEST = ../../bin/nbtest -NOTEBOOKS = \ - upgrading-index-to-use-elser.ipynb - -.PHONY: all $(NOTEBOOKS) - -all: $(NOTEBOOKS) - -$(NOTEBOOKS): - -$(NBTEST) $@ diff --git a/notebooks/search/Makefile b/notebooks/search/Makefile deleted file mode 100644 index 7b9f9755..00000000 --- a/notebooks/search/Makefile +++ /dev/null @@ -1,16 +0,0 @@ -NBTEST = ../../bin/nbtest -NOTEBOOKS = \ - 00-quick-start.ipynb \ - 01-keyword-querying-filtering.ipynb \ - 02-hybrid-search.ipynb \ - 03-ELSER.ipynb \ - 04-multilingual.ipynb \ - 05-query-rules.ipynb \ - 06-synonyms-api.ipynb - -.PHONY: all $(NOTEBOOKS) - -all: $(NOTEBOOKS) - -$(NOTEBOOKS): - -$(NBTEST) $@