Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

More notebooks added to nbtest #172

Merged
merged 3 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,20 @@

test: nbtest notebooks

notebooks: search document-chunking
notebooks: search document-chunking model-upgrades langchain

search:
$(MAKE) -C notebooks/search

document-chunking:
$(MAKE) -C notebooks/document-chunking

model-upgrades:
$(MAKE) -C notebooks/model-upgrades

langchain:
$(MAKE) -C notebooks/langchain

install: pre-commit nbtest

pre-commit:
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "385c47c3-27e8-4b51-b8b7-26c97b9a3ad3",
"metadata": {},
"outputs": [],
"source": [
"from elasticsearch import Elasticsearch\n",
"from getpass import getpass\n",
"\n",
"ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
"\n",
"client = Elasticsearch(cloud_id=ELASTIC_CLOUD_ID, api_key=ELASTIC_API_KEY,)\n",
"\n",
"# delete the notebook's index\n",
"client.indices.delete(index=\"blogs\", ignore_unavailable=True)\n",
"\n",
"# delete the pipeline\n",
"try:\n",
" client.ingest.delete_pipeline(id=\"vectorize_blogs\")\n",
"except:\n",
" pass\n",
"\n",
"# delete the model\n",
"try:\n",
" client.ml.delete_trained_model(model_id=\"sentence-transformers__all-minilm-l6-v2\", force=True)\n",
"except:\n",
" pass"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,10 @@
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
"collapsed": false,
"jupyter": {
"outputs_hidden": false
}
},
"source": [
"# NLP text search using hugging face transformer model\n",
Expand Down Expand Up @@ -44,14 +47,20 @@
},
"outputs": [],
"source": [
"# install packages\n",
"!python3 -m pip install -qU sentence-transformers eland elasticsearch transformers\n",
"\n",
"!python3 -m pip -qU install sentence-transformers eland elasticsearch transformers"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# import modules\n",
"import pandas as pd, json\n",
"from elasticsearch import Elasticsearch\n",
"from getpass import getpass\n",
"from urllib.request import urlopen"
"from urllib.request import urlopen\n",
"import json"
]
},
{
Expand Down Expand Up @@ -93,8 +102,15 @@
"ELASTIC_CLOUD_ID = getpass(\"Elastic Cloud ID: \")\n",
"\n",
"# https://www.elastic.co/search-labs/tutorials/install-elasticsearch/elastic-cloud#creating-an-api-key\n",
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")\n",
"\n",
"ELASTIC_API_KEY = getpass(\"Elastic Api Key: \")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"!eland_import_hub_model --cloud-id $ELASTIC_CLOUD_ID --hub-model-id sentence-transformers/all-MiniLM-L6-v2 --task-type text_embedding --es-api-key $ELASTIC_API_KEY --start"
]
},
Expand Down Expand Up @@ -304,7 +320,7 @@
},
{
"cell_type": "code",
"execution_count": 106,
"execution_count": 22,
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
Expand All @@ -315,125 +331,40 @@
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>_id</th>\n",
" <th>_score</th>\n",
" <th>fields.title</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>TxUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.591786</td>\n",
" <td>[Brewing in Beats: Track network connections]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>SxUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.401099</td>\n",
" <td>[Machine Learning for Nginx Logs - Identifying...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>UxUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.390279</td>\n",
" <td>[Data Visualization For Machine Learning]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>TBUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.368995</td>\n",
" <td>[Logstash Lines: Introduce integration plugins]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>UhUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.368995</td>\n",
" <td>[Logstash Lines: Introduce integration plugins]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5</th>\n",
" <td>URUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.356903</td>\n",
" <td>[Keeping up with Kibana: This week in Kibana f...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>6</th>\n",
" <td>UBUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.341939</td>\n",
" <td>[Kibana 4 Video Tutorials, Part 3]</td>\n",
" </tr>\n",
" <tr>\n",
" <th>7</th>\n",
" <td>VBUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.337294</td>\n",
" <td>[Introducing approximate nearest neighbor sear...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>8</th>\n",
" <td>ThUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.336460</td>\n",
" <td>[Where in the World is Elastic? - QCon Beijing...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>9</th>\n",
" <td>TRUU-YkBAHcz2kFqAun2</td>\n",
" <td>0.320756</td>\n",
" <td>[EQL for the masses]</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" _id _score \\\n",
"0 TxUU-YkBAHcz2kFqAun2 0.591786 \n",
"1 SxUU-YkBAHcz2kFqAun2 0.401099 \n",
"2 UxUU-YkBAHcz2kFqAun2 0.390279 \n",
"3 TBUU-YkBAHcz2kFqAun2 0.368995 \n",
"4 UhUU-YkBAHcz2kFqAun2 0.368995 \n",
"5 URUU-YkBAHcz2kFqAun2 0.356903 \n",
"6 UBUU-YkBAHcz2kFqAun2 0.341939 \n",
"7 VBUU-YkBAHcz2kFqAun2 0.337294 \n",
"8 ThUU-YkBAHcz2kFqAun2 0.336460 \n",
"9 TRUU-YkBAHcz2kFqAun2 0.320756 \n",
"\n",
" fields.title \n",
"0 [Brewing in Beats: Track network connections] \n",
"1 [Machine Learning for Nginx Logs - Identifying... \n",
"2 [Data Visualization For Machine Learning] \n",
"3 [Logstash Lines: Introduce integration plugins] \n",
"4 [Logstash Lines: Introduce integration plugins] \n",
"5 [Keeping up with Kibana: This week in Kibana f... \n",
"6 [Kibana 4 Video Tutorials, Part 3] \n",
"7 [Introducing approximate nearest neighbor sear... \n",
"8 [Where in the World is Elastic? - QCon Beijing... \n",
"9 [EQL for the masses] "
]
},
"execution_count": 106,
"metadata": {},
"output_type": "execute_result"
"name": "stdout",
"output_type": "stream",
"text": [
"['Brewing in Beats: Track network connections']\n",
"Score: 0.5917864\n",
"\n",
"['Machine Learning for Nginx Logs - Identifying Operational Issues with Your Website']\n",
"Score: 0.40109876\n",
"\n",
"['Data Visualization For Machine Learning']\n",
"Score: 0.39027885\n",
"\n",
"['Logstash Lines: Introduce integration plugins']\n",
"Score: 0.36899462\n",
"\n",
"['Keeping up with Kibana: This week in Kibana for November 29th, 2019']\n",
"Score: 0.35690257\n",
"\n",
"['How to implement similarity image search | Elastic.co | Elastic Blog']\n",
"Score: 0.34473613\n",
"\n",
"['Kibana 4 Video Tutorials, Part 3']\n",
"Score: 0.34193927\n",
"\n",
"['Introducing approximate nearest neighbor search in Elasticsearch 8.0 | Elastic Blog']\n",
"Score: 0.3372936\n",
"\n",
"['Where in the World is Elastic? - QCon Beijing, Devoxx France, Percona Live & AWS Summit Chicago']\n",
"Score: 0.33645985\n",
"\n",
"['EQL for the masses']\n",
"Score: 0.3207562\n",
"\n"
]
}
],
"source": [
Expand All @@ -458,26 +389,41 @@
" knn=query,\n",
" source=False)\n",
"\n",
"\n",
"results = pd.json_normalize(json.loads(json.dumps(response.body['hits']['hits'])))\n",
"\n",
"# shows the result\n",
"results[['_id', '_score', 'fields.title']]\n"
"def show_results(results):\n",
" for result in results:\n",
" print(f'{result[\"fields\"][\"title\"]}\\nScore: {result[\"_score\"]}\\n')\n",
" \n",
"show_results(response.body['hits']['hits'])"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"colab": {
"provenance": []
},
"kernelspec": {
"display_name": "Python 3.11.3 64-bit",
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"version": "3.9.6"
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
},
"vscode": {
"interpreter": {
Expand All @@ -486,5 +432,5 @@
}
},
"nbformat": 4,
"nbformat_minor": 0
"nbformat_minor": 4
}
11 changes: 11 additions & 0 deletions notebooks/langchain/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
NBTEST = ../../bin/nbtest
NOTEBOOKS = \
langchain-using-own-model.ipynb \
langchain-vector-store-using-elser.ipynb

.PHONY: all $(NOTEBOOKS)

all: $(NOTEBOOKS)

$(NOTEBOOKS):
-$(NBTEST) $@
Loading