Skip to content

Commit

Permalink
remove chroma caches
Browse files Browse the repository at this point in the history
  • Loading branch information
fmigneault committed Dec 9, 2023
1 parent a15d98e commit f97c666
Show file tree
Hide file tree
Showing 16 changed files with 26 additions and 15 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,16 @@
**/.pytest_cache
**/condaenv.*.requirements.txt

## Chroma VDB caches
**/*.bin
**/*.pickle
**/*.sqlite3

### Binaries
**/*.jar

### Notebooks
# expect examples per domain
# disallow notebooks at root
./*.ipynb
**/.ipynb_checkpoints/
18 changes: 10 additions & 8 deletions nlp/notebooks/nl2query/V2/V2_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json
import os
import re
from typing import Optional

import nltk
import osmnx as ox
Expand All @@ -16,6 +17,7 @@
TemporalAnnotation
)
from nl2query.V2.Vdb_simsearch import Vdb_simsearch
from typedefs import JSON

try:
nltk.data.find('corpora/stopwords')
Expand All @@ -24,7 +26,7 @@
from nltk.corpus import stopwords


def find_spans(span:str, query:str):
def find_spans(span: str, query: str):
"""Find a span in a query.
Return the spans or a list of spans
in the case of split spans and
Expand Down Expand Up @@ -84,7 +86,7 @@ def remove_stopwords(text:str):
return filtered_text


def duckling_parse(duckling_url:str, query:str):
def duckling_parse(duckling_url: str, query: str) -> Optional[JSON]:
"""Temporal Expression Detection using Duckling.
Needs rasa/duckling Docker image running on duckling_url.
Return a response json or None."""
Expand All @@ -100,18 +102,18 @@ def duckling_parse(duckling_url:str, query:str):
#empty response
return None
else:
raise Exception("Please make sure Duckling docker service is running on localhost port 8000!")
raise Exception(f"Please make sure Duckling docker service is running on [{duckling_url}]!")
except:
raise Exception("Please make sure Duckling docker service is running on localhost port 8000!")
raise Exception(f"Please make sure Duckling docker service is running on [{duckling_url}]!")


def osmnx_geocode(query:str, threshold:int=0.7, policy:str='length'):
def osmnx_geocode(vdb: Vdb_simsearch, query: str, threshold: float = 0.7, policy: str = 'length'):
"""location geocoding service
that queries every 1 and 2-gram tokens
and returns a result above the threshold (default 0.7)
and highest score if policy=score
or highest length if policy=length (default)."""
query_tokens,_ = Vdb_simsearch.generate_ngrams(query, 2)
query_tokens,_ = vdb.query_ngram_target(query, 2)
# query by 1-gram and 2-gram tokens
importance = 0
max_gdf = None
Expand All @@ -137,7 +139,7 @@ def osmnx_geocode(query:str, threshold:int=0.7, policy:str='length'):

class V2_pipeline(NL2QueryInterface):

def __init__(self, config:str="v2_config.cfg"):
def __init__(self, config: str = "v2_config.cfg"):
super().__init__(os.path.join(os.path.dirname(os.path.realpath(__file__)),config))
# Getting vdb paths from config file
if "prop_vdb" in self.config.sections():
Expand All @@ -155,7 +157,7 @@ def __init__(self, config:str="v2_config.cfg"):
else:
print("No Duckling URL in the config file! Temporal Expression Detection will not be working!")
# need either the vdb paths or the vocab paths to setup vdbs
self.vdbs = Vdb_simsearch.Vdb_simsearch(self.prop_vdb, self.prop_vocab, self.targ_vdb, self.targ_vocab )
self.vdbs = Vdb_simsearch(self.prop_vdb, self.prop_vocab, self.targ_vdb, self.targ_vocab )
# check if Duckling is running correctly
duckling_parse(self.duckling_url, "test - yesterday")

Expand Down
11 changes: 7 additions & 4 deletions nlp/notebooks/nl2query/V2/Vdb_simsearch.py
Original file line number Diff line number Diff line change
Expand Up @@ -221,9 +221,12 @@ def query_ngram_prop(self, query, ngrams=3, threshold=0.6, verbose=False):


if __name__ == "__main__":


my_vdbs = Vdb_simsearch()
my_vdbs = Vdb_simsearch(
"nl2query/V2/prop_vdb",
"nl2query/V2/prop_vocab.csv",
"nl2query/V2/target_vdb",
"nl2query/V2/target_vocab3.csv",
)

query = "sentinel daily rain amount"
my_vdbs.query_ngram_target(query, verbose=True)
my_vdbs.query_ngram_target(query, verbose=True)
Binary file not shown.
Binary file not shown.
Binary file not shown.
Empty file.
Binary file removed nlp/notebooks/nl2query/V2/prop_vdb/chroma.sqlite3
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
2 changes: 1 addition & 1 deletion nlp/notebooks/nl2query/V2/v2_config.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -6,5 +6,5 @@ prop_vdb_path = nl2query/V2/prop_vdb
prop_vocab_path = nl2query/V2/prop_vocab.csv

[targ_vdb]
targ_vdb_path = nl2query/V2/arget_vdb
targ_vdb_path = nl2query/V2/target_vdb
targ_vocab_path = nl2query/V2/target_vocab3.csv
4 changes: 2 additions & 2 deletions nlp/notebooks/nl2query/V3/V3_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,7 +86,7 @@ def transform_nl2query(self, nlq: str, verbose:bool=False) -> QueryAnnotationsDi
print("New query:", newq)

# location annotation
loc_span, osmnx_annotation = V2_pipeline.osmnx_geocode(newq)
loc_span, osmnx_annotation = V2_pipeline.osmnx_geocode(self.v2_instance.vdbs, newq)
if loc_span:
_, pos = V2_pipeline.find_spans(loc_span, nlq)
loc = self.create_location_annotation([loc_span, pos, osmnx_annotation])
Expand All @@ -105,7 +105,7 @@ def transform_nl2query(self, nlq: str, verbose:bool=False) -> QueryAnnotationsDi
for loc in v1_loc:
# print("V1 Loc:", loc.text)
if loc.text in newq:
loc_span, osmnx_annotation = V2_pipeline.osmnx_geocode(loc.text)
loc_span, osmnx_annotation = V2_pipeline.osmnx_geocode(self.v2_instance.vdbs, loc.text)
if loc_span:
loc = self.create_location_annotation([loc_span, loc.position, osmnx_annotation])
combined_annotations.append(loc)
Expand Down

0 comments on commit f97c666

Please sign in to comment.