Update requirements

datasciencecampus · Jul 25, 2023 · 2b35160 · 2b35160
1 parent 0152548
commit 2b35160
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 4 deletions.
diff --git a/requirements.txt b/requirements.txt
@@ -9,6 +9,7 @@ PyYAML==6.0
 rapidfuzz==3.1.1
 scikit_learn==1.2.2
 scipy==1.11.1
+schema==0.7.5
 setuptools==67.6.1
 spacy==3.6.0
 textblob==0.17.1

diff --git a/src/modules/topic_modelling.py b/src/modules/topic_modelling.py
@@ -37,7 +37,7 @@ def topic_model(
     Returns
     -------
     None (prints messages to console on location of outputs)"""
-    settings = config["question_settings"][question]
+    settings = config["models"][question]
     vectorizer_class = {"lda": CountVectorizer, "nmf": TfidfVectorizer}
     model_class = {"lda": LatentDirichletAllocation, "nmf": NMF}
     vectorizer = vectorizer_class[model](

diff --git a/src/run_pipeline.py b/src/run_pipeline.py
@@ -36,14 +36,18 @@ def run_pipeline():
     lower_series = spelling_fixed.str.lower()
     no_punctuation_series = lower_series.apply(spell.remove_punctuation)
     word_tokens = no_punctuation_series.apply(word_tokenize)
-    short_tokens = prep.shorten_tokens(word_tokens, config["lemmatize"])
+    short_tokens = prep.shorten_tokens(word_tokens, config["general"]["lemmatize"])
     without_stopwords = short_tokens.apply(
-        lambda x: prep.remove_nltk_stopwords(x, config["additional_stopwords"])
+        lambda x: prep.remove_nltk_stopwords(
+            x, config["general"]["additional_stopwords"]
+        )
     )
     rejoined_words = without_stopwords.apply(prep.rejoin_tokens)
     all_text_combined = " ".join(rejoined_words)  # rejoin_tokens
     wc.create_wordcloud(all_text_combined, f"{question}_wordcloud")
-    stopwords = prep.initialise_update_stopwords(config["additional_stopwords"])
+    stopwords = prep.initialise_update_stopwords(
+        config["general"]["additional_stopwords"]
+    )
     model_data = without_blank_rows
 
     [