Skip to content

Commit

Permalink
Update requirements
Browse files Browse the repository at this point in the history
  • Loading branch information
ColinDaglish committed Jul 25, 2023
1 parent 0152548 commit 2b35160
Show file tree
Hide file tree
Showing 3 changed files with 9 additions and 4 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ PyYAML==6.0
rapidfuzz==3.1.1
scikit_learn==1.2.2
scipy==1.11.1
schema==0.7.5
setuptools==67.6.1
spacy==3.6.0
textblob==0.17.1
Expand Down
2 changes: 1 addition & 1 deletion src/modules/topic_modelling.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ def topic_model(
Returns
-------
None (prints messages to console on location of outputs)"""
settings = config["question_settings"][question]
settings = config["models"][question]

Check warning on line 40 in src/modules/topic_modelling.py

View check run for this annotation

Codecov / codecov/patch

src/modules/topic_modelling.py#L40

Added line #L40 was not covered by tests
vectorizer_class = {"lda": CountVectorizer, "nmf": TfidfVectorizer}
model_class = {"lda": LatentDirichletAllocation, "nmf": NMF}
vectorizer = vectorizer_class[model](
Expand Down
10 changes: 7 additions & 3 deletions src/run_pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,14 +36,18 @@ def run_pipeline():
lower_series = spelling_fixed.str.lower()
no_punctuation_series = lower_series.apply(spell.remove_punctuation)
word_tokens = no_punctuation_series.apply(word_tokenize)
short_tokens = prep.shorten_tokens(word_tokens, config["lemmatize"])
short_tokens = prep.shorten_tokens(word_tokens, config["general"]["lemmatize"])

Check warning on line 39 in src/run_pipeline.py

View check run for this annotation

Codecov / codecov/patch

src/run_pipeline.py#L39

Added line #L39 was not covered by tests
without_stopwords = short_tokens.apply(
lambda x: prep.remove_nltk_stopwords(x, config["additional_stopwords"])
lambda x: prep.remove_nltk_stopwords(
x, config["general"]["additional_stopwords"]
)
)
rejoined_words = without_stopwords.apply(prep.rejoin_tokens)
all_text_combined = " ".join(rejoined_words) # rejoin_tokens
wc.create_wordcloud(all_text_combined, f"{question}_wordcloud")
stopwords = prep.initialise_update_stopwords(config["additional_stopwords"])
stopwords = prep.initialise_update_stopwords(

Check warning on line 48 in src/run_pipeline.py

View check run for this annotation

Codecov / codecov/patch

src/run_pipeline.py#L48

Added line #L48 was not covered by tests
config["general"]["additional_stopwords"]
)
model_data = without_blank_rows

[
Expand Down

0 comments on commit 2b35160

Please sign in to comment.