-
Notifications
You must be signed in to change notification settings - Fork 0
/
daily_word_cloud.py
70 lines (54 loc) · 2.22 KB
/
daily_word_cloud.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
import pandas as pd
from sqlalchemy import create_engine
from wordcloud import WordCloud
import matplotlib.pyplot as plt
from dotenv import load_dotenv
import os
import re
from datetime import datetime, timedelta
# Load environment variables
load_dotenv()
# Database connection parameters
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
# Create a connection to the database
engine = create_engine(f'postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}')
# Calculate the previous day's date
yesterday = datetime.now() - timedelta(1)
yesterday_str = yesterday.strftime('%Y-%m-%d')
# Query to select all data from the crypto_news table for the previous day
query = f"SELECT * FROM crypto_news WHERE date = '{yesterday_str}';"
# Read the data into a pandas DataFrame
df_crypto_news = pd.read_sql(query, engine)
# Close the engine connection
engine.dispose()
# If there are no headlines from the previous day, exit the script
if df_crypto_news.empty:
print(f"No headlines found for {yesterday_str}. Exiting.")
exit()
# Preprocessing the Headline column to extract words
def preprocess_text(text):
# Remove special characters, numbers, and convert to lowercase
text = re.sub(r'[^a-zA-Z\s]', '', text)
text = text.lower()
return text
# Apply preprocessing to the 'Headline' column
df_crypto_news['processed_headline'] = df_crypto_news['headline'].apply(preprocess_text)
# Combine all headlines into a single string
all_text = ' '.join(df_crypto_news['processed_headline'].tolist())
# Generate a word cloud limited to 15 words
wordcloud = WordCloud(width=800, height=400, max_words=30, background_color='white').generate(all_text)
# Plotting the word cloud (no title or heading)
plt.figure(figsize=(10, 5))
plt.imshow(wordcloud, interpolation='bilinear')
plt.axis('off')
# Ensure the /charts/ directory exists
output_dir = 'charts/'
os.makedirs(output_dir, exist_ok=True)
# Save the word cloud image with a unique name based on the date
output_image = f"{output_dir}crypto_news_wordcloud_{yesterday_str}.png"
wordcloud.to_file(output_image)
print(f"Word cloud image for {yesterday_str} saved as {output_image}")