Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: made scrapers not able to run twice at the same time #33

Merged
merged 1 commit into from
Dec 26, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,4 +8,5 @@ requests==2.32.3
selenium==4.27.1
seleniumbase==4.33.11
mattermostdriver
colored
colored
apscheduler
44 changes: 36 additions & 8 deletions website/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,30 @@
DATABASE = 'scraper_data.db'


# Class to hold the status of scrapers
class ScraperStatus:
def __init__(self):
self.status = {
'metropol': False,
'bicyclette': False,
'simpizza': False,
'pizza_donna': False,
'bocca_ovp': False,
's5': False
}

def is_running(self, scraper_name):
return self.status.get(scraper_name, False)

def set_running(self, scraper_name, running):
if scraper_name in self.status:
self.status[scraper_name] = running


# Instantiate the scraper status tracker
scraper_status = ScraperStatus()


# Function to update the database with scraper info
def update_scraper_info(restaurant_name, products_count):
last_scraped = datetime.now()
Expand Down Expand Up @@ -52,27 +76,31 @@ def update_scraper_status(restaurant_name, status):
def run_scraper_in_background(restaurant_name):
"""
Function to run the scraper in a background thread.
Updates the database with the number of products, last scraped time, and status.
Updates the status of the scraper in the ScraperStatus class.
"""
if scraper_status.is_running(restaurant_name):
print(f"Scraper for {restaurant_name} is already running. Skipping.")
return

try:
# Set status to "Running" when scraping starts
# Mark the scraper as running
scraper_status.set_running(restaurant_name, True)
update_scraper_status(restaurant_name, "Running")
print(f"Starting scraper for {restaurant_name}...")

# Run the scraper for the given restaurant
result = run_scrapers(restaurant_names=[restaurant_name])

# Extract the values from the result dictionary
restaurant_names = result["restaurant_names"]
total_products_scraped = result["total_products_scraped"]

# Update the database with the number of products, last scraped timestamp, and status as "Finished"
update_scraper_info(restaurant_name, total_products_scraped)
update_scraper_status(restaurant_name, "Finished")

print(f"Scraper for {restaurant_name} completed. Products scraped: {total_products_scraped}")
except Exception as e:
print(f"Error running scraper for {restaurant_name}: {e}")
# If there's an error, set the status to "Failed"
update_scraper_status(restaurant_name, "Failed")
finally:
# Mark the scraper as not running
scraper_status.set_running(restaurant_name, False)


@app.route("/scrape/<restaurant_name>", methods=['POST'])
Expand Down
Loading