From dc2fc70bc3d14c4f7b9d587a7cadef587e0e7ac3 Mon Sep 17 00:00:00 2001 From: Tybo Verslype Date: Thu, 26 Dec 2024 19:46:02 +0100 Subject: [PATCH] chore: made scrapers not able to run twice at the same time --- requirements.txt | 3 ++- website/app.py | 44 ++++++++++++++++++++++++++++++++++++-------- 2 files changed, 38 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index ba9c129..79b5061 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,4 +8,5 @@ requests==2.32.3 selenium==4.27.1 seleniumbase==4.33.11 mattermostdriver -colored \ No newline at end of file +colored +apscheduler \ No newline at end of file diff --git a/website/app.py b/website/app.py index ea96d9f..62d60a2 100644 --- a/website/app.py +++ b/website/app.py @@ -18,6 +18,30 @@ DATABASE = 'scraper_data.db' +# Class to hold the status of scrapers +class ScraperStatus: + def __init__(self): + self.status = { + 'metropol': False, + 'bicyclette': False, + 'simpizza': False, + 'pizza_donna': False, + 'bocca_ovp': False, + 's5': False + } + + def is_running(self, scraper_name): + return self.status.get(scraper_name, False) + + def set_running(self, scraper_name, running): + if scraper_name in self.status: + self.status[scraper_name] = running + + +# Instantiate the scraper status tracker +scraper_status = ScraperStatus() + + # Function to update the database with scraper info def update_scraper_info(restaurant_name, products_count): last_scraped = datetime.now() @@ -52,27 +76,31 @@ def update_scraper_status(restaurant_name, status): def run_scraper_in_background(restaurant_name): """ Function to run the scraper in a background thread. - Updates the database with the number of products, last scraped time, and status. + Updates the status of the scraper in the ScraperStatus class. """ + if scraper_status.is_running(restaurant_name): + print(f"Scraper for {restaurant_name} is already running. Skipping.") + return + try: - # Set status to "Running" when scraping starts + # Mark the scraper as running + scraper_status.set_running(restaurant_name, True) update_scraper_status(restaurant_name, "Running") + print(f"Starting scraper for {restaurant_name}...") # Run the scraper for the given restaurant result = run_scrapers(restaurant_names=[restaurant_name]) - - # Extract the values from the result dictionary - restaurant_names = result["restaurant_names"] total_products_scraped = result["total_products_scraped"] - # Update the database with the number of products, last scraped timestamp, and status as "Finished" update_scraper_info(restaurant_name, total_products_scraped) update_scraper_status(restaurant_name, "Finished") - + print(f"Scraper for {restaurant_name} completed. Products scraped: {total_products_scraped}") except Exception as e: print(f"Error running scraper for {restaurant_name}: {e}") - # If there's an error, set the status to "Failed" update_scraper_status(restaurant_name, "Failed") + finally: + # Mark the scraper as not running + scraper_status.set_running(restaurant_name, False) @app.route("/scrape/", methods=['POST'])