From dc2fc70bc3d14c4f7b9d587a7cadef587e0e7ac3 Mon Sep 17 00:00:00 2001
From: Tybo Verslype <tybo.verslype@ugent.be>
Date: Thu, 26 Dec 2024 19:46:02 +0100
Subject: [PATCH] chore: made scrapers not able to run twice at the same time

---
 requirements.txt |  3 ++-
 website/app.py   | 44 ++++++++++++++++++++++++++++++++++++--------
 2 files changed, 38 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index ba9c129..79b5061 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -8,4 +8,5 @@ requests==2.32.3
 selenium==4.27.1
 seleniumbase==4.33.11
 mattermostdriver
-colored
\ No newline at end of file
+colored
+apscheduler
\ No newline at end of file
diff --git a/website/app.py b/website/app.py
index ea96d9f..62d60a2 100644
--- a/website/app.py
+++ b/website/app.py
@@ -18,6 +18,30 @@
 DATABASE = 'scraper_data.db'
 
 
+# Class to hold the status of scrapers
+class ScraperStatus:
+    def __init__(self):
+        self.status = {
+            'metropol': False,
+            'bicyclette': False,
+            'simpizza': False,
+            'pizza_donna': False,
+            'bocca_ovp': False,
+            's5': False
+        }
+
+    def is_running(self, scraper_name):
+        return self.status.get(scraper_name, False)
+
+    def set_running(self, scraper_name, running):
+        if scraper_name in self.status:
+            self.status[scraper_name] = running
+
+
+# Instantiate the scraper status tracker
+scraper_status = ScraperStatus()
+
+
 # Function to update the database with scraper info
 def update_scraper_info(restaurant_name, products_count):
     last_scraped = datetime.now()
@@ -52,27 +76,31 @@ def update_scraper_status(restaurant_name, status):
 def run_scraper_in_background(restaurant_name):
     """
     Function to run the scraper in a background thread.
-    Updates the database with the number of products, last scraped time, and status.
+    Updates the status of the scraper in the ScraperStatus class.
     """
+    if scraper_status.is_running(restaurant_name):
+        print(f"Scraper for {restaurant_name} is already running. Skipping.")
+        return
+
     try:
-        # Set status to "Running" when scraping starts
+        # Mark the scraper as running
+        scraper_status.set_running(restaurant_name, True)
         update_scraper_status(restaurant_name, "Running")
+        print(f"Starting scraper for {restaurant_name}...")
 
         # Run the scraper for the given restaurant
         result = run_scrapers(restaurant_names=[restaurant_name])
-
-        # Extract the values from the result dictionary
-        restaurant_names = result["restaurant_names"]
         total_products_scraped = result["total_products_scraped"]
 
-        # Update the database with the number of products, last scraped timestamp, and status as "Finished"
         update_scraper_info(restaurant_name, total_products_scraped)
         update_scraper_status(restaurant_name, "Finished")
-
+        print(f"Scraper for {restaurant_name} completed. Products scraped: {total_products_scraped}")
     except Exception as e:
         print(f"Error running scraper for {restaurant_name}: {e}")
-        # If there's an error, set the status to "Failed"
         update_scraper_status(restaurant_name, "Failed")
+    finally:
+        # Mark the scraper as not running
+        scraper_status.set_running(restaurant_name, False)
 
 
 @app.route("/scrape/<restaurant_name>", methods=['POST'])