diff --git a/.gitignore b/.gitignore index 575c772..d6f09fa 100644 --- a/.gitignore +++ b/.gitignore @@ -164,4 +164,5 @@ cython_debug/ config.toml bocca_takeaway.pdf hlds_files -db.json \ No newline at end of file +db.json +scraper_data.db \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 79b5061..27f9558 100644 --- a/requirements.txt +++ b/requirements.txt @@ -9,4 +9,6 @@ selenium==4.27.1 seleniumbase==4.33.11 mattermostdriver colored -apscheduler \ No newline at end of file +apscheduler +requests +authlib \ No newline at end of file diff --git a/run_website.sh b/run_website.sh index 50f2b08..244ebec 100644 --- a/run_website.sh +++ b/run_website.sh @@ -7,7 +7,8 @@ set -e IMAGE_NAME="haldis_website" CONTAINER_NAME="haldis_website_container" DOCKERFILE_NAME="Dockerfile" # Variable for Dockerfile name -VOLUME_NAME="$(pwd)/hlds_files" +VOLUME_NAME1="$(pwd)/hlds_files" +VOLUME_NAME_DB="$(pwd)/scraper_data.db" NO_REBUILD=false @@ -44,7 +45,7 @@ fi # Step 3: Run the Docker container echo "Running the Docker container..." -docker run -d -p 5000:5000 -v "$VOLUME_NAME:/haldis_prijsje/hlds_files" --name $CONTAINER_NAME $IMAGE_NAME +docker run -d -p 5001:5001 -v "$VOLUME_NAME1:/haldis_prijsje/hlds_files" -v "$VOLUME_NAME_DB:/haldis_prijsje/scraper_data.db" --name $CONTAINER_NAME $IMAGE_NAME # Step 4: Output the URL where the website can be accessed -echo "Website is now running at: http://127.0.0.1:5000" +echo "Website is now running at: http://127.0.0.1:5001" diff --git a/sync_gitmate.py b/sync_gitmate.py index 0fa2ef3..25f074f 100644 --- a/sync_gitmate.py +++ b/sync_gitmate.py @@ -9,6 +9,8 @@ import tomllib from pprint import pprint +from git import GitCommandError + from mattermost_comunication import send_message # import mattermost_communication @@ -94,6 +96,10 @@ def delete_stale_local_branches(repo): def checkout_branch(repo, branch_name): + # print(repo.git.status()) + if "have diverged" in repo.git.status(): + print("Merge is in progress. Aborting merge.") + repo.git.merge("--quit") # Quit any merge process repo.git.switch("master") prune_remote(repo) delete_stale_local_branches(repo) @@ -107,8 +113,17 @@ def checkout_branch(repo, branch_name): if remote_branch_full in remote_branches: # If the branch exists on the remote, check it out and pull changes print(f"Checking out existing branch: {branch_name}") - repo.git.checkout(branch_name) - repo.git.pull("origin", branch_name) + try: + # Ensure there are no merge conflicts or ongoing merges before switching + if "have diverged" in repo.git.status(): + print("Merge is in progress. Aborting merge.") + repo.git.merge("--quit") # Quit any merge process + + repo.git.checkout(branch_name) + repo.git.pull("origin", branch_name, "--strategy=ours", "--no-rebase") + except GitCommandError as e: + print(f"Error during checkout or pull: {e}") + raise e else: # If the branch doesn't exist, create it and push to the remote print(f"Branch {branch_name} does not exist on origin. Creating the branch.") diff --git a/website/app.py b/website/app.py index 62d60a2..b54da1d 100644 --- a/website/app.py +++ b/website/app.py @@ -2,21 +2,142 @@ import sqlite3 import sys import threading +import tomllib from datetime import datetime +from functools import wraps -from flask import Flask, render_template, jsonify +import requests +from flask import Flask, render_template, jsonify, request from apscheduler.schedulers.background import BackgroundScheduler +from authlib.integrations.flask_client import OAuth +from flask import session, redirect, url_for # Add the parent directory to the system path to allow imports from the higher-level directory sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..'))) from main import run_scrapers from run_sync import sync_gitmate +from data_types.location import Location app = Flask(__name__) +lock = threading.Lock() +oauth = OAuth(app) # SQLite database file DATABASE = 'scraper_data.db' +with open("config.toml", mode="rb") as config_toml: + config = tomllib.load(config_toml) + +app.config['SECRET_KEY'] = config['flask']['SECRET_KEY'] + +# Configure Zauth +zeus = oauth.register( + name='zeus', + client_id=config['zauth']['ZAUTH_CLIENT_ID'], + client_secret=config['zauth']['ZAUTH_CLIENT_SECRET'], + authorize_url=config['zauth']['ZAUTH_AUTHORIZE_URL'], + access_token_url=config['zauth']['ZAUTH_TOKEN_URL'], + # client_kwargs={'scope': 'profile email'}, # Add scopes based on your needs +) + + +@app.route('/test') +def test_route(): + print("Test route hit!", flush=True) + return "Test successful!" + + +@app.route('/login') +def login(): + """ + Redirect the user to the Zauth authorization URL. + """ + # state = zeus.client_kwargs.get('state') + # print(f"Generated state: {state}") + return zeus.authorize_redirect(redirect_uri=config['zauth']['ZAUTH_REDIRECT_URI']) + + +@app.route('/auth/callback') +def callback(): + """ + Handle the callback from Zauth. + Exchange the authorization code for an access token and fetch user info. + """ + try: + # Fetch the token + token = zeus.authorize_access_token() + + # Extract the access token from the response + access_token = token.get("access_token") + if not access_token: + return jsonify({"error": "No access token returned"}), 400 + + # Use the access token to fetch user info from the resource server + headers = {"Authorization": f"Bearer {access_token}"} + response = requests.get("https://zauth.zeus.gent/current_user", headers=headers) + + # Check if the request was successful + if response.status_code != 200: + return jsonify({"error": "Failed to fetch user info", "details": response.text}), response.status_code + + user_info = response.json() + + # Store the user info and token in the session (if needed) + session['user'] = user_info + session['oauth_token'] = token + # return jsonify({"message": "Login successful", "user": user_info}) + return redirect("/") + except Exception as e: + return jsonify({"error": "Failed to authenticate", "details": str(e)}), 400 + + +@app.route('/logout') +def logout(): + """ + Logout the user by clearing the session. + """ + session.pop('user', None) + session.pop('oauth_token', None) + # return jsonify({"message": "Logged out successfully"}) + return redirect('/') + + +@app.route("/profile") +def get_user_info(): + # Check if the user is authenticated + if 'oauth_token' not in session: + return redirect("/login") + + # Set the token in the zeus session + zeus.token = session['oauth_token'] + + try: + # Make a GET request to the resource server to fetch user info + response = zeus.get('https://zeus.example.com/api/userinfo') + + if response.status_code == 200: + # Parse the JSON response + user_info = response.json() + return jsonify(user_info) + + else: + # Handle errors (e.g., token expired or insufficient permissions) + return jsonify({"error": "Failed to fetch user info", "details": response.json()}), response.status_code + + except Exception as e: + return jsonify({"error": "An exception occurred while fetching user info", "details": str(e)}), 500 + + +def login_required(func): + @wraps(func) + def wrapper(*args, **kwargs): + if 'user' not in session: + return redirect(url_for('login')) + return func(*args, **kwargs) + + wrapper.__name__ = func.__name__ + return wrapper + # Class to hold the status of scrapers class ScraperStatus: @@ -31,11 +152,13 @@ def __init__(self): } def is_running(self, scraper_name): - return self.status.get(scraper_name, False) + with lock: + return self.status.get(scraper_name, False) def set_running(self, scraper_name, running): - if scraper_name in self.status: - self.status[scraper_name] = running + with lock: + if scraper_name in self.status: + self.status[scraper_name] = running # Instantiate the scraper status tracker @@ -104,6 +227,7 @@ def run_scraper_in_background(restaurant_name): @app.route("/scrape/", methods=['POST']) +@login_required def scrape(restaurant_name): """ Start the scraper in a background thread for the given restaurant. @@ -121,6 +245,7 @@ def scrape(restaurant_name): @app.route("/scrape-all", methods=['POST']) +@login_required def scrape_all(): """ Trigger scraping for all restaurants. @@ -144,6 +269,7 @@ def scrape_all(): @app.route("/update-scraper-info") +@login_required def update_scraper_info_page(): """ Fetch the scraper information from the database and update the frontend table. @@ -190,12 +316,15 @@ def init_db(): @app.route("/") +@login_required def home(): + user = session.get('user', None) scraper_info = get_scraper_info() - return render_template('index.html', scraper_info=scraper_info) + return render_template('index.html', scraper_info=scraper_info, user=user) @app.route("/sync-all", methods=["POST"]) +@login_required def sync_all_files(): """ Sync all files to GitMate. @@ -215,8 +344,111 @@ def sync_all_files(): scheduler.add_job(sync_all_files, 'interval', minutes=30) # Sync every 30 minutes scheduler.start() + +@app.route("/editor_selector") +@login_required +def editor_selector(): + scraper_info = get_scraper_info() + return render_template("editor_selector.html", scraper_info=scraper_info) + + +UPLOAD_FOLDER = 'hlds_files' +app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER + + +# Route to serve the editor page for a specific file +@app.route('/edit/', methods=['GET']) +@login_required +def edit_file(filename): + filepath = os.path.join(app.config['UPLOAD_FOLDER'], f"{filename}.hlds") + if os.path.exists(filepath): + with open(filepath, 'r') as file: + content = file.read() + header = extract_header(content) + return render_template('editor.html', filename=filename, header=header) # Render the frontend editor + else: + return f"File {filename}.hlds not found", 404 + + +@app.route('/read_file', methods=['GET']) +@login_required +def read_file(): + filename = request.args.get('filename') + filepath = os.path.join(app.config['UPLOAD_FOLDER'], f"{filename}.hlds") + + if os.path.exists(filepath) and filepath.endswith('.hlds'): + with open(filepath, 'r', encoding='utf-8') as file: + content = file.read() + header = extract_header(content) + return jsonify({'content': content, 'filename': filename, 'header': header}) + else: + return jsonify({'error': f'File {filepath} not found or invalid file type'}), 404 + + +@app.route('/save_file', methods=['POST']) +@login_required +def save_file(): + data = request.json + filename = data.get('filename') + content = data.get('content') + filepath = os.path.join(app.config['UPLOAD_FOLDER'], f"{filename}.hlds") + header = data.get('header') # Get header data from the request + + # Create a Location instance using the header data + location = Location( + name=": ".join([header.get('name_key', ''), header.get('name_value', '')]), + osm=header.get('osm', ''), + address=header.get('address', ''), + telephone=header.get('phone', ''), + website=header.get('website', '') + ) + + # Format the header using the Location class + header_str = str(location) + search_str = "=========================" + header_start = content.find(search_str) + header_end = content.find(search_str, header_start + len(search_str)) + + if header_start != -1 and header_end != -1: + # Replace the header part between "============" with the new header + content = content[:header_start] + header_str + content[header_end + len( + search_str) + 2:] # TODO maybe for linux this need to be +1 on windows it did give 2 empty lines after the header + print(content) + if os.path.exists(filepath) and filepath.endswith('.hlds'): + with open(filepath, 'w', encoding='utf-8') as file: + file.write(content) + return jsonify({'message': 'File saved successfully'}) + else: + return jsonify({'error': 'File not found or invalid file type'}), 404 + + +def extract_header(content): + # Define the header pattern (this can be customized based on your actual header structure) + header_lines = content.splitlines()[:6] # Assuming the header is the first 5 lines + header = {} + + # Default to empty string if a part of the header is missing + header['name_key'] = "" + header['name_value'] = "" + header['osm'] = "" + header['phone'] = "" + header['address'] = "" + header['website'] = "" + + if len(header_lines) >= 5: + print(len(header_lines)) + header['name_key'] = header_lines[1].split(":")[0].strip() + header['name_value'] = header_lines[1].split(":")[1].strip() + header['osm'] = " ".join(header_lines[2].split(" ")[1:]).strip() + header['phone'] = " ".join(header_lines[3].split(" ")[1:]).strip() + header['address'] = " ".join(header_lines[4].split(" ")[1:]).strip() + header['website'] = " ".join(header_lines[5].split(" ")[1:]).strip() + + return header + + if __name__ == "__main__": # Initialize the database when the app starts init_db() - app.run(host="0.0.0.0", port=5000, threaded=True, debug=True) + app.run(host="0.0.0.0", port=5001, threaded=True, debug=True) diff --git a/website/static/styles.css b/website/static/styles.css new file mode 100644 index 0000000..7cc199e --- /dev/null +++ b/website/static/styles.css @@ -0,0 +1,86 @@ +/* Top bar styling */ +.top-bar { + background-color: #ff7f00; + color: white; + padding: 10px 20px; + display: flex; + justify-content: space-between; + align-items: center; +} + +.top-bar a { + color: white; + text-decoration: none; + margin: 0 10px; + font-weight: bold; + font-size: 16px; +} + +.top-bar a:hover { + text-decoration: underline; +} + +/* General table styling */ +table { + width: 100%; + border-collapse: collapse; + margin: 20px 0; + font-family: Arial, sans-serif; + text-align: left; +} + +th, td { + padding: 10px; + border: 1px solid #ddd; + text-align: left; +} + +th { + background-color: #ff7f00; + color: white; + font-weight: bold; +} + +/* Row styling */ +tr:nth-child(even) { + background-color: #f9f9f9; +} + +/* Hover effect for table rows */ +tr:hover { + background-color: #f1f1f1; +} + +/* Button styling */ +button { + background-color: #ff7f00; + color: white; + padding: 8px 16px; + border: none; + cursor: pointer; + font-size: 14px; + border-radius: 4px; + transition: background-color 0.3s ease; +} + +button:hover { + background-color: #ff7f00; +} + +/* Status column styling */ +.status { + font-weight: bold; + color: #333; +} + +.status.never-run { + color: #ff9800; +} + +.status.running { + color: #2196f3; +} + +.status.finished { + color: #ff7f00; +} \ No newline at end of file diff --git a/website/templates/editor.html b/website/templates/editor.html new file mode 100644 index 0000000..af00c21 --- /dev/null +++ b/website/templates/editor.html @@ -0,0 +1,118 @@ + + + + + + HLDS File Editor + + + +

Editing HLDS File: {{ filename }}.hlds

+ + + + + + + + + + +
+

+    
+ + diff --git a/website/templates/editor_selector.html b/website/templates/editor_selector.html new file mode 100644 index 0000000..9dac616 --- /dev/null +++ b/website/templates/editor_selector.html @@ -0,0 +1,49 @@ + + + + + + Haldis een prijsje + + + + + + +
+
+ Home + Editor +
+
+
+

Hlds file editor

+
+ +
+ + + + + + + + + + + + {% for scraper in scraper_info %} + + + + + + + + {% endfor %} + +
RestaurantProducts ScrapedLast ScrapedStatusAction
{{ scraper[0] }}{{ scraper[1] }}{{ scraper[2] }}{{ scraper[3] }}
+
+ + diff --git a/website/templates/index.html b/website/templates/index.html index 3666479..6ca99fc 100644 --- a/website/templates/index.html +++ b/website/templates/index.html @@ -5,73 +5,7 @@ Haldis een prijsje - + +
+
+ Home + Editor +
+ + +
+ {% if user %} + Welcome, {{ user['username'] }} + {% endif %} + logout +
+
+

Restaurant Scraper