Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: added a lock on the state #34

Merged
merged 7 commits into from
Dec 29, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -164,4 +164,5 @@ cython_debug/
config.toml
bocca_takeaway.pdf
hlds_files
db.json
db.json
scraper_data.db
4 changes: 3 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,6 @@ selenium==4.27.1
seleniumbase==4.33.11
mattermostdriver
colored
apscheduler
apscheduler
requests
authlib
7 changes: 4 additions & 3 deletions run_website.sh
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,8 @@ set -e
IMAGE_NAME="haldis_website"
CONTAINER_NAME="haldis_website_container"
DOCKERFILE_NAME="Dockerfile" # Variable for Dockerfile name
VOLUME_NAME="$(pwd)/hlds_files"
VOLUME_NAME1="$(pwd)/hlds_files"
VOLUME_NAME_DB="$(pwd)/scraper_data.db"

NO_REBUILD=false

Expand Down Expand Up @@ -44,7 +45,7 @@ fi

# Step 3: Run the Docker container
echo "Running the Docker container..."
docker run -d -p 5000:5000 -v "$VOLUME_NAME:/haldis_prijsje/hlds_files" --name $CONTAINER_NAME $IMAGE_NAME
docker run -d -p 5001:5001 -v "$VOLUME_NAME1:/haldis_prijsje/hlds_files" -v "$VOLUME_NAME_DB:/haldis_prijsje/scraper_data.db" --name $CONTAINER_NAME $IMAGE_NAME

# Step 4: Output the URL where the website can be accessed
echo "Website is now running at: http://127.0.0.1:5000"
echo "Website is now running at: http://127.0.0.1:5001"
19 changes: 17 additions & 2 deletions sync_gitmate.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import tomllib
from pprint import pprint

from git import GitCommandError

from mattermost_comunication import send_message

# import mattermost_communication
Expand Down Expand Up @@ -94,6 +96,10 @@ def delete_stale_local_branches(repo):


def checkout_branch(repo, branch_name):
# print(repo.git.status())
if "have diverged" in repo.git.status():
print("Merge is in progress. Aborting merge.")
repo.git.merge("--quit") # Quit any merge process
repo.git.switch("master")
prune_remote(repo)
delete_stale_local_branches(repo)
Expand All @@ -107,8 +113,17 @@ def checkout_branch(repo, branch_name):
if remote_branch_full in remote_branches:
# If the branch exists on the remote, check it out and pull changes
print(f"Checking out existing branch: {branch_name}")
repo.git.checkout(branch_name)
repo.git.pull("origin", branch_name)
try:
# Ensure there are no merge conflicts or ongoing merges before switching
if "have diverged" in repo.git.status():
print("Merge is in progress. Aborting merge.")
repo.git.merge("--quit") # Quit any merge process

repo.git.checkout(branch_name)
repo.git.pull("origin", branch_name, "--strategy=ours", "--no-rebase")
except GitCommandError as e:
print(f"Error during checkout or pull: {e}")
raise e
else:
# If the branch doesn't exist, create it and push to the remote
print(f"Branch {branch_name} does not exist on origin. Creating the branch.")
Expand Down
244 changes: 238 additions & 6 deletions website/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,21 +2,142 @@
import sqlite3
import sys
import threading
import tomllib
from datetime import datetime
from functools import wraps

from flask import Flask, render_template, jsonify
import requests
from flask import Flask, render_template, jsonify, request
from apscheduler.schedulers.background import BackgroundScheduler
from authlib.integrations.flask_client import OAuth
from flask import session, redirect, url_for

# Add the parent directory to the system path to allow imports from the higher-level directory
sys.path.append(os.path.abspath(os.path.join(os.path.dirname(__file__), '..')))
from main import run_scrapers
from run_sync import sync_gitmate
from data_types.location import Location

app = Flask(__name__)
lock = threading.Lock()
oauth = OAuth(app)

# SQLite database file
DATABASE = 'scraper_data.db'

with open("config.toml", mode="rb") as config_toml:
config = tomllib.load(config_toml)

app.config['SECRET_KEY'] = config['flask']['SECRET_KEY']

# Configure Zauth
zeus = oauth.register(
name='zeus',
client_id=config['zauth']['ZAUTH_CLIENT_ID'],
client_secret=config['zauth']['ZAUTH_CLIENT_SECRET'],
authorize_url=config['zauth']['ZAUTH_AUTHORIZE_URL'],
access_token_url=config['zauth']['ZAUTH_TOKEN_URL'],
# client_kwargs={'scope': 'profile email'}, # Add scopes based on your needs
)


@app.route('/test')
def test_route():
print("Test route hit!", flush=True)
return "Test successful!"


@app.route('/login')
def login():
"""
Redirect the user to the Zauth authorization URL.
"""
# state = zeus.client_kwargs.get('state')
# print(f"Generated state: {state}")
return zeus.authorize_redirect(redirect_uri=config['zauth']['ZAUTH_REDIRECT_URI'])


@app.route('/auth/callback')
def callback():
"""
Handle the callback from Zauth.
Exchange the authorization code for an access token and fetch user info.
"""
try:
# Fetch the token
token = zeus.authorize_access_token()

# Extract the access token from the response
access_token = token.get("access_token")
if not access_token:
return jsonify({"error": "No access token returned"}), 400

# Use the access token to fetch user info from the resource server
headers = {"Authorization": f"Bearer {access_token}"}
response = requests.get("https://zauth.zeus.gent/current_user", headers=headers)

# Check if the request was successful
if response.status_code != 200:
return jsonify({"error": "Failed to fetch user info", "details": response.text}), response.status_code

user_info = response.json()

# Store the user info and token in the session (if needed)
session['user'] = user_info
session['oauth_token'] = token
# return jsonify({"message": "Login successful", "user": user_info})
return redirect("/")
except Exception as e:
return jsonify({"error": "Failed to authenticate", "details": str(e)}), 400


@app.route('/logout')
def logout():
"""
Logout the user by clearing the session.
"""
session.pop('user', None)
session.pop('oauth_token', None)
# return jsonify({"message": "Logged out successfully"})
return redirect('/')


@app.route("/profile")
def get_user_info():
# Check if the user is authenticated
if 'oauth_token' not in session:
return redirect("/login")

# Set the token in the zeus session
zeus.token = session['oauth_token']

try:
# Make a GET request to the resource server to fetch user info
response = zeus.get('https://zeus.example.com/api/userinfo')

if response.status_code == 200:
# Parse the JSON response
user_info = response.json()
return jsonify(user_info)

else:
# Handle errors (e.g., token expired or insufficient permissions)
return jsonify({"error": "Failed to fetch user info", "details": response.json()}), response.status_code

except Exception as e:
return jsonify({"error": "An exception occurred while fetching user info", "details": str(e)}), 500


def login_required(func):
@wraps(func)
def wrapper(*args, **kwargs):
if 'user' not in session:
return redirect(url_for('login'))
return func(*args, **kwargs)

wrapper.__name__ = func.__name__
return wrapper


# Class to hold the status of scrapers
class ScraperStatus:
Expand All @@ -31,11 +152,13 @@ def __init__(self):
}

def is_running(self, scraper_name):
return self.status.get(scraper_name, False)
with lock:
return self.status.get(scraper_name, False)

def set_running(self, scraper_name, running):
if scraper_name in self.status:
self.status[scraper_name] = running
with lock:
if scraper_name in self.status:
self.status[scraper_name] = running


# Instantiate the scraper status tracker
Expand Down Expand Up @@ -104,6 +227,7 @@ def run_scraper_in_background(restaurant_name):


@app.route("/scrape/<restaurant_name>", methods=['POST'])
@login_required
def scrape(restaurant_name):
"""
Start the scraper in a background thread for the given restaurant.
Expand All @@ -121,6 +245,7 @@ def scrape(restaurant_name):


@app.route("/scrape-all", methods=['POST'])
@login_required
def scrape_all():
"""
Trigger scraping for all restaurants.
Expand All @@ -144,6 +269,7 @@ def scrape_all():


@app.route("/update-scraper-info")
@login_required
def update_scraper_info_page():
"""
Fetch the scraper information from the database and update the frontend table.
Expand Down Expand Up @@ -190,12 +316,15 @@ def init_db():


@app.route("/")
@login_required
def home():
user = session.get('user', None)
scraper_info = get_scraper_info()
return render_template('index.html', scraper_info=scraper_info)
return render_template('index.html', scraper_info=scraper_info, user=user)


@app.route("/sync-all", methods=["POST"])
@login_required
def sync_all_files():
"""
Sync all files to GitMate.
Expand All @@ -215,8 +344,111 @@ def sync_all_files():
scheduler.add_job(sync_all_files, 'interval', minutes=30) # Sync every 30 minutes
scheduler.start()


@app.route("/editor_selector")
@login_required
def editor_selector():
scraper_info = get_scraper_info()
return render_template("editor_selector.html", scraper_info=scraper_info)


UPLOAD_FOLDER = 'hlds_files'
app.config['UPLOAD_FOLDER'] = UPLOAD_FOLDER


# Route to serve the editor page for a specific file
@app.route('/edit/<filename>', methods=['GET'])
@login_required
def edit_file(filename):
filepath = os.path.join(app.config['UPLOAD_FOLDER'], f"{filename}.hlds")
if os.path.exists(filepath):
with open(filepath, 'r') as file:
content = file.read()
header = extract_header(content)
return render_template('editor.html', filename=filename, header=header) # Render the frontend editor
else:
return f"File {filename}.hlds not found", 404


@app.route('/read_file', methods=['GET'])
@login_required
def read_file():
filename = request.args.get('filename')
filepath = os.path.join(app.config['UPLOAD_FOLDER'], f"{filename}.hlds")

if os.path.exists(filepath) and filepath.endswith('.hlds'):
with open(filepath, 'r', encoding='utf-8') as file:
content = file.read()
header = extract_header(content)
return jsonify({'content': content, 'filename': filename, 'header': header})
else:
return jsonify({'error': f'File {filepath} not found or invalid file type'}), 404


@app.route('/save_file', methods=['POST'])
@login_required
def save_file():
data = request.json
filename = data.get('filename')
content = data.get('content')
filepath = os.path.join(app.config['UPLOAD_FOLDER'], f"{filename}.hlds")
header = data.get('header') # Get header data from the request

# Create a Location instance using the header data
location = Location(
name=": ".join([header.get('name_key', ''), header.get('name_value', '')]),
osm=header.get('osm', ''),
address=header.get('address', ''),
telephone=header.get('phone', ''),
website=header.get('website', '')
)

# Format the header using the Location class
header_str = str(location)
search_str = "========================="
header_start = content.find(search_str)
header_end = content.find(search_str, header_start + len(search_str))

if header_start != -1 and header_end != -1:
# Replace the header part between "============" with the new header
content = content[:header_start] + header_str + content[header_end + len(
search_str) + 2:] # TODO maybe for linux this need to be +1 on windows it did give 2 empty lines after the header
print(content)
if os.path.exists(filepath) and filepath.endswith('.hlds'):
with open(filepath, 'w', encoding='utf-8') as file:
file.write(content)
return jsonify({'message': 'File saved successfully'})
else:
return jsonify({'error': 'File not found or invalid file type'}), 404


def extract_header(content):
# Define the header pattern (this can be customized based on your actual header structure)
header_lines = content.splitlines()[:6] # Assuming the header is the first 5 lines
header = {}

# Default to empty string if a part of the header is missing
header['name_key'] = ""
header['name_value'] = ""
header['osm'] = ""
header['phone'] = ""
header['address'] = ""
header['website'] = ""

if len(header_lines) >= 5:
print(len(header_lines))
header['name_key'] = header_lines[1].split(":")[0].strip()
header['name_value'] = header_lines[1].split(":")[1].strip()
header['osm'] = " ".join(header_lines[2].split(" ")[1:]).strip()
header['phone'] = " ".join(header_lines[3].split(" ")[1:]).strip()
header['address'] = " ".join(header_lines[4].split(" ")[1:]).strip()
header['website'] = " ".join(header_lines[5].split(" ")[1:]).strip()

return header


if __name__ == "__main__":
# Initialize the database when the app starts
init_db()

app.run(host="0.0.0.0", port=5000, threaded=True, debug=True)
app.run(host="0.0.0.0", port=5001, threaded=True, debug=True)
Loading
Loading