Improving the web interface (#1975)

* update web interface with commandline options * improve web interface * update README images of web interface * fix bug in app.py * fix web interface
soxoj · Dec 17, 2024 · c2e3e96 · c2e3e96
1 parent 900ed84
commit c2e3e96
Show file tree

Hide file tree

Showing 7 changed files with 710 additions and 138 deletions.
diff --git a/maigret/web/app.py b/maigret/web/app.py
@@ -1,4 +1,3 @@
-# app.py
 from flask import (
     Flask,
     render_template,
@@ -22,7 +21,7 @@
 app = Flask(__name__)
 app.secret_key = 'your-secret-key-here'
 
-# Add background job tracking
+#add background job tracking
 background_jobs = {}
 job_results = {}
 
@@ -46,16 +45,38 @@ async def maigret_search(username, options):
     logger = setup_logger(logging.WARNING, 'maigret')
     try:
         db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE)
-        sites = db.ranked_sites_dict(top=int(options.get('top_sites', 500)))
+
+        top_sites = int(options.get('top_sites') or 500) 
+        if options.get('all_sites'):
+            top_sites = 999999999  # effectively all
+
+        tags = options.get('tags', [])
+        site_list= options.get('site_list', [])
+        logger.info(f"Filtering sites by tags: {tags}")
+
+        sites = db.ranked_sites_dict(
+            top=top_sites,
+            tags=tags,
+            names=site_list,
+            disabled=False,
+            id_type='username'
+        )
+
+        logger.info(f"Found {len(sites)} sites matching the tag criteria")
 
         results = await maigret.search(
             username=username,
             site_dict=sites,
             timeout=int(options.get('timeout', 30)),
             logger=logger,
-            id_type=options.get('id_type', 'username'),
+            id_type='username',
             cookies=COOKIES_FILE if options.get('use_cookies') else None,
-            is_parsing_enabled=True,
+            is_parsing_enabled=(not options.get('disable_extracting', False)),  
+            recursive_search_enabled=(not options.get('disable_recursive_search', False)),
+            check_domains=options.get('with_domains', False),
+            proxy=options.get('proxy', None),
+            tor_proxy=options.get('tor_proxy', None),
+            i2p_proxy=options.get('i2p_proxy', None),
         )
         return results
     except Exception as e:
@@ -68,36 +89,31 @@ async def search_multiple_usernames(usernames, options):
     for username in usernames:
         try:
             search_results = await maigret_search(username.strip(), options)
-            results.append((username.strip(), options['id_type'], search_results))
+            results.append((username.strip(), 'username', search_results))
         except Exception as e:
             logging.error(f"Error searching username {username}: {str(e)}")
     return results
 
 
 def process_search_task(usernames, options, timestamp):
     try:
-        # Setup event loop for async operations
         loop = asyncio.new_event_loop()
         asyncio.set_event_loop(loop)
 
-        # Run the search
         general_results = loop.run_until_complete(
             search_multiple_usernames(usernames, options)
         )
 
-        # Create session folder
         session_folder = os.path.join(REPORTS_FOLDER, f"search_{timestamp}")
         os.makedirs(session_folder, exist_ok=True)
 
-        # Save the combined graph
         graph_path = os.path.join(session_folder, "combined_graph.html")
         maigret.report.save_graph_report(
             graph_path,
             general_results,
             MaigretDatabase().load_from_path(MAIGRET_DB_FILE),
         )
 
-        # Save individual reports
         individual_reports = []
         for username, id_type, results in general_results:
             report_base = os.path.join(session_folder, f"report_{username}")
@@ -154,25 +170,42 @@ def process_search_task(usernames, options, timestamp):
                 }
             )
 
-        # Save results and mark job as complete
+        # save results and mark job as complete using timestamp as key
         job_results[timestamp] = {
             'status': 'completed',
             'session_folder': f"search_{timestamp}",
             'graph_file': os.path.join(f"search_{timestamp}", "combined_graph.html"),
             'usernames': usernames,
             'individual_reports': individual_reports,
         }
+
     except Exception as e:
+        logging.error(f"Error in search task for timestamp {timestamp}: {str(e)}")
         job_results[timestamp] = {'status': 'failed', 'error': str(e)}
     finally:
         background_jobs[timestamp]['completed'] = True
 
 
 @app.route('/')
 def index():
-    return render_template('index.html')
-
-
+    #load site data for autocomplete
+    db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE)
+    site_options = []
+
+    for site in db.sites:
+        #add main site name
+        site_options.append(site.name)
+        #add URL if different from name
+        if site.url_main and site.url_main not in site_options:
+            site_options.append(site.url_main)
+
+    #sort and deduplicate
+    site_options = sorted(set(site_options))
+
+    return render_template('index.html', site_options=site_options)
+
+
+# Modified search route
 @app.route('/search', methods=['POST'])
 def search():
     usernames_input = request.form.get('usernames', '').strip()
@@ -187,15 +220,28 @@ def search():
     # Create timestamp for this search session
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
 
-    logging.info(f"Starting search for usernames: {usernames}")
+    # Get selected tags - ensure it's a list
+    selected_tags = request.form.getlist('tags')
+    logging.info(f"Selected tags: {selected_tags}")
 
     options = {
-        'top_sites': request.form.get('top_sites', '500'),
-        'timeout': request.form.get('timeout', '30'),
-        'id_type': 'username',  # fixed as username
+        'top_sites': request.form.get('top_sites') or '500',
+        'timeout': request.form.get('timeout') or '30',
         'use_cookies': 'use_cookies' in request.form,
+        'all_sites': 'all_sites' in request.form,
+        'disable_recursive_search': 'disable_recursive_search' in request.form,
+        'disable_extracting': 'disable_extracting' in request.form,
+        'with_domains': 'with_domains' in request.form,
+        'proxy': request.form.get('proxy', None) or None,
+        'tor_proxy': request.form.get('tor_proxy', None) or None,
+        'i2p_proxy': request.form.get('i2p_proxy', None) or None,
+        'permute': 'permute' in request.form,
+        'tags': selected_tags,  # Pass selected tags as a list
+        'site_list': [s.strip() for s in request.form.get('site', '').split(',') if s.strip()],
     }
 
+    logging.info(f"Starting search for usernames: {usernames} with tags: {selected_tags}")
+
     # Start background job
     background_jobs[timestamp] = {
         'completed': False,
@@ -205,46 +251,42 @@ def search():
     }
     background_jobs[timestamp]['thread'].start()
 
-    logging.info(f"Search job started with timestamp: {timestamp}")
-
-    # Redirect to status page
     return redirect(url_for('status', timestamp=timestamp))
 
-
 @app.route('/status/<timestamp>')
 def status(timestamp):
     logging.info(f"Status check for timestamp: {timestamp}")
 
     # Validate timestamp
     if timestamp not in background_jobs:
-        flash('Invalid search session', 'danger')
+        flash('Invalid search session.', 'danger')
+        logging.error(f"Invalid search session: {timestamp}")
         return redirect(url_for('index'))
 
     # Check if job is completed
     if background_jobs[timestamp]['completed']:
         result = job_results.get(timestamp)
         if not result:
-            flash('No results found for this search session', 'warning')
+            flash('No results found for this search session.', 'warning')
+            logging.error(f"No results found for completed session: {timestamp}")
             return redirect(url_for('index'))
 
         if result['status'] == 'completed':
-            # Redirect to results page once done
+            # Note: use the session_folder from the results to redirect
             return redirect(url_for('results', session_id=result['session_folder']))
         else:
-            error_msg = result.get('error', 'Unknown error occurred')
+            error_msg = result.get('error', 'Unknown error occurred.')
             flash(f'Search failed: {error_msg}', 'danger')
+            logging.error(f"Search failed for session {timestamp}: {error_msg}")
             return redirect(url_for('index'))
 
-    # If job is still running, show status page with a simple spinner
+    # If job is still running, show a status page
     return render_template('status.html', timestamp=timestamp)
 
 
 @app.route('/results/<session_id>')
 def results(session_id):
-    if not session_id.startswith('search_'):
-        flash('Invalid results session format', 'danger')
-        return redirect(url_for('index'))
-
+    # Find completed results that match this session_folder
     result_data = next(
         (
             r
@@ -254,6 +296,11 @@ def results(session_id):
         None,
     )
 
+    if not result_data:
+        flash('No results found for this session ID.', 'danger')
+        logging.error(f"Results for session {session_id} not found in job_results.")
+        return redirect(url_for('index'))
+
     return render_template(
         'results.html',
         usernames=result_data['usernames'],
@@ -266,7 +313,9 @@ def results(session_id):
 @app.route('/reports/<path:filename>')
 def download_report(filename):
     try:
-        file_path = os.path.join(REPORTS_FOLDER, filename)
+        file_path = os.path.normpath(os.path.join(REPORTS_FOLDER, filename))
+        if not file_path.startswith(REPORTS_FOLDER):
+            raise Exception("Invalid file path")
         return send_file(file_path)
     except Exception as e:
         logging.error(f"Error serving file {filename}: {str(e)}")
@@ -278,4 +327,5 @@ def download_report(filename):
         level=logging.INFO,
         format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
     )
-    app.run(debug=True)
+    debug_mode = os.getenv('FLASK_DEBUG', 'False').lower() in ['true', '1', 't']
+    app.run(debug=debug_mode)
diff --git a/maigret/web/static/maigret.png b/maigret/web/static/maigret.png