Skip to content

Commit

Permalink
Improving the web interface (#1975)
Browse files Browse the repository at this point in the history
* update web interface with commandline options
* improve web interface
* update README images of web interface
* fix bug in app.py
* fix web interface
  • Loading branch information
overcuriousity authored Dec 17, 2024
1 parent 900ed84 commit c2e3e96
Show file tree
Hide file tree
Showing 7 changed files with 710 additions and 138 deletions.
118 changes: 84 additions & 34 deletions maigret/web/app.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
# app.py
from flask import (
Flask,
render_template,
Expand All @@ -22,7 +21,7 @@
app = Flask(__name__)
app.secret_key = 'your-secret-key-here'

# Add background job tracking
#add background job tracking
background_jobs = {}
job_results = {}

Expand All @@ -46,16 +45,38 @@ async def maigret_search(username, options):
logger = setup_logger(logging.WARNING, 'maigret')
try:
db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE)
sites = db.ranked_sites_dict(top=int(options.get('top_sites', 500)))

top_sites = int(options.get('top_sites') or 500)
if options.get('all_sites'):
top_sites = 999999999 # effectively all

tags = options.get('tags', [])
site_list= options.get('site_list', [])
logger.info(f"Filtering sites by tags: {tags}")

sites = db.ranked_sites_dict(
top=top_sites,
tags=tags,
names=site_list,
disabled=False,
id_type='username'
)

logger.info(f"Found {len(sites)} sites matching the tag criteria")

results = await maigret.search(
username=username,
site_dict=sites,
timeout=int(options.get('timeout', 30)),
logger=logger,
id_type=options.get('id_type', 'username'),
id_type='username',
cookies=COOKIES_FILE if options.get('use_cookies') else None,
is_parsing_enabled=True,
is_parsing_enabled=(not options.get('disable_extracting', False)),
recursive_search_enabled=(not options.get('disable_recursive_search', False)),
check_domains=options.get('with_domains', False),
proxy=options.get('proxy', None),
tor_proxy=options.get('tor_proxy', None),
i2p_proxy=options.get('i2p_proxy', None),
)
return results
except Exception as e:
Expand All @@ -68,36 +89,31 @@ async def search_multiple_usernames(usernames, options):
for username in usernames:
try:
search_results = await maigret_search(username.strip(), options)
results.append((username.strip(), options['id_type'], search_results))
results.append((username.strip(), 'username', search_results))
except Exception as e:
logging.error(f"Error searching username {username}: {str(e)}")
return results


def process_search_task(usernames, options, timestamp):
try:
# Setup event loop for async operations
loop = asyncio.new_event_loop()
asyncio.set_event_loop(loop)

# Run the search
general_results = loop.run_until_complete(
search_multiple_usernames(usernames, options)
)

# Create session folder
session_folder = os.path.join(REPORTS_FOLDER, f"search_{timestamp}")
os.makedirs(session_folder, exist_ok=True)

# Save the combined graph
graph_path = os.path.join(session_folder, "combined_graph.html")
maigret.report.save_graph_report(
graph_path,
general_results,
MaigretDatabase().load_from_path(MAIGRET_DB_FILE),
)

# Save individual reports
individual_reports = []
for username, id_type, results in general_results:
report_base = os.path.join(session_folder, f"report_{username}")
Expand Down Expand Up @@ -154,25 +170,42 @@ def process_search_task(usernames, options, timestamp):
}
)

# Save results and mark job as complete
# save results and mark job as complete using timestamp as key
job_results[timestamp] = {
'status': 'completed',
'session_folder': f"search_{timestamp}",
'graph_file': os.path.join(f"search_{timestamp}", "combined_graph.html"),
'usernames': usernames,
'individual_reports': individual_reports,
}

except Exception as e:
logging.error(f"Error in search task for timestamp {timestamp}: {str(e)}")
job_results[timestamp] = {'status': 'failed', 'error': str(e)}
finally:
background_jobs[timestamp]['completed'] = True


@app.route('/')
def index():
return render_template('index.html')


#load site data for autocomplete
db = MaigretDatabase().load_from_path(MAIGRET_DB_FILE)
site_options = []

for site in db.sites:
#add main site name
site_options.append(site.name)
#add URL if different from name
if site.url_main and site.url_main not in site_options:
site_options.append(site.url_main)

#sort and deduplicate
site_options = sorted(set(site_options))

return render_template('index.html', site_options=site_options)


# Modified search route
@app.route('/search', methods=['POST'])
def search():
usernames_input = request.form.get('usernames', '').strip()
Expand All @@ -187,15 +220,28 @@ def search():
# Create timestamp for this search session
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")

logging.info(f"Starting search for usernames: {usernames}")
# Get selected tags - ensure it's a list
selected_tags = request.form.getlist('tags')
logging.info(f"Selected tags: {selected_tags}")

options = {
'top_sites': request.form.get('top_sites', '500'),
'timeout': request.form.get('timeout', '30'),
'id_type': 'username', # fixed as username
'top_sites': request.form.get('top_sites') or '500',
'timeout': request.form.get('timeout') or '30',
'use_cookies': 'use_cookies' in request.form,
'all_sites': 'all_sites' in request.form,
'disable_recursive_search': 'disable_recursive_search' in request.form,
'disable_extracting': 'disable_extracting' in request.form,
'with_domains': 'with_domains' in request.form,
'proxy': request.form.get('proxy', None) or None,
'tor_proxy': request.form.get('tor_proxy', None) or None,
'i2p_proxy': request.form.get('i2p_proxy', None) or None,
'permute': 'permute' in request.form,
'tags': selected_tags, # Pass selected tags as a list
'site_list': [s.strip() for s in request.form.get('site', '').split(',') if s.strip()],
}

logging.info(f"Starting search for usernames: {usernames} with tags: {selected_tags}")

# Start background job
background_jobs[timestamp] = {
'completed': False,
Expand All @@ -205,46 +251,42 @@ def search():
}
background_jobs[timestamp]['thread'].start()

logging.info(f"Search job started with timestamp: {timestamp}")

# Redirect to status page
return redirect(url_for('status', timestamp=timestamp))


@app.route('/status/<timestamp>')
def status(timestamp):
logging.info(f"Status check for timestamp: {timestamp}")

# Validate timestamp
if timestamp not in background_jobs:
flash('Invalid search session', 'danger')
flash('Invalid search session.', 'danger')
logging.error(f"Invalid search session: {timestamp}")
return redirect(url_for('index'))

# Check if job is completed
if background_jobs[timestamp]['completed']:
result = job_results.get(timestamp)
if not result:
flash('No results found for this search session', 'warning')
flash('No results found for this search session.', 'warning')
logging.error(f"No results found for completed session: {timestamp}")
return redirect(url_for('index'))

if result['status'] == 'completed':
# Redirect to results page once done
# Note: use the session_folder from the results to redirect
return redirect(url_for('results', session_id=result['session_folder']))
else:
error_msg = result.get('error', 'Unknown error occurred')
error_msg = result.get('error', 'Unknown error occurred.')
flash(f'Search failed: {error_msg}', 'danger')
logging.error(f"Search failed for session {timestamp}: {error_msg}")
return redirect(url_for('index'))

# If job is still running, show status page with a simple spinner
# If job is still running, show a status page
return render_template('status.html', timestamp=timestamp)


@app.route('/results/<session_id>')
def results(session_id):
if not session_id.startswith('search_'):
flash('Invalid results session format', 'danger')
return redirect(url_for('index'))

# Find completed results that match this session_folder
result_data = next(
(
r
Expand All @@ -254,6 +296,11 @@ def results(session_id):
None,
)

if not result_data:
flash('No results found for this session ID.', 'danger')
logging.error(f"Results for session {session_id} not found in job_results.")
return redirect(url_for('index'))

return render_template(
'results.html',
usernames=result_data['usernames'],
Expand All @@ -266,7 +313,9 @@ def results(session_id):
@app.route('/reports/<path:filename>')
def download_report(filename):
try:
file_path = os.path.join(REPORTS_FOLDER, filename)
file_path = os.path.normpath(os.path.join(REPORTS_FOLDER, filename))
if not file_path.startswith(REPORTS_FOLDER):
raise Exception("Invalid file path")
return send_file(file_path)
except Exception as e:
logging.error(f"Error serving file {filename}: {str(e)}")
Expand All @@ -278,4 +327,5 @@ def download_report(filename):
level=logging.INFO,
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
)
app.run(debug=True)
debug_mode = os.getenv('FLASK_DEBUG', 'False').lower() in ['true', '1', 't']
app.run(debug=debug_mode)
Binary file added maigret/web/static/maigret.png
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading

0 comments on commit c2e3e96

Please sign in to comment.