Skip to content
This repository has been archived by the owner on Oct 25, 2024. It is now read-only.

Commit

Permalink
Fix race condition in driver patching for multiple browser usage
Browse files Browse the repository at this point in the history
  • Loading branch information
coskundeniz committed Aug 6, 2023
1 parent 6e63f4f commit 50f65b0
Show file tree
Hide file tree
Showing 5 changed files with 93 additions and 9 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -132,3 +132,5 @@ dmypy.json

geolocation.db
proxy_auth_plugin/

.MULTI_BROWSERS_IN_USE
5 changes: 4 additions & 1 deletion ad_clicker.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ def main():

driver = create_webdriver(proxy, args.auth, args.headless, args.incognito)

search_controller = None

try:
search_controller = SearchController(driver, args.query, args.ad_visit_time, args.excludes)
ads = search_controller.search_for_ads()
Expand All @@ -139,7 +141,8 @@ def main():
details = traceback.format_tb(exp.__traceback__)
logger.debug(f"Exception details: \n{''.join(details)}")

search_controller.end_search()
if search_controller:
search_controller.end_search()


if __name__ == "__main__":
Expand Down
32 changes: 27 additions & 5 deletions run_ad_clicker.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,12 +17,12 @@
import traceback
import subprocess
import multiprocessing
from typing import Optional
from itertools import cycle
from concurrent.futures import ProcessPoolExecutor, wait

from argparse import ArgumentParser
from concurrent.futures import ProcessPoolExecutor, wait
from itertools import cycle
from pathlib import Path
from time import sleep
from typing import Optional

from config import logger
from proxy import get_proxies
Expand Down Expand Up @@ -85,6 +85,7 @@ def start_tool(
browser_id: int,
query: str,
proxy: str,
start_timeout: float,
auth: Optional[bool] = None,
excludes: Optional[str] = None,
incognito: Optional[bool] = False,
Expand All @@ -97,6 +98,8 @@ def start_tool(
:param query: Search query
:type proxy: str
:param proxy: Proxy to use in ip:port or user:pass@host:port format
:type start_timeout: float
:param start_timeout: Start timeout to avoid race condition in driver patching
:type auth: bool
:param auth: Whether authentication is used or not for proxy
:type excludes: str
Expand All @@ -105,6 +108,8 @@ def start_tool(
:param incognito: Whether to run in incognito mode
"""

sleep(start_timeout)

command = ["python", "ad_clicker.py"]

command.extend(["-q", query, "-p", proxy])
Expand Down Expand Up @@ -149,8 +154,15 @@ def main() -> None:
arg_parser = get_arg_parser()
args = arg_parser.parse_args()

multi_browser_flag_file = Path(".MULTI_BROWSERS_IN_USE")
multi_browser_flag_file.unlink(missing_ok=True)

MAX_WORKERS = args.browser_count

if MAX_WORKERS > 1:
logger.debug(f"Creating {multi_browser_flag_file} flag file...")
multi_browser_flag_file.touch()

if args.query_file:
queries = get_queries(args.query_file)
query = cycle(queries) if len(queries) <= MAX_WORKERS else iter(queries)
Expand All @@ -164,6 +176,8 @@ def main() -> None:
else:
raise SystemExit("Missing proxy file!")

logger.info(f"Running with {MAX_WORKERS} browser{'s' if MAX_WORKERS > 1 else ''}...")

# 1st way - different query on each browser
if args.multiprocess_style == 1:
with ProcessPoolExecutor(max_workers=MAX_WORKERS) as executor:
Expand All @@ -174,6 +188,7 @@ def main() -> None:
i,
next(query),
next(proxy),
i * 0.5,
args.auth,
args.excludes,
args.incognito,
Expand All @@ -199,7 +214,14 @@ def main() -> None:

futures = [
executor.submit(
start_tool, i, query, next(proxy), args.auth, args.excludes, args.incognito
start_tool,
i,
query,
next(proxy),
i * 0.5,
args.auth,
args.excludes,
args.incognito,
)
for i in range(1, MAX_WORKERS + 1)
]
Expand Down
1 change: 0 additions & 1 deletion run_in_loop.py
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,6 @@ def main() -> None:
command.extend(["-ms", str(args.multiprocess_style)])

while True:
logger.info(f"Running with {args.browser_count} browsers...")
subprocess.run(command)

logger.info(f"Sleeping {args.wait_time} seconds...")
Expand Down
62 changes: 60 additions & 2 deletions utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
-- Orhan Veli
"""

import os
import sys
import random
from pathlib import Path
from time import sleep
Expand Down Expand Up @@ -262,6 +264,13 @@ def create_webdriver(
chrome_options.add_argument("--no-sandbox")
chrome_options.add_argument("--window-size=1920,1080")

multi_browser_flag_file = Path(".MULTI_BROWSERS_IN_USE")
multi_procs_enabled = multi_browser_flag_file.exists()
driver_exe_path = None

if multi_procs_enabled:
driver_exe_path = _get_driver_exe_path()

if proxy:
logger.info(f"Using proxy: {proxy}")

Expand All @@ -279,7 +288,14 @@ def create_webdriver(
else:
chrome_options.add_argument(f"--proxy-server={proxy}")

driver = undetected_chromedriver.Chrome(options=chrome_options, headless=headless)
driver = undetected_chromedriver.Chrome(
driver_executable_path=(
driver_exe_path if multi_procs_enabled and Path(driver_exe_path).exists() else None
),
options=chrome_options,
headless=headless,
user_multi_procs=multi_procs_enabled,
)

# set geolocation of the browser according to IP address
accuracy = 90
Expand All @@ -301,6 +317,48 @@ def create_webdriver(
driver.execute_cdp_cmd("Emulation.setTimezoneOverride", {"timezoneId": timezone})

else:
driver = undetected_chromedriver.Chrome(options=chrome_options, headless=headless)
driver = undetected_chromedriver.Chrome(
driver_executable_path=(
driver_exe_path if multi_procs_enabled and Path(driver_exe_path).exists() else None
),
options=chrome_options,
headless=headless,
user_multi_procs=multi_procs_enabled,
)

return driver


def _get_driver_exe_path() -> str:
"""Get the path for the chromedriver executable to avoid downloading and patching each time
:rtype: str
:returns: Absoulute path of the chromedriver executable
"""

platform = sys.platform
prefix = "undetected"
exe_name = "chromedriver%s"

if platform.endswith("win32"):
exe_name %= ".exe"
if platform.endswith(("linux", "linux2")):
exe_name %= ""
if platform.endswith("darwin"):
exe_name %= ""

if platform.endswith("win32"):
dirpath = "~/appdata/roaming/undetected_chromedriver"
elif "LAMBDA_TASK_ROOT" in os.environ:
dirpath = "/tmp/undetected_chromedriver"
elif platform.startswith(("linux", "linux2")):
dirpath = "~/.local/share/undetected_chromedriver"
elif platform.endswith("darwin"):
dirpath = "~/Library/Application Support/undetected_chromedriver"
else:
dirpath = "~/.undetected_chromedriver"

driver_exe_folder = os.path.abspath(os.path.expanduser(dirpath))
driver_exe_path = os.path.join(driver_exe_folder, "_".join([prefix, exe_name]))

return driver_exe_path

0 comments on commit 50f65b0

Please sign in to comment.