Add max_scroll_limit option from premium version and randomize wait o…

…n ad page(update for 100 stars)
coskundeniz · Feb 15, 2024 · f5b6b7c · f5b6b7c
1 parent b280ca8
commit f5b6b7c
Show file tree

Hide file tree

Showing 5 changed files with 63 additions and 24 deletions.
diff --git a/README.md b/README.md
@@ -5,7 +5,7 @@ This command-line tool clicks ads for a certain query on Google search using [un
 Old version of the tool can be found in the `old_version` branch.
 
 * Requires Python 3.9+
-* Requires Chrome 108+
+* Requires Chrome latest version
 
 [Related post on Medium](https://python.plainenglish.io/google-ads-clicker-with-python-selenium-and-tor-a6ff8078da2a)
 
@@ -27,14 +27,16 @@ See [here](https://github.com/coskundeniz/ad_clicker/wiki/Creating-and-running-D
 
 * You need to see `(env)` at the beginning of your command prompt that is showing virtual environment is activated.
 
+* Before running the below commands for the first time, run `python ad_clicker.py -q test` once and end it by pressing CTRL+C after seeing the browser opened.
+
 ```
-usage: python ad_clicker.py [-h] [-q QUERY] [-e EXCLUDES] [-t AD_VISIT_TIME] [--headless] [-p PROXY] [-pf PROXY_FILE] [--auth] [--incognito]
+usage: python ad_clicker.py [-h] [-q QUERY] [-e EXCLUDES] [-l MAX_SCROLL_LIMIT] [--headless] [-p PROXY] [-pf PROXY_FILE] [--auth] [--incognito]
 
 optional arguments:
   -h, --help                                                       show this help message and exit
   -q QUERY, --query QUERY                                          Search query
-  -t AD_VISIT_TIME, --visittime AD_VISIT_TIME                      Number of seconds to wait on the ad page opened
-  --headless                                                       Use headless browser
+  -l MAX_SCROLL_LIMIT, --max_scroll_limit MAX_SCROLL_LIMIT         Number of maximum scrolls on the search results page
+  --headless                                                       Use headless browser (not recommended)
   -p PROXY, --proxy PROXY                                          Use the given proxy in ip:port format
   -pf PROXY_FILE, --proxy_file PROXY_FILE                          Select a proxy from the given file
   --auth                                                           Use proxy with username and password. If this is passed,
@@ -44,12 +46,11 @@ optional arguments:
   -qf QUERY_FILE, --query_file QUERY_FILE                          Read queries to search from the given file (valid for multiprocess run)
   -bc BROWSER_COUNT, --browser_count BROWSER_COUNT                 Maximum number of browsers to run concurrently (valid for multiprocess run)
   -ms MULTIPROCESS_STYLE, --multiprocess_style MULTIPROCESS_STYLE  Style of the multiprocess run. (valid for multiprocess run)
-                                                                   1: single browser instance for each query (default)
-                                                                   2: multiple browser instances for each query
-
+                                                                   1: different query on each browser (default)
+                                                                   2: same query on each browser
 ```
 
-`python ad_clicker.py -q <search query> [-e EXCLUDES] [-t ad_visit_time_in_seconds] [--headless] [-p PROXY] [-pf PROXY_FILE] [--auth] [--incognito]`
+`python ad_clicker.py -q <search query> [-e EXCLUDES] [-l MAX_SCROLL_LIMIT] [--headless] [-p PROXY] [-pf PROXY_FILE] [--auth] [--incognito]`
 
 
 ### Examples
@@ -64,9 +65,11 @@ otherwise you should give the full path.
 
     * `python ad_clicker.py -q "wireless keyboard"`
 
-* Search for "wireless keyboard" with 5 seconds visit time on clicked ad pages.
+* Search for "wireless keyboard" with maximum scroll set to 5.
+
+    * `python ad_clicker.py -q "wireless keyboard" -l 5`
 
-    * `python ad_clicker.py -q "wireless keyboard" -t 5`
+    * By default(0), it will scroll until the end.
 
 * Search for "wireless keyboard" using headless browser.
 
@@ -146,6 +149,10 @@ otherwise you should give the full path.
 
     * `python run_in_loop.py -qf ~/queries.txt -pf ~/proxies.txt --auth -bc 4`
 
+* Run the tool in loop with given number of browser instances and maximum scroll set to 10.
+
+    * `python run_in_loop.py -qf ~/queries.txt -pf ~/proxies.txt --auth -bc 4 -l 10`
+
 * Run the tool in loop with given number of browser instances and 2 minutes wait between runs.
 
     * `python run_in_loop.py -qf ~/queries.txt -pf ~/proxies.txt --auth -bc 2 -wt 120`

diff --git a/ad_clicker.py b/ad_clicker.py
@@ -32,12 +32,11 @@ def get_arg_parser() -> ArgumentParser:
     arg_parser = ArgumentParser()
     arg_parser.add_argument("-q", "--query", help="Search query")
     arg_parser.add_argument(
-        "-t",
-        "--visittime",
-        default=4,
+        "-l",
+        "--max_scroll_limit",
+        default=0,
         type=int,
-        dest="ad_visit_time",
-        help="Number of seconds to wait on the ad page opened",
+        help="Number of maximum scrolls on the search results page",
     )
     arg_parser.add_argument("--headless", action="store_true", help="Use headless browser")
     arg_parser.add_argument(
@@ -100,6 +99,7 @@ def main():
 
     if args.poem:
         get_poem(args.poem)
+        raise SystemExit()
 
     if args.id:
         update_log_formats(args.id)
@@ -124,7 +124,9 @@ def main():
     search_controller = None
 
     try:
-        search_controller = SearchController(driver, args.query, args.ad_visit_time, args.excludes)
+        search_controller = SearchController(
+            driver, args.query, args.max_scroll_limit, args.excludes
+        )
         ads = search_controller.search_for_ads()
 
         if not ads:

diff --git a/run_ad_clicker.py b/run_ad_clicker.py
@@ -46,6 +46,13 @@ def get_arg_parser() -> ArgumentParser:
         "--proxy_file",
         help="Select a proxy from the given file",
     )
+    arg_parser.add_argument(
+        "-l",
+        "--max_scroll_limit",
+        default=0,
+        type=int,
+        help="Number of maximum scrolls on the search results page",
+    )
     arg_parser.add_argument(
         "-e",
         "--excludes",
@@ -85,6 +92,7 @@ def start_tool(
     query: str,
     proxy: str,
     start_timeout: float,
+    max_scroll_limit: int,
     auth: Optional[bool] = None,
     excludes: Optional[str] = None,
     incognito: Optional[bool] = False,
@@ -99,6 +107,8 @@ def start_tool(
     :param proxy: Proxy to use in ip:port or user:pass@host:port format
     :type start_timeout: float
     :param start_timeout: Start timeout to avoid race condition in driver patching
+    :type max_scroll_limit: int
+    :param max_scroll_limit: Number of maximum scrolls on the search results page
     :type auth: bool
     :param auth: Whether authentication is used or not for proxy
     :type excludes: str
@@ -111,7 +121,7 @@ def start_tool(
 
     command = ["python", "ad_clicker.py"]
 
-    command.extend(["-q", query, "-p", proxy])
+    command.extend(["-q", query, "-p", proxy, "-l", str(max_scroll_limit)])
 
     if auth:
         command.append("--auth")
@@ -167,6 +177,7 @@ def main() -> None:
                     next(query),
                     next(proxy),
                     i * 0.5,
+                    args.max_scroll_limit,
                     args.auth,
                     args.excludes,
                     args.incognito,
@@ -195,6 +206,7 @@ def main() -> None:
                         query,
                         next(proxy),
                         i * 0.5,
+                        args.max_scroll_limit,
                         args.auth,
                         args.excludes,
                         args.incognito,
@@ -221,4 +233,3 @@ def main() -> None:
         logger.debug(f"Exception: {message}")
         details = traceback.format_tb(exp.__traceback__)
         logger.debug(f"Exception details: \n{''.join(details)}")
-
diff --git a/run_in_loop.py b/run_in_loop.py
@@ -53,6 +53,13 @@ def get_arg_parser() -> ArgumentParser:
         "--proxy_file",
         help="Select a proxy from the given file",
     )
+    arg_parser.add_argument(
+        "-l",
+        "--max_scroll_limit",
+        default=0,
+        type=int,
+        help="Number of maximum scrolls on the search results page",
+    )
     arg_parser.add_argument(
         "-e",
         "--excludes",
@@ -107,7 +114,9 @@ def main() -> None:
 
     command = ["python", "run_ad_clicker.py"]
 
-    command.extend(["-qf", args.query_file, "-pf", args.proxy_file])
+    command.extend(
+        ["-qf", args.query_file, "-pf", args.proxy_file, "-l", str(args.max_scroll_limit)]
+    )
 
     if args.auth:
         command.append("--auth")

diff --git a/search_controller.py b/search_controller.py
@@ -46,8 +46,8 @@ class SearchController:
     :param driver: Selenium Chrome webdriver instance
     :type query: str
     :param query: Search query
-    :type ad_visit_time: int
-    :param ad_visit_time: Number of seconds to wait on the ad page
+    :type max_scroll_limit: int
+    :param max_scroll_limit: Number of maximum scrolls on the search results page
     :type excludes: str
     :param excludes: Words to exclude ads containing them in url or title
     """
@@ -63,10 +63,10 @@ class SearchController:
     AD_TITLE = (By.CSS_SELECTOR, "div[role='heading']")
 
     def __init__(
-        self, driver: selenium.webdriver, query: str, ad_visit_time: int, excludes: str = None
+        self, driver: selenium.webdriver, query: str, max_scroll_limit: int, excludes: str = None
     ) -> None:
         self._driver = driver
-        self._ad_visit_time = ad_visit_time
+        self._max_scroll_limit = max_scroll_limit
         self._search_query, self._filter_words = self._process_query(query)
         self._exclude_list = None
 
@@ -172,7 +172,7 @@ def click_ads(self, ads: AdList) -> None:
                 for window_handle in self._driver.window_handles:
                     if window_handle != original_window_handle:
                         self._driver.switch_to.window(window_handle)
-                        sleep(self._ad_visit_time)
+                        sleep(random.choice(range(4, 9)))
 
                         logger.debug(f"Current url on new tab: {self._driver.current_url}")
 
@@ -220,6 +220,10 @@ def _get_ad_links(self) -> AdList:
 
         ads = []
 
+        scroll_count = 0
+
+        logger.debug(f"Max scroll limit: {self._max_scroll_limit}")
+
         while not self._is_scroll_at_the_end():
             try:
                 top_ads_containers = self._driver.find_elements(*self.TOP_ADS_CONTAINER)
@@ -237,9 +241,15 @@ def _get_ad_links(self) -> AdList:
             except NoSuchElementException:
                 logger.debug("Could not found bottom ads!")
 
+            if self._max_scroll_limit > 0 and scroll_count == self._max_scroll_limit:
+                logger.debug("Reached to max scroll limit! Ending scroll...")
+                break
+
             self._driver.find_element(By.TAG_NAME, "body").send_keys(Keys.PAGE_DOWN)
             sleep(2)
 
+            scroll_count += 1
+
         if not ads:
             return []