mozilla · marco-c · Mar 9, 2023 · Mar 8, 2023 · Mar 8, 2023 · Mar 8, 2023
diff --git a/bugbug/db.py b/bugbug/db.py
@@ -50,7 +50,7 @@ def is_different_schema(path):
     r = requests.get(url)
 
     if not r.ok:
-        logger.info(f"Version file is not yet available to download for {path}")
+        logger.info("Version file is not yet available to download for %s", path)
         return True
 
     prev_version = int(r.text)
@@ -67,7 +67,7 @@ def download_support_file(path, file_name, extract=True):
         url = urljoin(DATABASES[path]["url"], file_name)
         path = os.path.join(os.path.dirname(path), file_name)
 
-        logger.info(f"Downloading {url} to {path}")
+        logger.info("Downloading %s to %s", url, path)
         updated = utils.download_check_etag(url, path)
 
         if extract and updated and path.endswith(".zst"):
@@ -77,7 +77,7 @@ def download_support_file(path, file_name, extract=True):
         return True
     except requests.exceptions.HTTPError:
         logger.info(
-            f"{file_name} is not yet available to download for {path}", exc_info=True
+            "%s is not yet available to download for %s", file_name, path, exc_info=True
         )
         return False
 
@@ -92,7 +92,7 @@ def download(path, support_files_too=False, extract=True):
 
     url = DATABASES[path]["url"]
     try:
-        logger.info(f"Downloading {url} to {zst_path}")
+        logger.info("Downloading %s to %s", url, zst_path)
         updated = utils.download_check_etag(url, zst_path)
 
         if extract and updated:
@@ -106,7 +106,7 @@ def download(path, support_files_too=False, extract=True):
 
         return successful
     except requests.exceptions.HTTPError:
-        logger.info(f"{url} is not yet available to download", exc_info=True)
+        logger.info("%s is not yet available to download", url, exc_info=True)
         return False
 
 

diff --git a/bugbug/github.py b/bugbug/github.py
@@ -58,7 +58,7 @@ def get_token(self) -> str:
 
     def fetch_events(self, events_url: str) -> list:
         self.api_limit()
-        logger.info(f"Fetching {events_url}")
+        logger.info("Fetching %s", events_url)
         headers = {"Authorization": "token {}".format(self.get_token())}
         response = get_session("github").get(events_url, headers=headers)
         response.raise_for_status()
@@ -78,7 +78,7 @@ def fetch_issues(
         if isinstance(data, dict):
             data = [data]
 
-        logger.info(f"Fetching {url}")
+        logger.info("Fetching %s", url)
 
         if retrieve_events:
             for item in data:

diff --git a/bugbug/models/fixtime.py b/bugbug/models/fixtime.py
@@ -92,11 +92,11 @@ def _quantiles(n):
         quantiles = _quantiles(2)
 
         logger.info(
-            f"Max fix time: {max(fix_time for bug_id, fix_time in bug_fix_times)}"
+            "Max fix time: %s", max(fix_time for bug_id, fix_time in bug_fix_times)
         )
-        logger.info(f"Fix time quantiles: {quantiles}")
-        logger.info(f"Fix time quartiles: {_quantiles(4)}")
-        logger.info(f"Fix time deciles: {_quantiles(10)}")
+        logger.info("Fix time quantiles: %s", quantiles)
+        logger.info("Fix time quartiles: %s", _quantiles(4))
+        logger.info("Fix time deciles: %s", _quantiles(10))
 
         classes = {}
         for bug_id, fix_time in bug_fix_times:
@@ -110,7 +110,9 @@ def _quantiles(n):
 
         for i in range(len(quantiles) + 1):
             logger.info(
-                f"{sum(1 for label in classes.values() if label == i)} bugs are in the {i}th quantile"
+                "%d bugs are in the %dth quantile",
+                sum(1 for label in classes.values() if label == i),
+                i,
             )
 
         return classes, list(range(len(quantiles) + 1))

diff --git a/bugbug/repository.py b/bugbug/repository.py
@@ -963,7 +963,7 @@ def _hg_log(revs: list[bytes], branch: str = "tip") -> tuple[Commit, ...]:
 
 
 def get_revs(hg, rev_start=0, rev_end="tip"):
-    logger.info(f"Getting revs from {rev_start} to {rev_end}...")
+    logger.info("Getting revs from %s to %s...", rev_start, rev_end)
 
     args = hglib.util.cmdbuilder(
         b"log",
@@ -1021,7 +1021,7 @@ def __setitem__(self, key, value):
 def calculate_experiences(
     commits: Collection[Commit], first_pushdate: datetime, save: bool = True
 ) -> None:
-    logger.info(f"Analyzing seniorities from {len(commits)} commits...")
+    logger.info("Analyzing seniorities from %d commits...", len(commits))
 
     experiences = Experiences(save)
 
@@ -1034,7 +1034,7 @@ def calculate_experiences(
             time_lapse = commit.pushdate - experiences[key]
             commit.seniority_author = time_lapse.total_seconds()
 
-    logger.info(f"Analyzing experiences from {len(commits)} commits...")
+    logger.info("Analyzing experiences from %d commits...", len(commits))
 
     # Note: In the case of files, directories, components, we can't just use the sum of previous commits, as we could end
     # up overcounting them. For example, consider a commit A which modifies "dir1" and "dir2", a commit B which modifies
@@ -1365,10 +1365,10 @@ def download_commits(
 
         first_pushdate = get_first_pushdate(repo_dir)
 
-        logger.info(f"Mining {len(revs)} commits...")
+        logger.info("Mining %d commits...", len(revs))
 
         if not use_single_process:
-            logger.info(f"Using {os.cpu_count()} processes...")
+            logger.info("Using %d processes...", os.cpu_count())
             commits = hg_log_multi(repo_dir, revs, branch)
         else:
             commits = hg_log(hg, revs, branch)
@@ -1377,7 +1377,7 @@ def download_commits(
 
         commits_num = len(commits)
 
-        logger.info(f"Mining {commits_num} patches...")
+        logger.info("Mining %d patches...", commits_num)
 
         global code_analysis_server
 
@@ -1485,9 +1485,9 @@ def clone(
 
         # Pull, to make sure the pushlog is generated.
         with hglib.open(repo_dir) as hg:
-            logger.info(f"Pulling {repo_dir}")
+            logger.info("Pulling %s", repo_dir)
             hg.pull(update=update)
-            logger.info(f"{repo_dir} pulled")
+            logger.info("%s pulled", repo_dir)
 
         return
     except hglib.error.ServerError as e:
@@ -1506,7 +1506,7 @@ def clone(
     )
     subprocess.run(cmd, check=True)
 
-    logger.info(f"{repo_dir} cloned")
+    logger.info("%s cloned", repo_dir)
 
 
 def pull(repo_dir: str, branch: str, revision: str) -> None:

diff --git a/bugbug/similarity.py b/bugbug/similarity.py
@@ -66,7 +66,7 @@ def download_similarity_model(model_name):
     path = f"{model_name_to_class[model_name].__name__.lower()}.similaritymodel"
     url = f"https://community-tc.services.mozilla.com/api/index/v1/task/project.bugbug.train_similarity.latest/artifacts/public/{path}.zst"
 
-    logger.info(f"Downloading similarity model from {url}...")
+    logger.info("Downloading similarity model from %s...", url)
     updated = download_check_etag(url)
     if updated:
         zstd_decompress(path)

diff --git a/bugbug/test_scheduling.py b/bugbug/test_scheduling.py
@@ -207,7 +207,7 @@ def get_push_data(
 
         push_data_queue.append(elem)
 
-    logger.info(f"push data nodes: {push_data_count}")
+    logger.info("Push data nodes: %d", push_data_count)
 
     push_data = [
         (
@@ -235,7 +235,7 @@ def get_push_data(
             tuple(all_groups_set), cast(Set[Runnable], all_groups_set), "group"
         )
         all_groups_set = set(all_groups)
-        logger.info(f"{len(all_groups_set)} manifests run in the last 28 pushes")
+        logger.info("%d manifests run in the last 28 pushes", len(all_groups_set))
 
     all_runnables_set = set(
         sum((list(push_runnables) for _, _, push_runnables, _, _ in push_data), [])
@@ -245,7 +245,7 @@ def get_push_data(
         tuple(all_runnables_set), all_runnables_set, granularity
     )
     all_runnables_set = set(all_runnables)
-    logger.info(f"{len(all_runnables_set)} runnables run in the last 28 pushes")
+    logger.info("%d runnables run in the last 28 pushes", len(all_runnables_set))
 
     def push_data_iter() -> Iterator[PushResult]:
         return (
@@ -447,7 +447,7 @@ def count_runs_and_failures(tasks):
 
         stats[couple] = (support, confidence)
 
-    logger.info(f"{skipped} couples skipped because their support was too low")
+    logger.info("%d couples skipped because their support was too low", skipped)
 
     logger.info("Redundancies with the highest support and confidence:")
     for couple, (support, confidence) in sorted(
@@ -456,7 +456,13 @@ def count_runs_and_failures(tasks):
         failure_count = count_both_failures[couple]
         run_count = count_runs[couple]
         logger.info(
-            f"{couple[0]} - {couple[1]} redundancy confidence {confidence}, support {support} ({failure_count} over {run_count})."
+            "%s - %s redundancy confidence %f, support %d (%d over %d).",
+            couple[0],
+            couple[1],
+            confidence,
+            support,
+            failure_count,
+            run_count,
         )
 
     logger.info("Redundancies with the highest confidence and lowest support:")
@@ -466,7 +472,13 @@ def count_runs_and_failures(tasks):
         failure_count = count_both_failures[couple]
         run_count = count_runs[couple]
         logger.info(
-            f"{couple[0]} - {couple[1]} redundancy confidence {confidence}, support {support} ({failure_count} over {run_count})."
+            "%s - %s redundancy confidence %f, support %d (%d over %d).",
+            couple[0],
+            couple[1],
+            confidence,
+            support,
+            failure_count,
+            run_count,
         )
 
     failing_together: dict = {}
@@ -515,7 +527,7 @@ def count_runs_and_failures(tasks):
             failing_together[couple[0]][couple[1]] = (support, confidence)
 
     for percentage, count in count_redundancies.most_common():
-        logger.info(f"{count} with {percentage} confidence")
+        logger.info("%d with %f%% confidence", count, percentage)
 
     failing_together_db = get_failing_together_db(granularity, False)
 

diff --git a/bugbug/utils.py b/bugbug/utils.py
@@ -224,7 +224,7 @@ def download_model(model_name: str) -> str:
 
     path = f"{model_name}model"
     url = f"https://community-tc.services.mozilla.com/api/index/v1/task/project.bugbug.train_{model_name}.{version}/artifacts/public/{path}.zst"
-    logger.info(f"Downloading {url}...")
+    logger.info("Downloading %s...", url)
     updated = download_check_etag(url)
     if updated:
         zstd_decompress(path)

diff --git a/http_service/bugbug_http/boot.py b/http_service/bugbug_http/boot.py
@@ -21,7 +21,7 @@
 def boot_worker() -> None:
     # Clone autoland
     def clone_autoland() -> None:
-        logger.info(f"Cloning autoland in {REPO_DIR}...")
+        logger.info("Cloning autoland in %s...", REPO_DIR)
         repository.clone(REPO_DIR, "https://hg.mozilla.org/integration/autoland")
 
     def extract_past_failures_label() -> None:
@@ -118,7 +118,7 @@ def retrieve_schedulable_tasks() -> None:
             for push_id, push_obj in r.json()["pushes"].items()
         ]
 
-        logger.info(f"Retrieving known tasks from {revs}")
+        logger.info("Retrieving known tasks from %s...", revs)
 
         # Store in a file the list of tasks in the latest autoland pushes.
         # We use more than one to protect ourselves from broken decision tasks.
@@ -130,7 +130,7 @@ def retrieve_schedulable_tasks() -> None:
             if r.ok:
                 known_tasks.update(r.json())
 
-        logger.info(f"Retrieved {len(known_tasks)} tasks")
+        logger.info("Retrieved %d tasks", len(known_tasks))
 
         assert len(known_tasks) > 0
 
@@ -198,7 +198,7 @@ def retrieve_schedulable_tasks() -> None:
                 logger.info("Touched together DB updated.")
             except Exception as e:
                 # It's not ideal, but better not to crash the service!
-                logger.error(f"Exception while updating commits DB: {e}")
+                logger.error("Exception while updating commits DB: %s", e)
 
         # Wait list of schedulable tasks to be downloaded and written to disk.
         retrieve_schedulable_tasks_future.result()

diff --git a/http_service/bugbug_http/models.py b/http_service/bugbug_http/models.py
@@ -181,7 +181,7 @@ def schedule_tests(branch: str, rev: str) -> str:
     from bugbug_http.app import JobInfo
 
     job = JobInfo(schedule_tests, branch, rev)
-    LOGGER.info(f"Processing {job}...")
+    LOGGER.info("Processing %s...", job)
 
     # Pull the revision to the local repository
     LOGGER.info("Pulling commits from the remote repository...")
@@ -256,7 +256,7 @@ def get_config_specific_groups(config: str) -> str:
     from bugbug_http.app import JobInfo
 
     job = JobInfo(get_config_specific_groups, config)
-    LOGGER.info(f"Processing {job}...")
+    LOGGER.info("Processing %s...", job)
 
     equivalence_sets = testselect._get_equivalence_sets(0.9)
 

diff --git a/scripts/bug_classifier.py b/scripts/bug_classifier.py
@@ -29,7 +29,7 @@ def classify_bugs(model_name: str, classifier: str, bug_id: int) -> None:
         model_file_name = f"{model_name}model"
 
     if not os.path.exists(model_file_name):
-        logger.info(f"{model_file_name} does not exist. Downloading the model....")
+        logger.info("%s does not exist. Downloading the model....", model_file_name)
         try:
             download_model(model_name)
         except requests.HTTPError:

diff --git a/scripts/bug_retriever.py b/scripts/bug_retriever.py
@@ -42,7 +42,7 @@ def retrieve_bugs(self, limit: int = None) -> None:
         else:
             changed_ids = set()
 
-        logger.info(f"Retrieved {len(changed_ids)} IDs.")
+        logger.info("Retrieved %d IDs.", len(changed_ids))
 
         all_components = bugzilla.get_product_component_count(9999)
 
@@ -60,17 +60,17 @@ def retrieve_bugs(self, limit: int = None) -> None:
         two_years_and_six_months_ago = datetime.utcnow() - relativedelta(
             years=2, months=6
         )
-        logger.info(f"Retrieving bug IDs since {two_years_and_six_months_ago}")
+        logger.info("Retrieving bug IDs since %s", two_years_and_six_months_ago)
         timespan_ids = bugzilla.get_ids_between(two_years_and_six_months_ago)
         if limit:
             timespan_ids = timespan_ids[-limit:]
-        logger.info(f"Retrieved {len(timespan_ids)} IDs.")
+        logger.info("Retrieved %d IDs.", len(timespan_ids))
 
         # Get IDs of labelled bugs.
         labelled_bug_ids = labels.get_all_bug_ids()
         if limit:
             labelled_bug_ids = labelled_bug_ids[-limit:]
-        logger.info(f"{len(labelled_bug_ids)} labelled bugs to download.")
+        logger.info("%d labelled bugs to download.", len(labelled_bug_ids))
 
         # Get the commits DB, as we need it to get the bug IDs linked to recent commits.
         # XXX: Temporarily avoid downloading the commits DB when a limit is set, to avoid the integration test fail when the commits DB is bumped.
@@ -89,7 +89,7 @@ def retrieve_bugs(self, limit: int = None) -> None:
         )
         if limit:
             commit_bug_ids = commit_bug_ids[-limit:]
-        logger.info(f"{len(commit_bug_ids)} bugs linked to commits to download.")
+        logger.info("%d bugs linked to commits to download.", len(commit_bug_ids))
 
         # Get IDs of bugs which are regressions, bugs which caused regressions (useful for the regressor model),
         # and blocked bugs.
@@ -119,7 +119,7 @@ def retrieve_bugs(self, limit: int = None) -> None:
         ]
         if limit:
             test_failure_bug_ids = test_failure_bug_ids[-limit:]
-        logger.info(f"{len(test_failure_bug_ids)} bugs about test failures.")
+        logger.info("%d bugs about test failures.", len(test_failure_bug_ids))
 
         all_ids = (
             timespan_ids