Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactored logging statements to use lazy % formatting #3335

Merged
merged 10 commits into from
Mar 9, 2023
10 changes: 5 additions & 5 deletions bugbug/db.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@ def is_different_schema(path):
r = requests.get(url)

if not r.ok:
logger.info(f"Version file is not yet available to download for {path}")
logger.info("Version file is not yet available to download for %s", path)
return True

prev_version = int(r.text)
Expand All @@ -67,7 +67,7 @@ def download_support_file(path, file_name, extract=True):
url = urljoin(DATABASES[path]["url"], file_name)
path = os.path.join(os.path.dirname(path), file_name)

logger.info(f"Downloading {url} to {path}")
logger.info("Downloading %s to %s", url, path)
updated = utils.download_check_etag(url, path)

if extract and updated and path.endswith(".zst"):
Expand All @@ -77,7 +77,7 @@ def download_support_file(path, file_name, extract=True):
return True
except requests.exceptions.HTTPError:
logger.info(
f"{file_name} is not yet available to download for {path}", exc_info=True
"%s is not yet available to download for %s", file_name, path, exc_info=True
)
return False

Expand All @@ -92,7 +92,7 @@ def download(path, support_files_too=False, extract=True):

url = DATABASES[path]["url"]
try:
logger.info(f"Downloading {url} to {zst_path}")
logger.info("Downloading %s to %s", url, zst_path)
updated = utils.download_check_etag(url, zst_path)

if extract and updated:
Expand All @@ -106,7 +106,7 @@ def download(path, support_files_too=False, extract=True):

return successful
except requests.exceptions.HTTPError:
logger.info(f"{url} is not yet available to download", exc_info=True)
logger.info("%s is not yet available to download", url, exc_info=True)
return False


Expand Down
4 changes: 2 additions & 2 deletions bugbug/github.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ def get_token(self) -> str:

def fetch_events(self, events_url: str) -> list:
self.api_limit()
logger.info(f"Fetching {events_url}")
logger.info("Fetching %s", events_url)
headers = {"Authorization": "token {}".format(self.get_token())}
response = get_session("github").get(events_url, headers=headers)
response.raise_for_status()
Expand All @@ -78,7 +78,7 @@ def fetch_issues(
if isinstance(data, dict):
data = [data]

logger.info(f"Fetching {url}")
logger.info("Fetching %s", url)

if retrieve_events:
for item in data:
Expand Down
12 changes: 7 additions & 5 deletions bugbug/models/fixtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,11 +92,11 @@ def _quantiles(n):
quantiles = _quantiles(2)

logger.info(
f"Max fix time: {max(fix_time for bug_id, fix_time in bug_fix_times)}"
"Max fix time: %s", max(fix_time for bug_id, fix_time in bug_fix_times)
)
logger.info(f"Fix time quantiles: {quantiles}")
logger.info(f"Fix time quartiles: {_quantiles(4)}")
logger.info(f"Fix time deciles: {_quantiles(10)}")
logger.info("Fix time quantiles: %s", quantiles)
logger.info("Fix time quartiles: %s", _quantiles(4))
logger.info("Fix time deciles: %s", _quantiles(10))

classes = {}
for bug_id, fix_time in bug_fix_times:
Expand All @@ -110,7 +110,9 @@ def _quantiles(n):

for i in range(len(quantiles) + 1):
logger.info(
f"{sum(1 for label in classes.values() if label == i)} bugs are in the {i}th quantile"
"%d bugs are in the %dth quantile",
sum(1 for label in classes.values() if label == i),
i,
)

return classes, list(range(len(quantiles) + 1))
Expand Down
18 changes: 9 additions & 9 deletions bugbug/repository.py
Original file line number Diff line number Diff line change
Expand Up @@ -963,7 +963,7 @@ def _hg_log(revs: list[bytes], branch: str = "tip") -> tuple[Commit, ...]:


def get_revs(hg, rev_start=0, rev_end="tip"):
logger.info(f"Getting revs from {rev_start} to {rev_end}...")
logger.info("Getting revs from %s to %s...", rev_start, rev_end)

args = hglib.util.cmdbuilder(
b"log",
Expand Down Expand Up @@ -1021,7 +1021,7 @@ def __setitem__(self, key, value):
def calculate_experiences(
commits: Collection[Commit], first_pushdate: datetime, save: bool = True
) -> None:
logger.info(f"Analyzing seniorities from {len(commits)} commits...")
logger.info("Analyzing seniorities from %d commits...", len(commits))

experiences = Experiences(save)

Expand All @@ -1034,7 +1034,7 @@ def calculate_experiences(
time_lapse = commit.pushdate - experiences[key]
commit.seniority_author = time_lapse.total_seconds()

logger.info(f"Analyzing experiences from {len(commits)} commits...")
logger.info("Analyzing experiences from %d commits...", len(commits))

# Note: In the case of files, directories, components, we can't just use the sum of previous commits, as we could end
# up overcounting them. For example, consider a commit A which modifies "dir1" and "dir2", a commit B which modifies
Expand Down Expand Up @@ -1365,10 +1365,10 @@ def download_commits(

first_pushdate = get_first_pushdate(repo_dir)

logger.info(f"Mining {len(revs)} commits...")
logger.info("Mining %d commits...", len(revs))

if not use_single_process:
logger.info(f"Using {os.cpu_count()} processes...")
logger.info("Using %d processes...", os.cpu_count())
commits = hg_log_multi(repo_dir, revs, branch)
else:
commits = hg_log(hg, revs, branch)
Expand All @@ -1377,7 +1377,7 @@ def download_commits(

commits_num = len(commits)

logger.info(f"Mining {commits_num} patches...")
logger.info("Mining %d patches...", commits_num)

global code_analysis_server

Expand Down Expand Up @@ -1485,9 +1485,9 @@ def clone(

# Pull, to make sure the pushlog is generated.
with hglib.open(repo_dir) as hg:
logger.info(f"Pulling {repo_dir}")
logger.info("Pulling %s", repo_dir)
hg.pull(update=update)
logger.info(f"{repo_dir} pulled")
logger.info("%s pulled", repo_dir)

return
except hglib.error.ServerError as e:
Expand All @@ -1506,7 +1506,7 @@ def clone(
)
subprocess.run(cmd, check=True)

logger.info(f"{repo_dir} cloned")
logger.info("%s cloned", repo_dir)


def pull(repo_dir: str, branch: str, revision: str) -> None:
Expand Down
2 changes: 1 addition & 1 deletion bugbug/similarity.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def download_similarity_model(model_name):
path = f"{model_name_to_class[model_name].__name__.lower()}.similaritymodel"
url = f"https://community-tc.services.mozilla.com/api/index/v1/task/project.bugbug.train_similarity.latest/artifacts/public/{path}.zst"

logger.info(f"Downloading similarity model from {url}...")
logger.info("Downloading similarity model from %s...", url)
updated = download_check_etag(url)
if updated:
zstd_decompress(path)
Expand Down
26 changes: 19 additions & 7 deletions bugbug/test_scheduling.py
Original file line number Diff line number Diff line change
Expand Up @@ -207,7 +207,7 @@ def get_push_data(

push_data_queue.append(elem)

logger.info(f"push data nodes: {push_data_count}")
logger.info("Push data nodes: %d", push_data_count)

push_data = [
(
Expand Down Expand Up @@ -235,7 +235,7 @@ def get_push_data(
tuple(all_groups_set), cast(Set[Runnable], all_groups_set), "group"
)
all_groups_set = set(all_groups)
logger.info(f"{len(all_groups_set)} manifests run in the last 28 pushes")
logger.info("%d manifests run in the last 28 pushes", len(all_groups_set))

all_runnables_set = set(
sum((list(push_runnables) for _, _, push_runnables, _, _ in push_data), [])
Expand All @@ -245,7 +245,7 @@ def get_push_data(
tuple(all_runnables_set), all_runnables_set, granularity
)
all_runnables_set = set(all_runnables)
logger.info(f"{len(all_runnables_set)} runnables run in the last 28 pushes")
logger.info("%d runnables run in the last 28 pushes", len(all_runnables_set))

def push_data_iter() -> Iterator[PushResult]:
return (
Expand Down Expand Up @@ -447,7 +447,7 @@ def count_runs_and_failures(tasks):

stats[couple] = (support, confidence)

logger.info(f"{skipped} couples skipped because their support was too low")
logger.info("%d couples skipped because their support was too low", skipped)

logger.info("Redundancies with the highest support and confidence:")
for couple, (support, confidence) in sorted(
Expand All @@ -456,7 +456,13 @@ def count_runs_and_failures(tasks):
failure_count = count_both_failures[couple]
run_count = count_runs[couple]
logger.info(
f"{couple[0]} - {couple[1]} redundancy confidence {confidence}, support {support} ({failure_count} over {run_count})."
"%s - %s redundancy confidence %f, support %d (%d over %d).",
couple[0],
couple[1],
confidence,
support,
failure_count,
run_count,
)

logger.info("Redundancies with the highest confidence and lowest support:")
Expand All @@ -466,7 +472,13 @@ def count_runs_and_failures(tasks):
failure_count = count_both_failures[couple]
run_count = count_runs[couple]
logger.info(
f"{couple[0]} - {couple[1]} redundancy confidence {confidence}, support {support} ({failure_count} over {run_count})."
"%s - %s redundancy confidence %f, support %d (%d over %d).",
couple[0],
couple[1],
confidence,
support,
failure_count,
run_count,
)

failing_together: dict = {}
Expand Down Expand Up @@ -515,7 +527,7 @@ def count_runs_and_failures(tasks):
failing_together[couple[0]][couple[1]] = (support, confidence)

for percentage, count in count_redundancies.most_common():
logger.info(f"{count} with {percentage} confidence")
logger.info("%d with %f%% confidence", count, percentage)

failing_together_db = get_failing_together_db(granularity, False)

Expand Down
2 changes: 1 addition & 1 deletion bugbug/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -224,7 +224,7 @@ def download_model(model_name: str) -> str:

path = f"{model_name}model"
url = f"https://community-tc.services.mozilla.com/api/index/v1/task/project.bugbug.train_{model_name}.{version}/artifacts/public/{path}.zst"
logger.info(f"Downloading {url}...")
logger.info("Downloading %s...", url)
updated = download_check_etag(url)
if updated:
zstd_decompress(path)
Expand Down
8 changes: 4 additions & 4 deletions http_service/bugbug_http/boot.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
def boot_worker() -> None:
# Clone autoland
def clone_autoland() -> None:
logger.info(f"Cloning autoland in {REPO_DIR}...")
logger.info("Cloning autoland in %s...", REPO_DIR)
repository.clone(REPO_DIR, "https://hg.mozilla.org/integration/autoland")

def extract_past_failures_label() -> None:
Expand Down Expand Up @@ -118,7 +118,7 @@ def retrieve_schedulable_tasks() -> None:
for push_id, push_obj in r.json()["pushes"].items()
]

logger.info(f"Retrieving known tasks from {revs}")
logger.info("Retrieving known tasks from %s...", revs)

# Store in a file the list of tasks in the latest autoland pushes.
# We use more than one to protect ourselves from broken decision tasks.
Expand All @@ -130,7 +130,7 @@ def retrieve_schedulable_tasks() -> None:
if r.ok:
known_tasks.update(r.json())

logger.info(f"Retrieved {len(known_tasks)} tasks")
logger.info("Retrieved %d tasks", len(known_tasks))

assert len(known_tasks) > 0

Expand Down Expand Up @@ -198,7 +198,7 @@ def retrieve_schedulable_tasks() -> None:
logger.info("Touched together DB updated.")
except Exception as e:
# It's not ideal, but better not to crash the service!
logger.error(f"Exception while updating commits DB: {e}")
logger.error("Exception while updating commits DB: %s", e)

# Wait list of schedulable tasks to be downloaded and written to disk.
retrieve_schedulable_tasks_future.result()
Expand Down
4 changes: 2 additions & 2 deletions http_service/bugbug_http/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -181,7 +181,7 @@ def schedule_tests(branch: str, rev: str) -> str:
from bugbug_http.app import JobInfo

job = JobInfo(schedule_tests, branch, rev)
LOGGER.info(f"Processing {job}...")
LOGGER.info("Processing %s...", job)

# Pull the revision to the local repository
LOGGER.info("Pulling commits from the remote repository...")
Expand Down Expand Up @@ -256,7 +256,7 @@ def get_config_specific_groups(config: str) -> str:
from bugbug_http.app import JobInfo

job = JobInfo(get_config_specific_groups, config)
LOGGER.info(f"Processing {job}...")
LOGGER.info("Processing %s...", job)

equivalence_sets = testselect._get_equivalence_sets(0.9)

Expand Down
2 changes: 1 addition & 1 deletion scripts/bug_classifier.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ def classify_bugs(model_name: str, classifier: str, bug_id: int) -> None:
model_file_name = f"{model_name}model"

if not os.path.exists(model_file_name):
logger.info(f"{model_file_name} does not exist. Downloading the model....")
logger.info("%s does not exist. Downloading the model....", model_file_name)
try:
download_model(model_name)
except requests.HTTPError:
Expand Down
12 changes: 6 additions & 6 deletions scripts/bug_retriever.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def retrieve_bugs(self, limit: int = None) -> None:
else:
changed_ids = set()

logger.info(f"Retrieved {len(changed_ids)} IDs.")
logger.info("Retrieved %d IDs.", len(changed_ids))

all_components = bugzilla.get_product_component_count(9999)

Expand All @@ -60,17 +60,17 @@ def retrieve_bugs(self, limit: int = None) -> None:
two_years_and_six_months_ago = datetime.utcnow() - relativedelta(
years=2, months=6
)
logger.info(f"Retrieving bug IDs since {two_years_and_six_months_ago}")
logger.info("Retrieving bug IDs since %s", two_years_and_six_months_ago)
timespan_ids = bugzilla.get_ids_between(two_years_and_six_months_ago)
if limit:
timespan_ids = timespan_ids[-limit:]
logger.info(f"Retrieved {len(timespan_ids)} IDs.")
logger.info("Retrieved %d IDs.", len(timespan_ids))

# Get IDs of labelled bugs.
labelled_bug_ids = labels.get_all_bug_ids()
if limit:
labelled_bug_ids = labelled_bug_ids[-limit:]
logger.info(f"{len(labelled_bug_ids)} labelled bugs to download.")
logger.info("%d labelled bugs to download.", len(labelled_bug_ids))

# Get the commits DB, as we need it to get the bug IDs linked to recent commits.
# XXX: Temporarily avoid downloading the commits DB when a limit is set, to avoid the integration test fail when the commits DB is bumped.
Expand All @@ -89,7 +89,7 @@ def retrieve_bugs(self, limit: int = None) -> None:
)
if limit:
commit_bug_ids = commit_bug_ids[-limit:]
logger.info(f"{len(commit_bug_ids)} bugs linked to commits to download.")
logger.info("%d bugs linked to commits to download.", len(commit_bug_ids))

# Get IDs of bugs which are regressions, bugs which caused regressions (useful for the regressor model),
# and blocked bugs.
Expand Down Expand Up @@ -119,7 +119,7 @@ def retrieve_bugs(self, limit: int = None) -> None:
]
if limit:
test_failure_bug_ids = test_failure_bug_ids[-limit:]
logger.info(f"{len(test_failure_bug_ids)} bugs about test failures.")
logger.info("%d bugs about test failures.", len(test_failure_bug_ids))

all_ids = (
timespan_ids
Expand Down
Loading