From 5dbf96e0ad8c8b54c930e884aef3f2c4bcabc3a7 Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Mon, 6 Mar 2023 20:59:30 +0000 Subject: [PATCH 1/7] Refactored some logging statemnts to use lazy % formatting --- .gitignore | 3 +++ scripts/regressor_finder.py | 4 ++-- 2 files changed, 5 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 116cd87bce..e9f169a112 100644 --- a/.gitignore +++ b/.gitignore @@ -50,3 +50,6 @@ node_modules/ *.log # Desktop Service Store *.DS_Store + +#Python virtual environment +bb_bug/ \ No newline at end of file diff --git a/scripts/regressor_finder.py b/scripts/regressor_finder.py index 29ee70d0b5..2f777a5d09 100644 --- a/scripts/regressor_finder.py +++ b/scripts/regressor_finder.py @@ -352,7 +352,7 @@ def mercurial_to_git(revs): ) f.writelines("{}\n".format(git_hash) for git_hash in git_hashes) - logger.info(f"{len(bug_fixing_commits)} commits to analyze") + logger.info("%d commits to analyze", len(bug_fixing_commits)) # Skip already found bug-introducing commits. bug_fixing_commits = [ @@ -393,7 +393,7 @@ def _init(git_repo_dir): thread_local.git.get_head() def find_bic(bug_fixing_commit): - logger.info("Analyzing {}...".format(bug_fixing_commit["rev"])) + logger.info("Analyzing %s...", bug_fixing_commit["rev"]) git_fix_revision = tuple(mercurial_to_git([bug_fixing_commit["rev"]]))[0] From 1f130fc4372eb4b0c9bcf3a6f4f30ee52b53996b Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Mon, 6 Mar 2023 21:59:01 +0000 Subject: [PATCH 2/7] removed unrelated changes and refactored some more lines --- .gitignore | 5 +---- bugbug/models/duplicate.py | 6 +++++- scripts/regressor_finder.py | 22 ++++++++++------------ 3 files changed, 16 insertions(+), 17 deletions(-) diff --git a/.gitignore b/.gitignore index e9f169a112..847a89433b 100644 --- a/.gitignore +++ b/.gitignore @@ -49,7 +49,4 @@ node_modules/ # Logs *.log # Desktop Service Store -*.DS_Store - -#Python virtual environment -bb_bug/ \ No newline at end of file +*.DS_Store \ No newline at end of file diff --git a/bugbug/models/duplicate.py b/bugbug/models/duplicate.py index 70801c7559..a9cf824774 100644 --- a/bugbug/models/duplicate.py +++ b/bugbug/models/duplicate.py @@ -5,6 +5,7 @@ import random from itertools import combinations +from logging import INFO, basicConfig, getLogger from sklearn.compose import ColumnTransformer from sklearn.feature_extraction import DictVectorizer @@ -14,6 +15,9 @@ from bugbug import bug_features, bugzilla, feature_cleanup, utils from bugbug.model import BugCoupleModel +basicConfig(level=INFO) +logger = getLogger(__name__) + REPORTERS_TO_IGNORE = {"intermittent-bug-filer@mozilla.bugs", "wptsync@mozilla.bugs"} @@ -120,7 +124,7 @@ def get_labels(self): # Store all remaining ids non_duplicate_ids = list(all_ids - set(duplicate_ids)) - print(f"Number of duplicate labels is: {self.num_duplicates}") + logger.info("Number of duplicate labels is: %d", self.num_duplicates) # When the bug has no duplicates, we create dup-nondup labels. dup_nondup_num = 0 diff --git a/scripts/regressor_finder.py b/scripts/regressor_finder.py index 2f777a5d09..1a66f0818f 100644 --- a/scripts/regressor_finder.py +++ b/scripts/regressor_finder.py @@ -64,15 +64,14 @@ def __init__( with ThreadPoolExecutorResult(max_workers=3) as executor: if self.git_repo_url is not None: - logger.info(f"Cloning {self.git_repo_url} to {self.git_repo_dir}...") + logger.info("Cloning %s to %s...", self.git_repo_url, self.git_repo_dir) executor.submit( self.clone_git_repo, self.git_repo_url, self.git_repo_dir ) if self.tokenized_git_repo_url is not None: - logger.info( - f"Cloning {self.tokenized_git_repo_url} to {self.tokenized_git_repo_dir}..." - ) + logger.info("Cloning %s to %s...", self.tokenized_git_repo_url, self.tokenized_git_repo_dir) + executor.submit( self.clone_git_repo, self.tokenized_git_repo_url, @@ -93,9 +92,9 @@ def clone_git_repo(self, repo_url, repo_dir): ) )() - logger.info(f"{repo_dir} cloned") + logger.info("%s cloned", repo_dir) - logger.info(f"Fetching {repo_dir}") + logger.info("Fetching %s", repo_dir) tenacity.retry( wait=tenacity.wait_exponential(multiplier=1, min=16, max=64), @@ -109,7 +108,7 @@ def clone_git_repo(self, repo_url, repo_dir): ) )() - logger.info(f"{repo_dir} fetched") + logger.info("%s fetched", repo_dir) def init_mapping(self): if self.tokenized_git_repo_url is not None: @@ -158,7 +157,7 @@ def get_commits_to_ignore(self) -> None: commits_to_ignore.append({"rev": backedout, "type": "backedout"}) - logger.info(f"{len(commits_to_ignore)} commits to ignore...") + logger.info("%d commits to ignore...", len(commits_to_ignore)) # Skip backed-out commits which aren't in the repository (commits which landed *before* the Mercurial history # started, and backouts which mentioned a bad hash in their message). @@ -166,12 +165,11 @@ def get_commits_to_ignore(self) -> None: c for c in commits_to_ignore if c["rev"][:12] in all_commits ] - logger.info(f"{len(commits_to_ignore)} commits to ignore...") + logger.info("%d commits to ignore...", len(commits_to_ignore)) logger.info( - "...of which {} are backed-out".format( + "...of which %d are backed-out", sum(1 for commit in commits_to_ignore if commit["type"] == "backedout") - ) ) db.write(IGNORED_COMMITS_DB, commits_to_ignore) @@ -194,7 +192,7 @@ def find_bug_fixing_commits(self) -> None: for bug_fixing_commit in db.read(BUG_FIXING_COMMITS_DB) ) logger.info( - f"Already classified {len(prev_bug_fixing_commits_nodes)} commits..." + "Already classified %d commits...", len(prev_bug_fixing_commits_nodes) ) # TODO: Switch to the pure Defect model, as it's better in this case. From 74718b5d69ccaee3e66b57f4fa8b33410b9c6941 Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Mon, 6 Mar 2023 22:28:15 +0000 Subject: [PATCH 3/7] removed unrelated changes and refactored some more lines --- bugbug/models/duplicate.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/bugbug/models/duplicate.py b/bugbug/models/duplicate.py index a9cf824774..70801c7559 100644 --- a/bugbug/models/duplicate.py +++ b/bugbug/models/duplicate.py @@ -5,7 +5,6 @@ import random from itertools import combinations -from logging import INFO, basicConfig, getLogger from sklearn.compose import ColumnTransformer from sklearn.feature_extraction import DictVectorizer @@ -15,9 +14,6 @@ from bugbug import bug_features, bugzilla, feature_cleanup, utils from bugbug.model import BugCoupleModel -basicConfig(level=INFO) -logger = getLogger(__name__) - REPORTERS_TO_IGNORE = {"intermittent-bug-filer@mozilla.bugs", "wptsync@mozilla.bugs"} @@ -124,7 +120,7 @@ def get_labels(self): # Store all remaining ids non_duplicate_ids = list(all_ids - set(duplicate_ids)) - logger.info("Number of duplicate labels is: %d", self.num_duplicates) + print(f"Number of duplicate labels is: {self.num_duplicates}") # When the bug has no duplicates, we create dup-nondup labels. dup_nondup_num = 0 From 7539c40389a319440536cfb70f02a3668208516f Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Mon, 6 Mar 2023 22:34:15 +0000 Subject: [PATCH 4/7] removed unrelated changes and refactored some more lines --- bugbug/models/duplicate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bugbug/models/duplicate.py b/bugbug/models/duplicate.py index 70801c7559..90445ee729 100644 --- a/bugbug/models/duplicate.py +++ b/bugbug/models/duplicate.py @@ -147,4 +147,4 @@ def get_labels(self): return classes, [0, 1] def get_feature_names(self): - return self.extraction_pipeline.named_steps["union"].get_feature_names_out() + return self.extraction_pipeline.named_steps["union"].get_feature_names_out() \ No newline at end of file From 569ad3c2fcb94ef27676ea2565a8f491211ed7c3 Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Tue, 7 Mar 2023 06:22:26 +0000 Subject: [PATCH 5/7] discarded unrealted issues, added new lines at end of file and setup pre-commit install --- .gitignore | 2 +- bugbug/models/duplicate.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.gitignore b/.gitignore index 847a89433b..116cd87bce 100644 --- a/.gitignore +++ b/.gitignore @@ -49,4 +49,4 @@ node_modules/ # Logs *.log # Desktop Service Store -*.DS_Store \ No newline at end of file +*.DS_Store diff --git a/bugbug/models/duplicate.py b/bugbug/models/duplicate.py index 90445ee729..70801c7559 100644 --- a/bugbug/models/duplicate.py +++ b/bugbug/models/duplicate.py @@ -147,4 +147,4 @@ def get_labels(self): return classes, [0, 1] def get_feature_names(self): - return self.extraction_pipeline.named_steps["union"].get_feature_names_out() \ No newline at end of file + return self.extraction_pipeline.named_steps["union"].get_feature_names_out() From 562465112107d8dd970f0157737868964a4f39ec Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Tue, 7 Mar 2023 16:15:27 +0000 Subject: [PATCH 6/7] pre-commit previoulsly commited files --- scripts/regressor_finder.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/scripts/regressor_finder.py b/scripts/regressor_finder.py index 1a66f0818f..3d6994b0f9 100644 --- a/scripts/regressor_finder.py +++ b/scripts/regressor_finder.py @@ -70,7 +70,11 @@ def __init__( ) if self.tokenized_git_repo_url is not None: - logger.info("Cloning %s to %s...", self.tokenized_git_repo_url, self.tokenized_git_repo_dir) + logger.info( + "Cloning %s to %s...", + self.tokenized_git_repo_url, + self.tokenized_git_repo_dir, + ) executor.submit( self.clone_git_repo, @@ -169,7 +173,7 @@ def get_commits_to_ignore(self) -> None: logger.info( "...of which %d are backed-out", - sum(1 for commit in commits_to_ignore if commit["type"] == "backedout") + sum(1 for commit in commits_to_ignore if commit["type"] == "backedout"), ) db.write(IGNORED_COMMITS_DB, commits_to_ignore) From 7d2c6eed0d20fa0585cff0d28b3c97f0f3aa82c0 Mon Sep 17 00:00:00 2001 From: WhiteWolf47 Date: Tue, 7 Mar 2023 20:23:08 +0000 Subject: [PATCH 7/7] removed new line --- scripts/regressor_finder.py | 1 - 1 file changed, 1 deletion(-) diff --git a/scripts/regressor_finder.py b/scripts/regressor_finder.py index 3d6994b0f9..5de5e0384e 100644 --- a/scripts/regressor_finder.py +++ b/scripts/regressor_finder.py @@ -75,7 +75,6 @@ def __init__( self.tokenized_git_repo_url, self.tokenized_git_repo_dir, ) - executor.submit( self.clone_git_repo, self.tokenized_git_repo_url,