From b4b83316bc623dfef8992441393a159ac0115b7e Mon Sep 17 00:00:00 2001 From: mpadge Date: Fri, 13 Dec 2024 16:14:24 +0100 Subject: [PATCH] finish working version of 'user_relation_matrices' --- DESCRIPTION | 2 +- R/analyse-users.R | 56 +++++++++++++++++++++++++++++++++++++++-------- codemeta.json | 2 +- 3 files changed, 49 insertions(+), 11 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 9252135..bb02888 100755 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,6 +1,6 @@ Package: repometrics Title: Metrics for Your Code Repository -Version: 0.1.3.024 +Version: 0.1.3.025 Authors@R: person("Mark", "Padgham", , "mark.padgham@email.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0003-2172-5265")) diff --git a/R/analyse-users.R b/R/analyse-users.R index 974d62d..becdd26 100644 --- a/R/analyse-users.R +++ b/R/analyse-users.R @@ -12,6 +12,8 @@ #' \item issue_cmts Comments on issues #' \item issues Issues opened by user. #' } +#' @return A `data.frame` of pairwise user logins, and proportions of overlap +#' betwen repositories in the six variables described above. #' @noRd user_relation_matrices <- function (user_data) { @@ -19,7 +21,36 @@ user_relation_matrices <- function (user_data) { user_data <- add_user_login_cols (user_data) |> combine_user_data () - cmts <- user_relate_commits (user_data, user_names) + # Pre-processing to name grouping column "repo" and count column "n": + user_data$commit_cmt$repo <- + paste0 (user_data$commit_cmt$org, user_data$commit_cmt$repo) + + user_data$followers <- + dplyr::rename (user_data$followers, repo = followers) |> + dplyr::mutate (n = 1L) + user_data$following <- + dplyr::rename (user_data$following, repo = following) |> + dplyr::mutate (n = 1L) + + user_data$issue_cmts <- + dplyr::rename (user_data$issue_cmts, repo = org_repo) |> + dplyr::group_by (repo, login) |> + dplyr::summarise (n = sum (num_comments), .groups = "keep") + user_data$issues <- dplyr::rename (user_data$issues, repo = org_repo) |> + dplyr::group_by (repo, login) |> + dplyr::summarise (n = dplyr::n (), .groups = "keep") + + overlap <- lapply (names (user_data), function (n) { + user_data [[n]] <- user_relate_fields (user_data, user_names, what = n) + }) + + res <- dplyr::left_join (overlap [[1]], overlap [[2]], by = c ("login1", "login2")) |> + dplyr::left_join (overlap [[3]], by = c ("login1", "login2")) |> + dplyr::left_join (overlap [[4]], by = c ("login1", "login2")) |> + dplyr::left_join (overlap [[5]], by = c ("login1", "login2")) |> + dplyr::left_join (overlap [[6]], by = c ("login1", "login2")) + + return (res) } #' Add 'login' columns to all user data, so each element can be combined. @@ -67,17 +98,22 @@ combine_user_data <- function (user_data) { return (data) } -user_relate_commits <- function (user_data, user_names) { +user_relate_fields <- function (user_data, user_names, what = "commits") { user_combs <- t (combn (user_names, m = 2L)) + if (what == "commits") { + user_data [[what]] <- dplyr::rename (user_data [[what]], n = num_commits) + } else if (what == "commit_cmt") { + user_data$commit_cmt$n <- 1L + } res <- apply (user_combs, 1, function (i) { - cmt1 <- dplyr::filter (user_data$commits, login == i [1]) |> + cmt1 <- dplyr::filter (user_data [[what]], login == i [1]) |> dplyr::group_by (repo) |> - dplyr::summarise (n1 = sum (num_commits)) - cmt2 <- dplyr::filter (user_data$commits, login == i [2]) |> + dplyr::summarise (n1 = sum (n)) + cmt2 <- dplyr::filter (user_data [[what]], login == i [2]) |> dplyr::group_by (repo) |> - dplyr::summarise (n2 = sum (num_commits)) + dplyr::summarise (n2 = sum (n)) overlap <- dplyr::inner_join (cmt1, cmt2, by = "repo") res <- 0 @@ -88,10 +124,12 @@ user_relate_commits <- function (user_data, user_names) { return (res) }) - data.frame ( + res <- data.frame ( login1 = user_combs [, 1], login2 = user_combs [, 2], - overlap = res, - what = "commits" + res ) + names (res) [3] <- what + + return (res) } diff --git a/codemeta.json b/codemeta.json index 284c806..ea3358c 100644 --- a/codemeta.json +++ b/codemeta.json @@ -8,7 +8,7 @@ "codeRepository": "https://github.com/ropensci-review-tools/repometrics", "issueTracker": "https://github.com/ropensci-review-tools/repometrics/issues", "license": "https://spdx.org/licenses/GPL-3.0", - "version": "0.1.3.024", + "version": "0.1.3.025", "programmingLanguage": { "@type": "ComputerLanguage", "name": "R",