From 3c553d57c87679d5e6b641629583e35a46f9c68b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jakub=20Ber=C3=A1nek?= Date: Wed, 1 Jan 2025 12:23:58 +0100 Subject: [PATCH] Deduplicate scores based on e-mail --- src/site.rs | 34 +++++++++++++++++++++++++++++++--- 1 file changed, 31 insertions(+), 3 deletions(-) diff --git a/src/site.rs b/src/site.rs index 8262a7bd..17588ae9 100644 --- a/src/site.rs +++ b/src/site.rs @@ -1,6 +1,6 @@ use crate::{AuthorMap, VersionTag}; use handlebars::Handlebars; -use std::collections::BTreeMap; +use std::collections::{BTreeMap, HashMap}; use std::fs; use std::path::Path; use unicase::UniCase; @@ -135,17 +135,18 @@ fn about() -> Result<(), Box> { Ok(()) } -#[derive(serde::Serialize)] +#[derive(serde::Serialize, Ord, PartialOrd, Eq, PartialEq)] struct Entry { rank: u32, author: String, + email: String, commits: usize, } fn author_map_to_scores(map: &AuthorMap) -> Vec { let debug_emails = std::env::var("DEBUG_EMAILS").is_ok_and(|value| value == "1"); - let mut scores = map + let scores = map .iter() .map(|(author, commits)| { let name = UniCase::into_inner(author.name.clone()); @@ -157,11 +158,14 @@ fn author_map_to_scores(map: &AuthorMap) -> Vec { } else { name }, + email: UniCase::into_inner(author.email.clone()), commits, } }) .collect::>(); + let mut scores = deduplicate_scores(scores); scores.sort_by_key(|e| (std::cmp::Reverse(e.commits), e.author.clone())); + let mut last_rank = 1; let mut ranked_at_current = 0; let mut last_commits = usize::max_value(); @@ -178,6 +182,30 @@ fn author_map_to_scores(map: &AuthorMap) -> Vec { scores } +/// Deduplicate scores based on the assumption that an e-mail uniquely identifies a given +/// person. If there are multiple entries with the same email, their commit counts will be +/// merged into a single entry, with the canonical name being chosen based on the entry with +/// the most commits. +fn deduplicate_scores(entries: Vec) -> Vec { + let mut entry_map: HashMap> = HashMap::with_capacity(entries.len()); + for entry in entries { + entry_map.entry(entry.email.clone()).or_default().push(entry); + } + + entry_map.into_values().map(|mut entry| { + // If there are multiple entries with the same maximum commit count, ensure that + // the ordering is stable, by sorting based on the whole entry. + entry.sort(); + let canonical_entry = entry.iter().max_by_key(|entry| entry.commits).unwrap(); + Entry { + rank: 0, + author: canonical_entry.author.clone(), + email: canonical_entry.email.clone(), + commits: entry.iter().map(|e| e.commits).sum(), + } + }).collect() +} + fn releases( by_version: &BTreeMap, all_time: &AuthorMap,