Skip to content

Commit

Permalink
Merge pull request #16764 from lichess-org/plus-normalization-emails
Browse files Browse the repository at this point in the history
Apply +-normalization for all email domains
  • Loading branch information
ornicar authored Jan 26, 2025
2 parents fa1bd27 + 3b2fdef commit 6e7ccb9
Show file tree
Hide file tree
Showing 4 changed files with 61 additions and 27 deletions.
65 changes: 44 additions & 21 deletions bin/mongodb/fix-normalized-emails.js
Original file line number Diff line number Diff line change
@@ -1,25 +1,48 @@
function gmailNormalize(email) {
const dry = false;
const gmailOrProton = [
'protonmail.com',
'protonmail.ch',
'pm.me',
'proton.me',
'gmail.com',
'googlemail.com',
];

function normalize(email) {
let [name, domain] = email.toLowerCase().split('@');
[name] = name.split('+');
return name.replace(/\./g, '') + '@' + domain;

if (gmailOrProton.includes(domain)) name = name.replace(/\./g, '');

return name + '@' + domain;
}

db.user4
.find({ email: /[^+.]+[+.].*@(protonmail\.com|protonmail\.ch|pm\.me|gmail\.com|googlemail\.com)$/i })
.forEach(user => {
const normalized = gmailNormalize(user.email);
const verbatim = user.verbatimEmail || user.email;
print(user.username, ': ', verbatim, '->', normalized);

db.user4.update(
{
_id: user._id,
},
{
$set: {
email: normalized,
verbatimEmail: verbatim,
},
},
);
});
let nbUpdates = 0;
let nbDups = 0;

db.user4.find({ email: /^[^+]+\+.*@.+$/ }, { email: 1, verbatimEmail: 1, username: 1 }).forEach(user => {
const normalized = normalize(user.email);
const verbatim = user.verbatimEmail || user.email;
print(user.username, ': ', verbatim, '->', normalized);

const updates = {};
if (normalized != user.email) updates.email = normalized;
if (verbatim != user.email) updates.verbatimEmail = verbatim;

if (!dry && Object.keys(updates).length) {
try {
db.user4.updateOne({ _id: user._id }, { $set: updates });
db.user_email_backup.update(
{ _id: user._id },
{ $set: { email: user.email, verbatimEmail: user.verbatimEmail } },
{ upsert: true },
);
nbUpdates++;
} catch (e) {
if (e.code == 11000) nbDups++;
}
}
});

print('updated:', nbUpdates);
print('skiped duplicates:', nbDups);
4 changes: 2 additions & 2 deletions bin/mongodb/indexes.js
Original file line number Diff line number Diff line change
Expand Up @@ -101,8 +101,8 @@ db.note.createIndex(
);
db.irwin_report.createIndex({ date: -1 });
db.user4.createIndex({ 'count.game': -1 });
db.user4.createIndex({ title: 1 }, { sparse: true });
db.user4.createIndex({ email: 1 }, { unique: true, sparse: 1 });
db.user4.createIndex({ title: 1 }, { partialFilterExpression: { title: { $exists: 1 } } });
db.user4.createIndex({ email: 1 }, { unique: true, partialFilterExpression: { email: { $exists: 1 } } });
db.user4.createIndex({ roles: 1 }, { background: 1, partialFilterExpression: { roles: { $exists: 1 } } });
db.user4.createIndex({ prevEmail: 1 }, { sparse: 1, background: 1 });
db.user4.createIndex(
Expand Down
9 changes: 5 additions & 4 deletions modules/core/src/main/email.scala
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,11 @@ object email:
def normalize = NormalizedEmailAddress: // changing normalization requires database migration!
val lower = e.toLowerCase
lower.split('@') match
case Array(name, domain) if EmailAddress.gmailLikeNormalizedDomains(domain) =>
val normalizedName = name
.replace(".", "") // remove all dots
.takeWhile('+' != _) // skip everything after the first '+'
case Array(name, domain) =>
val skipAfterPlus = name.takeWhile('+' != _)
val normalizedName =
if EmailAddress.gmailLikeNormalizedDomains(domain) then skipAfterPlus.replace(".", "")
else skipAfterPlus
if normalizedName.isEmpty then lower else s"$normalizedName@$domain"
case _ => lower

Expand Down
10 changes: 10 additions & 0 deletions modules/core/src/test/EmailTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,16 @@ class EmailTest extends munit.FunSuite:
NormalizedEmailAddress("[email protected]")
)

test("normalize other"):
assertEquals(
EmailAddress("[email protected]").normalize,
NormalizedEmailAddress("[email protected]")
)
assertEquals(
EmailAddress("[email protected]").normalize,
NormalizedEmailAddress("[email protected]")
)

test("not similar emails"):
assert(!EmailAddress("[email protected]").similarTo(EmailAddress("[email protected]")))
assert(!EmailAddress("[email protected]").similarTo(EmailAddress("[email protected]")))
Expand Down

0 comments on commit 6e7ccb9

Please sign in to comment.