Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

hg: add email validation for hg users (bug 1691635) #217

Open
wants to merge 4 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 22 additions & 1 deletion landoapi/hg.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.
from __future__ import annotations
import copy
from contextlib import contextmanager
import configparser
import logging
import os
from pathlib import Path
from re import search
import shlex
import shutil
import tempfile
Expand All @@ -17,6 +19,7 @@
import hglib

from landoapi.hgexports import PatchHelper
from landoapi.validation import is_valid_email

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -353,10 +356,15 @@ def apply_patch(self, patch_io_buf):
# --landing_system is provided by the set_landing_system hgext.
date = patch_helper.header("Date")
user = patch_helper.header("User")

if not user:
raise ValueError("Missing `User` header!")

email = self.extract_email_from_username(user)
if not is_valid_email(email):
raise ValueError(
f"Invalid email ({email}) configured for Mercurial user!"
)

if not date:
raise ValueError("Missing `Date` header!")

Expand Down Expand Up @@ -519,3 +527,16 @@ def read_checkout_file(self, path: str) -> str:

with checkout_file_path.open() as f:
return f.read()

@staticmethod
def extract_email_from_username(username: str | bytes) -> str:
"""Extracts an email from a Mercurial username, if it exists.

Not guaranteed to return a valid email, make sure to validate."""
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
Not guaranteed to return a valid email, make sure to validate."""
Not guaranteed to return a valid email, make sure to validate.
"""

email = search(r"<.*?>", str(username))
if email:
return email.group(0).replace("<", "").replace(">", "")
Comment on lines +536 to +538
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This can be simplified/cleaned up using grouping.


# If there is no value between angle brackets in the string,
# then there is no Mercurial email configured
return ""
20 changes: 20 additions & 0 deletions landoapi/validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,3 +35,23 @@ def parse_landing_path(landing_path: list[dict]) -> list[tuple[int, int]]:
f"The provided landing_path was malformed.\n{str(e)}",
type="https://developer.mozilla.org/en-US/docs/Web/HTTP/Status/400",
)


def is_valid_email(email: str) -> bool:
"""Given a string, determines if it is a valid email.

For the prefix, it will check for alphanumeric characters and acceptable
special characters (.-_), but still ensure an alphanumeric comes before
the @ symbol.

For the suffix, it will check for an alphanumeric subdomain and accept hyphens.
It then checks the TLD to make sure it only contains alphabet characters with
a minimum length of two.

Pattern modified from:
https://stackabuse.com/python-validate-email-address-with-regular-expressions-regex
"""
accepted_email_re = re.compile(
r"([A-Za-z\d\-_.])*[A-Za-z\d]+@[A-Za-z\d\-]+(\.[A-Z|a-z]{2,})+"
)
return accepted_email_re.match(email) is not None
58 changes: 58 additions & 0 deletions tests/test_hg.py
Original file line number Diff line number Diff line change
Expand Up @@ -173,6 +173,37 @@ def test_integrated_hgrepo_can_log(hg_clone):
+adding another line
""".strip()

PATCH_NO_EMAIL = b"""
# HG changeset patch
# User Test User
# Date 0 0
# Thu Jan 01 00:00:00 1970 +0000
# Diff Start Line 7
add another file.
diff --git a/test.txt b/test.txt
--- a/test.txt
+++ b/test.txt
@@ -1,1 +1,2 @@
TEST
+adding another line
""".strip()


PATCH_INVALID_EMAIL = b"""
# HG changeset patch
# User Test User <test@example>
# Date 0 0
# Thu Jan 01 00:00:00 1970 +0000
# Diff Start Line 7
add another file.
diff --git a/test.txt b/test.txt
--- a/test.txt
+++ b/test.txt
@@ -1,1 +1,2 @@
TEST
+adding another line
""".strip()


def test_integrated_hgrepo_apply_patch(hg_clone):
repo = HgRepo(hg_clone.strpath)
Expand All @@ -186,6 +217,14 @@ def test_integrated_hgrepo_apply_patch(hg_clone):
with pytest.raises(PatchConflict), repo.for_pull():
repo.apply_patch(io.BytesIO(PATCH_WITH_CONFLICT))

# Mercurial users with invalid emails raise an error
with pytest.raises(ValueError), repo.for_pull():
repo.apply_patch(io.BytesIO(PATCH_INVALID_EMAIL))

# Mercurial users with no email configured raise an error
with pytest.raises(ValueError), repo.for_pull():
repo.apply_patch(io.BytesIO(PATCH_NO_EMAIL))

with repo.for_pull():
repo.apply_patch(io.BytesIO(PATCH_NORMAL))
# Commit created.
Expand Down Expand Up @@ -251,3 +290,22 @@ def test_hgrepo_request_user(hg_clone):
assert REQUEST_USER_ENV_VAR in os.environ
assert os.environ[REQUEST_USER_ENV_VAR] == "[email protected]"
assert REQUEST_USER_ENV_VAR not in os.environ


def test_extract_email(hg_clone):
repo = HgRepo(hg_clone.strpath)

# Empty username
assert repo.extract_email_from_username("") == ""

# Username without email
assert repo.extract_email_from_username("test user") == ""

# Username with email
assert (
repo.extract_email_from_username("test user <[email protected]>")
== "[email protected]"
)

# Username with invalid email
assert repo.extract_email_from_username("test <test@test>") == "test@test"
27 changes: 26 additions & 1 deletion tests/test_validation.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import pytest
from connexion import ProblemException

from landoapi.validation import revision_id_to_int
from landoapi.validation import revision_id_to_int, is_valid_email


def test_convertion_success():
Expand All @@ -20,3 +20,28 @@ def test_convertion_failure_string(id):
def test_convertion_failure_integer():
with pytest.raises(TypeError):
revision_id_to_int(123)


def test_is_valid_email():
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

More test cases can be added here (for example test <test> and test <test@test>. And perhaps a few malformed ones.

invalid_emails = [
"Test User",
"test <test>",
"test <test@test>",
"test@",
"-@...",
"test@mozilla.",
"[email protected]",
]
valid_emails = [
"[email protected]",
"[email protected]",
"[email protected]",
"[email protected]",
"[email protected]",
"[email protected]",
"[email protected]",
"[email protected]",
]

assert True not in [is_valid_email(value) for value in invalid_emails]
assert False not in [is_valid_email(value) for value in valid_emails]
Comment on lines +46 to +47
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

nit: more of a personal preference than anything else, but could be a little more human readable.

Suggested change
assert True not in [is_valid_email(value) for value in invalid_emails]
assert False not in [is_valid_email(value) for value in valid_emails]
assert not any(is_valid_email(value) for value in invalid_emails)
assert all(is_valid_email(value) for value in valid_emails)