Skip to content

Commit

Permalink
Merge pull request #8613 from cfpb/fix/cfpb-url-matching
Browse files Browse the repository at this point in the history
Fix link markup for certain non-cf.gov URLs
  • Loading branch information
chosak authored Oct 21, 2024
2 parents 8882bf2 + 6d16ab9 commit 6c8ee51
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 2 deletions.
33 changes: 33 additions & 0 deletions cfgov/core/tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from core.templatetags.svg_icon import svg_icon
from core.utils import (
ASK_CFPB_LINKS,
NON_CFPB_LINKS,
add_link_markup,
extract_answers_from_request,
format_file_size,
Expand Down Expand Up @@ -277,3 +278,35 @@ def test_link_with_whitespace_in_text(self):

expected_tag = BeautifulSoup(expected_html, "html.parser")
self.assertEqual(add_link_markup(tag, path), str(expected_tag))

def test_non_cfpb_links(self):
cfpb_urls = [
"http://consumerfinance.gov",
"https://consumerfinance.gov",
"http://cfpb.gov",
"https://cfpb.gov",
"http://www.consumerfinance.gov",
"https://www.consumerfinance.gov",
"http://localhost",
"https://localhost",
"http://localhost:8000",
"http://content.localhost:8000",
"https://www.consumerfinance.gov/foo/bar/",
]

for url in cfpb_urls:
with self.subTest(url=url):
self.assertFalse(NON_CFPB_LINKS.match(url))

non_cfpb_urls = [
"https://example.com/page/",
"https://example.com/foo/www.consumerfinance.gov/bar/",
"http://example.com/foo/cfpb.gov/bar/",
"https://subdomain.example.com:1234",
"https://subdomain.example.com:1234/foo/",
"http://notconsumerfinance.gov",
]

for url in non_cfpb_urls:
with self.subTest(url=url):
self.assertTrue(NON_CFPB_LINKS.match(url))
14 changes: 12 additions & 2 deletions cfgov/core/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,18 @@


NON_CFPB_LINKS = re.compile(
r"(https?:\/\/(?:www\.)?(?![^\?]*(cfpb|consumerfinance).gov)"
r"(?!(content\.)?localhost).*)"
# HTTP or HTTPS
r"https?:\/\/"
# Negative lookahead: don't match anything that matches what follows
r"(?!"
# Match any subdomains
r"((\w+\.)*"
# Match consumerfinance.gov, cfpb.gov, or localhost
r"(consumerfinance\.gov|cfpb\.gov|localhost))"
# Match a port number, if provided
r"(?:\:\d+)?"
# Match the rest of the URL
r".*)"
)

LINK_PATTERN = re.compile(
Expand Down

0 comments on commit 6c8ee51

Please sign in to comment.