Skip to content

Commit

Permalink
canonicalize_url: do not apply lowercase to userinfo
Browse files Browse the repository at this point in the history
  • Loading branch information
Gallaecio committed Jun 12, 2024
1 parent d7c3307 commit acd7161
Show file tree
Hide file tree
Showing 2 changed files with 11 additions and 1 deletion.
5 changes: 5 additions & 0 deletions tests/test_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -1384,6 +1384,11 @@ def test_domains_are_case_insensitive(self):
canonicalize_url("http://www.EXAMPLE.com/"), "http://www.example.com/"
)

def test_userinfo_is_case_sensitive(self):
self.assertEqual(
canonicalize_url("sftp://UsEr:PaSsWoRd@www.EXAMPLE.com/"), "sftp://UsEr:PaSsWoRd@www.example.com/"
)

def test_canonicalize_idns(self):
self.assertEqual(
canonicalize_url("http://www.bücher.de?q=bücher"),
Expand Down
7 changes: 6 additions & 1 deletion w3lib/url.py
Original file line number Diff line number Diff line change
Expand Up @@ -654,9 +654,14 @@ def canonicalize_url(

fragment = "" if not keep_fragments else fragment

# Apply lowercase to the domain, but not to the userinfo.
netloc_parts = netloc.split("@")
netloc_parts[-1] = netloc_parts[-1].lower().rstrip(":")
netloc = "@".join(netloc_parts)

# every part should be safe already
return urlunparse(
(scheme, netloc.lower().rstrip(":"), path, params, query, fragment)
(scheme, netloc, path, params, query, fragment)
)


Expand Down

0 comments on commit acd7161

Please sign in to comment.