From 2b578479b96aa3deeeb8bac313a02b5cf3cb1aff Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Tue, 11 Jun 2019 12:45:35 +0200 Subject: [PATCH] [2.7] bpo-36742: Fix urlparse.urlsplit() error message for Unicode URL (GH-13937) If urlparse.urlsplit() detects an invalid netloc according to NFKC normalization, the error message type is now str rather than unicode, and use repr() to format the URL, to prevent when display the error message. Signed-off-by: Peter Korsgaard --- Lib/test/test_urlparse.py | 9 +++++++++ Lib/urlparse.py | 5 +++-- .../NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst | 3 +++ 3 files changed, 15 insertions(+), 2 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst diff --git a/Lib/test/test_urlparse.py b/Lib/test/test_urlparse.py index 857ed96d92..86c4a0595c 100644 --- a/Lib/test/test_urlparse.py +++ b/Lib/test/test_urlparse.py @@ -656,6 +656,15 @@ class UrlParseTestCase(unittest.TestCase): with self.assertRaises(ValueError): urlparse.urlsplit(url) + # check error message: invalid netloc must be formated with repr() + # to get an ASCII error message + with self.assertRaises(ValueError) as cm: + urlparse.urlsplit(u'http://example.com\uFF03@bing.com') + self.assertEqual(str(cm.exception), + "netloc u'example.com\\uff03@bing.com' contains invalid characters " + "under NFKC normalization") + self.assertIsInstance(cm.exception.args[0], str) + def test_main(): test_support.run_unittest(UrlParseTestCase) diff --git a/Lib/urlparse.py b/Lib/urlparse.py index 6834f3c179..798b467b60 100644 --- a/Lib/urlparse.py +++ b/Lib/urlparse.py @@ -180,8 +180,9 @@ def _checknetloc(netloc): return for c in '/?#@:': if c in netloc2: - raise ValueError(u"netloc '" + netloc + u"' contains invalid " + - u"characters under NFKC normalization") + raise ValueError("netloc %r contains invalid characters " + "under NFKC normalization" + % netloc) def urlsplit(url, scheme='', allow_fragments=True): """Parse a URL into 5 components: diff --git a/Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst b/Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst new file mode 100644 index 0000000000..3ba774056f --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-06-10-12-02-45.bpo-36742.UEdHXJ.rst @@ -0,0 +1,3 @@ +:func:`urlparse.urlsplit` error message for invalid ``netloc`` according to +NFKC normalization is now a :class:`str` string, rather than a +:class:`unicode` string, to prevent error when displaying the error. -- 2.11.0