From c8bf67a50f73b0682fe3e1d73dc8fccff642fcea Mon Sep 17 00:00:00 2001 From: htsedebenham Date: Sat, 22 Jul 2023 13:45:31 +0100 Subject: [PATCH 1/5] Write failing test --- Lib/test/test_email/data/msg_47.txt | 14 ++++++++++++++ Lib/test/test_email/test_parser.py | 11 ++++++++++- 2 files changed, 24 insertions(+), 1 deletion(-) create mode 100644 Lib/test/test_email/data/msg_47.txt diff --git a/Lib/test/test_email/data/msg_47.txt b/Lib/test/test_email/data/msg_47.txt new file mode 100644 index 00000000000000..bb48b47d96baf8 --- /dev/null +++ b/Lib/test/test_email/data/msg_47.txt @@ -0,0 +1,14 @@ +Date: 01 Jan 2001 00:01+0000 +From: arthur@example.example +MIME-Version: 1.0 +Content-Type: multipart/mixed; boundary=foo + +--foo +Content-Type: text/plain +bar + +--foo +Content-Type: text/html +

baz

+ +--foo-- \ No newline at end of file diff --git a/Lib/test/test_email/test_parser.py b/Lib/test/test_email/test_parser.py index 06c86408ab52b9..6fd517a66d30a7 100644 --- a/Lib/test/test_email/test_parser.py +++ b/Lib/test/test_email/test_parser.py @@ -3,7 +3,7 @@ import unittest from email.message import Message, EmailMessage from email.policy import default -from test.test_email import TestEmailBase +from test.test_email import openfile, TestEmailBase class TestCustomMessage(TestEmailBase): @@ -67,6 +67,15 @@ def test_only_split_on_cr_lf(self): ]) self.assertEqual(msg.get_payload(), "") + def test_headers_only_multipart(self): + with openfile('msg_47.txt', encoding="utf-8") as fp: + msgdata = fp.read() + + parser = email.parser.Parser(policy=email.policy.default) + parsed_msg = parser.parsestr(msgdata, headersonly=True) + + self.assertEqual(parsed_msg.defects, []) + class MyMessage(EmailMessage): pass From 3ef320b5d230703bb3d59ab9a152f7fc7570ba90 Mon Sep 17 00:00:00 2001 From: htsedebenham Date: Sat, 22 Jul 2023 13:46:24 +0100 Subject: [PATCH 2/5] Move test to test_email.py --- Lib/test/test_email/test_email.py | 10 ++++++++++ Lib/test/test_email/test_parser.py | 8 -------- 2 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Lib/test/test_email/test_email.py b/Lib/test/test_email/test_email.py index b4f3a2481976e8..cdb6ef1275e520 100644 --- a/Lib/test/test_email/test_email.py +++ b/Lib/test/test_email/test_email.py @@ -3712,6 +3712,16 @@ def test_bytes_header_parser(self): self.assertIsInstance(msg.get_payload(), str) self.assertIsInstance(msg.get_payload(decode=True), bytes) + def test_header_parser_multipart_is_valid(self): + # Don't flag valid multipart emails as having defects + with openfile('msg_47.txt', encoding="utf-8") as fp: + msgdata = fp.read() + + parser = email.parser.Parser(policy=email.policy.default) + parsed_msg = parser.parsestr(msgdata, headersonly=True) + + self.assertEqual(parsed_msg.defects, []) + def test_bytes_parser_does_not_close_file(self): with openfile('msg_02.txt', 'rb') as fp: email.parser.BytesParser().parse(fp) diff --git a/Lib/test/test_email/test_parser.py b/Lib/test/test_email/test_parser.py index 6fd517a66d30a7..496cc1291e7a79 100644 --- a/Lib/test/test_email/test_parser.py +++ b/Lib/test/test_email/test_parser.py @@ -67,14 +67,6 @@ def test_only_split_on_cr_lf(self): ]) self.assertEqual(msg.get_payload(), "") - def test_headers_only_multipart(self): - with openfile('msg_47.txt', encoding="utf-8") as fp: - msgdata = fp.read() - - parser = email.parser.Parser(policy=email.policy.default) - parsed_msg = parser.parsestr(msgdata, headersonly=True) - - self.assertEqual(parsed_msg.defects, []) class MyMessage(EmailMessage): pass From 037bd31c1949f27b90690a884112d401b1f23e57 Mon Sep 17 00:00:00 2001 From: htsedebenham Date: Sat, 22 Jul 2023 13:46:34 +0100 Subject: [PATCH 3/5] Ignore MultipartInvariantViolationDefect for message body if only considering the headers --- Lib/email/feedparser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/email/feedparser.py b/Lib/email/feedparser.py index 53d71f50225152..06d6b4a3afcd07 100644 --- a/Lib/email/feedparser.py +++ b/Lib/email/feedparser.py @@ -189,7 +189,7 @@ def close(self): assert not self._msgstack # Look for final set of defects if root.get_content_maintype() == 'multipart' \ - and not root.is_multipart(): + and not root.is_multipart() and not self._headersonly: defect = errors.MultipartInvariantViolationDefect() self.policy.handle_defect(root, defect) return root From 9d75f98dfec49dd21874ae9bef586446754ff3f5 Mon Sep 17 00:00:00 2001 From: htsedebenham Date: Sat, 22 Jul 2023 13:46:43 +0100 Subject: [PATCH 4/5] Tidy imports --- Lib/test/test_email/test_parser.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/Lib/test/test_email/test_parser.py b/Lib/test/test_email/test_parser.py index 496cc1291e7a79..06c86408ab52b9 100644 --- a/Lib/test/test_email/test_parser.py +++ b/Lib/test/test_email/test_parser.py @@ -3,7 +3,7 @@ import unittest from email.message import Message, EmailMessage from email.policy import default -from test.test_email import openfile, TestEmailBase +from test.test_email import TestEmailBase class TestCustomMessage(TestEmailBase): @@ -67,7 +67,6 @@ def test_only_split_on_cr_lf(self): ]) self.assertEqual(msg.get_payload(), "") - class MyMessage(EmailMessage): pass From 11a79b27201a67ce333a40195ff608255982673e Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Sat, 22 Jul 2023 13:09:29 +0000 Subject: [PATCH 5/5] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../Library/2023-07-22-13-09-28.gh-issue-106186.EIsUNG.rst | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2023-07-22-13-09-28.gh-issue-106186.EIsUNG.rst diff --git a/Misc/NEWS.d/next/Library/2023-07-22-13-09-28.gh-issue-106186.EIsUNG.rst b/Misc/NEWS.d/next/Library/2023-07-22-13-09-28.gh-issue-106186.EIsUNG.rst new file mode 100644 index 00000000000000..07fdcc96fa38a6 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2023-07-22-13-09-28.gh-issue-106186.EIsUNG.rst @@ -0,0 +1,3 @@ +Do not report ``MultipartInvariantViolationDefect`` defect +when the :class:`email.parser.Parser` class is used +to parse emails with ``headersonly=True``.