- Refactor ContentContainer to Vertical layout with fixed header + scrollable content - Change EnvelopeHeader to ScrollableContainer for long recipient lists - Parse headers from message content (fixes empty To: field from himalaya) - Strip all email headers, MIME boundaries, base64 blocks from body display - Add 22 unit tests for header parsing and content stripping - Cancelled meeting emails now render with empty body as expected
376 lines
13 KiB
Python
376 lines
13 KiB
Python
"""Unit tests for email header parsing from message content.
|
|
|
|
Run with:
|
|
pytest tests/test_header_parsing.py -v
|
|
"""
|
|
|
|
import pytest
|
|
import sys
|
|
from pathlib import Path
|
|
|
|
# Add project root to path for proper imports
|
|
project_root = str(Path(__file__).parent.parent)
|
|
if project_root not in sys.path:
|
|
sys.path.insert(0, project_root)
|
|
|
|
|
|
# Sample cancelled meeting email from himalaya (message 114)
|
|
CANCELLED_MEETING_EMAIL = """From: Marshall <unknown>, Cody <john.marshall@corteva.com>
|
|
To: Ruttencutter <unknown>, Chris <chris.ruttencutter@corteva.com>, Dake <unknown>, Ryan <ryan.dake@corteva.com>, Smith <unknown>, James <james.l.smith@corteva.com>, Santana <unknown>, Jonatas <jonatas.santana@corteva.com>
|
|
Cc: Bendt <unknown>, Timothy <timothy.bendt@corteva.com>
|
|
Subject: Canceled: Technical Refinement
|
|
|
|
Received: from CY8PR17MB7060.namprd17.prod.outlook.com (2603:10b6:930:6d::6)
|
|
by PH7PR17MB7149.namprd17.prod.outlook.com with HTTPS; Fri, 19 Dec 2025
|
|
19:12:45 +0000
|
|
Received: from SA6PR17MB7362.namprd17.prod.outlook.com (2603:10b6:806:411::6)
|
|
by CY8PR17MB7060.namprd17.prod.outlook.com (2603:10b6:930:6d::6) with
|
|
Microsoft SMTP Server (version=TLS1_2,
|
|
cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.9434.8; Fri, 19 Dec
|
|
2025 19:12:42 +0000
|
|
From: "Marshall, Cody" <john.marshall@corteva.com>
|
|
To: "Ruttencutter, Chris" <chris.ruttencutter@corteva.com>, "Dake, Ryan"
|
|
<ryan.dake@corteva.com>, "Smith, James" <james.l.smith@corteva.com>,
|
|
"Santana, Jonatas" <jonatas.santana@corteva.com>
|
|
CC: "Bendt, Timothy" <timothy.bendt@corteva.com>
|
|
Subject: Canceled: Technical Refinement
|
|
Thread-Topic: Technical Refinement
|
|
Thread-Index: AdoSeicQGeYQHp7iHUWAUBWrOGskKw==
|
|
Importance: high
|
|
X-Priority: 1
|
|
Date: Fri, 19 Dec 2025 19:12:42 +0000
|
|
Message-ID:
|
|
<SA6PR17MB7362D5E1A906728B63A804D2E4A9ASA6PR17MB7362.namprd17.prod.outlook.com>
|
|
Accept-Language: en-US
|
|
Content-Language: en-US
|
|
X-MS-Exchange-Organization-AuthAs: Internal
|
|
X-MS-Exchange-Organization-AuthMechanism: 04
|
|
Content-Type: multipart/alternative;
|
|
boundary="_002_SA6PR17MB7362D5E1A906728B63A804D2E4A9ASA6PR17MB7362namp_"
|
|
MIME-Version: 1.0
|
|
|
|
--_002_SA6PR17MB7362D5E1A906728B63A804D2E4A9ASA6PR17MB7362namp_
|
|
Content-Type: text/plain; charset="us-ascii"
|
|
|
|
|
|
--_002_SA6PR17MB7362D5E1A906728B63A804D2E4A9ASA6PR17MB7362namp_
|
|
Content-Type: text/calendar; charset="utf-8"; method=CANCEL
|
|
Content-Transfer-Encoding: base64
|
|
|
|
QkVHSU46VkNBTEVOREFSDQpNRVRIT0Q6Q0FOQ0VMDQpQUk9ESUQ6TWljcm9zb2Z0IEV4Y2hhbmdl
|
|
IFNlcnZlciAyMDEwDQpWRVJTSU9OOjIuMA0KQkVHSU46VlRJTUVaT05FDQpUWklEOkNlbnRyYWwg
|
|
U3RhbmRhcmQgVGltZQ0KQkVHSU46U1RBTkRBUkQNCkRUU1RBUlQ6MTYwMTAxMDFUMDIwMDAwDQpU
|
|
|
|
--_002_SA6PR17MB7362D5E1A906728B63A804D2E4A9ASA6PR17MB7362namp_--
|
|
"""
|
|
|
|
|
|
class TestParseHeadersFromContent:
|
|
"""Tests for _parse_headers_from_content method."""
|
|
|
|
@pytest.fixture
|
|
def parser(self):
|
|
"""Create a ContentContainer instance for testing."""
|
|
# We need to create a minimal instance just for the parsing method
|
|
# Import here to avoid circular imports
|
|
from src.mail.widgets.ContentContainer import ContentContainer
|
|
|
|
container = ContentContainer()
|
|
return container._parse_headers_from_content
|
|
|
|
def test_parse_simple_headers(self, parser):
|
|
"""Test parsing simple email headers."""
|
|
content = """From: John Doe <john@example.com>
|
|
To: Jane Smith <jane@example.com>
|
|
Subject: Test Email
|
|
Date: Mon, 29 Dec 2025 10:00:00 +0000
|
|
|
|
This is the body of the email.
|
|
"""
|
|
headers = parser(content)
|
|
|
|
assert headers["from"] == "John Doe <john@example.com>"
|
|
assert headers["to"] == "Jane Smith <jane@example.com>"
|
|
assert headers["subject"] == "Test Email"
|
|
assert headers["date"] == "Mon, 29 Dec 2025 10:00:00 +0000"
|
|
|
|
def test_parse_multiple_recipients(self, parser):
|
|
"""Test parsing headers with multiple recipients."""
|
|
content = """From: sender@example.com
|
|
To: user1@example.com, user2@example.com, user3@example.com
|
|
Subject: Multi-recipient email
|
|
Date: 2025-12-29
|
|
|
|
Body here.
|
|
"""
|
|
headers = parser(content)
|
|
|
|
assert (
|
|
headers["to"] == "user1@example.com, user2@example.com, user3@example.com"
|
|
)
|
|
|
|
def test_parse_with_cc(self, parser):
|
|
"""Test parsing headers including CC."""
|
|
content = """From: sender@example.com
|
|
To: recipient@example.com
|
|
CC: cc1@example.com, cc2@example.com
|
|
Subject: Email with CC
|
|
Date: 2025-12-29
|
|
|
|
Body.
|
|
"""
|
|
headers = parser(content)
|
|
|
|
assert headers["to"] == "recipient@example.com"
|
|
assert headers["cc"] == "cc1@example.com, cc2@example.com"
|
|
|
|
def test_parse_multiline_to_header(self, parser):
|
|
"""Test parsing To header that spans multiple lines."""
|
|
content = """From: sender@example.com
|
|
To: First User <first@example.com>,
|
|
Second User <second@example.com>,
|
|
Third User <third@example.com>
|
|
Subject: Multi-line To
|
|
Date: 2025-12-29
|
|
|
|
Body.
|
|
"""
|
|
headers = parser(content)
|
|
|
|
# Should combine continuation lines
|
|
assert "First User" in headers["to"]
|
|
assert "Second User" in headers["to"]
|
|
assert "Third User" in headers["to"]
|
|
|
|
def test_parse_with_name_and_email(self, parser):
|
|
"""Test parsing headers with display names and email addresses."""
|
|
content = """From: Renovate Bot (SA @renovate-bot-sa) <gitlab@example.com>
|
|
To: Bendt <unknown>, Timothy <timothy.bendt@example.com>
|
|
Subject: Test Subject
|
|
Date: 2025-12-29 02:07+00:00
|
|
|
|
Body content.
|
|
"""
|
|
headers = parser(content)
|
|
|
|
assert (
|
|
headers["from"] == "Renovate Bot (SA @renovate-bot-sa) <gitlab@example.com>"
|
|
)
|
|
assert "Timothy <timothy.bendt@example.com>" in headers["to"]
|
|
assert "Bendt <unknown>" in headers["to"]
|
|
|
|
def test_parse_empty_content(self, parser):
|
|
"""Test parsing empty content."""
|
|
headers = parser("")
|
|
assert headers == {}
|
|
|
|
def test_parse_no_headers(self, parser):
|
|
"""Test parsing content with no recognizable headers."""
|
|
content = """This is just body content
|
|
without any headers.
|
|
"""
|
|
headers = parser(content)
|
|
assert headers == {}
|
|
|
|
def test_parse_ignores_unknown_headers(self, parser):
|
|
"""Test that unknown headers are ignored."""
|
|
content = """From: sender@example.com
|
|
X-Custom-Header: some value
|
|
To: recipient@example.com
|
|
Message-ID: <123@example.com>
|
|
Subject: Test
|
|
Date: 2025-12-29
|
|
|
|
Body.
|
|
"""
|
|
headers = parser(content)
|
|
|
|
# Should only have the recognized headers
|
|
assert set(headers.keys()) == {"from", "to", "subject", "date"}
|
|
assert "X-Custom-Header" not in headers
|
|
assert "Message-ID" not in headers
|
|
|
|
def test_parse_real_himalaya_output(self, parser):
|
|
"""Test parsing actual himalaya message read output format."""
|
|
content = """From: Renovate Bot (SA @renovate-bot-sa) <gitlab@gitlab.research.corteva.com>
|
|
To: Bendt <unknown>, Timothy <timothy.bendt@corteva.com>
|
|
Subject: Re: Fabric3 Monorepo | chore(deps): update vitest monorepo to v4 (major) (!6861)
|
|
|
|
Renovate Bot (SA) pushed new commits to merge request !6861<https://gitlab.research.corteva.com/granular/fabric/fabric3/-/merge_requests/6861>
|
|
|
|
* f96fec2b...2fb2ae10 <https://gitlab.research.corteva.com/granular/fabric/fabric3/-/compare/f96fec2b...2fb2ae10> - 2 commits from branch `main`
|
|
"""
|
|
headers = parser(content)
|
|
|
|
assert (
|
|
headers["from"]
|
|
== "Renovate Bot (SA @renovate-bot-sa) <gitlab@gitlab.research.corteva.com>"
|
|
)
|
|
assert headers["to"] == "Bendt <unknown>, Timothy <timothy.bendt@corteva.com>"
|
|
assert "Fabric3 Monorepo" in headers["subject"]
|
|
|
|
def test_parse_cancelled_meeting_headers(self, parser):
|
|
"""Test parsing headers from a cancelled meeting email."""
|
|
headers = parser(CANCELLED_MEETING_EMAIL)
|
|
|
|
# Should extract the first occurrence of headers (simplified format)
|
|
assert "Canceled: Technical Refinement" in headers.get("subject", "")
|
|
assert "corteva.com" in headers.get("to", "")
|
|
assert "cc" in headers # Should have CC
|
|
|
|
|
|
class TestStripHeadersFromContent:
|
|
"""Tests for _strip_headers_from_content method."""
|
|
|
|
@pytest.fixture
|
|
def stripper(self):
|
|
"""Create a ContentContainer instance for testing."""
|
|
from src.mail.widgets.ContentContainer import ContentContainer
|
|
|
|
container = ContentContainer()
|
|
return container._strip_headers_from_content
|
|
|
|
def test_strip_simple_headers(self, stripper):
|
|
"""Test stripping simple headers from content."""
|
|
content = """From: sender@example.com
|
|
To: recipient@example.com
|
|
Subject: Test
|
|
|
|
This is the body.
|
|
"""
|
|
result = stripper(content)
|
|
|
|
assert "From:" not in result
|
|
assert "To:" not in result
|
|
assert "Subject:" not in result
|
|
assert "This is the body" in result
|
|
|
|
def test_strip_mime_boundaries(self, stripper):
|
|
"""Test stripping MIME boundary markers."""
|
|
content = """From: sender@example.com
|
|
Subject: Test
|
|
|
|
--boundary123456789
|
|
Content-Type: text/plain
|
|
|
|
Hello world
|
|
|
|
--boundary123456789--
|
|
"""
|
|
result = stripper(content)
|
|
|
|
assert "--boundary" not in result
|
|
assert "Hello world" in result
|
|
|
|
def test_strip_base64_content(self, stripper):
|
|
"""Test stripping base64 encoded content."""
|
|
content = """From: sender@example.com
|
|
Subject: Test
|
|
|
|
--boundary
|
|
Content-Type: text/calendar
|
|
Content-Transfer-Encoding: base64
|
|
|
|
QkVHSU46VkNBTEVOREFSDQpNRVRIT0Q6Q0FOQ0VMDQpQUk9ESUQ6TWljcm9zb2Z0
|
|
IEV4Y2hhbmdlIFNlcnZlciAyMDEwDQpWRVJTSU9OOjIuMA0KQkVHSU46VlRJTUVa
|
|
|
|
--boundary--
|
|
"""
|
|
result = stripper(content)
|
|
|
|
# Should not contain base64 content
|
|
assert "QkVHSU46" not in result
|
|
assert "VKNTVU9OOjIuMA" not in result
|
|
|
|
def test_strip_cancelled_meeting_email(self, stripper):
|
|
"""Test stripping a cancelled meeting email - should result in empty/minimal content."""
|
|
result = stripper(CANCELLED_MEETING_EMAIL)
|
|
|
|
# Should not contain headers
|
|
assert "From:" not in result
|
|
assert "To:" not in result
|
|
assert "Subject:" not in result
|
|
assert "Received:" not in result
|
|
assert "Content-Type:" not in result
|
|
|
|
# Should not contain MIME boundaries
|
|
assert "--_002_" not in result
|
|
|
|
# Should not contain base64
|
|
assert "QkVHSU46" not in result
|
|
|
|
# The result should be empty or just whitespace since the text/plain part is empty
|
|
assert result.strip() == "" or len(result.strip()) < 50
|
|
|
|
def test_strip_vcalendar_content(self, stripper):
|
|
"""Test stripping vCalendar/ICS content."""
|
|
content = """From: sender@example.com
|
|
Subject: Meeting
|
|
|
|
BEGIN:VCALENDAR
|
|
METHOD:REQUEST
|
|
VERSION:2.0
|
|
BEGIN:VEVENT
|
|
SUMMARY:Team Meeting
|
|
END:VEVENT
|
|
END:VCALENDAR
|
|
"""
|
|
result = stripper(content)
|
|
|
|
assert "BEGIN:VCALENDAR" not in result
|
|
assert "END:VCALENDAR" not in result
|
|
assert "VEVENT" not in result
|
|
|
|
|
|
class TestFormatRecipients:
|
|
"""Tests for _format_recipients method."""
|
|
|
|
@pytest.fixture
|
|
def formatter(self):
|
|
"""Create a ContentContainer instance for testing."""
|
|
from src.mail.widgets.ContentContainer import ContentContainer
|
|
|
|
container = ContentContainer()
|
|
return container._format_recipients
|
|
|
|
def test_format_string_recipient(self, formatter):
|
|
"""Test formatting a string recipient."""
|
|
result = formatter("user@example.com")
|
|
assert result == "user@example.com"
|
|
|
|
def test_format_dict_recipient(self, formatter):
|
|
"""Test formatting a dict recipient."""
|
|
result = formatter({"name": "John Doe", "addr": "john@example.com"})
|
|
assert result == "John Doe <john@example.com>"
|
|
|
|
def test_format_dict_recipient_name_only(self, formatter):
|
|
"""Test formatting a dict with name only."""
|
|
result = formatter({"name": "John Doe", "addr": ""})
|
|
assert result == "John Doe"
|
|
|
|
def test_format_dict_recipient_addr_only(self, formatter):
|
|
"""Test formatting a dict with addr only."""
|
|
result = formatter({"name": None, "addr": "john@example.com"})
|
|
assert result == "john@example.com"
|
|
|
|
def test_format_dict_recipient_empty(self, formatter):
|
|
"""Test formatting an empty dict recipient."""
|
|
result = formatter({"name": None, "addr": ""})
|
|
assert result == ""
|
|
|
|
def test_format_list_recipients(self, formatter):
|
|
"""Test formatting a list of recipients."""
|
|
result = formatter(
|
|
[
|
|
{"name": "John", "addr": "john@example.com"},
|
|
{"name": "Jane", "addr": "jane@example.com"},
|
|
]
|
|
)
|
|
assert result == "John <john@example.com>, Jane <jane@example.com>"
|
|
|
|
def test_format_empty(self, formatter):
|
|
"""Test formatting empty input."""
|
|
assert formatter(None) == ""
|
|
assert formatter("") == ""
|
|
assert formatter([]) == ""
|