"""Unit tests for email header parsing from message content. Run with: pytest tests/test_header_parsing.py -v """ import pytest import sys from pathlib import Path # Add project root to path for proper imports project_root = str(Path(__file__).parent.parent) if project_root not in sys.path: sys.path.insert(0, project_root) # Sample cancelled meeting email from himalaya (message 114) CANCELLED_MEETING_EMAIL = """From: Marshall , Cody To: Ruttencutter , Chris , Dake , Ryan , Smith , James , Santana , Jonatas Cc: Bendt , Timothy Subject: Canceled: Technical Refinement Received: from CY8PR17MB7060.namprd17.prod.outlook.com (2603:10b6:930:6d::6) by PH7PR17MB7149.namprd17.prod.outlook.com with HTTPS; Fri, 19 Dec 2025 19:12:45 +0000 Received: from SA6PR17MB7362.namprd17.prod.outlook.com (2603:10b6:806:411::6) by CY8PR17MB7060.namprd17.prod.outlook.com (2603:10b6:930:6d::6) with Microsoft SMTP Server (version=TLS1_2, cipher=TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384) id 15.20.9434.8; Fri, 19 Dec 2025 19:12:42 +0000 From: "Marshall, Cody" To: "Ruttencutter, Chris" , "Dake, Ryan" , "Smith, James" , "Santana, Jonatas" CC: "Bendt, Timothy" Subject: Canceled: Technical Refinement Thread-Topic: Technical Refinement Thread-Index: AdoSeicQGeYQHp7iHUWAUBWrOGskKw== Importance: high X-Priority: 1 Date: Fri, 19 Dec 2025 19:12:42 +0000 Message-ID: Accept-Language: en-US Content-Language: en-US X-MS-Exchange-Organization-AuthAs: Internal X-MS-Exchange-Organization-AuthMechanism: 04 Content-Type: multipart/alternative; boundary="_002_SA6PR17MB7362D5E1A906728B63A804D2E4A9ASA6PR17MB7362namp_" MIME-Version: 1.0 --_002_SA6PR17MB7362D5E1A906728B63A804D2E4A9ASA6PR17MB7362namp_ Content-Type: text/plain; charset="us-ascii" --_002_SA6PR17MB7362D5E1A906728B63A804D2E4A9ASA6PR17MB7362namp_ Content-Type: text/calendar; charset="utf-8"; method=CANCEL Content-Transfer-Encoding: base64 QkVHSU46VkNBTEVOREFSDQpNRVRIT0Q6Q0FOQ0VMDQpQUk9ESUQ6TWljcm9zb2Z0IEV4Y2hhbmdl IFNlcnZlciAyMDEwDQpWRVJTSU9OOjIuMA0KQkVHSU46VlRJTUVaT05FDQpUWklEOkNlbnRyYWwg U3RhbmRhcmQgVGltZQ0KQkVHSU46U1RBTkRBUkQNCkRUU1RBUlQ6MTYwMTAxMDFUMDIwMDAwDQpU --_002_SA6PR17MB7362D5E1A906728B63A804D2E4A9ASA6PR17MB7362namp_-- """ class TestParseHeadersFromContent: """Tests for _parse_headers_from_content method.""" @pytest.fixture def parser(self): """Create a ContentContainer instance for testing.""" # We need to create a minimal instance just for the parsing method # Import here to avoid circular imports from src.mail.widgets.ContentContainer import ContentContainer container = ContentContainer() return container._parse_headers_from_content def test_parse_simple_headers(self, parser): """Test parsing simple email headers.""" content = """From: John Doe To: Jane Smith Subject: Test Email Date: Mon, 29 Dec 2025 10:00:00 +0000 This is the body of the email. """ headers = parser(content) assert headers["from"] == "John Doe " assert headers["to"] == "Jane Smith " assert headers["subject"] == "Test Email" assert headers["date"] == "Mon, 29 Dec 2025 10:00:00 +0000" def test_parse_multiple_recipients(self, parser): """Test parsing headers with multiple recipients.""" content = """From: sender@example.com To: user1@example.com, user2@example.com, user3@example.com Subject: Multi-recipient email Date: 2025-12-29 Body here. """ headers = parser(content) assert ( headers["to"] == "user1@example.com, user2@example.com, user3@example.com" ) def test_parse_with_cc(self, parser): """Test parsing headers including CC.""" content = """From: sender@example.com To: recipient@example.com CC: cc1@example.com, cc2@example.com Subject: Email with CC Date: 2025-12-29 Body. """ headers = parser(content) assert headers["to"] == "recipient@example.com" assert headers["cc"] == "cc1@example.com, cc2@example.com" def test_parse_multiline_to_header(self, parser): """Test parsing To header that spans multiple lines.""" content = """From: sender@example.com To: First User , Second User , Third User Subject: Multi-line To Date: 2025-12-29 Body. """ headers = parser(content) # Should combine continuation lines assert "First User" in headers["to"] assert "Second User" in headers["to"] assert "Third User" in headers["to"] def test_parse_with_name_and_email(self, parser): """Test parsing headers with display names and email addresses.""" content = """From: Renovate Bot (SA @renovate-bot-sa) To: Bendt , Timothy Subject: Test Subject Date: 2025-12-29 02:07+00:00 Body content. """ headers = parser(content) assert ( headers["from"] == "Renovate Bot (SA @renovate-bot-sa) " ) assert "Timothy " in headers["to"] assert "Bendt " in headers["to"] def test_parse_empty_content(self, parser): """Test parsing empty content.""" headers = parser("") assert headers == {} def test_parse_no_headers(self, parser): """Test parsing content with no recognizable headers.""" content = """This is just body content without any headers. """ headers = parser(content) assert headers == {} def test_parse_ignores_unknown_headers(self, parser): """Test that unknown headers are ignored.""" content = """From: sender@example.com X-Custom-Header: some value To: recipient@example.com Message-ID: <123@example.com> Subject: Test Date: 2025-12-29 Body. """ headers = parser(content) # Should only have the recognized headers assert set(headers.keys()) == {"from", "to", "subject", "date"} assert "X-Custom-Header" not in headers assert "Message-ID" not in headers def test_parse_real_himalaya_output(self, parser): """Test parsing actual himalaya message read output format.""" content = """From: Renovate Bot (SA @renovate-bot-sa) To: Bendt , Timothy Subject: Re: Fabric3 Monorepo | chore(deps): update vitest monorepo to v4 (major) (!6861) Renovate Bot (SA) pushed new commits to merge request !6861 * f96fec2b...2fb2ae10 - 2 commits from branch `main` """ headers = parser(content) assert ( headers["from"] == "Renovate Bot (SA @renovate-bot-sa) " ) assert headers["to"] == "Bendt , Timothy " assert "Fabric3 Monorepo" in headers["subject"] def test_parse_cancelled_meeting_headers(self, parser): """Test parsing headers from a cancelled meeting email.""" headers = parser(CANCELLED_MEETING_EMAIL) # Should extract the first occurrence of headers (simplified format) assert "Canceled: Technical Refinement" in headers.get("subject", "") assert "corteva.com" in headers.get("to", "") assert "cc" in headers # Should have CC class TestStripHeadersFromContent: """Tests for _strip_headers_from_content method.""" @pytest.fixture def stripper(self): """Create a ContentContainer instance for testing.""" from src.mail.widgets.ContentContainer import ContentContainer container = ContentContainer() return container._strip_headers_from_content def test_strip_simple_headers(self, stripper): """Test stripping simple headers from content.""" content = """From: sender@example.com To: recipient@example.com Subject: Test This is the body. """ result = stripper(content) assert "From:" not in result assert "To:" not in result assert "Subject:" not in result assert "This is the body" in result def test_strip_mime_boundaries(self, stripper): """Test stripping MIME boundary markers.""" content = """From: sender@example.com Subject: Test --boundary123456789 Content-Type: text/plain Hello world --boundary123456789-- """ result = stripper(content) assert "--boundary" not in result assert "Hello world" in result def test_strip_base64_content(self, stripper): """Test stripping base64 encoded content.""" content = """From: sender@example.com Subject: Test --boundary Content-Type: text/calendar Content-Transfer-Encoding: base64 QkVHSU46VkNBTEVOREFSDQpNRVRIT0Q6Q0FOQ0VMDQpQUk9ESUQ6TWljcm9zb2Z0 IEV4Y2hhbmdlIFNlcnZlciAyMDEwDQpWRVJTSU9OOjIuMA0KQkVHSU46VlRJTUVa --boundary-- """ result = stripper(content) # Should not contain base64 content assert "QkVHSU46" not in result assert "VKNTVU9OOjIuMA" not in result def test_strip_cancelled_meeting_email(self, stripper): """Test stripping a cancelled meeting email - should result in empty/minimal content.""" result = stripper(CANCELLED_MEETING_EMAIL) # Should not contain headers assert "From:" not in result assert "To:" not in result assert "Subject:" not in result assert "Received:" not in result assert "Content-Type:" not in result # Should not contain MIME boundaries assert "--_002_" not in result # Should not contain base64 assert "QkVHSU46" not in result # The result should be empty or just whitespace since the text/plain part is empty assert result.strip() == "" or len(result.strip()) < 50 def test_strip_vcalendar_content(self, stripper): """Test stripping vCalendar/ICS content.""" content = """From: sender@example.com Subject: Meeting BEGIN:VCALENDAR METHOD:REQUEST VERSION:2.0 BEGIN:VEVENT SUMMARY:Team Meeting END:VEVENT END:VCALENDAR """ result = stripper(content) assert "BEGIN:VCALENDAR" not in result assert "END:VCALENDAR" not in result assert "VEVENT" not in result class TestFormatRecipients: """Tests for _format_recipients method.""" @pytest.fixture def formatter(self): """Create a ContentContainer instance for testing.""" from src.mail.widgets.ContentContainer import ContentContainer container = ContentContainer() return container._format_recipients def test_format_string_recipient(self, formatter): """Test formatting a string recipient.""" result = formatter("user@example.com") assert result == "user@example.com" def test_format_dict_recipient(self, formatter): """Test formatting a dict recipient.""" result = formatter({"name": "John Doe", "addr": "john@example.com"}) assert result == "John Doe " def test_format_dict_recipient_name_only(self, formatter): """Test formatting a dict with name only.""" result = formatter({"name": "John Doe", "addr": ""}) assert result == "John Doe" def test_format_dict_recipient_addr_only(self, formatter): """Test formatting a dict with addr only.""" result = formatter({"name": None, "addr": "john@example.com"}) assert result == "john@example.com" def test_format_dict_recipient_empty(self, formatter): """Test formatting an empty dict recipient.""" result = formatter({"name": None, "addr": ""}) assert result == "" def test_format_list_recipients(self, formatter): """Test formatting a list of recipients.""" result = formatter( [ {"name": "John", "addr": "john@example.com"}, {"name": "Jane", "addr": "jane@example.com"}, ] ) assert result == "John , Jane " def test_format_empty(self, formatter): """Test formatting empty input.""" assert formatter(None) == "" assert formatter("") == "" assert formatter([]) == ""