Files
luk/utils/mail_utils/maildir.py
2025-05-14 15:11:24 -06:00

271 lines
12 KiB
Python

"""
Maildir operations for handling local mail storage.
"""
import os
import email
import base64
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
import time
import aiohttp
import re
from utils.calendar_utils import truncate_id
from utils.mail_utils.helpers import safe_filename, ensure_directory_exists, format_datetime, format_mime_date
async def save_mime_to_maildir_async(maildir_path, message, attachments_dir, headers, progress, dry_run=False, download_attachments=False):
"""
Save a message from Microsoft Graph API to a Maildir.
Args:
maildir_path (str): Path to the Maildir.
message (dict): Message data from Microsoft Graph API.
attachments_dir (str): Path to save attachments.
headers (dict): Headers including authentication.
progress: Progress instance for updating progress bars.
dry_run (bool): If True, don't actually save files.
download_attachments (bool): If True, download email attachments.
Returns:
None
"""
message_id = message.get('id', '')
# Determine target directory based on read status
target_dir = os.path.join(maildir_path, 'cur' if message.get('isRead', False) else 'new')
ensure_directory_exists(target_dir)
# Check if the file already exists in either new or cur
new_path = os.path.join(maildir_path, 'new', f"{message_id}.eml")
cur_path = os.path.join(maildir_path, 'cur', f"{message_id}.eml")
if os.path.exists(new_path) or os.path.exists(cur_path):
return # Skip if already exists
# Create MIME email
mime_msg = await create_mime_message_async(message, headers, attachments_dir, progress, download_attachments)
# Only save file if not in dry run mode
if not dry_run:
with open(os.path.join(target_dir, f"{message_id}.eml"), 'wb') as f:
f.write(mime_msg.as_bytes())
else:
progress.console.print(f"[DRY-RUN] Would save message: {message.get('subject', 'No Subject')}")
async def create_mime_message_async(message, headers, attachments_dir, progress, download_attachments=False):
"""
Create a MIME message from Microsoft Graph API message data.
Args:
message (dict): Message data from Microsoft Graph API.
headers (dict): Headers including authentication.
attachments_dir (str): Path to save attachments.
progress: Progress instance for updating progress bars.
download_attachments (bool): If True, download email attachments.
Returns:
MIMEMultipart: The MIME message.
"""
# Create a new MIMEMultipart message
mime_msg = MIMEMultipart()
# Message headers
mime_msg['Message-ID'] = message.get('id', '')
mime_msg['Subject'] = message.get('subject', 'No Subject')
# Sender information
sender = message.get('from', {}).get('emailAddress', {})
if sender:
mime_msg['From'] = f"{sender.get('name', '')} <{sender.get('address', '')}>".strip()
# Recipients
to_recipients = message.get('toRecipients', [])
cc_recipients = message.get('ccRecipients', [])
if to_recipients:
to_list = [f"{r.get('emailAddress', {}).get('name', '')} <{r.get('emailAddress', {}).get('address', '')}>".strip() for r in to_recipients]
mime_msg['To'] = ', '.join(to_list)
if cc_recipients:
cc_list = [f"{r.get('emailAddress', {}).get('name', '')} <{r.get('emailAddress', {}).get('address', '')}>".strip() for r in cc_recipients]
mime_msg['Cc'] = ', '.join(cc_list)
# Date - using the new format_mime_date function to ensure RFC 5322 compliance
received_datetime = message.get('receivedDateTime', '')
if received_datetime:
mime_msg['Date'] = format_mime_date(received_datetime)
# First try the direct body content approach
message_id = message.get('id', '')
try:
# First get the message with body content
body_url = f"https://graph.microsoft.com/v1.0/me/messages/{message_id}?$select=body,bodyPreview"
async with aiohttp.ClientSession() as session:
async with session.get(body_url, headers=headers) as response:
if response.status == 200:
body_data = await response.json()
# Get body content
body_content = body_data.get('body', {}).get('content', '')
body_type = body_data.get('body', {}).get('contentType', 'text')
body_preview = body_data.get('bodyPreview', '')
# If we have body content, use it
if body_content:
if body_type.lower() == 'html':
# Add both HTML and plain text versions
# Plain text conversion
plain_text = re.sub(r'<br\s*/?>', '\n', body_content)
plain_text = re.sub(r'<[^>]*>', '', plain_text)
mime_msg.attach(MIMEText(plain_text, 'plain'))
mime_msg.attach(MIMEText(body_content, 'html'))
else:
# Just plain text
mime_msg.attach(MIMEText(body_content, 'plain'))
elif body_preview:
# Use preview if we have it
mime_msg.attach(MIMEText(f"{body_preview}\n\n[Message preview only. Full content not available.]", 'plain'))
else:
# Fallback to MIME content
progress.console.print(f"No direct body content for message {truncate_id(message_id)}, trying MIME content...")
await fetch_mime_content(mime_msg, message_id, headers, progress)
else:
progress.console.print(f"Failed to get message body: {response.status}. Trying MIME content...")
await fetch_mime_content(mime_msg, message_id, headers, progress)
except Exception as e:
progress.console.print(f"Error getting message body: {e}. Trying MIME content...")
await fetch_mime_content(mime_msg, message_id, headers, progress)
# Handle attachments only if we want to download them
if download_attachments:
await add_attachments_async(mime_msg, message, headers, attachments_dir, progress)
else:
# Add a header to indicate attachment info was skipped
mime_msg['X-Attachments-Skipped'] = 'True'
return mime_msg
async def fetch_mime_content(mime_msg, message_id, headers, progress):
"""
Fetch and add MIME content to a message when direct body access fails.
Args:
mime_msg (MIMEMultipart): The message to add content to.
message_id (str): Message ID.
headers (dict): Headers including authentication.
progress: Progress instance for updating progress bars.
"""
# Fallback to getting the MIME content
message_content_url = f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/$value"
try:
async with aiohttp.ClientSession() as session:
async with session.get(message_content_url, headers=headers) as response:
if response.status == 200:
full_content = await response.text()
# Check for body tags
body_match = re.search(r'<body[^>]*>(.*?)</body>', full_content, re.DOTALL | re.IGNORECASE)
if body_match:
body_content = body_match.group(1)
# Simple HTML to text conversion
body_text = re.sub(r'<br\s*/?>', '\n', body_content)
body_text = re.sub(r'<[^>]*>', '', body_text)
# Add the plain text body
mime_msg.attach(MIMEText(body_text, 'plain'))
# Also add the HTML body
mime_msg.attach(MIMEText(full_content, 'html'))
else:
# Fallback - try to find content between Content-Type: text/html and next boundary
html_parts = re.findall(r'Content-Type: text/html.*?\r?\n\r?\n(.*?)(?:\r?\n\r?\n|$)',
full_content, re.DOTALL | re.IGNORECASE)
if html_parts:
html_content = html_parts[0]
mime_msg.attach(MIMEText(html_content, 'html'))
# Also make plain text version
plain_text = re.sub(r'<br\s*/?>', '\n', html_content)
plain_text = re.sub(r'<[^>]*>', '', plain_text)
mime_msg.attach(MIMEText(plain_text, 'plain'))
else:
# Just use the raw content as text if nothing else works
mime_msg.attach(MIMEText(full_content, 'plain'))
progress.console.print(f"Using raw content for message {message_id} - no body tags found")
else:
error_text = await response.text()
progress.console.print(f"Failed to get MIME content: {response.status} {error_text}")
mime_msg.attach(MIMEText(f"Failed to retrieve message body: HTTP {response.status}", 'plain'))
except Exception as e:
progress.console.print(f"Error retrieving MIME content: {e}")
mime_msg.attach(MIMEText(f"Failed to retrieve message body: {str(e)}", 'plain'))
async def add_attachments_async(mime_msg, message, headers, attachments_dir, progress):
"""
Add attachments to a MIME message.
Args:
mime_msg (MIMEMultipart): The MIME message to add attachments to.
message (dict): Message data from Microsoft Graph API.
headers (dict): Headers including authentication.
attachments_dir (str): Path to save attachments.
progress: Progress instance for updating progress bars.
Returns:
None
"""
message_id = message.get('id', '')
# Get attachments list
attachments_url = f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/attachments"
async with aiohttp.ClientSession() as session:
async with session.get(attachments_url, headers=headers) as response:
if response.status != 200:
return
attachments_data = await response.json()
attachments = attachments_data.get('value', [])
if not attachments:
return
# Create a directory for this message's attachments
message_attachments_dir = os.path.join(attachments_dir, message_id)
ensure_directory_exists(message_attachments_dir)
# Add a header with attachment count
mime_msg['X-Attachment-Count'] = str(len(attachments))
for idx, attachment in enumerate(attachments):
attachment_name = safe_filename(attachment.get('name', 'attachment'))
attachment_type = attachment.get('contentType', 'application/octet-stream')
# Add attachment info to headers for reference
mime_msg[f'X-Attachment-{idx+1}-Name'] = attachment_name
mime_msg[f'X-Attachment-{idx+1}-Type'] = attachment_type
attachment_part = MIMEBase(*attachment_type.split('/', 1))
# Get attachment content
if 'contentBytes' in attachment:
attachment_content = base64.b64decode(attachment['contentBytes'])
# Save attachment to disk
attachment_path = os.path.join(message_attachments_dir, attachment_name)
with open(attachment_path, 'wb') as f:
f.write(attachment_content)
# Add to MIME message
attachment_part.set_payload(attachment_content)
encoders.encode_base64(attachment_part)
attachment_part.add_header('Content-Disposition', f'attachment; filename="{attachment_name}"')
mime_msg.attach(attachment_part)
progress.console.print(f"Downloaded attachment: {attachment_name}")
else:
progress.console.print(f"Skipping attachment with no content: {attachment_name}")