add vdir sync feature

This commit is contained in:
Tim Bendt
2025-07-15 23:39:53 -04:00
parent df4c49c3ef
commit 1f306fffd7
9 changed files with 1212 additions and 521 deletions

View File

@@ -1,6 +1,7 @@
"""
Maildir operations for handling local mail storage.
"""
import os
import email
import base64
@@ -11,11 +12,30 @@ from email import encoders
import time
import aiohttp
import re
import logging
# Suppress HTTP library debug logging
logging.getLogger("aiohttp").setLevel(logging.ERROR)
logging.getLogger("aiohttp.access").setLevel(logging.ERROR)
from src.utils.calendar_utils import truncate_id
from src.utils.mail_utils.helpers import safe_filename, ensure_directory_exists, format_datetime, format_mime_date
from src.utils.mail_utils.helpers import (
safe_filename,
ensure_directory_exists,
format_datetime,
format_mime_date,
)
async def save_mime_to_maildir_async(maildir_path, message, attachments_dir, headers, progress, dry_run=False, download_attachments=False):
async def save_mime_to_maildir_async(
maildir_path,
message,
attachments_dir,
headers,
progress,
dry_run=False,
download_attachments=False,
):
"""
Save a message from Microsoft Graph API to a Maildir.
@@ -31,30 +51,39 @@ async def save_mime_to_maildir_async(maildir_path, message, attachments_dir, hea
Returns:
None
"""
message_id = message.get('id', '')
message_id = message.get("id", "")
# Determine target directory based on read status
target_dir = os.path.join(maildir_path, 'cur' if message.get('isRead', False) else 'new')
target_dir = os.path.join(
maildir_path, "cur" if message.get("isRead", False) else "new"
)
ensure_directory_exists(target_dir)
# Check if the file already exists in either new or cur
new_path = os.path.join(maildir_path, 'new', f"{message_id}.eml")
cur_path = os.path.join(maildir_path, 'cur', f"{message_id}.eml")
new_path = os.path.join(maildir_path, "new", f"{message_id}.eml")
cur_path = os.path.join(maildir_path, "cur", f"{message_id}.eml")
if os.path.exists(new_path) or os.path.exists(cur_path):
return # Skip if already exists
# Create MIME email
mime_msg = await create_mime_message_async(message, headers, attachments_dir, progress, download_attachments)
mime_msg = await create_mime_message_async(
message, headers, attachments_dir, progress, download_attachments
)
# Only save file if not in dry run mode
if not dry_run:
with open(os.path.join(target_dir, f"{message_id}.eml"), 'wb') as f:
with open(os.path.join(target_dir, f"{message_id}.eml"), "wb") as f:
f.write(mime_msg.as_bytes())
else:
progress.console.print(f"[DRY-RUN] Would save message: {message.get('subject', 'No Subject')}")
progress.console.print(
f"[DRY-RUN] Would save message: {message.get('subject', 'No Subject')}"
)
async def create_mime_message_async(message, headers, attachments_dir, progress, download_attachments=False):
async def create_mime_message_async(
message, headers, attachments_dir, progress, download_attachments=False
):
"""
Create a MIME message from Microsoft Graph API message data.
@@ -72,33 +101,41 @@ async def create_mime_message_async(message, headers, attachments_dir, progress,
mime_msg = MIMEMultipart()
# Message headers
mime_msg['Message-ID'] = message.get('id', '')
mime_msg['Subject'] = message.get('subject', 'No Subject')
mime_msg["Message-ID"] = message.get("id", "")
mime_msg["Subject"] = message.get("subject", "No Subject")
# Sender information
sender = message.get('from', {}).get('emailAddress', {})
sender = message.get("from", {}).get("emailAddress", {})
if sender:
mime_msg['From'] = f"{sender.get('name', '')} <{sender.get('address', '')}>".strip()
mime_msg["From"] = (
f"{sender.get('name', '')} <{sender.get('address', '')}>".strip()
)
# Recipients
to_recipients = message.get('toRecipients', [])
cc_recipients = message.get('ccRecipients', [])
to_recipients = message.get("toRecipients", [])
cc_recipients = message.get("ccRecipients", [])
if to_recipients:
to_list = [f"{r.get('emailAddress', {}).get('name', '')} <{r.get('emailAddress', {}).get('address', '')}>".strip() for r in to_recipients]
mime_msg['To'] = ', '.join(to_list)
to_list = [
f"{r.get('emailAddress', {}).get('name', '')} <{r.get('emailAddress', {}).get('address', '')}>".strip()
for r in to_recipients
]
mime_msg["To"] = ", ".join(to_list)
if cc_recipients:
cc_list = [f"{r.get('emailAddress', {}).get('name', '')} <{r.get('emailAddress', {}).get('address', '')}>".strip() for r in cc_recipients]
mime_msg['Cc'] = ', '.join(cc_list)
cc_list = [
f"{r.get('emailAddress', {}).get('name', '')} <{r.get('emailAddress', {}).get('address', '')}>".strip()
for r in cc_recipients
]
mime_msg["Cc"] = ", ".join(cc_list)
# Date - using the new format_mime_date function to ensure RFC 5322 compliance
received_datetime = message.get('receivedDateTime', '')
received_datetime = message.get("receivedDateTime", "")
if received_datetime:
mime_msg['Date'] = format_mime_date(received_datetime)
mime_msg["Date"] = format_mime_date(received_datetime)
# First try the direct body content approach
message_id = message.get('id', '')
message_id = message.get("id", "")
try:
# First get the message with body content
body_url = f"https://graph.microsoft.com/v1.0/me/messages/{message_id}?$select=body,bodyPreview"
@@ -108,46 +145,62 @@ async def create_mime_message_async(message, headers, attachments_dir, progress,
body_data = await response.json()
# Get body content
body_content = body_data.get('body', {}).get('content', '')
body_type = body_data.get('body', {}).get('contentType', 'text')
body_preview = body_data.get('bodyPreview', '')
body_content = body_data.get("body", {}).get("content", "")
body_type = body_data.get("body", {}).get("contentType", "text")
body_preview = body_data.get("bodyPreview", "")
# If we have body content, use it
if body_content:
if body_type.lower() == 'html':
if body_type.lower() == "html":
# Add both HTML and plain text versions
# Plain text conversion
plain_text = re.sub(r'<br\s*/?>', '\n', body_content)
plain_text = re.sub(r'<[^>]*>', '', plain_text)
plain_text = re.sub(r"<br\s*/?>", "\n", body_content)
plain_text = re.sub(r"<[^>]*>", "", plain_text)
mime_msg.attach(MIMEText(plain_text, 'plain'))
mime_msg.attach(MIMEText(body_content, 'html'))
mime_msg.attach(MIMEText(plain_text, "plain"))
mime_msg.attach(MIMEText(body_content, "html"))
else:
# Just plain text
mime_msg.attach(MIMEText(body_content, 'plain'))
mime_msg.attach(MIMEText(body_content, "plain"))
elif body_preview:
# Use preview if we have it
mime_msg.attach(MIMEText(f"{body_preview}\n\n[Message preview only. Full content not available.]", 'plain'))
mime_msg.attach(
MIMEText(
f"{body_preview}\n\n[Message preview only. Full content not available.]",
"plain",
)
)
else:
# Fallback to MIME content
progress.console.print(f"No direct body content for message {truncate_id(message_id)}, trying MIME content...")
await fetch_mime_content(mime_msg, message_id, headers, progress)
progress.console.print(
f"No direct body content for message {truncate_id(message_id)}, trying MIME content..."
)
await fetch_mime_content(
mime_msg, message_id, headers, progress
)
else:
progress.console.print(f"Failed to get message body: {response.status}. Trying MIME content...")
progress.console.print(
f"Failed to get message body: {response.status}. Trying MIME content..."
)
await fetch_mime_content(mime_msg, message_id, headers, progress)
except Exception as e:
progress.console.print(f"Error getting message body: {e}. Trying MIME content...")
progress.console.print(
f"Error getting message body: {e}. Trying MIME content..."
)
await fetch_mime_content(mime_msg, message_id, headers, progress)
# Handle attachments only if we want to download them
if download_attachments:
await add_attachments_async(mime_msg, message, headers, attachments_dir, progress)
await add_attachments_async(
mime_msg, message, headers, attachments_dir, progress
)
else:
# Add a header to indicate attachment info was skipped
mime_msg['X-Attachments-Skipped'] = 'True'
mime_msg["X-Attachments-Skipped"] = "True"
return mime_msg
async def fetch_mime_content(mime_msg, message_id, headers, progress):
"""
Fetch and add MIME content to a message when direct body access fails.
@@ -159,7 +212,9 @@ async def fetch_mime_content(mime_msg, message_id, headers, progress):
progress: Progress instance for updating progress bars.
"""
# Fallback to getting the MIME content
message_content_url = f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/$value"
message_content_url = (
f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/$value"
)
try:
async with aiohttp.ClientSession() as session:
async with session.get(message_content_url, headers=headers) as response:
@@ -167,41 +222,58 @@ async def fetch_mime_content(mime_msg, message_id, headers, progress):
full_content = await response.text()
# Check for body tags
body_match = re.search(r'<body[^>]*>(.*?)</body>', full_content, re.DOTALL | re.IGNORECASE)
body_match = re.search(
r"<body[^>]*>(.*?)</body>",
full_content,
re.DOTALL | re.IGNORECASE,
)
if body_match:
body_content = body_match.group(1)
# Simple HTML to text conversion
body_text = re.sub(r'<br\s*/?>', '\n', body_content)
body_text = re.sub(r'<[^>]*>', '', body_text)
body_text = re.sub(r"<br\s*/?>", "\n", body_content)
body_text = re.sub(r"<[^>]*>", "", body_text)
# Add the plain text body
mime_msg.attach(MIMEText(body_text, 'plain'))
mime_msg.attach(MIMEText(body_text, "plain"))
# Also add the HTML body
mime_msg.attach(MIMEText(full_content, 'html'))
mime_msg.attach(MIMEText(full_content, "html"))
else:
# Fallback - try to find content between Content-Type: text/html and next boundary
html_parts = re.findall(r'Content-Type: text/html.*?\r?\n\r?\n(.*?)(?:\r?\n\r?\n|$)',
full_content, re.DOTALL | re.IGNORECASE)
html_parts = re.findall(
r"Content-Type: text/html.*?\r?\n\r?\n(.*?)(?:\r?\n\r?\n|$)",
full_content,
re.DOTALL | re.IGNORECASE,
)
if html_parts:
html_content = html_parts[0]
mime_msg.attach(MIMEText(html_content, 'html'))
mime_msg.attach(MIMEText(html_content, "html"))
# Also make plain text version
plain_text = re.sub(r'<br\s*/?>', '\n', html_content)
plain_text = re.sub(r'<[^>]*>', '', plain_text)
mime_msg.attach(MIMEText(plain_text, 'plain'))
plain_text = re.sub(r"<br\s*/?>", "\n", html_content)
plain_text = re.sub(r"<[^>]*>", "", plain_text)
mime_msg.attach(MIMEText(plain_text, "plain"))
else:
# Just use the raw content as text if nothing else works
mime_msg.attach(MIMEText(full_content, 'plain'))
progress.console.print(f"Using raw content for message {message_id} - no body tags found")
mime_msg.attach(MIMEText(full_content, "plain"))
progress.console.print(
f"Using raw content for message {message_id} - no body tags found"
)
else:
error_text = await response.text()
progress.console.print(f"Failed to get MIME content: {response.status} {error_text}")
mime_msg.attach(MIMEText(f"Failed to retrieve message body: HTTP {response.status}", 'plain'))
progress.console.print(
f"Failed to get MIME content: {response.status} {error_text}"
)
mime_msg.attach(
MIMEText(
f"Failed to retrieve message body: HTTP {response.status}",
"plain",
)
)
except Exception as e:
progress.console.print(f"Error retrieving MIME content: {e}")
mime_msg.attach(MIMEText(f"Failed to retrieve message body: {str(e)}", 'plain'))
mime_msg.attach(MIMEText(f"Failed to retrieve message body: {str(e)}", "plain"))
async def add_attachments_async(mime_msg, message, headers, attachments_dir, progress):
"""
@@ -217,10 +289,12 @@ async def add_attachments_async(mime_msg, message, headers, attachments_dir, pro
Returns:
None
"""
message_id = message.get('id', '')
message_id = message.get("id", "")
# Get attachments list
attachments_url = f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/attachments"
attachments_url = (
f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/attachments"
)
async with aiohttp.ClientSession() as session:
async with session.get(attachments_url, headers=headers) as response:
@@ -228,7 +302,7 @@ async def add_attachments_async(mime_msg, message, headers, attachments_dir, pro
return
attachments_data = await response.json()
attachments = attachments_data.get('value', [])
attachments = attachments_data.get("value", [])
if not attachments:
return
@@ -238,33 +312,42 @@ async def add_attachments_async(mime_msg, message, headers, attachments_dir, pro
ensure_directory_exists(message_attachments_dir)
# Add a header with attachment count
mime_msg['X-Attachment-Count'] = str(len(attachments))
mime_msg["X-Attachment-Count"] = str(len(attachments))
for idx, attachment in enumerate(attachments):
attachment_name = safe_filename(attachment.get('name', 'attachment'))
attachment_type = attachment.get('contentType', 'application/octet-stream')
attachment_name = safe_filename(attachment.get("name", "attachment"))
attachment_type = attachment.get(
"contentType", "application/octet-stream"
)
# Add attachment info to headers for reference
mime_msg[f'X-Attachment-{idx+1}-Name'] = attachment_name
mime_msg[f'X-Attachment-{idx+1}-Type'] = attachment_type
mime_msg[f"X-Attachment-{idx + 1}-Name"] = attachment_name
mime_msg[f"X-Attachment-{idx + 1}-Type"] = attachment_type
attachment_part = MIMEBase(*attachment_type.split('/', 1))
attachment_part = MIMEBase(*attachment_type.split("/", 1))
# Get attachment content
if 'contentBytes' in attachment:
attachment_content = base64.b64decode(attachment['contentBytes'])
if "contentBytes" in attachment:
attachment_content = base64.b64decode(attachment["contentBytes"])
# Save attachment to disk
attachment_path = os.path.join(message_attachments_dir, attachment_name)
with open(attachment_path, 'wb') as f:
attachment_path = os.path.join(
message_attachments_dir, attachment_name
)
with open(attachment_path, "wb") as f:
f.write(attachment_content)
# Add to MIME message
attachment_part.set_payload(attachment_content)
encoders.encode_base64(attachment_part)
attachment_part.add_header('Content-Disposition', f'attachment; filename="{attachment_name}"')
attachment_part.add_header(
"Content-Disposition",
f'attachment; filename="{attachment_name}"',
)
mime_msg.attach(attachment_part)
progress.console.print(f"Downloaded attachment: {attachment_name}")
else:
progress.console.print(f"Skipping attachment with no content: {attachment_name}")
progress.console.print(
f"Skipping attachment with no content: {attachment_name}"
)