wip refactoring

This commit is contained in:
Tim Bendt
2025-05-12 17:19:34 -06:00
parent d75f16c25d
commit 7123ff1f43
13 changed files with 1258 additions and 421 deletions

300
utils/calendar_utils.py Normal file
View File

@@ -0,0 +1,300 @@
"""
Utility module for handling calendar events and iCalendar operations.
"""
import re
import os
from datetime import datetime, timedelta
from dateutil import parser
from dateutil.tz import UTC
import glob
def truncate_id(text, first=8, last=8):
"""
Truncate long IDs or filenames to show just the first and last few characters.
Args:
text: The ID or filename to truncate
first: Number of characters to keep from the beginning
last: Number of characters to keep from the end
Returns:
Truncated string with ellipsis in the middle
"""
if not text or len(text) <= first + last + 3:
return text
return f"{text[:first]}...{text[-last:]}"
def clean_text(text):
"""
Clean text by removing instances of 3 or more consecutive underscores
which can affect readability.
Args:
text: Text to clean
Returns:
Cleaned text
"""
if not text:
return ""
# Replace 3 or more consecutive underscores with 2 underscores
return re.sub(r'_{3,}', '__', text)
def escape_ical_text(text):
"""
Escape text for iCalendar format according to RFC 5545.
Args:
text: Text to escape
Returns:
Escaped text
"""
if not text:
return ""
# First clean multiple underscores
text = clean_text(text)
text = text.replace("\\", "\\\\")
text = text.replace("\n", "\\n")
text = text.replace(",", "\\,")
text = text.replace(";", "\\;")
return text
async def fetch_calendar_events(headers, days_back=1, days_forward=6, fetch_function=None,
start_date=None, end_date=None):
"""
Fetch calendar events from Microsoft Graph API.
Args:
headers: Authentication headers for Microsoft Graph API
days_back: Number of days to look back (default: 1)
days_forward: Number of days to look forward (default: 6)
fetch_function: Async function to use for fetching data (default: None)
Should accept URL and headers as parameters
start_date: Optional explicit start date (datetime object)
end_date: Optional explicit end date (datetime object)
Returns:
Tuple of (events list, total_events count)
"""
if fetch_function is None:
raise ValueError("fetch_function is required for API calls")
# Calculate date range
if start_date is None:
start_date = datetime.now().replace(hour=0, minute=0, second=0) - timedelta(days=days_back)
if end_date is None:
end_of_today = datetime.now().replace(hour=23, minute=59, second=59)
end_date = end_of_today + timedelta(days=days_forward)
# Build the API URL
event_base_url = f"https://graph.microsoft.com/v1.0/me/calendarView?startDateTime={start_date.isoformat()}&endDateTime={end_date.isoformat()}"
calendar_url = f"{event_base_url}&$top=100&$select=start,end,id,iCalUId,subject,bodyPreview,webLink,location,recurrence,showAs,responseStatus,onlineMeeting,lastModifiedDateTime"
# Fetch total count for progress reporting (if needed)
total_event_url = f"{event_base_url}&$count=true&$select=id"
try:
total_response = await fetch_function(total_event_url, headers)
total_events = total_response.get('@odata.count', 0)
except Exception as e:
print(f"Error fetching total events count: {e}")
total_events = 0
# Fetch all calendar events, handling pagination
events = []
while calendar_url:
try:
response_data = await fetch_function(calendar_url, headers)
if response_data:
events.extend(response_data.get('value', []))
# Get the next page URL from @odata.nextLink
calendar_url = response_data.get('@odata.nextLink')
else:
print("Received empty response from calendar API")
break
except Exception as e:
print(f"Error fetching calendar events: {e}")
break
# Only return the events and total_events
return events, total_events
def write_event_to_ical(f, event, start, end):
"""
Write a single event to an iCalendar file.
Args:
f: File-like object to write to
event: Dictionary containing event data
start: Start datetime with timezone information
end: End datetime with timezone information
"""
# Preserve the original timezones
start_tz = start.tzinfo
end_tz = end.tzinfo
f.write(f"BEGIN:VEVENT\nSUMMARY:{escape_ical_text(event['subject'])}\n")
# Handle multi-line description properly
description = event.get('bodyPreview', '')
if description:
escaped_description = escape_ical_text(description)
f.write(f"DESCRIPTION:{escaped_description}\n")
f.write(f"UID:{event.get('iCalUId', '')}\n")
f.write(f"LOCATION:{escape_ical_text(event.get('location', {}).get('displayName', ''))}\n")
f.write(f"CLASS:{event.get('showAs', '')}\n")
f.write(f"STATUS:{event.get('responseStatus', {}).get('response', '')}\n")
if 'onlineMeeting' in event and event['onlineMeeting']:
f.write(f"URL:{event.get('onlineMeeting', {}).get('joinUrl', '')}\n")
# Write start and end times with timezone info in iCalendar format
if start.tzinfo == UTC:
f.write(f"DTSTART:{start.strftime('%Y%m%dT%H%M%SZ')}\n")
else:
tz_name = start_tz.tzname(None) if start_tz else 'UTC'
f.write(f"DTSTART;TZID={tz_name}:{start.strftime('%Y%m%dT%H%M%S')}\n")
if end.tzinfo == UTC:
f.write(f"DTEND:{end.strftime('%Y%m%dT%H%M%SZ')}\n")
else:
tz_name = end_tz.tzname(None) if end_tz else 'UTC'
f.write(f"DTEND;TZID={tz_name}:{end.strftime('%Y%m%dT%H%M%S')}\n")
# Handle recurrence rules
if 'recurrence' in event and event['recurrence']:
for rule in event['recurrence']:
if rule.startswith('RRULE'):
rule_parts = rule.split(';')
new_rule_parts = []
for part in rule_parts:
if part.startswith('UNTIL='):
until_value = part.split('=')[1]
until_date = parser.isoparse(until_value)
if start.tzinfo is not None and until_date.tzinfo is None:
until_date = until_date.replace(tzinfo=start.tzinfo)
new_rule_parts.append(f"UNTIL={until_date.strftime('%Y%m%dT%H%M%SZ')}")
else:
new_rule_parts.append(part)
rule = ';'.join(new_rule_parts)
f.write(f"{rule}\n")
f.write("END:VEVENT\n")
def save_events_to_vdir(events, org_vdir_path, progress, task_id, dry_run=False):
"""
Save events to vdir format (one file per event).
Args:
events: List of event dictionaries
org_vdir_path: Path to save the event files
progress: Progress object for updating UI
task_id: Task ID for progress tracking
dry_run: If True, don't actually write files
Returns:
Number of events processed
"""
if dry_run:
progress.console.print(f"[DRY-RUN] Would save {len(events)} events to vdir format in {org_vdir_path}")
return len(events)
os.makedirs(org_vdir_path, exist_ok=True)
progress.console.print(f"Saving events to vdir format in {org_vdir_path}...")
# Create a dictionary to track existing files and their metadata
existing_files = {}
for file_path in glob.glob(os.path.join(org_vdir_path, "*.ics")):
file_name = os.path.basename(file_path)
file_mod_time = os.path.getmtime(file_path)
existing_files[file_name] = {
'path': file_path,
'mtime': file_mod_time
}
processed_files = set()
for event in events:
progress.advance(task_id)
if 'start' not in event or 'end' not in event:
continue
# Parse start and end times with timezone information
start = parser.isoparse(event['start']['dateTime'])
end = parser.isoparse(event['end']['dateTime'])
uid = event.get('iCalUId', '')
if not uid:
# Generate a unique ID if none exists
uid = f"outlook-{event.get('id', '')}"
# Create a filename based on the UID
safe_filename = re.sub(r'[^\w\-]', '_', uid) + ".ics"
event_path = os.path.join(org_vdir_path, safe_filename)
processed_files.add(safe_filename)
# Check if we need to update this file
should_update = True
if safe_filename in existing_files:
# Only update if the event has been modified since the file was last updated
if 'lastModifiedDateTime' in event:
last_modified = parser.isoparse(event['lastModifiedDateTime']).timestamp()
file_mtime = existing_files[safe_filename]['mtime']
if last_modified <= file_mtime:
should_update = False
progress.console.print(f"Skipping unchanged event: {event['subject']}")
if should_update:
with open(event_path, 'w') as f:
f.write("BEGIN:VCALENDAR\nVERSION:2.0\n")
write_event_to_ical(f, event, start, end)
f.write("END:VCALENDAR\n")
# Remove files for events that no longer exist in the calendar view
for file_name in existing_files:
if file_name not in processed_files:
progress.console.print(f"Removing obsolete event file: {truncate_id(file_name)}")
os.remove(existing_files[file_name]['path'])
progress.console.print(f"Saved {len(events)} events to {org_vdir_path}")
return len(events)
def save_events_to_file(events, output_file, progress, task_id, dry_run=False):
"""
Save all events to a single iCalendar file.
Args:
events: List of event dictionaries
output_file: Path to the output file
progress: Progress object for updating UI
task_id: Task ID for progress tracking
dry_run: If True, don't actually write the file
Returns:
Number of events processed
"""
if dry_run:
progress.console.print(f"[DRY-RUN] Would save events to {output_file}")
return len(events)
os.makedirs(os.path.dirname(output_file), exist_ok=True)
progress.console.print(f"Saving events to {output_file}...")
with open(output_file, 'w') as f:
f.write("BEGIN:VCALENDAR\nVERSION:2.0\n")
for event in events:
progress.advance(task_id)
if 'start' in event and 'end' in event:
# Parse start and end times with timezone information
start = parser.isoparse(event['start']['dateTime'])
end = parser.isoparse(event['end']['dateTime'])
write_event_to_ical(f, event, start, end)
f.write("END:VCALENDAR\n")
progress.console.print(f"Saved events to {output_file}")
return len(events)

View File

@@ -0,0 +1,3 @@
"""
Mail utilities module for email operations.
"""

114
utils/mail_utils/helpers.py Normal file
View File

@@ -0,0 +1,114 @@
"""
Mail utility helper functions.
"""
import os
import json
import time
from datetime import datetime
def truncate_id(message_id, length=8):
"""
Truncate a message ID to a reasonable length for display.
Args:
message_id (str): The message ID to truncate.
length (int): The number of characters to keep.
Returns:
str: The truncated message ID.
"""
if not message_id:
return ""
if len(message_id) <= length:
return message_id
return f"{message_id[:length]}..."
def load_last_sync_timestamp():
"""
Load the last synchronization timestamp from a file.
Returns:
float: The timestamp of the last synchronization, or 0 if not available.
"""
try:
with open('sync_timestamp.json', 'r') as f:
data = json.load(f)
return data.get('timestamp', 0)
except (FileNotFoundError, json.JSONDecodeError):
return 0
def save_sync_timestamp():
"""
Save the current timestamp as the last synchronization timestamp.
Returns:
None
"""
current_time = time.time()
with open('sync_timestamp.json', 'w') as f:
json.dump({'timestamp': current_time}, f)
def format_datetime(dt_str, format_string="%m/%d %I:%M %p"):
"""
Format a datetime string from ISO format.
Args:
dt_str (str): ISO format datetime string.
format_string (str): Format string for the output.
Returns:
str: Formatted datetime string.
"""
if not dt_str:
return ""
try:
dt = datetime.fromisoformat(dt_str.replace('Z', '+00:00'))
return dt.strftime(format_string)
except (ValueError, AttributeError):
return dt_str
def safe_filename(filename):
"""
Convert a string to a safe filename.
Args:
filename (str): Original filename.
Returns:
str: Safe filename with invalid characters replaced.
"""
invalid_chars = '<>:"/\\|?*'
for char in invalid_chars:
filename = filename.replace(char, '_')
return filename
def ensure_directory_exists(directory):
"""
Ensure that a directory exists, creating it if necessary.
Args:
directory (str): The directory path to check/create.
Returns:
None
"""
if not os.path.exists(directory):
os.makedirs(directory)
def parse_maildir_name(filename):
"""
Parse a Maildir filename to extract components.
Args:
filename (str): The maildir filename.
Returns:
tuple: (message_id, flags) components of the filename.
"""
# Maildir filename format: unique-id:flags
if ':' in filename:
message_id, flags = filename.split(':', 1)
else:
message_id = filename
flags = ''
return message_id, flags

270
utils/mail_utils/maildir.py Normal file
View File

@@ -0,0 +1,270 @@
"""
Maildir operations for handling local mail storage.
"""
import os
import email
import base64
from email.mime.multipart import MIMEMultipart
from email.mime.text import MIMEText
from email.mime.base import MIMEBase
from email import encoders
import time
import aiohttp
import re
from utils.calendar_utils import truncate_id
from utils.mail_utils.helpers import safe_filename, ensure_directory_exists, format_datetime
async def save_mime_to_maildir_async(maildir_path, message, attachments_dir, headers, progress, dry_run=False, download_attachments=False):
"""
Save a message from Microsoft Graph API to a Maildir.
Args:
maildir_path (str): Path to the Maildir.
message (dict): Message data from Microsoft Graph API.
attachments_dir (str): Path to save attachments.
headers (dict): Headers including authentication.
progress: Progress instance for updating progress bars.
dry_run (bool): If True, don't actually save files.
download_attachments (bool): If True, download email attachments.
Returns:
None
"""
message_id = message.get('id', '')
# Determine target directory based on read status
target_dir = os.path.join(maildir_path, 'cur' if message.get('isRead', False) else 'new')
ensure_directory_exists(target_dir)
# Check if the file already exists in either new or cur
new_path = os.path.join(maildir_path, 'new', f"{message_id}.eml")
cur_path = os.path.join(maildir_path, 'cur', f"{message_id}.eml")
if os.path.exists(new_path) or os.path.exists(cur_path):
return # Skip if already exists
# Create MIME email
mime_msg = await create_mime_message_async(message, headers, attachments_dir, progress, download_attachments)
# Only save file if not in dry run mode
if not dry_run:
with open(os.path.join(target_dir, f"{message_id}.eml"), 'wb') as f:
f.write(mime_msg.as_bytes())
else:
progress.console.print(f"[DRY-RUN] Would save message: {message.get('subject', 'No Subject')}")
async def create_mime_message_async(message, headers, attachments_dir, progress, download_attachments=False):
"""
Create a MIME message from Microsoft Graph API message data.
Args:
message (dict): Message data from Microsoft Graph API.
headers (dict): Headers including authentication.
attachments_dir (str): Path to save attachments.
progress: Progress instance for updating progress bars.
download_attachments (bool): If True, download email attachments.
Returns:
MIMEMultipart: The MIME message.
"""
# Create a new MIMEMultipart message
mime_msg = MIMEMultipart()
# Message headers
mime_msg['Message-ID'] = message.get('id', '')
mime_msg['Subject'] = message.get('subject', 'No Subject')
# Sender information
sender = message.get('from', {}).get('emailAddress', {})
if sender:
mime_msg['From'] = f"{sender.get('name', '')} <{sender.get('address', '')}>".strip()
# Recipients
to_recipients = message.get('toRecipients', [])
cc_recipients = message.get('ccRecipients', [])
if to_recipients:
to_list = [f"{r.get('emailAddress', {}).get('name', '')} <{r.get('emailAddress', {}).get('address', '')}>".strip() for r in to_recipients]
mime_msg['To'] = ', '.join(to_list)
if cc_recipients:
cc_list = [f"{r.get('emailAddress', {}).get('name', '')} <{r.get('emailAddress', {}).get('address', '')}>".strip() for r in cc_recipients]
mime_msg['Cc'] = ', '.join(cc_list)
# Date
received_datetime = message.get('receivedDateTime', '')
if received_datetime:
mime_msg['Date'] = received_datetime
# First try the direct body content approach
message_id = message.get('id', '')
try:
# First get the message with body content
body_url = f"https://graph.microsoft.com/v1.0/me/messages/{message_id}?$select=body,bodyPreview"
async with aiohttp.ClientSession() as session:
async with session.get(body_url, headers=headers) as response:
if response.status == 200:
body_data = await response.json()
# Get body content
body_content = body_data.get('body', {}).get('content', '')
body_type = body_data.get('body', {}).get('contentType', 'text')
body_preview = body_data.get('bodyPreview', '')
# If we have body content, use it
if body_content:
if body_type.lower() == 'html':
# Add both HTML and plain text versions
# Plain text conversion
plain_text = re.sub(r'<br\s*/?>', '\n', body_content)
plain_text = re.sub(r'<[^>]*>', '', plain_text)
mime_msg.attach(MIMEText(plain_text, 'plain'))
mime_msg.attach(MIMEText(body_content, 'html'))
else:
# Just plain text
mime_msg.attach(MIMEText(body_content, 'plain'))
elif body_preview:
# Use preview if we have it
mime_msg.attach(MIMEText(f"{body_preview}\n\n[Message preview only. Full content not available.]", 'plain'))
else:
# Fallback to MIME content
progress.console.print(f"No direct body content for message {truncate_id(message_id)}, trying MIME content...")
await fetch_mime_content(mime_msg, message_id, headers, progress)
else:
progress.console.print(f"Failed to get message body: {response.status}. Trying MIME content...")
await fetch_mime_content(mime_msg, message_id, headers, progress)
except Exception as e:
progress.console.print(f"Error getting message body: {e}. Trying MIME content...")
await fetch_mime_content(mime_msg, message_id, headers, progress)
# Handle attachments only if we want to download them
if download_attachments:
await add_attachments_async(mime_msg, message, headers, attachments_dir, progress)
else:
# Add a header to indicate attachment info was skipped
mime_msg['X-Attachments-Skipped'] = 'True'
return mime_msg
async def fetch_mime_content(mime_msg, message_id, headers, progress):
"""
Fetch and add MIME content to a message when direct body access fails.
Args:
mime_msg (MIMEMultipart): The message to add content to.
message_id (str): Message ID.
headers (dict): Headers including authentication.
progress: Progress instance for updating progress bars.
"""
# Fallback to getting the MIME content
message_content_url = f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/$value"
try:
async with aiohttp.ClientSession() as session:
async with session.get(message_content_url, headers=headers) as response:
if response.status == 200:
full_content = await response.text()
# Check for body tags
body_match = re.search(r'<body[^>]*>(.*?)</body>', full_content, re.DOTALL | re.IGNORECASE)
if body_match:
body_content = body_match.group(1)
# Simple HTML to text conversion
body_text = re.sub(r'<br\s*/?>', '\n', body_content)
body_text = re.sub(r'<[^>]*>', '', body_text)
# Add the plain text body
mime_msg.attach(MIMEText(body_text, 'plain'))
# Also add the HTML body
mime_msg.attach(MIMEText(full_content, 'html'))
else:
# Fallback - try to find content between Content-Type: text/html and next boundary
html_parts = re.findall(r'Content-Type: text/html.*?\r?\n\r?\n(.*?)(?:\r?\n\r?\n|$)',
full_content, re.DOTALL | re.IGNORECASE)
if html_parts:
html_content = html_parts[0]
mime_msg.attach(MIMEText(html_content, 'html'))
# Also make plain text version
plain_text = re.sub(r'<br\s*/?>', '\n', html_content)
plain_text = re.sub(r'<[^>]*>', '', plain_text)
mime_msg.attach(MIMEText(plain_text, 'plain'))
else:
# Just use the raw content as text if nothing else works
mime_msg.attach(MIMEText(full_content, 'plain'))
progress.console.print(f"Using raw content for message {message_id} - no body tags found")
else:
error_text = await response.text()
progress.console.print(f"Failed to get MIME content: {response.status} {error_text}")
mime_msg.attach(MIMEText(f"Failed to retrieve message body: HTTP {response.status}", 'plain'))
except Exception as e:
progress.console.print(f"Error retrieving MIME content: {e}")
mime_msg.attach(MIMEText(f"Failed to retrieve message body: {str(e)}", 'plain'))
async def add_attachments_async(mime_msg, message, headers, attachments_dir, progress):
"""
Add attachments to a MIME message.
Args:
mime_msg (MIMEMultipart): The MIME message to add attachments to.
message (dict): Message data from Microsoft Graph API.
headers (dict): Headers including authentication.
attachments_dir (str): Path to save attachments.
progress: Progress instance for updating progress bars.
Returns:
None
"""
message_id = message.get('id', '')
# Get attachments list
attachments_url = f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/attachments"
async with aiohttp.ClientSession() as session:
async with session.get(attachments_url, headers=headers) as response:
if response.status != 200:
return
attachments_data = await response.json()
attachments = attachments_data.get('value', [])
if not attachments:
return
# Create a directory for this message's attachments
message_attachments_dir = os.path.join(attachments_dir, message_id)
ensure_directory_exists(message_attachments_dir)
# Add a header with attachment count
mime_msg['X-Attachment-Count'] = str(len(attachments))
for idx, attachment in enumerate(attachments):
attachment_name = safe_filename(attachment.get('name', 'attachment'))
attachment_type = attachment.get('contentType', 'application/octet-stream')
# Add attachment info to headers for reference
mime_msg[f'X-Attachment-{idx+1}-Name'] = attachment_name
mime_msg[f'X-Attachment-{idx+1}-Type'] = attachment_type
attachment_part = MIMEBase(*attachment_type.split('/', 1))
# Get attachment content
if 'contentBytes' in attachment:
attachment_content = base64.b64decode(attachment['contentBytes'])
# Save attachment to disk
attachment_path = os.path.join(message_attachments_dir, attachment_name)
with open(attachment_path, 'wb') as f:
f.write(attachment_content)
# Add to MIME message
attachment_part.set_payload(attachment_content)
encoders.encode_base64(attachment_part)
attachment_part.add_header('Content-Disposition', f'attachment; filename="{attachment_name}"')
mime_msg.attach(attachment_part)
progress.console.print(f"Downloaded attachment: {attachment_name}")
else:
progress.console.print(f"Skipping attachment with no content: {attachment_name}")