add vdir sync feature

This commit is contained in:
Tim Bendt
2025-07-15 23:39:53 -04:00
parent df4c49c3ef
commit 1f306fffd7
9 changed files with 1212 additions and 521 deletions

View File

@@ -1,15 +1,30 @@
"""
Authentication module for Microsoft Graph API.
"""
import os
import msal
import logging
from rich import print
from rich.panel import Panel
# Comprehensive logging suppression for authentication-related libraries
logging.getLogger("msal").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
logging.getLogger("requests").setLevel(logging.ERROR)
logging.getLogger("requests_oauthlib").setLevel(logging.ERROR)
logging.getLogger("aiohttp").setLevel(logging.ERROR)
logging.getLogger("aiohttp.access").setLevel(logging.ERROR)
logging.getLogger("asyncio").setLevel(logging.ERROR)
logging.getLogger("azure").setLevel(logging.ERROR)
logging.getLogger("azure.core").setLevel(logging.ERROR)
def ensure_directory_exists(path):
if not os.path.exists(path):
os.makedirs(path)
def get_access_token(scopes):
"""
Authenticate with Microsoft Graph API and obtain an access token.
@@ -26,43 +41,57 @@ def get_access_token(scopes):
Exception: If authentication fails.
"""
# Read Azure app credentials from environment variables
client_id = os.getenv('AZURE_CLIENT_ID')
tenant_id = os.getenv('AZURE_TENANT_ID')
client_id = os.getenv("AZURE_CLIENT_ID")
tenant_id = os.getenv("AZURE_TENANT_ID")
if not client_id or not tenant_id:
raise ValueError("Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables.")
raise ValueError(
"Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables."
)
# Token cache
cache = msal.SerializableTokenCache()
cache_file = 'token_cache.bin'
cache_file = "token_cache.bin"
if os.path.exists(cache_file):
cache.deserialize(open(cache_file, 'r').read())
cache.deserialize(open(cache_file, "r").read())
# Authentication
authority = f'https://login.microsoftonline.com/{tenant_id}'
app = msal.PublicClientApplication(client_id, authority=authority, token_cache=cache)
authority = f"https://login.microsoftonline.com/{tenant_id}"
app = msal.PublicClientApplication(
client_id, authority=authority, token_cache=cache
)
accounts = app.get_accounts()
if accounts:
token_response = app.acquire_token_silent(scopes, account=accounts[0])
else:
flow = app.initiate_device_flow(scopes=scopes)
if 'user_code' not in flow:
if "user_code" not in flow:
raise Exception("Failed to create device flow")
print(Panel(flow['message'], border_style="magenta", padding=2, title="MSAL Login Flow Link"))
print(
Panel(
flow["message"],
border_style="magenta",
padding=2,
title="MSAL Login Flow Link",
)
)
token_response = app.acquire_token_by_device_flow(flow)
if 'access_token' not in token_response:
if "access_token" not in token_response:
raise Exception("Failed to acquire token")
# Save token cache
with open(cache_file, 'w') as f:
with open(cache_file, "w") as f:
f.write(cache.serialize())
access_token = token_response['access_token']
headers = {'Authorization': f'Bearer {access_token}', 'Prefer': 'outlook.body-content-type="text",IdType="ImmutableId"'}
access_token = token_response["access_token"]
headers = {
"Authorization": f"Bearer {access_token}",
"Prefer": 'outlook.body-content-type="text",IdType="ImmutableId"',
}
return access_token, headers

View File

@@ -3,9 +3,13 @@ Calendar operations for Microsoft Graph API.
"""
import os
import json
import re
import glob
from datetime import datetime, timedelta
from dateutil import parser
from .client import fetch_with_aiohttp
from .client import fetch_with_aiohttp, post_with_aiohttp, delete_with_aiohttp
async def fetch_calendar_events(
@@ -40,7 +44,7 @@ async def fetch_calendar_events(
calendar_url = (
f"https://graph.microsoft.com/v1.0/me/calendarView?"
f"startDateTime={start_date_str}&endDateTime={end_date_str}&"
f"$select=id,subject,organizer,start,end,location,isAllDay,showAs,sensitivity&$count=true"
f"$select=id,subject,organizer,start,end,location,isAllDay,showAs,sensitivity,iCalUId,lastModifiedDateTime&$count=true"
)
events = []
@@ -59,3 +63,408 @@ async def fetch_calendar_events(
# Return events and total count
total_count = response_data.get("@odata.count", len(events))
return events, total_count
def parse_ical_file(file_path):
"""
Parse a single iCalendar file and extract event data.
Args:
file_path (str): Path to the .ics file
Returns:
dict: Event data or None if parsing fails
"""
try:
with open(file_path, "r", encoding="utf-8") as f:
content = f.read()
event_data = {}
in_event = False
for line in content.split("\n"):
line = line.strip()
if line == "BEGIN:VEVENT":
in_event = True
continue
elif line == "END:VEVENT":
break
elif not in_event:
continue
if ":" in line:
key, value = line.split(":", 1)
# Handle special cases
if key == "UID":
event_data["uid"] = value
elif key == "SUMMARY":
event_data["subject"] = (
value.replace("\\,", ",")
.replace("\\;", ";")
.replace("\\n", "\n")
)
elif key.startswith("DTSTART"):
event_data["start"] = _parse_ical_datetime(key, value)
elif key.startswith("DTEND"):
event_data["end"] = _parse_ical_datetime(key, value)
elif key == "LOCATION":
event_data["location"] = value.replace("\\,", ",").replace(
"\\;", ";"
)
elif key == "DESCRIPTION":
event_data["description"] = (
value.replace("\\,", ",")
.replace("\\;", ";")
.replace("\\n", "\n")
)
# Get file modification time for tracking local changes
event_data["local_mtime"] = os.path.getmtime(file_path)
event_data["local_file"] = file_path
return event_data if "uid" in event_data else None
except Exception as e:
print(f"Error parsing {file_path}: {e}")
return None
def _parse_ical_datetime(key, value):
"""Parse iCalendar datetime format."""
try:
if "TZID=" in key:
# Extract timezone info if present
tz_part = (
key.split("TZID=")[1].split(":")[0]
if ":" in key
else key.split("TZID=")[1]
)
# For now, treat as naive datetime and let dateutil handle it
return parser.parse(value.replace("Z", ""))
elif value.endswith("Z"):
# UTC time
return parser.parse(value)
else:
# Naive datetime
return parser.parse(value.replace("Z", ""))
except Exception:
return None
def get_local_calendar_events(vdir_path):
"""
Get all local calendar events from vdir format.
Args:
vdir_path (str): Path to vdir calendar directory
Returns:
dict: Dictionary mapping UIDs to event data
"""
local_events = {}
if not os.path.exists(vdir_path):
return local_events
ics_files = glob.glob(os.path.join(vdir_path, "*.ics"))
for file_path in ics_files:
event_data = parse_ical_file(file_path)
if event_data and "uid" in event_data:
local_events[event_data["uid"]] = event_data
return local_events
async def create_calendar_event(headers, event_data):
"""
Create a new calendar event on Microsoft Graph.
Args:
headers (dict): Authentication headers
event_data (dict): Event data from local file
Returns:
dict: Created event response or None if failed
"""
try:
# Convert local event data to Microsoft Graph format
graph_event = {
"subject": event_data.get("subject", "Untitled Event"),
"start": {"dateTime": event_data["start"].isoformat(), "timeZone": "UTC"},
"end": {"dateTime": event_data["end"].isoformat(), "timeZone": "UTC"},
}
if event_data.get("location"):
graph_event["location"] = {"displayName": event_data["location"]}
if event_data.get("description"):
graph_event["body"] = {
"contentType": "text",
"content": event_data["description"],
}
# Create the event
create_url = "https://graph.microsoft.com/v1.0/me/events"
status = await post_with_aiohttp(create_url, headers, graph_event)
if status == 201:
return graph_event
else:
print(f"Failed to create event: HTTP {status}")
return None
except Exception as e:
print(f"Error creating event: {e}")
return None
async def delete_calendar_event_by_uid(headers, ical_uid):
"""
Delete a calendar event by its iCalUId.
Args:
headers (dict): Authentication headers
ical_uid (str): The iCalUId of the event to delete
Returns:
bool: True if deleted successfully, False otherwise
"""
try:
# First, find the event by iCalUId
search_url = f"https://graph.microsoft.com/v1.0/me/events?$filter=iCalUId eq '{ical_uid}'"
response = await fetch_with_aiohttp(search_url, headers)
events = response.get("value", [])
if not events:
print(f"Event with UID {ical_uid} not found on server")
return False
# Delete the event using its Graph ID
event_id = events[0]["id"]
delete_url = f"https://graph.microsoft.com/v1.0/me/events/{event_id}"
status = await delete_with_aiohttp(delete_url, headers)
if status == 204:
print(f"Successfully deleted event with UID {ical_uid}")
return True
else:
print(f"Failed to delete event: HTTP {status}")
return False
except Exception as e:
print(f"Error deleting event: {e}")
return False
def get_sync_timestamp_file(vdir_path):
"""Get the path to the sync timestamp file."""
return os.path.join(vdir_path, ".sync_timestamp")
def get_last_sync_time(vdir_path):
"""
Get the timestamp of the last sync.
Args:
vdir_path (str): Path to vdir calendar directory
Returns:
float: Unix timestamp of last sync, or 0 if never synced
"""
timestamp_file = get_sync_timestamp_file(vdir_path)
if os.path.exists(timestamp_file):
try:
with open(timestamp_file, "r") as f:
return float(f.read().strip())
except (ValueError, IOError):
return 0
return 0
def update_sync_timestamp(vdir_path):
"""
Update the sync timestamp to current time.
Args:
vdir_path (str): Path to vdir calendar directory
"""
timestamp_file = get_sync_timestamp_file(vdir_path)
try:
with open(timestamp_file, "w") as f:
f.write(str(datetime.now().timestamp()))
except IOError as e:
print(f"Warning: Could not update sync timestamp: {e}")
def detect_deleted_events(vdir_path):
"""
Detect events that have been deleted from vdir since last sync.
Uses sync state and file modification times to determine deletions.
Args:
vdir_path (str): Path to vdir calendar directory
Returns:
list: List of UIDs that were deleted locally
"""
if not os.path.exists(vdir_path):
return []
state_file = os.path.join(vdir_path, ".sync_state.json")
last_sync_time = get_last_sync_time(vdir_path)
# Load previous sync state
previous_state = {}
if os.path.exists(state_file):
try:
with open(state_file, "r") as f:
previous_state = json.load(f)
except Exception:
return []
if not previous_state:
return [] # No previous state to compare against
# Get current local events
current_local_events = get_local_calendar_events(vdir_path)
deleted_events = []
# Check each event from previous state
for uid in previous_state:
if uid not in current_local_events:
# Event is no longer in local files
# Check if the vdir has been modified since last sync
# This ensures we only delete events that were intentionally removed
vdir_mtime = os.path.getmtime(vdir_path)
if vdir_mtime > last_sync_time:
deleted_events.append(uid)
return deleted_events
async def sync_local_calendar_changes(
headers, vdir_path, progress, task_id, dry_run=False
):
"""
Sync local calendar changes (new events and deletions) to Microsoft Graph.
Args:
headers (dict): Authentication headers
vdir_path (str): Path to local vdir calendar directory
progress: Progress instance for updates
task_id: Progress task ID
dry_run (bool): If True, only report what would be done
Returns:
tuple: (created_count, deleted_count)
"""
if not os.path.exists(vdir_path):
progress.console.print(
f"[yellow]Local calendar directory not found: {vdir_path}[/yellow]"
)
return 0, 0
# Track state file for knowing what was previously synced
state_file = os.path.join(vdir_path, ".sync_state.json")
# Load previous sync state
previous_state = {}
if os.path.exists(state_file):
try:
with open(state_file, "r") as f:
previous_state = json.load(f)
except Exception as e:
progress.console.print(f"[yellow]Could not load sync state: {e}[/yellow]")
# Detect deleted events using enhanced detection
deleted_events = detect_deleted_events(vdir_path)
# Get current local events
current_local_events = get_local_calendar_events(vdir_path)
# Get current remote events to avoid duplicates
try:
remote_events, _ = await fetch_calendar_events(
headers, days_back=30, days_forward=90
)
remote_uids = {
event.get("iCalUId", event.get("id", "")) for event in remote_events
}
except Exception as e:
progress.console.print(f"[red]Error fetching remote events: {e}[/red]")
return 0, 0
created_count = 0
deleted_count = 0
# Find new local events (not in previous state and not on server)
new_local_events = []
for uid, event_data in current_local_events.items():
if uid not in previous_state and uid not in remote_uids:
# This is a new local event
new_local_events.append((uid, event_data))
progress.update(task_id, total=len(new_local_events) + len(deleted_events))
# Handle deletions FIRST to clean up server before adding new events
for uid in deleted_events:
if dry_run:
progress.console.print(f"[DRY-RUN] Would delete event with UID: {uid}")
else:
result = await delete_calendar_event_by_uid(headers, uid)
if result:
deleted_count += 1
progress.console.print(f"[green]Deleted event with UID: {uid}[/green]")
else:
progress.console.print(
f"[red]Failed to delete event with UID: {uid}[/red]"
)
progress.advance(task_id)
# Create new events on server
for uid, event_data in new_local_events:
if dry_run:
progress.console.print(
f"[DRY-RUN] Would create event: {event_data.get('subject', 'Untitled')}"
)
else:
result = await create_calendar_event(headers, event_data)
if result:
created_count += 1
progress.console.print(
f"[green]Created event: {event_data.get('subject', 'Untitled')}[/green]"
)
else:
progress.console.print(
f"[red]Failed to create event: {event_data.get('subject', 'Untitled')}[/red]"
)
progress.advance(task_id)
# Update sync state and timestamp
if not dry_run:
new_state = {
uid: event_data.get("local_mtime", 0)
for uid, event_data in current_local_events.items()
}
try:
with open(state_file, "w") as f:
json.dump(new_state, f, indent=2)
# Update sync timestamp to mark when this sync completed
update_sync_timestamp(vdir_path)
except Exception as e:
progress.console.print(f"[yellow]Could not save sync state: {e}[/yellow]")
if created_count > 0 or deleted_count > 0:
progress.console.print(
f"[cyan]Local calendar sync completed: {created_count} created, {deleted_count} deleted[/cyan]"
)
return created_count, deleted_count

View File

@@ -1,16 +1,41 @@
"""
HTTP client for Microsoft Graph API.
"""
import aiohttp
import asyncio
import logging
import orjson
# Define a global semaphore for throttling
semaphore = asyncio.Semaphore(4)
# Suppress debug logging from HTTP libraries
logging.getLogger("aiohttp").setLevel(logging.ERROR)
logging.getLogger("aiohttp.access").setLevel(logging.ERROR)
logging.getLogger("urllib3").setLevel(logging.ERROR)
logging.getLogger("asyncio").setLevel(logging.ERROR)
# Define a global semaphore for throttling - reduced for better compliance
semaphore = asyncio.Semaphore(2)
async def _handle_throttling_retry(func, *args, max_retries=3):
"""Handle 429 throttling with exponential backoff retry."""
for attempt in range(max_retries):
try:
return await func(*args)
except Exception as e:
if "429" in str(e) and attempt < max_retries - 1:
wait_time = (2**attempt) + 1 # Exponential backoff: 2, 5, 9 seconds
print(
f"Rate limited, waiting {wait_time}s before retry {attempt + 1}/{max_retries}"
)
await asyncio.sleep(wait_time)
continue
raise e
async def fetch_with_aiohttp(url, headers):
"""
Fetch data from Microsoft Graph API.
Fetch data from Microsoft Graph API with throttling and retry logic.
Args:
url (str): The URL to fetch data from.
@@ -20,23 +45,36 @@ async def fetch_with_aiohttp(url, headers):
dict: JSON response data.
Raises:
Exception: If the request fails.
Exception: If the request fails after retries.
"""
return await _handle_throttling_retry(_fetch_impl, url, headers)
async def _fetch_impl(url, headers):
"""Internal fetch implementation."""
async with semaphore:
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
if response.status != 200:
raise Exception(f"Failed to fetch {url}: {response.status} {await response.text()}")
if response.status == 429:
# Let the retry handler deal with throttling
raise Exception(
f"Failed to fetch {url}: {response.status} {await response.text()}"
)
elif response.status != 200:
raise Exception(
f"Failed to fetch {url}: {response.status} {await response.text()}"
)
raw_bytes = await response.read()
content_length = response.headers.get('Content-Length')
content_length = response.headers.get("Content-Length")
if content_length and len(raw_bytes) != int(content_length):
print("Warning: Incomplete response received!")
return None
return orjson.loads(raw_bytes)
async def post_with_aiohttp(url, headers, json_data):
"""
Post data to Microsoft Graph API.
Post data to Microsoft Graph API with throttling and retry logic.
Args:
url (str): The URL to post data to.
@@ -46,14 +84,24 @@ async def post_with_aiohttp(url, headers, json_data):
Returns:
int: HTTP status code.
"""
return await _handle_throttling_retry(_post_impl, url, headers, json_data)
async def _post_impl(url, headers, json_data):
"""Internal post implementation."""
async with semaphore:
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json=json_data) as response:
if response.status == 429:
raise Exception(
f"Failed to post {url}: {response.status} {await response.text()}"
)
return response.status
async def patch_with_aiohttp(url, headers, json_data):
"""
Patch data to Microsoft Graph API.
Patch data to Microsoft Graph API with throttling and retry logic.
Args:
url (str): The URL to patch data to.
@@ -63,14 +111,24 @@ async def patch_with_aiohttp(url, headers, json_data):
Returns:
int: HTTP status code.
"""
return await _handle_throttling_retry(_patch_impl, url, headers, json_data)
async def _patch_impl(url, headers, json_data):
"""Internal patch implementation."""
async with semaphore:
async with aiohttp.ClientSession() as session:
async with session.patch(url, headers=headers, json=json_data) as response:
if response.status == 429:
raise Exception(
f"Failed to patch {url}: {response.status} {await response.text()}"
)
return response.status
async def delete_with_aiohttp(url, headers):
"""
Delete data from Microsoft Graph API.
Delete data from Microsoft Graph API with throttling and retry logic.
Args:
url (str): The URL to delete data from.
@@ -79,7 +137,51 @@ async def delete_with_aiohttp(url, headers):
Returns:
int: HTTP status code.
"""
return await _handle_throttling_retry(_delete_impl, url, headers)
async def _delete_impl(url, headers):
"""Internal delete implementation."""
async with semaphore:
async with aiohttp.ClientSession() as session:
async with session.delete(url, headers=headers) as response:
if response.status == 429:
raise Exception(
f"Failed to delete {url}: {response.status} {await response.text()}"
)
return response.status
async def batch_with_aiohttp(requests, headers):
"""
Execute multiple requests in a single batch call to Microsoft Graph API with throttling and retry logic.
Args:
requests (list): List of request dictionaries with 'id', 'method', 'url', and optional 'body' keys.
headers (dict): Headers including authentication.
Returns:
dict: Batch response with individual request responses.
"""
return await _handle_throttling_retry(_batch_impl, requests, headers)
async def _batch_impl(requests, headers):
"""Internal batch implementation."""
batch_url = "https://graph.microsoft.com/v1.0/$batch"
batch_data = {"requests": requests}
async with semaphore:
async with aiohttp.ClientSession() as session:
async with session.post(
batch_url, headers=headers, json=batch_data
) as response:
if response.status == 429:
raise Exception(
f"Batch request failed: {response.status} {await response.text()}"
)
elif response.status != 200:
raise Exception(
f"Batch request failed: {response.status} {await response.text()}"
)
return await response.json()

View File

@@ -5,6 +5,7 @@ Mail operations for Microsoft Graph API.
import os
import re
import glob
import asyncio
from typing import Set
import aiohttp
@@ -13,6 +14,7 @@ from .client import (
patch_with_aiohttp,
post_with_aiohttp,
delete_with_aiohttp,
batch_with_aiohttp,
)
@@ -73,9 +75,18 @@ async def fetch_mail_async(
new_files = set(glob.glob(os.path.join(new_dir, "*.eml*")))
cur_files = set(glob.glob(os.path.join(cur_dir, "*.eml*")))
for filename in Set.union(cur_files, new_files):
message_id = filename.split(".")[0].split("/")[
-1
# Get local message IDs (filename without extension)
local_msg_ids = set()
for filename in set.union(cur_files, new_files):
message_id = os.path.basename(filename).split(".")[
0
] # Extract the Message-ID from the filename
local_msg_ids.add(message_id)
# Delete local files that no longer exist on server
for filename in set.union(cur_files, new_files):
message_id = os.path.basename(filename).split(".")[
0
] # Extract the Message-ID from the filename
if message_id not in inbox_msg_ids:
if not dry_run:
@@ -84,7 +95,18 @@ async def fetch_mail_async(
else:
progress.console.print(f"[DRY-RUN] Would delete {filename} from inbox")
for message in messages:
# Filter messages to only include those not already local
messages_to_download = [msg for msg in messages if msg["id"] not in local_msg_ids]
progress.console.print(
f"Found {len(messages)} total messages on server, {len(local_msg_ids)} already local"
)
progress.console.print(f"Downloading {len(messages_to_download)} new messages")
# Update progress to reflect only the messages we actually need to download
progress.update(task_id, total=len(messages_to_download), completed=0)
for message in messages_to_download:
progress.console.print(
f"Processing message: {message.get('subject', 'No Subject')}", end="\r"
)
@@ -97,14 +119,19 @@ async def fetch_mail_async(
dry_run,
download_attachments,
)
progress.update(task_id, advance=0.5)
progress.update(task_id, completed=len(messages))
progress.console.print(f"\nFinished saving {len(messages)} messages.")
progress.update(task_id, advance=1)
progress.update(task_id, completed=len(messages_to_download))
progress.console.print(
f"\nFinished downloading {len(messages_to_download)} new messages."
)
progress.console.print(
f"Total messages on server: {len(messages)}, Already local: {len(local_msg_ids)}"
)
async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=False):
"""
Archive mail from Maildir to Microsoft Graph API archive folder.
Archive mail from Maildir to Microsoft Graph API archive folder using batch operations.
Args:
maildir_path (str): Path to the Maildir.
@@ -125,8 +152,14 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
glob.glob(os.path.join(archive_dir, "**", "*.eml*"), recursive=True)
)
if not archive_files:
progress.update(task_id, total=0, completed=0)
progress.console.print("No messages to archive")
return
progress.update(task_id, total=len(archive_files))
# Get archive folder ID from server
folder_response = await fetch_with_aiohttp(
"https://graph.microsoft.com/v1.0/me/mailFolders", headers
)
@@ -143,44 +176,115 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
if not archive_folder_id:
raise Exception("No folder named 'Archive' or 'Archives' found on the server.")
for filepath in archive_files:
message_id = os.path.basename(filepath).split(".")[
0
] # Extract the Message-ID from the filename
# Process files in batches of 20 (Microsoft Graph batch limit)
batch_size = 20
successful_moves = []
for i in range(0, len(archive_files), batch_size):
batch_files = archive_files[i : i + batch_size]
# Add small delay between batches to respect API limits
if i > 0:
await asyncio.sleep(0.5)
if not dry_run:
status = await post_with_aiohttp(
f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/microsoft.graph.move",
headers,
{"destinationId": archive_folder_id},
)
if status == 201: # 201 Created indicates successful move
os.remove(
filepath
) # Remove the local file since it's now archived on server
progress.console.print(f"Moved message to 'Archive': {message_id}")
elif status == 404:
os.remove(
filepath
) # Remove the file from local archive if not found on server
progress.console.print(
f"Message not found on server, removed local copy: {message_id}"
)
else:
progress.console.print(
f"Failed to move message to 'Archive': {message_id}, status: {status}"
# Prepare batch requests
batch_requests = []
for idx, filepath in enumerate(batch_files):
message_id = os.path.basename(filepath).split(".")[0]
batch_requests.append(
{
"id": str(idx + 1),
"method": "POST",
"url": f"/me/messages/{message_id}/microsoft.graph.move",
"body": {"destinationId": archive_folder_id},
"headers": {"Content-Type": "application/json"},
}
)
try:
# Execute batch request
batch_response = await batch_with_aiohttp(batch_requests, headers)
# Process batch results
for response in batch_response.get("responses", []):
request_id = (
int(response["id"]) - 1
) # Convert back to 0-based index
filepath = batch_files[request_id]
message_id = os.path.basename(filepath).split(".")[0]
status = response["status"]
if status == 201: # 201 Created indicates successful move
os.remove(
filepath
) # Remove the local file since it's now archived on server
successful_moves.append(message_id)
progress.console.print(
f"Moved message to 'Archive': {message_id}"
)
elif status == 404:
os.remove(
filepath
) # Remove the file from local archive if not found on server
progress.console.print(
f"Message not found on server, removed local copy: {message_id}"
)
else:
progress.console.print(
f"Failed to move message to 'Archive': {message_id}, status: {status}"
)
except Exception as e:
progress.console.print(f"Batch archive request failed: {str(e)}")
# Fall back to individual requests for this batch
for filepath in batch_files:
message_id = os.path.basename(filepath).split(".")[0]
try:
status = await post_with_aiohttp(
f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/microsoft.graph.move",
headers,
{"destinationId": archive_folder_id},
)
if status == 201:
os.remove(filepath)
successful_moves.append(message_id)
progress.console.print(
f"Moved message to 'Archive' (fallback): {message_id}"
)
elif status == 404:
os.remove(filepath)
progress.console.print(
f"Message not found on server, removed local copy: {message_id}"
)
else:
progress.console.print(
f"Failed to move message to 'Archive': {message_id}, status: {status}"
)
except Exception as individual_error:
progress.console.print(
f"Failed to archive {message_id}: {str(individual_error)}"
)
else:
progress.console.print(
f"[DRY-RUN] Would move message to 'Archive' folder: {message_id}"
)
progress.advance(task_id)
# Dry run - just log what would be done
for filepath in batch_files:
message_id = os.path.basename(filepath).split(".")[0]
progress.console.print(
f"[DRY-RUN] Would move message to 'Archive' folder: {message_id}"
)
progress.advance(task_id, len(batch_files))
if not dry_run:
progress.console.print(
f"Successfully archived {len(successful_moves)} messages in batches"
)
return
async def delete_mail_async(maildir_path, headers, progress, task_id, dry_run=False):
"""
Delete mail from Maildir and Microsoft Graph API.
Delete mail from Maildir and Microsoft Graph API using batch operations.
Args:
maildir_path (str): Path to the Maildir.
@@ -194,22 +298,99 @@ async def delete_mail_async(maildir_path, headers, progress, task_id, dry_run=Fa
"""
trash_dir = os.path.join(maildir_path, ".Trash", "cur")
trash_files = set(glob.glob(os.path.join(trash_dir, "*.eml*")))
if not trash_files:
progress.update(task_id, total=0, completed=0)
progress.console.print("No messages to delete")
return
progress.update(task_id, total=len(trash_files))
for filepath in trash_files:
message_id = os.path.basename(filepath).split(".")[
0
] # Extract the Message-ID from the filename
# Process files in batches of 20 (Microsoft Graph batch limit)
batch_size = 20
trash_files_list = list(trash_files)
successful_deletes = []
for i in range(0, len(trash_files_list), batch_size):
batch_files = trash_files_list[i : i + batch_size]
# Add small delay between batches to respect API limits
if i > 0:
await asyncio.sleep(0.5)
if not dry_run:
progress.console.print(f"Moving message to trash: {message_id}")
status = await delete_with_aiohttp(
f"https://graph.microsoft.com/v1.0/me/messages/{message_id}", headers
)
if status == 204 or status == 404:
os.remove(filepath) # Remove the file from local trash
# Prepare batch requests
batch_requests = []
for idx, filepath in enumerate(batch_files):
message_id = os.path.basename(filepath).split(".")[0]
batch_requests.append(
{
"id": str(idx + 1),
"method": "DELETE",
"url": f"/me/messages/{message_id}",
}
)
try:
# Execute batch request
batch_response = await batch_with_aiohttp(batch_requests, headers)
# Process batch results
for response in batch_response.get("responses", []):
request_id = (
int(response["id"]) - 1
) # Convert back to 0-based index
filepath = batch_files[request_id]
message_id = os.path.basename(filepath).split(".")[0]
status = response["status"]
if (
status == 204 or status == 404
): # 204 No Content or 404 Not Found (already deleted)
os.remove(filepath) # Remove the file from local trash
successful_deletes.append(message_id)
progress.console.print(f"Deleted message: {message_id}")
else:
progress.console.print(
f"Failed to delete message: {message_id}, status: {status}"
)
except Exception as e:
progress.console.print(f"Batch delete request failed: {str(e)}")
# Fall back to individual requests for this batch
for filepath in batch_files:
message_id = os.path.basename(filepath).split(".")[0]
try:
status = await delete_with_aiohttp(
f"https://graph.microsoft.com/v1.0/me/messages/{message_id}",
headers,
)
if status == 204 or status == 404:
os.remove(filepath)
successful_deletes.append(message_id)
progress.console.print(
f"Deleted message (fallback): {message_id}"
)
else:
progress.console.print(
f"Failed to delete message: {message_id}, status: {status}"
)
except Exception as individual_error:
progress.console.print(
f"Failed to delete {message_id}: {str(individual_error)}"
)
else:
progress.console.print(f"[DRY-RUN] Would delete message: {message_id}")
progress.advance(task_id)
# Dry run - just log what would be done
for filepath in batch_files:
message_id = os.path.basename(filepath).split(".")[0]
progress.console.print(f"[DRY-RUN] Would delete message: {message_id}")
progress.advance(task_id, len(batch_files))
if not dry_run:
progress.console.print(
f"Successfully deleted {len(successful_deletes)} messages in batches"
)
async def get_inbox_count_async(headers):
@@ -231,7 +412,7 @@ async def synchronize_maildir_async(
maildir_path, headers, progress, task_id, dry_run=False
):
"""
Synchronize Maildir with Microsoft Graph API.
Synchronize Maildir with Microsoft Graph API using batch operations.
Args:
maildir_path (str): Path to the Maildir.
@@ -258,32 +439,123 @@ async def synchronize_maildir_async(
cur_files = set(glob.glob(os.path.join(cur_dir, "*.eml*")))
moved_to_cur = [os.path.basename(f) for f in cur_files - new_files]
progress.update(task_id, total=len(moved_to_cur))
for filename in moved_to_cur:
# TODO: this isn't scalable, we should use a more efficient way to check if the file was modified
if os.path.getmtime(os.path.join(cur_dir, filename)) < last_sync:
progress.update(task_id, advance=1)
continue
message_id = re.sub(
r"\:2.+", "", filename.split(".")[0]
) # Extract the Message-ID from the filename
if not dry_run:
status = await patch_with_aiohttp(
f"https://graph.microsoft.com/v1.0/me/messages/{message_id}",
headers,
{"isRead": True},
)
if status == 404:
os.remove(os.path.join(cur_dir, filename))
# Filter out files that haven't been modified since last sync
files_to_process = []
for filename in moved_to_cur:
if os.path.getmtime(os.path.join(cur_dir, filename)) >= last_sync:
files_to_process.append(filename)
if not files_to_process:
progress.update(task_id, total=0, completed=0)
progress.console.print("No messages to mark as read")
# Save timestamp even if no work was done
if not dry_run:
save_sync_timestamp()
return
progress.update(task_id, total=len(files_to_process))
# Process files in batches of 20 (Microsoft Graph batch limit)
batch_size = 20
successful_reads = []
for i in range(0, len(files_to_process), batch_size):
batch_files = files_to_process[i : i + batch_size]
# Add small delay between batches to respect API limits
if i > 0:
await asyncio.sleep(0.5)
if not dry_run:
# Prepare batch requests
batch_requests = []
for idx, filename in enumerate(batch_files):
message_id = re.sub(r"\:2.+", "", filename.split(".")[0])
batch_requests.append(
{
"id": str(idx + 1),
"method": "PATCH",
"url": f"/me/messages/{message_id}",
"body": {"isRead": True},
"headers": {"Content-Type": "application/json"},
}
)
try:
# Execute batch request
batch_response = await batch_with_aiohttp(batch_requests, headers)
# Process batch results
for response in batch_response.get("responses", []):
request_id = (
int(response["id"]) - 1
) # Convert back to 0-based index
filename = batch_files[request_id]
message_id = re.sub(r"\:2.+", "", filename.split(".")[0])
status = response["status"]
if status == 200: # 200 OK indicates successful update
successful_reads.append(message_id)
progress.console.print(
f"Marked message as read: {truncate_id(message_id)}"
)
elif status == 404:
os.remove(
os.path.join(cur_dir, filename)
) # Remove file if message doesn't exist on server
progress.console.print(
f"Message not found on server, removed local copy: {truncate_id(message_id)}"
)
else:
progress.console.print(
f"Failed to mark message as read: {truncate_id(message_id)}, status: {status}"
)
except Exception as e:
progress.console.print(f"Batch read-status request failed: {str(e)}")
# Fall back to individual requests for this batch
for filename in batch_files:
message_id = re.sub(r"\:2.+", "", filename.split(".")[0])
try:
status = await patch_with_aiohttp(
f"https://graph.microsoft.com/v1.0/me/messages/{message_id}",
headers,
{"isRead": True},
)
if status == 200:
successful_reads.append(message_id)
progress.console.print(
f"Marked message as read (fallback): {truncate_id(message_id)}"
)
elif status == 404:
os.remove(os.path.join(cur_dir, filename))
progress.console.print(
f"Message not found on server, removed local copy: {truncate_id(message_id)}"
)
else:
progress.console.print(
f"Failed to mark message as read: {truncate_id(message_id)}, status: {status}"
)
except Exception as individual_error:
progress.console.print(
f"Failed to update read status for {truncate_id(message_id)}: {str(individual_error)}"
)
else:
progress.console.print(
f"[DRY-RUN] Would mark message as read: {truncate_id(message_id)}"
)
progress.advance(task_id)
# Dry run - just log what would be done
for filename in batch_files:
message_id = re.sub(r"\:2.+", "", filename.split(".")[0])
progress.console.print(
f"[DRY-RUN] Would mark message as read: {truncate_id(message_id)}"
)
progress.advance(task_id, len(batch_files))
# Save the current sync timestamp
if not dry_run:
save_sync_timestamp()
progress.console.print(
f"Successfully marked {len(successful_reads)} messages as read in batches"
)
else:
progress.console.print("[DRY-RUN] Would save sync timestamp.")