add vdir sync feature

2025-07-15 23:39:53 -04:00
parent df4c49c3ef
commit 1f306fffd7
9 changed files with 1212 additions and 521 deletions
--- a/src/services/microsoft_graph/auth.py
+++ b/src/services/microsoft_graph/auth.py
@@ -1,15 +1,30 @@
 """
 Authentication module for Microsoft Graph API.
 """
+
 import os
 import msal
+import logging
 from rich import print
 from rich.panel import Panel

+# Comprehensive logging suppression for authentication-related libraries
+logging.getLogger("msal").setLevel(logging.ERROR)
+logging.getLogger("urllib3").setLevel(logging.ERROR)
+logging.getLogger("requests").setLevel(logging.ERROR)
+logging.getLogger("requests_oauthlib").setLevel(logging.ERROR)
+logging.getLogger("aiohttp").setLevel(logging.ERROR)
+logging.getLogger("aiohttp.access").setLevel(logging.ERROR)
+logging.getLogger("asyncio").setLevel(logging.ERROR)
+logging.getLogger("azure").setLevel(logging.ERROR)
+logging.getLogger("azure.core").setLevel(logging.ERROR)
+
+
 def ensure_directory_exists(path):
    if not os.path.exists(path):
        os.makedirs(path)

+
 def get_access_token(scopes):
    """
    Authenticate with Microsoft Graph API and obtain an access token.
@@ -26,43 +41,57 @@ def get_access_token(scopes):
        Exception: If authentication fails.
    """
    # Read Azure app credentials from environment variables
-    client_id = os.getenv('AZURE_CLIENT_ID')
-    tenant_id = os.getenv('AZURE_TENANT_ID')
+    client_id = os.getenv("AZURE_CLIENT_ID")
+    tenant_id = os.getenv("AZURE_TENANT_ID")

    if not client_id or not tenant_id:
-        raise ValueError("Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables.")
+        raise ValueError(
+            "Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables."
+        )

    # Token cache
    cache = msal.SerializableTokenCache()
-    cache_file = 'token_cache.bin'
+    cache_file = "token_cache.bin"

    if os.path.exists(cache_file):
-        cache.deserialize(open(cache_file, 'r').read())
+        cache.deserialize(open(cache_file, "r").read())

    # Authentication
-    authority = f'https://login.microsoftonline.com/{tenant_id}'
-    app = msal.PublicClientApplication(client_id, authority=authority, token_cache=cache)
+    authority = f"https://login.microsoftonline.com/{tenant_id}"
+    app = msal.PublicClientApplication(
+        client_id, authority=authority, token_cache=cache
+    )
    accounts = app.get_accounts()

    if accounts:
        token_response = app.acquire_token_silent(scopes, account=accounts[0])
    else:
        flow = app.initiate_device_flow(scopes=scopes)
-        if 'user_code' not in flow:
+        if "user_code" not in flow:
            raise Exception("Failed to create device flow")

-        print(Panel(flow['message'], border_style="magenta", padding=2, title="MSAL Login Flow Link"))
+        print(
+            Panel(
+                flow["message"],
+                border_style="magenta",
+                padding=2,
+                title="MSAL Login Flow Link",
+            )
+        )

        token_response = app.acquire_token_by_device_flow(flow)

-    if 'access_token' not in token_response:
+    if "access_token" not in token_response:
        raise Exception("Failed to acquire token")

    # Save token cache
-    with open(cache_file, 'w') as f:
+    with open(cache_file, "w") as f:
        f.write(cache.serialize())

-    access_token = token_response['access_token']
-    headers = {'Authorization': f'Bearer {access_token}', 'Prefer': 'outlook.body-content-type="text",IdType="ImmutableId"'}
+    access_token = token_response["access_token"]
+    headers = {
+        "Authorization": f"Bearer {access_token}",
+        "Prefer": 'outlook.body-content-type="text",IdType="ImmutableId"',
+    }

    return access_token, headers
--- a/src/services/microsoft_graph/calendar.py
+++ b/src/services/microsoft_graph/calendar.py
@@ -3,9 +3,13 @@ Calendar operations for Microsoft Graph API.
 """

 import os
+import json
+import re
+import glob
 from datetime import datetime, timedelta
+from dateutil import parser

-from .client import fetch_with_aiohttp
+from .client import fetch_with_aiohttp, post_with_aiohttp, delete_with_aiohttp


 async def fetch_calendar_events(
@@ -40,7 +44,7 @@ async def fetch_calendar_events(
    calendar_url = (
        f"https://graph.microsoft.com/v1.0/me/calendarView?"
        f"startDateTime={start_date_str}&endDateTime={end_date_str}&"
-        f"$select=id,subject,organizer,start,end,location,isAllDay,showAs,sensitivity&$count=true"
+        f"$select=id,subject,organizer,start,end,location,isAllDay,showAs,sensitivity,iCalUId,lastModifiedDateTime&$count=true"
    )

    events = []
@@ -59,3 +63,408 @@ async def fetch_calendar_events(
    # Return events and total count
    total_count = response_data.get("@odata.count", len(events))
    return events, total_count
+
+
+def parse_ical_file(file_path):
+    """
+    Parse a single iCalendar file and extract event data.
+
+    Args:
+        file_path (str): Path to the .ics file
+
+    Returns:
+        dict: Event data or None if parsing fails
+    """
+    try:
+        with open(file_path, "r", encoding="utf-8") as f:
+            content = f.read()
+
+        event_data = {}
+        in_event = False
+
+        for line in content.split("\n"):
+            line = line.strip()
+
+            if line == "BEGIN:VEVENT":
+                in_event = True
+                continue
+            elif line == "END:VEVENT":
+                break
+            elif not in_event:
+                continue
+
+            if ":" in line:
+                key, value = line.split(":", 1)
+
+                # Handle special cases
+                if key == "UID":
+                    event_data["uid"] = value
+                elif key == "SUMMARY":
+                    event_data["subject"] = (
+                        value.replace("\\,", ",")
+                        .replace("\\;", ";")
+                        .replace("\\n", "\n")
+                    )
+                elif key.startswith("DTSTART"):
+                    event_data["start"] = _parse_ical_datetime(key, value)
+                elif key.startswith("DTEND"):
+                    event_data["end"] = _parse_ical_datetime(key, value)
+                elif key == "LOCATION":
+                    event_data["location"] = value.replace("\\,", ",").replace(
+                        "\\;", ";"
+                    )
+                elif key == "DESCRIPTION":
+                    event_data["description"] = (
+                        value.replace("\\,", ",")
+                        .replace("\\;", ";")
+                        .replace("\\n", "\n")
+                    )
+
+        # Get file modification time for tracking local changes
+        event_data["local_mtime"] = os.path.getmtime(file_path)
+        event_data["local_file"] = file_path
+
+        return event_data if "uid" in event_data else None
+
+    except Exception as e:
+        print(f"Error parsing {file_path}: {e}")
+        return None
+
+
+def _parse_ical_datetime(key, value):
+    """Parse iCalendar datetime format."""
+    try:
+        if "TZID=" in key:
+            # Extract timezone info if present
+            tz_part = (
+                key.split("TZID=")[1].split(":")[0]
+                if ":" in key
+                else key.split("TZID=")[1]
+            )
+            # For now, treat as naive datetime and let dateutil handle it
+            return parser.parse(value.replace("Z", ""))
+        elif value.endswith("Z"):
+            # UTC time
+            return parser.parse(value)
+        else:
+            # Naive datetime
+            return parser.parse(value.replace("Z", ""))
+    except Exception:
+        return None
+
+
+def get_local_calendar_events(vdir_path):
+    """
+    Get all local calendar events from vdir format.
+
+    Args:
+        vdir_path (str): Path to vdir calendar directory
+
+    Returns:
+        dict: Dictionary mapping UIDs to event data
+    """
+    local_events = {}
+
+    if not os.path.exists(vdir_path):
+        return local_events
+
+    ics_files = glob.glob(os.path.join(vdir_path, "*.ics"))
+
+    for file_path in ics_files:
+        event_data = parse_ical_file(file_path)
+        if event_data and "uid" in event_data:
+            local_events[event_data["uid"]] = event_data
+
+    return local_events
+
+
+async def create_calendar_event(headers, event_data):
+    """
+    Create a new calendar event on Microsoft Graph.
+
+    Args:
+        headers (dict): Authentication headers
+        event_data (dict): Event data from local file
+
+    Returns:
+        dict: Created event response or None if failed
+    """
+    try:
+        # Convert local event data to Microsoft Graph format
+        graph_event = {
+            "subject": event_data.get("subject", "Untitled Event"),
+            "start": {"dateTime": event_data["start"].isoformat(), "timeZone": "UTC"},
+            "end": {"dateTime": event_data["end"].isoformat(), "timeZone": "UTC"},
+        }
+
+        if event_data.get("location"):
+            graph_event["location"] = {"displayName": event_data["location"]}
+
+        if event_data.get("description"):
+            graph_event["body"] = {
+                "contentType": "text",
+                "content": event_data["description"],
+            }
+
+        # Create the event
+        create_url = "https://graph.microsoft.com/v1.0/me/events"
+        status = await post_with_aiohttp(create_url, headers, graph_event)
+
+        if status == 201:
+            return graph_event
+        else:
+            print(f"Failed to create event: HTTP {status}")
+            return None
+
+    except Exception as e:
+        print(f"Error creating event: {e}")
+        return None
+
+
+async def delete_calendar_event_by_uid(headers, ical_uid):
+    """
+    Delete a calendar event by its iCalUId.
+
+    Args:
+        headers (dict): Authentication headers
+        ical_uid (str): The iCalUId of the event to delete
+
+    Returns:
+        bool: True if deleted successfully, False otherwise
+    """
+    try:
+        # First, find the event by iCalUId
+        search_url = f"https://graph.microsoft.com/v1.0/me/events?$filter=iCalUId eq '{ical_uid}'"
+        response = await fetch_with_aiohttp(search_url, headers)
+
+        events = response.get("value", [])
+        if not events:
+            print(f"Event with UID {ical_uid} not found on server")
+            return False
+
+        # Delete the event using its Graph ID
+        event_id = events[0]["id"]
+        delete_url = f"https://graph.microsoft.com/v1.0/me/events/{event_id}"
+        status = await delete_with_aiohttp(delete_url, headers)
+
+        if status == 204:
+            print(f"Successfully deleted event with UID {ical_uid}")
+            return True
+        else:
+            print(f"Failed to delete event: HTTP {status}")
+            return False
+
+    except Exception as e:
+        print(f"Error deleting event: {e}")
+        return False
+
+
+def get_sync_timestamp_file(vdir_path):
+    """Get the path to the sync timestamp file."""
+    return os.path.join(vdir_path, ".sync_timestamp")
+
+
+def get_last_sync_time(vdir_path):
+    """
+    Get the timestamp of the last sync.
+
+    Args:
+        vdir_path (str): Path to vdir calendar directory
+
+    Returns:
+        float: Unix timestamp of last sync, or 0 if never synced
+    """
+    timestamp_file = get_sync_timestamp_file(vdir_path)
+    if os.path.exists(timestamp_file):
+        try:
+            with open(timestamp_file, "r") as f:
+                return float(f.read().strip())
+        except (ValueError, IOError):
+            return 0
+    return 0
+
+
+def update_sync_timestamp(vdir_path):
+    """
+    Update the sync timestamp to current time.
+
+    Args:
+        vdir_path (str): Path to vdir calendar directory
+    """
+    timestamp_file = get_sync_timestamp_file(vdir_path)
+    try:
+        with open(timestamp_file, "w") as f:
+            f.write(str(datetime.now().timestamp()))
+    except IOError as e:
+        print(f"Warning: Could not update sync timestamp: {e}")
+
+
+def detect_deleted_events(vdir_path):
+    """
+    Detect events that have been deleted from vdir since last sync.
+    Uses sync state and file modification times to determine deletions.
+
+    Args:
+        vdir_path (str): Path to vdir calendar directory
+
+    Returns:
+        list: List of UIDs that were deleted locally
+    """
+    if not os.path.exists(vdir_path):
+        return []
+
+    state_file = os.path.join(vdir_path, ".sync_state.json")
+    last_sync_time = get_last_sync_time(vdir_path)
+
+    # Load previous sync state
+    previous_state = {}
+    if os.path.exists(state_file):
+        try:
+            with open(state_file, "r") as f:
+                previous_state = json.load(f)
+        except Exception:
+            return []
+
+    if not previous_state:
+        return []  # No previous state to compare against
+
+    # Get current local events
+    current_local_events = get_local_calendar_events(vdir_path)
+
+    deleted_events = []
+
+    # Check each event from previous state
+    for uid in previous_state:
+        if uid not in current_local_events:
+            # Event is no longer in local files
+            # Check if the vdir has been modified since last sync
+            # This ensures we only delete events that were intentionally removed
+            vdir_mtime = os.path.getmtime(vdir_path)
+            if vdir_mtime > last_sync_time:
+                deleted_events.append(uid)
+
+    return deleted_events
+
+
+async def sync_local_calendar_changes(
+    headers, vdir_path, progress, task_id, dry_run=False
+):
+    """
+    Sync local calendar changes (new events and deletions) to Microsoft Graph.
+
+    Args:
+        headers (dict): Authentication headers
+        vdir_path (str): Path to local vdir calendar directory
+        progress: Progress instance for updates
+        task_id: Progress task ID
+        dry_run (bool): If True, only report what would be done
+
+    Returns:
+        tuple: (created_count, deleted_count)
+    """
+    if not os.path.exists(vdir_path):
+        progress.console.print(
+            f"[yellow]Local calendar directory not found: {vdir_path}[/yellow]"
+        )
+        return 0, 0
+
+    # Track state file for knowing what was previously synced
+    state_file = os.path.join(vdir_path, ".sync_state.json")
+
+    # Load previous sync state
+    previous_state = {}
+    if os.path.exists(state_file):
+        try:
+            with open(state_file, "r") as f:
+                previous_state = json.load(f)
+        except Exception as e:
+            progress.console.print(f"[yellow]Could not load sync state: {e}[/yellow]")
+
+    # Detect deleted events using enhanced detection
+    deleted_events = detect_deleted_events(vdir_path)
+
+    # Get current local events
+    current_local_events = get_local_calendar_events(vdir_path)
+
+    # Get current remote events to avoid duplicates
+    try:
+        remote_events, _ = await fetch_calendar_events(
+            headers, days_back=30, days_forward=90
+        )
+        remote_uids = {
+            event.get("iCalUId", event.get("id", "")) for event in remote_events
+        }
+    except Exception as e:
+        progress.console.print(f"[red]Error fetching remote events: {e}[/red]")
+        return 0, 0
+
+    created_count = 0
+    deleted_count = 0
+
+    # Find new local events (not in previous state and not on server)
+    new_local_events = []
+    for uid, event_data in current_local_events.items():
+        if uid not in previous_state and uid not in remote_uids:
+            # This is a new local event
+            new_local_events.append((uid, event_data))
+
+    progress.update(task_id, total=len(new_local_events) + len(deleted_events))
+
+    # Handle deletions FIRST to clean up server before adding new events
+    for uid in deleted_events:
+        if dry_run:
+            progress.console.print(f"[DRY-RUN] Would delete event with UID: {uid}")
+        else:
+            result = await delete_calendar_event_by_uid(headers, uid)
+            if result:
+                deleted_count += 1
+                progress.console.print(f"[green]Deleted event with UID: {uid}[/green]")
+            else:
+                progress.console.print(
+                    f"[red]Failed to delete event with UID: {uid}[/red]"
+                )
+
+        progress.advance(task_id)
+
+    # Create new events on server
+    for uid, event_data in new_local_events:
+        if dry_run:
+            progress.console.print(
+                f"[DRY-RUN] Would create event: {event_data.get('subject', 'Untitled')}"
+            )
+        else:
+            result = await create_calendar_event(headers, event_data)
+            if result:
+                created_count += 1
+                progress.console.print(
+                    f"[green]Created event: {event_data.get('subject', 'Untitled')}[/green]"
+                )
+            else:
+                progress.console.print(
+                    f"[red]Failed to create event: {event_data.get('subject', 'Untitled')}[/red]"
+                )
+
+        progress.advance(task_id)
+
+    # Update sync state and timestamp
+    if not dry_run:
+        new_state = {
+            uid: event_data.get("local_mtime", 0)
+            for uid, event_data in current_local_events.items()
+        }
+        try:
+            with open(state_file, "w") as f:
+                json.dump(new_state, f, indent=2)
+
+            # Update sync timestamp to mark when this sync completed
+            update_sync_timestamp(vdir_path)
+
+        except Exception as e:
+            progress.console.print(f"[yellow]Could not save sync state: {e}[/yellow]")
+
+    if created_count > 0 or deleted_count > 0:
+        progress.console.print(
+            f"[cyan]Local calendar sync completed: {created_count} created, {deleted_count} deleted[/cyan]"
+        )
+
+    return created_count, deleted_count
--- a/src/services/microsoft_graph/client.py
+++ b/src/services/microsoft_graph/client.py
@@ -1,16 +1,41 @@
 """
 HTTP client for Microsoft Graph API.
 """
+
 import aiohttp
 import asyncio
+import logging
 import orjson

-# Define a global semaphore for throttling
-semaphore = asyncio.Semaphore(4)
+# Suppress debug logging from HTTP libraries
+logging.getLogger("aiohttp").setLevel(logging.ERROR)
+logging.getLogger("aiohttp.access").setLevel(logging.ERROR)
+logging.getLogger("urllib3").setLevel(logging.ERROR)
+logging.getLogger("asyncio").setLevel(logging.ERROR)
+
+# Define a global semaphore for throttling - reduced for better compliance
+semaphore = asyncio.Semaphore(2)
+
+
+async def _handle_throttling_retry(func, *args, max_retries=3):
+    """Handle 429 throttling with exponential backoff retry."""
+    for attempt in range(max_retries):
+        try:
+            return await func(*args)
+        except Exception as e:
+            if "429" in str(e) and attempt < max_retries - 1:
+                wait_time = (2**attempt) + 1  # Exponential backoff: 2, 5, 9 seconds
+                print(
+                    f"Rate limited, waiting {wait_time}s before retry {attempt + 1}/{max_retries}"
+                )
+                await asyncio.sleep(wait_time)
+                continue
+            raise e
+

 async def fetch_with_aiohttp(url, headers):
    """
-    Fetch data from Microsoft Graph API.
+    Fetch data from Microsoft Graph API with throttling and retry logic.

    Args:
        url (str): The URL to fetch data from.
@@ -20,23 +45,36 @@ async def fetch_with_aiohttp(url, headers):
        dict: JSON response data.

    Raises:
-        Exception: If the request fails.
+        Exception: If the request fails after retries.
    """
+    return await _handle_throttling_retry(_fetch_impl, url, headers)
+
+
+async def _fetch_impl(url, headers):
+    """Internal fetch implementation."""
    async with semaphore:
        async with aiohttp.ClientSession() as session:
            async with session.get(url, headers=headers) as response:
-                if response.status != 200:
-                    raise Exception(f"Failed to fetch {url}: {response.status} {await response.text()}")
+                if response.status == 429:
+                    # Let the retry handler deal with throttling
+                    raise Exception(
+                        f"Failed to fetch {url}: {response.status} {await response.text()}"
+                    )
+                elif response.status != 200:
+                    raise Exception(
+                        f"Failed to fetch {url}: {response.status} {await response.text()}"
+                    )
                raw_bytes = await response.read()
-                content_length = response.headers.get('Content-Length')
+                content_length = response.headers.get("Content-Length")
                if content_length and len(raw_bytes) != int(content_length):
                    print("Warning: Incomplete response received!")
                    return None
                return orjson.loads(raw_bytes)

+
 async def post_with_aiohttp(url, headers, json_data):
    """
-    Post data to Microsoft Graph API.
+    Post data to Microsoft Graph API with throttling and retry logic.

    Args:
        url (str): The URL to post data to.
@@ -46,14 +84,24 @@ async def post_with_aiohttp(url, headers, json_data):
    Returns:
        int: HTTP status code.
    """
+    return await _handle_throttling_retry(_post_impl, url, headers, json_data)
+
+
+async def _post_impl(url, headers, json_data):
+    """Internal post implementation."""
    async with semaphore:
        async with aiohttp.ClientSession() as session:
            async with session.post(url, headers=headers, json=json_data) as response:
+                if response.status == 429:
+                    raise Exception(
+                        f"Failed to post {url}: {response.status} {await response.text()}"
+                    )
                return response.status

+
 async def patch_with_aiohttp(url, headers, json_data):
    """
-    Patch data to Microsoft Graph API.
+    Patch data to Microsoft Graph API with throttling and retry logic.

    Args:
        url (str): The URL to patch data to.
@@ -63,14 +111,24 @@ async def patch_with_aiohttp(url, headers, json_data):
    Returns:
        int: HTTP status code.
    """
+    return await _handle_throttling_retry(_patch_impl, url, headers, json_data)
+
+
+async def _patch_impl(url, headers, json_data):
+    """Internal patch implementation."""
    async with semaphore:
        async with aiohttp.ClientSession() as session:
            async with session.patch(url, headers=headers, json=json_data) as response:
+                if response.status == 429:
+                    raise Exception(
+                        f"Failed to patch {url}: {response.status} {await response.text()}"
+                    )
                return response.status

+
 async def delete_with_aiohttp(url, headers):
    """
-    Delete data from Microsoft Graph API.
+    Delete data from Microsoft Graph API with throttling and retry logic.

    Args:
        url (str): The URL to delete data from.
@@ -79,7 +137,51 @@ async def delete_with_aiohttp(url, headers):
    Returns:
        int: HTTP status code.
    """
+    return await _handle_throttling_retry(_delete_impl, url, headers)
+
+
+async def _delete_impl(url, headers):
+    """Internal delete implementation."""
    async with semaphore:
        async with aiohttp.ClientSession() as session:
            async with session.delete(url, headers=headers) as response:
+                if response.status == 429:
+                    raise Exception(
+                        f"Failed to delete {url}: {response.status} {await response.text()}"
+                    )
                return response.status
+
+
+async def batch_with_aiohttp(requests, headers):
+    """
+    Execute multiple requests in a single batch call to Microsoft Graph API with throttling and retry logic.
+
+    Args:
+        requests (list): List of request dictionaries with 'id', 'method', 'url', and optional 'body' keys.
+        headers (dict): Headers including authentication.
+
+    Returns:
+        dict: Batch response with individual request responses.
+    """
+    return await _handle_throttling_retry(_batch_impl, requests, headers)
+
+
+async def _batch_impl(requests, headers):
+    """Internal batch implementation."""
+    batch_url = "https://graph.microsoft.com/v1.0/$batch"
+    batch_data = {"requests": requests}
+
+    async with semaphore:
+        async with aiohttp.ClientSession() as session:
+            async with session.post(
+                batch_url, headers=headers, json=batch_data
+            ) as response:
+                if response.status == 429:
+                    raise Exception(
+                        f"Batch request failed: {response.status} {await response.text()}"
+                    )
+                elif response.status != 200:
+                    raise Exception(
+                        f"Batch request failed: {response.status} {await response.text()}"
+                    )
+                return await response.json()
--- a/src/services/microsoft_graph/mail.py
+++ b/src/services/microsoft_graph/mail.py
@@ -5,6 +5,7 @@ Mail operations for Microsoft Graph API.
 import os
 import re
 import glob
+import asyncio
 from typing import Set
 import aiohttp

@@ -13,6 +14,7 @@ from .client import (
    patch_with_aiohttp,
    post_with_aiohttp,
    delete_with_aiohttp,
+    batch_with_aiohttp,
 )


@@ -73,9 +75,18 @@ async def fetch_mail_async(
    new_files = set(glob.glob(os.path.join(new_dir, "*.eml*")))
    cur_files = set(glob.glob(os.path.join(cur_dir, "*.eml*")))

-    for filename in Set.union(cur_files, new_files):
-        message_id = filename.split(".")[0].split("/")[
-            -1
+    # Get local message IDs (filename without extension)
+    local_msg_ids = set()
+    for filename in set.union(cur_files, new_files):
+        message_id = os.path.basename(filename).split(".")[
+            0
+        ]  # Extract the Message-ID from the filename
+        local_msg_ids.add(message_id)
+
+    # Delete local files that no longer exist on server
+    for filename in set.union(cur_files, new_files):
+        message_id = os.path.basename(filename).split(".")[
+            0
        ]  # Extract the Message-ID from the filename
        if message_id not in inbox_msg_ids:
            if not dry_run:
@@ -84,7 +95,18 @@ async def fetch_mail_async(
            else:
                progress.console.print(f"[DRY-RUN] Would delete {filename} from inbox")

-    for message in messages:
+    # Filter messages to only include those not already local
+    messages_to_download = [msg for msg in messages if msg["id"] not in local_msg_ids]
+
+    progress.console.print(
+        f"Found {len(messages)} total messages on server, {len(local_msg_ids)} already local"
+    )
+    progress.console.print(f"Downloading {len(messages_to_download)} new messages")
+
+    # Update progress to reflect only the messages we actually need to download
+    progress.update(task_id, total=len(messages_to_download), completed=0)
+
+    for message in messages_to_download:
        progress.console.print(
            f"Processing message: {message.get('subject', 'No Subject')}", end="\r"
        )
@@ -97,14 +119,19 @@ async def fetch_mail_async(
            dry_run,
            download_attachments,
        )
-        progress.update(task_id, advance=0.5)
-    progress.update(task_id, completed=len(messages))
-    progress.console.print(f"\nFinished saving {len(messages)} messages.")
+        progress.update(task_id, advance=1)
+    progress.update(task_id, completed=len(messages_to_download))
+    progress.console.print(
+        f"\nFinished downloading {len(messages_to_download)} new messages."
+    )
+    progress.console.print(
+        f"Total messages on server: {len(messages)}, Already local: {len(local_msg_ids)}"
+    )


 async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=False):
    """
-    Archive mail from Maildir to Microsoft Graph API archive folder.
+    Archive mail from Maildir to Microsoft Graph API archive folder using batch operations.

    Args:
        maildir_path (str): Path to the Maildir.
@@ -125,8 +152,14 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
                glob.glob(os.path.join(archive_dir, "**", "*.eml*"), recursive=True)
            )

+    if not archive_files:
+        progress.update(task_id, total=0, completed=0)
+        progress.console.print("No messages to archive")
+        return
+
    progress.update(task_id, total=len(archive_files))

+    # Get archive folder ID from server
    folder_response = await fetch_with_aiohttp(
        "https://graph.microsoft.com/v1.0/me/mailFolders", headers
    )
@@ -143,44 +176,115 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
    if not archive_folder_id:
        raise Exception("No folder named 'Archive' or 'Archives' found on the server.")

-    for filepath in archive_files:
-        message_id = os.path.basename(filepath).split(".")[
-            0
-        ]  # Extract the Message-ID from the filename
+    # Process files in batches of 20 (Microsoft Graph batch limit)
+    batch_size = 20
+    successful_moves = []
+
+    for i in range(0, len(archive_files), batch_size):
+        batch_files = archive_files[i : i + batch_size]
+
+        # Add small delay between batches to respect API limits
+        if i > 0:
+            await asyncio.sleep(0.5)

        if not dry_run:
-            status = await post_with_aiohttp(
-                f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/microsoft.graph.move",
-                headers,
-                {"destinationId": archive_folder_id},
-            )
-            if status == 201:  # 201 Created indicates successful move
-                os.remove(
-                    filepath
-                )  # Remove the local file since it's now archived on server
-                progress.console.print(f"Moved message to 'Archive': {message_id}")
-            elif status == 404:
-                os.remove(
-                    filepath
-                )  # Remove the file from local archive if not found on server
-                progress.console.print(
-                    f"Message not found on server, removed local copy: {message_id}"
-                )
-            else:
-                progress.console.print(
-                    f"Failed to move message to 'Archive': {message_id}, status: {status}"
+            # Prepare batch requests
+            batch_requests = []
+            for idx, filepath in enumerate(batch_files):
+                message_id = os.path.basename(filepath).split(".")[0]
+                batch_requests.append(
+                    {
+                        "id": str(idx + 1),
+                        "method": "POST",
+                        "url": f"/me/messages/{message_id}/microsoft.graph.move",
+                        "body": {"destinationId": archive_folder_id},
+                        "headers": {"Content-Type": "application/json"},
+                    }
                )
+
+            try:
+                # Execute batch request
+                batch_response = await batch_with_aiohttp(batch_requests, headers)
+
+                # Process batch results
+                for response in batch_response.get("responses", []):
+                    request_id = (
+                        int(response["id"]) - 1
+                    )  # Convert back to 0-based index
+                    filepath = batch_files[request_id]
+                    message_id = os.path.basename(filepath).split(".")[0]
+                    status = response["status"]
+
+                    if status == 201:  # 201 Created indicates successful move
+                        os.remove(
+                            filepath
+                        )  # Remove the local file since it's now archived on server
+                        successful_moves.append(message_id)
+                        progress.console.print(
+                            f"Moved message to 'Archive': {message_id}"
+                        )
+                    elif status == 404:
+                        os.remove(
+                            filepath
+                        )  # Remove the file from local archive if not found on server
+                        progress.console.print(
+                            f"Message not found on server, removed local copy: {message_id}"
+                        )
+                    else:
+                        progress.console.print(
+                            f"Failed to move message to 'Archive': {message_id}, status: {status}"
+                        )
+
+            except Exception as e:
+                progress.console.print(f"Batch archive request failed: {str(e)}")
+                # Fall back to individual requests for this batch
+                for filepath in batch_files:
+                    message_id = os.path.basename(filepath).split(".")[0]
+                    try:
+                        status = await post_with_aiohttp(
+                            f"https://graph.microsoft.com/v1.0/me/messages/{message_id}/microsoft.graph.move",
+                            headers,
+                            {"destinationId": archive_folder_id},
+                        )
+                        if status == 201:
+                            os.remove(filepath)
+                            successful_moves.append(message_id)
+                            progress.console.print(
+                                f"Moved message to 'Archive' (fallback): {message_id}"
+                            )
+                        elif status == 404:
+                            os.remove(filepath)
+                            progress.console.print(
+                                f"Message not found on server, removed local copy: {message_id}"
+                            )
+                        else:
+                            progress.console.print(
+                                f"Failed to move message to 'Archive': {message_id}, status: {status}"
+                            )
+                    except Exception as individual_error:
+                        progress.console.print(
+                            f"Failed to archive {message_id}: {str(individual_error)}"
+                        )
        else:
-            progress.console.print(
-                f"[DRY-RUN] Would move message to 'Archive' folder: {message_id}"
-            )
-        progress.advance(task_id)
+            # Dry run - just log what would be done
+            for filepath in batch_files:
+                message_id = os.path.basename(filepath).split(".")[0]
+                progress.console.print(
+                    f"[DRY-RUN] Would move message to 'Archive' folder: {message_id}"
+                )
+
+        progress.advance(task_id, len(batch_files))
+
+    if not dry_run:
+        progress.console.print(
+            f"Successfully archived {len(successful_moves)} messages in batches"
+        )
    return


 async def delete_mail_async(maildir_path, headers, progress, task_id, dry_run=False):
    """
-    Delete mail from Maildir and Microsoft Graph API.
+    Delete mail from Maildir and Microsoft Graph API using batch operations.

    Args:
        maildir_path (str): Path to the Maildir.
@@ -194,22 +298,99 @@ async def delete_mail_async(maildir_path, headers, progress, task_id, dry_run=Fa
    """
    trash_dir = os.path.join(maildir_path, ".Trash", "cur")
    trash_files = set(glob.glob(os.path.join(trash_dir, "*.eml*")))
+
+    if not trash_files:
+        progress.update(task_id, total=0, completed=0)
+        progress.console.print("No messages to delete")
+        return
+
    progress.update(task_id, total=len(trash_files))

-    for filepath in trash_files:
-        message_id = os.path.basename(filepath).split(".")[
-            0
-        ]  # Extract the Message-ID from the filename
+    # Process files in batches of 20 (Microsoft Graph batch limit)
+    batch_size = 20
+    trash_files_list = list(trash_files)
+    successful_deletes = []
+
+    for i in range(0, len(trash_files_list), batch_size):
+        batch_files = trash_files_list[i : i + batch_size]
+
+        # Add small delay between batches to respect API limits
+        if i > 0:
+            await asyncio.sleep(0.5)
+
        if not dry_run:
-            progress.console.print(f"Moving message to trash: {message_id}")
-            status = await delete_with_aiohttp(
-                f"https://graph.microsoft.com/v1.0/me/messages/{message_id}", headers
-            )
-            if status == 204 or status == 404:
-                os.remove(filepath)  # Remove the file from local trash
+            # Prepare batch requests
+            batch_requests = []
+            for idx, filepath in enumerate(batch_files):
+                message_id = os.path.basename(filepath).split(".")[0]
+                batch_requests.append(
+                    {
+                        "id": str(idx + 1),
+                        "method": "DELETE",
+                        "url": f"/me/messages/{message_id}",
+                    }
+                )
+
+            try:
+                # Execute batch request
+                batch_response = await batch_with_aiohttp(batch_requests, headers)
+
+                # Process batch results
+                for response in batch_response.get("responses", []):
+                    request_id = (
+                        int(response["id"]) - 1
+                    )  # Convert back to 0-based index
+                    filepath = batch_files[request_id]
+                    message_id = os.path.basename(filepath).split(".")[0]
+                    status = response["status"]
+
+                    if (
+                        status == 204 or status == 404
+                    ):  # 204 No Content or 404 Not Found (already deleted)
+                        os.remove(filepath)  # Remove the file from local trash
+                        successful_deletes.append(message_id)
+                        progress.console.print(f"Deleted message: {message_id}")
+                    else:
+                        progress.console.print(
+                            f"Failed to delete message: {message_id}, status: {status}"
+                        )
+
+            except Exception as e:
+                progress.console.print(f"Batch delete request failed: {str(e)}")
+                # Fall back to individual requests for this batch
+                for filepath in batch_files:
+                    message_id = os.path.basename(filepath).split(".")[0]
+                    try:
+                        status = await delete_with_aiohttp(
+                            f"https://graph.microsoft.com/v1.0/me/messages/{message_id}",
+                            headers,
+                        )
+                        if status == 204 or status == 404:
+                            os.remove(filepath)
+                            successful_deletes.append(message_id)
+                            progress.console.print(
+                                f"Deleted message (fallback): {message_id}"
+                            )
+                        else:
+                            progress.console.print(
+                                f"Failed to delete message: {message_id}, status: {status}"
+                            )
+                    except Exception as individual_error:
+                        progress.console.print(
+                            f"Failed to delete {message_id}: {str(individual_error)}"
+                        )
        else:
-            progress.console.print(f"[DRY-RUN] Would delete message: {message_id}")
-        progress.advance(task_id)
+            # Dry run - just log what would be done
+            for filepath in batch_files:
+                message_id = os.path.basename(filepath).split(".")[0]
+                progress.console.print(f"[DRY-RUN] Would delete message: {message_id}")
+
+        progress.advance(task_id, len(batch_files))
+
+    if not dry_run:
+        progress.console.print(
+            f"Successfully deleted {len(successful_deletes)} messages in batches"
+        )


 async def get_inbox_count_async(headers):
@@ -231,7 +412,7 @@ async def synchronize_maildir_async(
    maildir_path, headers, progress, task_id, dry_run=False
 ):
    """
-    Synchronize Maildir with Microsoft Graph API.
+    Synchronize Maildir with Microsoft Graph API using batch operations.

    Args:
        maildir_path (str): Path to the Maildir.
@@ -258,32 +439,123 @@ async def synchronize_maildir_async(
    cur_files = set(glob.glob(os.path.join(cur_dir, "*.eml*")))

    moved_to_cur = [os.path.basename(f) for f in cur_files - new_files]
-    progress.update(task_id, total=len(moved_to_cur))
-    for filename in moved_to_cur:
-        # TODO: this isn't scalable, we should use a more efficient way to check if the file was modified
-        if os.path.getmtime(os.path.join(cur_dir, filename)) < last_sync:
-            progress.update(task_id, advance=1)
-            continue
-        message_id = re.sub(
-            r"\:2.+", "", filename.split(".")[0]
-        )  # Extract the Message-ID from the filename
-        if not dry_run:
-            status = await patch_with_aiohttp(
-                f"https://graph.microsoft.com/v1.0/me/messages/{message_id}",
-                headers,
-                {"isRead": True},
-            )
-            if status == 404:
-                os.remove(os.path.join(cur_dir, filename))

+    # Filter out files that haven't been modified since last sync
+    files_to_process = []
+    for filename in moved_to_cur:
+        if os.path.getmtime(os.path.join(cur_dir, filename)) >= last_sync:
+            files_to_process.append(filename)
+
+    if not files_to_process:
+        progress.update(task_id, total=0, completed=0)
+        progress.console.print("No messages to mark as read")
+        # Save timestamp even if no work was done
+        if not dry_run:
+            save_sync_timestamp()
+        return
+
+    progress.update(task_id, total=len(files_to_process))
+
+    # Process files in batches of 20 (Microsoft Graph batch limit)
+    batch_size = 20
+    successful_reads = []
+
+    for i in range(0, len(files_to_process), batch_size):
+        batch_files = files_to_process[i : i + batch_size]
+
+        # Add small delay between batches to respect API limits
+        if i > 0:
+            await asyncio.sleep(0.5)
+
+        if not dry_run:
+            # Prepare batch requests
+            batch_requests = []
+            for idx, filename in enumerate(batch_files):
+                message_id = re.sub(r"\:2.+", "", filename.split(".")[0])
+                batch_requests.append(
+                    {
+                        "id": str(idx + 1),
+                        "method": "PATCH",
+                        "url": f"/me/messages/{message_id}",
+                        "body": {"isRead": True},
+                        "headers": {"Content-Type": "application/json"},
+                    }
+                )
+
+            try:
+                # Execute batch request
+                batch_response = await batch_with_aiohttp(batch_requests, headers)
+
+                # Process batch results
+                for response in batch_response.get("responses", []):
+                    request_id = (
+                        int(response["id"]) - 1
+                    )  # Convert back to 0-based index
+                    filename = batch_files[request_id]
+                    message_id = re.sub(r"\:2.+", "", filename.split(".")[0])
+                    status = response["status"]
+
+                    if status == 200:  # 200 OK indicates successful update
+                        successful_reads.append(message_id)
+                        progress.console.print(
+                            f"Marked message as read: {truncate_id(message_id)}"
+                        )
+                    elif status == 404:
+                        os.remove(
+                            os.path.join(cur_dir, filename)
+                        )  # Remove file if message doesn't exist on server
+                        progress.console.print(
+                            f"Message not found on server, removed local copy: {truncate_id(message_id)}"
+                        )
+                    else:
+                        progress.console.print(
+                            f"Failed to mark message as read: {truncate_id(message_id)}, status: {status}"
+                        )
+
+            except Exception as e:
+                progress.console.print(f"Batch read-status request failed: {str(e)}")
+                # Fall back to individual requests for this batch
+                for filename in batch_files:
+                    message_id = re.sub(r"\:2.+", "", filename.split(".")[0])
+                    try:
+                        status = await patch_with_aiohttp(
+                            f"https://graph.microsoft.com/v1.0/me/messages/{message_id}",
+                            headers,
+                            {"isRead": True},
+                        )
+                        if status == 200:
+                            successful_reads.append(message_id)
+                            progress.console.print(
+                                f"Marked message as read (fallback): {truncate_id(message_id)}"
+                            )
+                        elif status == 404:
+                            os.remove(os.path.join(cur_dir, filename))
+                            progress.console.print(
+                                f"Message not found on server, removed local copy: {truncate_id(message_id)}"
+                            )
+                        else:
+                            progress.console.print(
+                                f"Failed to mark message as read: {truncate_id(message_id)}, status: {status}"
+                            )
+                    except Exception as individual_error:
+                        progress.console.print(
+                            f"Failed to update read status for {truncate_id(message_id)}: {str(individual_error)}"
+                        )
        else:
-            progress.console.print(
-                f"[DRY-RUN] Would mark message as read: {truncate_id(message_id)}"
-            )
-        progress.advance(task_id)
+            # Dry run - just log what would be done
+            for filename in batch_files:
+                message_id = re.sub(r"\:2.+", "", filename.split(".")[0])
+                progress.console.print(
+                    f"[DRY-RUN] Would mark message as read: {truncate_id(message_id)}"
+                )
+
+        progress.advance(task_id, len(batch_files))

    # Save the current sync timestamp
    if not dry_run:
        save_sync_timestamp()
+        progress.console.print(
+            f"Successfully marked {len(successful_reads)} messages as read in batches"
+        )
    else:
        progress.console.print("[DRY-RUN] Would save sync timestamp.")