WIP

2025-12-18 22:11:47 -05:00
parent 0ed7800575
commit a41d59e529
26 changed files with 4187 additions and 373 deletions
--- a/src/services/khal/init.py
+++ b/src/services/khal/init.py
@@ -0,0 +1,5 @@
+"""Khal service package for calendar operations."""
+
+from .client import KhalClient
+
+__all__ = ["KhalClient"]
--- a/src/services/khal/client.py
+++ b/src/services/khal/client.py
@@ -0,0 +1,332 @@
+"""Khal CLI client for calendar operations.
+
+This module provides a client that uses the khal CLI tool to interact with
+calendar data stored in vdir format.
+"""
+
+import subprocess
+import logging
+from datetime import datetime, date, timedelta
+from typing import Optional, List
+
+from src.calendar.backend import CalendarBackend, Event
+
+logger = logging.getLogger(__name__)
+
+
+class KhalClient(CalendarBackend):
+    """Calendar backend using khal CLI."""
+
+    def __init__(self, config_path: Optional[str] = None):
+        """Initialize the Khal client.
+
+        Args:
+            config_path: Optional path to khal config file
+        """
+        self.config_path = config_path
+
+    def _run_khal(
+        self, args: List[str], capture_output: bool = True
+    ) -> subprocess.CompletedProcess:
+        """Run a khal command.
+
+        Args:
+            args: Command arguments (after 'khal')
+            capture_output: Whether to capture stdout/stderr
+
+        Returns:
+            CompletedProcess result
+        """
+        cmd = ["khal"] + args
+        if self.config_path:
+            cmd.extend(["-c", self.config_path])
+
+        logger.debug(f"Running khal command: {' '.join(cmd)}")
+
+        return subprocess.run(
+            cmd,
+            capture_output=capture_output,
+            text=True,
+        )
+
+    def _parse_event_line(
+        self, line: str, day_header_date: Optional[date] = None
+    ) -> Optional[Event]:
+        """Parse a single event line from khal list output.
+
+        Expected format: title|start-time|end-time|start|end|location|uid|description|organizer|url|categories|status|recurring
+
+        Args:
+            line: The line to parse
+            day_header_date: Current day being parsed (from day headers)
+
+        Returns:
+            Event if successfully parsed, None otherwise
+        """
+        # Skip empty lines and day headers
+        if not line or "|" not in line:
+            return None
+
+        parts = line.split("|")
+        if len(parts) < 5:
+            return None
+
+        try:
+            title = parts[0].strip()
+            start_str = parts[3].strip()  # Full datetime
+            end_str = parts[4].strip()  # Full datetime
+            location = parts[5].strip() if len(parts) > 5 else ""
+            uid = parts[6].strip() if len(parts) > 6 else ""
+            description = parts[7].strip() if len(parts) > 7 else ""
+            organizer = parts[8].strip() if len(parts) > 8 else ""
+            url = parts[9].strip() if len(parts) > 9 else ""
+            categories = parts[10].strip() if len(parts) > 10 else ""
+            status = parts[11].strip() if len(parts) > 11 else ""
+            recurring_symbol = parts[12].strip() if len(parts) > 12 else ""
+
+            # Parse datetimes (format: YYYY-MM-DD HH:MM)
+            start = datetime.strptime(start_str, "%Y-%m-%d %H:%M")
+            end = datetime.strptime(end_str, "%Y-%m-%d %H:%M")
+
+            # Check for all-day events (typically start at 00:00 and end at 00:00 next day)
+            all_day = (
+                start.hour == 0
+                and start.minute == 0
+                and end.hour == 0
+                and end.minute == 0
+                and (end.date() - start.date()).days >= 1
+            )
+
+            # Check if event is recurring (repeat symbol is typically a loop arrow)
+            recurring = bool(recurring_symbol)
+
+            return Event(
+                uid=uid or f"{title}_{start_str}",
+                title=title,
+                start=start,
+                end=end,
+                location=location,
+                description=description,
+                organizer=organizer,
+                url=url,
+                categories=categories,
+                status=status,
+                all_day=all_day,
+                recurring=recurring,
+            )
+        except (ValueError, IndexError) as e:
+            logger.warning(f"Failed to parse event line '{line}': {e}")
+            return None
+
+    def get_events(
+        self,
+        start_date: date,
+        end_date: date,
+        calendar: Optional[str] = None,
+    ) -> List[Event]:
+        """Get events in a date range.
+
+        Args:
+            start_date: Start of range (inclusive)
+            end_date: End of range (inclusive)
+            calendar: Optional calendar name to filter by
+
+        Returns:
+            List of events in the range, sorted by start time
+        """
+        # Format dates for khal
+        start_str = start_date.strftime("%Y-%m-%d")
+        # Add one day to end_date to make it inclusive
+        end_dt = end_date + timedelta(days=1)
+        end_str = end_dt.strftime("%Y-%m-%d")
+
+        # Build command
+        # Format: title|start-time|end-time|start|end|location|uid|description|organizer|url|categories|status|recurring
+        format_str = "{title}|{start-time}|{end-time}|{start}|{end}|{location}|{uid}|{description}|{organizer}|{url}|{categories}|{status}|{repeat-symbol}"
+        args = ["list", "-f", format_str, start_str, end_str]
+
+        if calendar:
+            args.extend(["-a", calendar])
+
+        result = self._run_khal(args)
+
+        if result.returncode != 0:
+            logger.error(f"khal list failed: {result.stderr}")
+            return []
+
+        events = []
+        current_day: Optional[date] = None
+
+        for line in result.stdout.strip().split("\n"):
+            line = line.strip()
+            if not line:
+                continue
+
+            # Check for day headers (e.g., "Today, 2025-12-18" or "Monday, 2025-12-22")
+            if ", " in line and "|" not in line:
+                try:
+                    # Extract date from header
+                    date_part = line.split(", ")[-1]
+                    current_day = datetime.strptime(date_part, "%Y-%m-%d").date()
+                except ValueError:
+                    pass
+                continue
+
+            event = self._parse_event_line(line, current_day)
+            if event:
+                events.append(event)
+
+        # Sort by start time
+        events.sort(key=lambda e: e.start)
+
+        return events
+
+    def get_event(self, uid: str) -> Optional[Event]:
+        """Get a single event by UID.
+
+        Args:
+            uid: Event unique identifier
+
+        Returns:
+            Event if found, None otherwise
+        """
+        # khal doesn't have a direct "get by uid" command
+        # We search for it instead
+        result = self._run_khal(["search", uid])
+
+        if result.returncode != 0 or not result.stdout.strip():
+            return None
+
+        # Parse the first result
+        # Search output format is different, so we need to handle it
+        lines = result.stdout.strip().split("\n")
+        if lines:
+            # For now, return None - would need more parsing
+            # This is a limitation of khal's CLI
+            return None
+
+        return None
+
+    def get_calendars(self) -> List[str]:
+        """Get list of available calendar names.
+
+        Returns:
+            List of calendar names
+        """
+        result = self._run_khal(["printcalendars"])
+
+        if result.returncode != 0:
+            logger.error(f"khal printcalendars failed: {result.stderr}")
+            return []
+
+        calendars = []
+        for line in result.stdout.strip().split("\n"):
+            line = line.strip()
+            if line:
+                calendars.append(line)
+
+        return calendars
+
+    def create_event(
+        self,
+        title: str,
+        start: datetime,
+        end: datetime,
+        calendar: Optional[str] = None,
+        location: Optional[str] = None,
+        description: Optional[str] = None,
+        all_day: bool = False,
+    ) -> Event:
+        """Create a new event.
+
+        Args:
+            title: Event title
+            start: Start datetime
+            end: End datetime
+            calendar: Calendar to add event to
+            location: Event location
+            description: Event description
+            all_day: Whether this is an all-day event
+
+        Returns:
+            The created event
+        """
+        # Build khal new command
+        # Format: khal new [-a calendar] start end title [:: description] [-l location]
+        if all_day:
+            start_str = start.strftime("%Y-%m-%d")
+            end_str = end.strftime("%Y-%m-%d")
+        else:
+            start_str = start.strftime("%Y-%m-%d %H:%M")
+            end_str = end.strftime("%H:%M")  # End time only if same day
+            if end.date() != start.date():
+                end_str = end.strftime("%Y-%m-%d %H:%M")
+
+        args = ["new"]
+        if calendar:
+            args.extend(["-a", calendar])
+        if location:
+            args.extend(["-l", location])
+
+        args.extend([start_str, end_str, title])
+
+        if description:
+            args.extend(["::", description])
+
+        result = self._run_khal(args)
+
+        if result.returncode != 0:
+            raise RuntimeError(f"Failed to create event: {result.stderr}")
+
+        # Return a constructed event (khal doesn't return the created event)
+        return Event(
+            uid=f"new_{title}_{start.isoformat()}",
+            title=title,
+            start=start,
+            end=end,
+            location=location or "",
+            description=description or "",
+            calendar=calendar or "",
+            all_day=all_day,
+        )
+
+    def delete_event(self, uid: str) -> bool:
+        """Delete an event.
+
+        Args:
+            uid: Event unique identifier
+
+        Returns:
+            True if deleted successfully
+        """
+        # khal edit with --delete flag
+        # This is tricky because khal edit is interactive
+        # We might need to use khal's Python API directly for this
+        logger.warning("delete_event not fully implemented for khal CLI")
+        return False
+
+    def update_event(
+        self,
+        uid: str,
+        title: Optional[str] = None,
+        start: Optional[datetime] = None,
+        end: Optional[datetime] = None,
+        location: Optional[str] = None,
+        description: Optional[str] = None,
+    ) -> Optional[Event]:
+        """Update an existing event.
+
+        Args:
+            uid: Event unique identifier
+            title: New title (if provided)
+            start: New start time (if provided)
+            end: New end time (if provided)
+            location: New location (if provided)
+            description: New description (if provided)
+
+        Returns:
+            Updated event if successful, None otherwise
+        """
+        # khal edit is interactive, so this is limited via CLI
+        logger.warning("update_event not fully implemented for khal CLI")
+        return None
--- a/src/services/microsoft_graph/mail.py
+++ b/src/services/microsoft_graph/mail.py
@@ -5,10 +5,11 @@ Mail operations for Microsoft Graph API.
 import os
 import re
 import glob
+import json
 import asyncio
 from email.parser import Parser
 from email.utils import getaddresses
-from typing import List, Dict, Any
+from typing import List, Dict, Any, Set

 from .client import (
    fetch_with_aiohttp,
@@ -27,6 +28,7 @@ async def fetch_mail_async(
    task_id,
    dry_run=False,
    download_attachments=False,
+    is_cancelled=None,
 ):
    """
    Fetch mail from Microsoft Graph API and save to Maildir.
@@ -39,6 +41,7 @@ async def fetch_mail_async(
        task_id: ID of the task in the progress bar.
        dry_run (bool): If True, don't actually make changes.
        download_attachments (bool): If True, download email attachments.
+        is_cancelled (callable, optional): Callback that returns True if task should stop.

    Returns:
        None
@@ -105,8 +108,14 @@ async def fetch_mail_async(

    # Update progress to reflect only the messages we actually need to download
    progress.update(task_id, total=len(messages_to_download), completed=0)
+    downloaded_count = 0

    for message in messages_to_download:
+        # Check if task was cancelled/disabled
+        if is_cancelled and is_cancelled():
+            progress.console.print("Task cancelled, stopping inbox fetch")
+            break
+
        progress.console.print(
            f"Processing message: {message.get('subject', 'No Subject')}", end="\r"
        )
@@ -120,44 +129,92 @@ async def fetch_mail_async(
            download_attachments,
        )
        progress.update(task_id, advance=1)
-    progress.update(task_id, completed=len(messages_to_download))
-    progress.console.print(
-        f"\nFinished downloading {len(messages_to_download)} new messages."
-    )
+        downloaded_count += 1
+
+    progress.update(task_id, completed=downloaded_count)
+    progress.console.print(f"\nFinished downloading {downloaded_count} new messages.")
    progress.console.print(
        f"Total messages on server: {len(messages)}, Already local: {len(local_msg_ids)}"
    )


-async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=False):
+def _get_archive_sync_state_path(maildir_path: str) -> str:
+    """Get the path to the archive sync state file."""
+    return os.path.join(maildir_path, ".Archive", ".sync_state.json")
+
+
+def _load_archive_sync_state(maildir_path: str) -> Set[str]:
+    """Load the set of message IDs that have been synced to server."""
+    state_path = _get_archive_sync_state_path(maildir_path)
+    if os.path.exists(state_path):
+        try:
+            with open(state_path, "r") as f:
+                data = json.load(f)
+                return set(data.get("synced_to_server", []))
+        except Exception:
+            pass
+    return set()
+
+
+def _save_archive_sync_state(maildir_path: str, synced_ids: Set[str]) -> None:
+    """Save the set of message IDs that have been synced to server."""
+    state_path = _get_archive_sync_state_path(maildir_path)
+    os.makedirs(os.path.dirname(state_path), exist_ok=True)
+    with open(state_path, "w") as f:
+        json.dump({"synced_to_server": list(synced_ids)}, f, indent=2)
+
+
+async def archive_mail_async(
+    maildir_path, headers, progress, task_id, dry_run=False, is_cancelled=None
+):
    """
    Archive mail from Maildir to Microsoft Graph API archive folder using batch operations.

+    Messages are moved to the server's Archive folder, but local copies are kept.
+    A sync state file tracks which messages have already been synced to avoid
+    re-processing them on subsequent runs.
+
    Args:
        maildir_path (str): Path to the Maildir.
        headers (dict): Headers including authentication.
        progress: Progress instance for updating progress bars.
        task_id: ID of the task in the progress bar.
        dry_run (bool): If True, don't actually make changes.
+        is_cancelled (callable, optional): Callback that returns True if task should stop.

    Returns:
        None
    """
-    # Check both possible archive folder names locally
+    # Load already-synced message IDs
+    synced_ids = _load_archive_sync_state(maildir_path)
+
+    # Check both possible archive folder names locally (prefer .Archive)
    archive_files = []
-    for archive_folder_name in [".Archives", ".Archive"]:
+    for archive_folder_name in [".Archive", ".Archives"]:
        archive_dir = os.path.join(maildir_path, archive_folder_name)
        if os.path.exists(archive_dir):
            archive_files.extend(
                glob.glob(os.path.join(archive_dir, "**", "*.eml*"), recursive=True)
            )

-    if not archive_files:
+    # Filter out already-synced messages
+    files_to_sync = []
+    for filepath in archive_files:
+        message_id = os.path.basename(filepath).split(".")[0]
+        if message_id not in synced_ids:
+            files_to_sync.append(filepath)
+
+    if not files_to_sync:
        progress.update(task_id, total=0, completed=0)
-        progress.console.print("No messages to archive")
+        progress.console.print(
+            f"No new messages to archive ({len(archive_files)} already synced)"
+        )
        return

-    progress.update(task_id, total=len(archive_files))
+    progress.update(task_id, total=len(files_to_sync))
+    progress.console.print(
+        f"Found {len(files_to_sync)} new messages to sync to server Archive"
+    )

    # Get archive folder ID from server
    folder_response = await fetch_with_aiohttp(
@@ -179,9 +236,15 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
    # Process files in batches of 20 (Microsoft Graph batch limit)
    batch_size = 20
    successful_moves = []
+    newly_synced_ids: Set[str] = set()

-    for i in range(0, len(archive_files), batch_size):
-        batch_files = archive_files[i : i + batch_size]
+    for i in range(0, len(files_to_sync), batch_size):
+        # Check if task was cancelled/disabled
+        if is_cancelled and is_cancelled():
+            progress.console.print("Task cancelled, stopping archive sync")
+            break
+
+        batch_files = files_to_sync[i : i + batch_size]

        # Add small delay between batches to respect API limits
        if i > 0:
@@ -216,23 +279,22 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
                    status = response["status"]

                    if status == 201:  # 201 Created indicates successful move
-                        os.remove(
-                            filepath
-                        )  # Remove the local file since it's now archived on server
+                        # Keep local file, just mark as synced
+                        newly_synced_ids.add(message_id)
                        successful_moves.append(message_id)
                        progress.console.print(
-                            f"Moved message to 'Archive': {message_id}"
+                            f"Moved message to server Archive: {message_id}"
                        )
                    elif status == 404:
-                        os.remove(
-                            filepath
-                        )  # Remove the file from local archive if not found on server
+                        # Message not in Inbox (maybe already archived or deleted on server)
+                        # Mark as synced so we don't retry, but keep local copy
+                        newly_synced_ids.add(message_id)
                        progress.console.print(
-                            f"Message not found on server, removed local copy: {message_id}"
+                            f"Message not in Inbox (already archived?): {message_id}"
                        )
                    else:
                        progress.console.print(
-                            f"Failed to move message to 'Archive': {message_id}, status: {status}"
+                            f"Failed to move message to Archive: {message_id}, status: {status}"
                        )

            except Exception as e:
@@ -247,19 +309,19 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
                            {"destinationId": archive_folder_id},
                        )
                        if status == 201:
-                            os.remove(filepath)
+                            newly_synced_ids.add(message_id)
                            successful_moves.append(message_id)
                            progress.console.print(
-                                f"Moved message to 'Archive' (fallback): {message_id}"
+                                f"Moved message to server Archive (fallback): {message_id}"
                            )
                        elif status == 404:
-                            os.remove(filepath)
+                            newly_synced_ids.add(message_id)
                            progress.console.print(
-                                f"Message not found on server, removed local copy: {message_id}"
+                                f"Message not in Inbox (already archived?): {message_id}"
                            )
                        else:
                            progress.console.print(
-                                f"Failed to move message to 'Archive': {message_id}, status: {status}"
+                                f"Failed to move message to Archive: {message_id}, status: {status}"
                            )
                    except Exception as individual_error:
                        progress.console.print(
@@ -270,18 +332,184 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
            for filepath in batch_files:
                message_id = os.path.basename(filepath).split(".")[0]
                progress.console.print(
-                    f"[DRY-RUN] Would move message to 'Archive' folder: {message_id}"
+                    f"[DRY-RUN] Would move message to server Archive: {message_id}"
                )

        progress.advance(task_id, len(batch_files))

-    if not dry_run:
+        # Save sync state after each batch for resilience
+        if not dry_run and newly_synced_ids:
+            synced_ids.update(newly_synced_ids)
+            _save_archive_sync_state(maildir_path, synced_ids)
+
+    # Final summary
+    if not dry_run and successful_moves:
        progress.console.print(
-            f"Successfully archived {len(successful_moves)} messages in batches"
+            f"Successfully synced {len(successful_moves)} messages to server Archive (kept local copies)"
        )
    return


+async def fetch_archive_mail_async(
+    maildir_path,
+    attachments_dir,
+    headers,
+    progress,
+    task_id,
+    dry_run=False,
+    download_attachments=False,
+    max_messages=None,
+    is_cancelled=None,
+):
+    """
+    Fetch archived mail from Microsoft Graph API Archive folder and save to local .Archive Maildir.
+
+    Args:
+        maildir_path (str): Path to the Maildir.
+        attachments_dir (str): Path to save attachments.
+        headers (dict): Headers including authentication.
+        progress: Progress instance for updating progress bars.
+        task_id: ID of the task in the progress bar.
+        dry_run (bool): If True, don't actually make changes.
+        download_attachments (bool): If True, download email attachments.
+        max_messages (int, optional): Maximum number of messages to fetch. None = all.
+        is_cancelled (callable, optional): Callback that returns True if task should stop.
+
+    Returns:
+        None
+    """
+    from src.utils.mail_utils.maildir import save_mime_to_maildir_async
+
+    # Use the well-known 'archive' folder name
+    mail_url = "https://graph.microsoft.com/v1.0/me/mailFolders/archive/messages?$top=100&$orderby=receivedDateTime desc&$select=id,subject,from,toRecipients,ccRecipients,receivedDateTime,isRead"
+    messages = []
+
+    # Fetch the total count of messages in the archive
+    archive_info_url = "https://graph.microsoft.com/v1.0/me/mailFolders/archive"
+
+    try:
+        response = await fetch_with_aiohttp(archive_info_url, headers)
+        total_messages = response.get("totalItemCount", 0) if response else 0
+    except Exception as e:
+        progress.console.print(f"Error fetching archive folder info: {e}")
+        total_messages = 0
+
+    # Apply max_messages limit if specified
+    effective_total = (
+        min(total_messages, max_messages) if max_messages else total_messages
+    )
+    progress.update(task_id, total=effective_total)
+    progress.console.print(
+        f"Archive folder has {total_messages} messages"
+        + (f", fetching up to {max_messages}" if max_messages else "")
+    )
+
+    # Fetch messages from archive
+    fetched_count = 0
+    while mail_url:
+        try:
+            response_data = await fetch_with_aiohttp(mail_url, headers)
+        except Exception as e:
+            progress.console.print(f"Error fetching archive messages: {e}")
+            break
+
+        batch = response_data.get("value", []) if response_data else []
+
+        # Apply max_messages limit
+        if max_messages and fetched_count + len(batch) > max_messages:
+            batch = batch[: max_messages - fetched_count]
+            messages.extend(batch)
+            fetched_count += len(batch)
+            break
+
+        messages.extend(batch)
+        fetched_count += len(batch)
+        progress.advance(task_id, len(batch))
+
+        # Get the next page URL from @odata.nextLink
+        mail_url = response_data.get("@odata.nextLink") if response_data else None
+
+    # Set up local archive directory paths
+    archive_dir = os.path.join(maildir_path, ".Archive")
+    cur_dir = os.path.join(archive_dir, "cur")
+    new_dir = os.path.join(archive_dir, "new")
+
+    # Ensure directories exist
+    os.makedirs(cur_dir, exist_ok=True)
+    os.makedirs(new_dir, exist_ok=True)
+    os.makedirs(os.path.join(archive_dir, "tmp"), exist_ok=True)
+
+    # Get local message IDs in archive
+    cur_files = set(glob.glob(os.path.join(cur_dir, "*.eml*")))
+    new_files = set(glob.glob(os.path.join(new_dir, "*.eml*")))
+
+    local_msg_ids = set()
+    for filename in set.union(cur_files, new_files):
+        message_id = os.path.basename(filename).split(".")[0]
+        local_msg_ids.add(message_id)
+
+    # Filter messages to only include those not already local
+    messages_to_download = [msg for msg in messages if msg["id"] not in local_msg_ids]
+
+    progress.console.print(
+        f"Found {len(messages)} messages on server Archive, {len(local_msg_ids)} already local"
+    )
+    progress.console.print(
+        f"Downloading {len(messages_to_download)} new archived messages"
+    )
+
+    # Update progress to reflect only the messages we actually need to download
+    progress.update(task_id, total=len(messages_to_download), completed=0)
+
+    # Load sync state once, we'll update it incrementally
+    synced_ids = _load_archive_sync_state(maildir_path) if not dry_run else set()
+    downloaded_count = 0
+
+    for message in messages_to_download:
+        # Check if task was cancelled/disabled
+        if is_cancelled and is_cancelled():
+            progress.console.print("Task cancelled, stopping archive fetch")
+            break
+
+        progress.console.print(
+            f"Processing archived message: {message.get('subject', 'No Subject')[:50]}",
+            end="\r",
+        )
+        # Save to .Archive folder instead of main maildir
+        await save_mime_to_maildir_async(
+            archive_dir,  # Use archive_dir instead of maildir_path
+            message,
+            attachments_dir,
+            headers,
+            progress,
+            dry_run,
+            download_attachments,
+        )
+        progress.update(task_id, advance=1)
+        downloaded_count += 1
+
+        # Update sync state after each message for resilience
+        # This ensures we don't try to re-upload this message in archive_mail_async
+        if not dry_run:
+            synced_ids.add(message["id"])
+            _save_archive_sync_state(maildir_path, synced_ids)
+
+    progress.update(task_id, completed=downloaded_count)
+    progress.console.print(
+        f"\nFinished downloading {downloaded_count} archived messages."
+    )
+    progress.console.print(
+        f"Total in server Archive: {total_messages}, Already local: {len(local_msg_ids)}"
+    )
+
+    # Also add any messages we already had locally (from the full server list)
+    # to ensure they're marked as synced
+    if not dry_run and messages:
+        for msg in messages:
+            synced_ids.add(msg["id"])
+        _save_archive_sync_state(maildir_path, synced_ids)
+
+
 async def delete_mail_async(maildir_path, headers, progress, task_id, dry_run=False):
    """
    Delete mail from Maildir and Microsoft Graph API using batch operations.