This commit is contained in:
Bendt
2025-12-18 22:11:47 -05:00
parent 0ed7800575
commit a41d59e529
26 changed files with 4187 additions and 373 deletions

View File

@@ -0,0 +1,5 @@
"""Khal service package for calendar operations."""
from .client import KhalClient
__all__ = ["KhalClient"]

332
src/services/khal/client.py Normal file
View File

@@ -0,0 +1,332 @@
"""Khal CLI client for calendar operations.
This module provides a client that uses the khal CLI tool to interact with
calendar data stored in vdir format.
"""
import subprocess
import logging
from datetime import datetime, date, timedelta
from typing import Optional, List
from src.calendar.backend import CalendarBackend, Event
logger = logging.getLogger(__name__)
class KhalClient(CalendarBackend):
"""Calendar backend using khal CLI."""
def __init__(self, config_path: Optional[str] = None):
"""Initialize the Khal client.
Args:
config_path: Optional path to khal config file
"""
self.config_path = config_path
def _run_khal(
self, args: List[str], capture_output: bool = True
) -> subprocess.CompletedProcess:
"""Run a khal command.
Args:
args: Command arguments (after 'khal')
capture_output: Whether to capture stdout/stderr
Returns:
CompletedProcess result
"""
cmd = ["khal"] + args
if self.config_path:
cmd.extend(["-c", self.config_path])
logger.debug(f"Running khal command: {' '.join(cmd)}")
return subprocess.run(
cmd,
capture_output=capture_output,
text=True,
)
def _parse_event_line(
self, line: str, day_header_date: Optional[date] = None
) -> Optional[Event]:
"""Parse a single event line from khal list output.
Expected format: title|start-time|end-time|start|end|location|uid|description|organizer|url|categories|status|recurring
Args:
line: The line to parse
day_header_date: Current day being parsed (from day headers)
Returns:
Event if successfully parsed, None otherwise
"""
# Skip empty lines and day headers
if not line or "|" not in line:
return None
parts = line.split("|")
if len(parts) < 5:
return None
try:
title = parts[0].strip()
start_str = parts[3].strip() # Full datetime
end_str = parts[4].strip() # Full datetime
location = parts[5].strip() if len(parts) > 5 else ""
uid = parts[6].strip() if len(parts) > 6 else ""
description = parts[7].strip() if len(parts) > 7 else ""
organizer = parts[8].strip() if len(parts) > 8 else ""
url = parts[9].strip() if len(parts) > 9 else ""
categories = parts[10].strip() if len(parts) > 10 else ""
status = parts[11].strip() if len(parts) > 11 else ""
recurring_symbol = parts[12].strip() if len(parts) > 12 else ""
# Parse datetimes (format: YYYY-MM-DD HH:MM)
start = datetime.strptime(start_str, "%Y-%m-%d %H:%M")
end = datetime.strptime(end_str, "%Y-%m-%d %H:%M")
# Check for all-day events (typically start at 00:00 and end at 00:00 next day)
all_day = (
start.hour == 0
and start.minute == 0
and end.hour == 0
and end.minute == 0
and (end.date() - start.date()).days >= 1
)
# Check if event is recurring (repeat symbol is typically a loop arrow)
recurring = bool(recurring_symbol)
return Event(
uid=uid or f"{title}_{start_str}",
title=title,
start=start,
end=end,
location=location,
description=description,
organizer=organizer,
url=url,
categories=categories,
status=status,
all_day=all_day,
recurring=recurring,
)
except (ValueError, IndexError) as e:
logger.warning(f"Failed to parse event line '{line}': {e}")
return None
def get_events(
self,
start_date: date,
end_date: date,
calendar: Optional[str] = None,
) -> List[Event]:
"""Get events in a date range.
Args:
start_date: Start of range (inclusive)
end_date: End of range (inclusive)
calendar: Optional calendar name to filter by
Returns:
List of events in the range, sorted by start time
"""
# Format dates for khal
start_str = start_date.strftime("%Y-%m-%d")
# Add one day to end_date to make it inclusive
end_dt = end_date + timedelta(days=1)
end_str = end_dt.strftime("%Y-%m-%d")
# Build command
# Format: title|start-time|end-time|start|end|location|uid|description|organizer|url|categories|status|recurring
format_str = "{title}|{start-time}|{end-time}|{start}|{end}|{location}|{uid}|{description}|{organizer}|{url}|{categories}|{status}|{repeat-symbol}"
args = ["list", "-f", format_str, start_str, end_str]
if calendar:
args.extend(["-a", calendar])
result = self._run_khal(args)
if result.returncode != 0:
logger.error(f"khal list failed: {result.stderr}")
return []
events = []
current_day: Optional[date] = None
for line in result.stdout.strip().split("\n"):
line = line.strip()
if not line:
continue
# Check for day headers (e.g., "Today, 2025-12-18" or "Monday, 2025-12-22")
if ", " in line and "|" not in line:
try:
# Extract date from header
date_part = line.split(", ")[-1]
current_day = datetime.strptime(date_part, "%Y-%m-%d").date()
except ValueError:
pass
continue
event = self._parse_event_line(line, current_day)
if event:
events.append(event)
# Sort by start time
events.sort(key=lambda e: e.start)
return events
def get_event(self, uid: str) -> Optional[Event]:
"""Get a single event by UID.
Args:
uid: Event unique identifier
Returns:
Event if found, None otherwise
"""
# khal doesn't have a direct "get by uid" command
# We search for it instead
result = self._run_khal(["search", uid])
if result.returncode != 0 or not result.stdout.strip():
return None
# Parse the first result
# Search output format is different, so we need to handle it
lines = result.stdout.strip().split("\n")
if lines:
# For now, return None - would need more parsing
# This is a limitation of khal's CLI
return None
return None
def get_calendars(self) -> List[str]:
"""Get list of available calendar names.
Returns:
List of calendar names
"""
result = self._run_khal(["printcalendars"])
if result.returncode != 0:
logger.error(f"khal printcalendars failed: {result.stderr}")
return []
calendars = []
for line in result.stdout.strip().split("\n"):
line = line.strip()
if line:
calendars.append(line)
return calendars
def create_event(
self,
title: str,
start: datetime,
end: datetime,
calendar: Optional[str] = None,
location: Optional[str] = None,
description: Optional[str] = None,
all_day: bool = False,
) -> Event:
"""Create a new event.
Args:
title: Event title
start: Start datetime
end: End datetime
calendar: Calendar to add event to
location: Event location
description: Event description
all_day: Whether this is an all-day event
Returns:
The created event
"""
# Build khal new command
# Format: khal new [-a calendar] start end title [:: description] [-l location]
if all_day:
start_str = start.strftime("%Y-%m-%d")
end_str = end.strftime("%Y-%m-%d")
else:
start_str = start.strftime("%Y-%m-%d %H:%M")
end_str = end.strftime("%H:%M") # End time only if same day
if end.date() != start.date():
end_str = end.strftime("%Y-%m-%d %H:%M")
args = ["new"]
if calendar:
args.extend(["-a", calendar])
if location:
args.extend(["-l", location])
args.extend([start_str, end_str, title])
if description:
args.extend(["::", description])
result = self._run_khal(args)
if result.returncode != 0:
raise RuntimeError(f"Failed to create event: {result.stderr}")
# Return a constructed event (khal doesn't return the created event)
return Event(
uid=f"new_{title}_{start.isoformat()}",
title=title,
start=start,
end=end,
location=location or "",
description=description or "",
calendar=calendar or "",
all_day=all_day,
)
def delete_event(self, uid: str) -> bool:
"""Delete an event.
Args:
uid: Event unique identifier
Returns:
True if deleted successfully
"""
# khal edit with --delete flag
# This is tricky because khal edit is interactive
# We might need to use khal's Python API directly for this
logger.warning("delete_event not fully implemented for khal CLI")
return False
def update_event(
self,
uid: str,
title: Optional[str] = None,
start: Optional[datetime] = None,
end: Optional[datetime] = None,
location: Optional[str] = None,
description: Optional[str] = None,
) -> Optional[Event]:
"""Update an existing event.
Args:
uid: Event unique identifier
title: New title (if provided)
start: New start time (if provided)
end: New end time (if provided)
location: New location (if provided)
description: New description (if provided)
Returns:
Updated event if successful, None otherwise
"""
# khal edit is interactive, so this is limited via CLI
logger.warning("update_event not fully implemented for khal CLI")
return None

View File

@@ -5,10 +5,11 @@ Mail operations for Microsoft Graph API.
import os
import re
import glob
import json
import asyncio
from email.parser import Parser
from email.utils import getaddresses
from typing import List, Dict, Any
from typing import List, Dict, Any, Set
from .client import (
fetch_with_aiohttp,
@@ -27,6 +28,7 @@ async def fetch_mail_async(
task_id,
dry_run=False,
download_attachments=False,
is_cancelled=None,
):
"""
Fetch mail from Microsoft Graph API and save to Maildir.
@@ -39,6 +41,7 @@ async def fetch_mail_async(
task_id: ID of the task in the progress bar.
dry_run (bool): If True, don't actually make changes.
download_attachments (bool): If True, download email attachments.
is_cancelled (callable, optional): Callback that returns True if task should stop.
Returns:
None
@@ -105,8 +108,14 @@ async def fetch_mail_async(
# Update progress to reflect only the messages we actually need to download
progress.update(task_id, total=len(messages_to_download), completed=0)
downloaded_count = 0
for message in messages_to_download:
# Check if task was cancelled/disabled
if is_cancelled and is_cancelled():
progress.console.print("Task cancelled, stopping inbox fetch")
break
progress.console.print(
f"Processing message: {message.get('subject', 'No Subject')}", end="\r"
)
@@ -120,44 +129,92 @@ async def fetch_mail_async(
download_attachments,
)
progress.update(task_id, advance=1)
progress.update(task_id, completed=len(messages_to_download))
progress.console.print(
f"\nFinished downloading {len(messages_to_download)} new messages."
)
downloaded_count += 1
progress.update(task_id, completed=downloaded_count)
progress.console.print(f"\nFinished downloading {downloaded_count} new messages.")
progress.console.print(
f"Total messages on server: {len(messages)}, Already local: {len(local_msg_ids)}"
)
async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=False):
def _get_archive_sync_state_path(maildir_path: str) -> str:
"""Get the path to the archive sync state file."""
return os.path.join(maildir_path, ".Archive", ".sync_state.json")
def _load_archive_sync_state(maildir_path: str) -> Set[str]:
"""Load the set of message IDs that have been synced to server."""
state_path = _get_archive_sync_state_path(maildir_path)
if os.path.exists(state_path):
try:
with open(state_path, "r") as f:
data = json.load(f)
return set(data.get("synced_to_server", []))
except Exception:
pass
return set()
def _save_archive_sync_state(maildir_path: str, synced_ids: Set[str]) -> None:
"""Save the set of message IDs that have been synced to server."""
state_path = _get_archive_sync_state_path(maildir_path)
os.makedirs(os.path.dirname(state_path), exist_ok=True)
with open(state_path, "w") as f:
json.dump({"synced_to_server": list(synced_ids)}, f, indent=2)
async def archive_mail_async(
maildir_path, headers, progress, task_id, dry_run=False, is_cancelled=None
):
"""
Archive mail from Maildir to Microsoft Graph API archive folder using batch operations.
Messages are moved to the server's Archive folder, but local copies are kept.
A sync state file tracks which messages have already been synced to avoid
re-processing them on subsequent runs.
Args:
maildir_path (str): Path to the Maildir.
headers (dict): Headers including authentication.
progress: Progress instance for updating progress bars.
task_id: ID of the task in the progress bar.
dry_run (bool): If True, don't actually make changes.
is_cancelled (callable, optional): Callback that returns True if task should stop.
Returns:
None
"""
# Check both possible archive folder names locally
# Load already-synced message IDs
synced_ids = _load_archive_sync_state(maildir_path)
# Check both possible archive folder names locally (prefer .Archive)
archive_files = []
for archive_folder_name in [".Archives", ".Archive"]:
for archive_folder_name in [".Archive", ".Archives"]:
archive_dir = os.path.join(maildir_path, archive_folder_name)
if os.path.exists(archive_dir):
archive_files.extend(
glob.glob(os.path.join(archive_dir, "**", "*.eml*"), recursive=True)
)
if not archive_files:
# Filter out already-synced messages
files_to_sync = []
for filepath in archive_files:
message_id = os.path.basename(filepath).split(".")[0]
if message_id not in synced_ids:
files_to_sync.append(filepath)
if not files_to_sync:
progress.update(task_id, total=0, completed=0)
progress.console.print("No messages to archive")
progress.console.print(
f"No new messages to archive ({len(archive_files)} already synced)"
)
return
progress.update(task_id, total=len(archive_files))
progress.update(task_id, total=len(files_to_sync))
progress.console.print(
f"Found {len(files_to_sync)} new messages to sync to server Archive"
)
# Get archive folder ID from server
folder_response = await fetch_with_aiohttp(
@@ -179,9 +236,15 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
# Process files in batches of 20 (Microsoft Graph batch limit)
batch_size = 20
successful_moves = []
newly_synced_ids: Set[str] = set()
for i in range(0, len(archive_files), batch_size):
batch_files = archive_files[i : i + batch_size]
for i in range(0, len(files_to_sync), batch_size):
# Check if task was cancelled/disabled
if is_cancelled and is_cancelled():
progress.console.print("Task cancelled, stopping archive sync")
break
batch_files = files_to_sync[i : i + batch_size]
# Add small delay between batches to respect API limits
if i > 0:
@@ -216,23 +279,22 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
status = response["status"]
if status == 201: # 201 Created indicates successful move
os.remove(
filepath
) # Remove the local file since it's now archived on server
# Keep local file, just mark as synced
newly_synced_ids.add(message_id)
successful_moves.append(message_id)
progress.console.print(
f"Moved message to 'Archive': {message_id}"
f"Moved message to server Archive: {message_id}"
)
elif status == 404:
os.remove(
filepath
) # Remove the file from local archive if not found on server
# Message not in Inbox (maybe already archived or deleted on server)
# Mark as synced so we don't retry, but keep local copy
newly_synced_ids.add(message_id)
progress.console.print(
f"Message not found on server, removed local copy: {message_id}"
f"Message not in Inbox (already archived?): {message_id}"
)
else:
progress.console.print(
f"Failed to move message to 'Archive': {message_id}, status: {status}"
f"Failed to move message to Archive: {message_id}, status: {status}"
)
except Exception as e:
@@ -247,19 +309,19 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
{"destinationId": archive_folder_id},
)
if status == 201:
os.remove(filepath)
newly_synced_ids.add(message_id)
successful_moves.append(message_id)
progress.console.print(
f"Moved message to 'Archive' (fallback): {message_id}"
f"Moved message to server Archive (fallback): {message_id}"
)
elif status == 404:
os.remove(filepath)
newly_synced_ids.add(message_id)
progress.console.print(
f"Message not found on server, removed local copy: {message_id}"
f"Message not in Inbox (already archived?): {message_id}"
)
else:
progress.console.print(
f"Failed to move message to 'Archive': {message_id}, status: {status}"
f"Failed to move message to Archive: {message_id}, status: {status}"
)
except Exception as individual_error:
progress.console.print(
@@ -270,18 +332,184 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
for filepath in batch_files:
message_id = os.path.basename(filepath).split(".")[0]
progress.console.print(
f"[DRY-RUN] Would move message to 'Archive' folder: {message_id}"
f"[DRY-RUN] Would move message to server Archive: {message_id}"
)
progress.advance(task_id, len(batch_files))
if not dry_run:
# Save sync state after each batch for resilience
if not dry_run and newly_synced_ids:
synced_ids.update(newly_synced_ids)
_save_archive_sync_state(maildir_path, synced_ids)
# Final summary
if not dry_run and successful_moves:
progress.console.print(
f"Successfully archived {len(successful_moves)} messages in batches"
f"Successfully synced {len(successful_moves)} messages to server Archive (kept local copies)"
)
return
async def fetch_archive_mail_async(
maildir_path,
attachments_dir,
headers,
progress,
task_id,
dry_run=False,
download_attachments=False,
max_messages=None,
is_cancelled=None,
):
"""
Fetch archived mail from Microsoft Graph API Archive folder and save to local .Archive Maildir.
Args:
maildir_path (str): Path to the Maildir.
attachments_dir (str): Path to save attachments.
headers (dict): Headers including authentication.
progress: Progress instance for updating progress bars.
task_id: ID of the task in the progress bar.
dry_run (bool): If True, don't actually make changes.
download_attachments (bool): If True, download email attachments.
max_messages (int, optional): Maximum number of messages to fetch. None = all.
is_cancelled (callable, optional): Callback that returns True if task should stop.
Returns:
None
"""
from src.utils.mail_utils.maildir import save_mime_to_maildir_async
# Use the well-known 'archive' folder name
mail_url = "https://graph.microsoft.com/v1.0/me/mailFolders/archive/messages?$top=100&$orderby=receivedDateTime desc&$select=id,subject,from,toRecipients,ccRecipients,receivedDateTime,isRead"
messages = []
# Fetch the total count of messages in the archive
archive_info_url = "https://graph.microsoft.com/v1.0/me/mailFolders/archive"
try:
response = await fetch_with_aiohttp(archive_info_url, headers)
total_messages = response.get("totalItemCount", 0) if response else 0
except Exception as e:
progress.console.print(f"Error fetching archive folder info: {e}")
total_messages = 0
# Apply max_messages limit if specified
effective_total = (
min(total_messages, max_messages) if max_messages else total_messages
)
progress.update(task_id, total=effective_total)
progress.console.print(
f"Archive folder has {total_messages} messages"
+ (f", fetching up to {max_messages}" if max_messages else "")
)
# Fetch messages from archive
fetched_count = 0
while mail_url:
try:
response_data = await fetch_with_aiohttp(mail_url, headers)
except Exception as e:
progress.console.print(f"Error fetching archive messages: {e}")
break
batch = response_data.get("value", []) if response_data else []
# Apply max_messages limit
if max_messages and fetched_count + len(batch) > max_messages:
batch = batch[: max_messages - fetched_count]
messages.extend(batch)
fetched_count += len(batch)
break
messages.extend(batch)
fetched_count += len(batch)
progress.advance(task_id, len(batch))
# Get the next page URL from @odata.nextLink
mail_url = response_data.get("@odata.nextLink") if response_data else None
# Set up local archive directory paths
archive_dir = os.path.join(maildir_path, ".Archive")
cur_dir = os.path.join(archive_dir, "cur")
new_dir = os.path.join(archive_dir, "new")
# Ensure directories exist
os.makedirs(cur_dir, exist_ok=True)
os.makedirs(new_dir, exist_ok=True)
os.makedirs(os.path.join(archive_dir, "tmp"), exist_ok=True)
# Get local message IDs in archive
cur_files = set(glob.glob(os.path.join(cur_dir, "*.eml*")))
new_files = set(glob.glob(os.path.join(new_dir, "*.eml*")))
local_msg_ids = set()
for filename in set.union(cur_files, new_files):
message_id = os.path.basename(filename).split(".")[0]
local_msg_ids.add(message_id)
# Filter messages to only include those not already local
messages_to_download = [msg for msg in messages if msg["id"] not in local_msg_ids]
progress.console.print(
f"Found {len(messages)} messages on server Archive, {len(local_msg_ids)} already local"
)
progress.console.print(
f"Downloading {len(messages_to_download)} new archived messages"
)
# Update progress to reflect only the messages we actually need to download
progress.update(task_id, total=len(messages_to_download), completed=0)
# Load sync state once, we'll update it incrementally
synced_ids = _load_archive_sync_state(maildir_path) if not dry_run else set()
downloaded_count = 0
for message in messages_to_download:
# Check if task was cancelled/disabled
if is_cancelled and is_cancelled():
progress.console.print("Task cancelled, stopping archive fetch")
break
progress.console.print(
f"Processing archived message: {message.get('subject', 'No Subject')[:50]}",
end="\r",
)
# Save to .Archive folder instead of main maildir
await save_mime_to_maildir_async(
archive_dir, # Use archive_dir instead of maildir_path
message,
attachments_dir,
headers,
progress,
dry_run,
download_attachments,
)
progress.update(task_id, advance=1)
downloaded_count += 1
# Update sync state after each message for resilience
# This ensures we don't try to re-upload this message in archive_mail_async
if not dry_run:
synced_ids.add(message["id"])
_save_archive_sync_state(maildir_path, synced_ids)
progress.update(task_id, completed=downloaded_count)
progress.console.print(
f"\nFinished downloading {downloaded_count} archived messages."
)
progress.console.print(
f"Total in server Archive: {total_messages}, Already local: {len(local_msg_ids)}"
)
# Also add any messages we already had locally (from the full server list)
# to ensure they're marked as synced
if not dry_run and messages:
for msg in messages:
synced_ids.add(msg["id"])
_save_archive_sync_state(maildir_path, synced_ids)
async def delete_mail_async(maildir_path, headers, progress, task_id, dry_run=False):
"""
Delete mail from Maildir and Microsoft Graph API using batch operations.