WIP
This commit is contained in:
5
src/services/khal/__init__.py
Normal file
5
src/services/khal/__init__.py
Normal file
@@ -0,0 +1,5 @@
|
||||
"""Khal service package for calendar operations."""
|
||||
|
||||
from .client import KhalClient
|
||||
|
||||
__all__ = ["KhalClient"]
|
||||
332
src/services/khal/client.py
Normal file
332
src/services/khal/client.py
Normal file
@@ -0,0 +1,332 @@
|
||||
"""Khal CLI client for calendar operations.
|
||||
|
||||
This module provides a client that uses the khal CLI tool to interact with
|
||||
calendar data stored in vdir format.
|
||||
"""
|
||||
|
||||
import subprocess
|
||||
import logging
|
||||
from datetime import datetime, date, timedelta
|
||||
from typing import Optional, List
|
||||
|
||||
from src.calendar.backend import CalendarBackend, Event
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class KhalClient(CalendarBackend):
|
||||
"""Calendar backend using khal CLI."""
|
||||
|
||||
def __init__(self, config_path: Optional[str] = None):
|
||||
"""Initialize the Khal client.
|
||||
|
||||
Args:
|
||||
config_path: Optional path to khal config file
|
||||
"""
|
||||
self.config_path = config_path
|
||||
|
||||
def _run_khal(
|
||||
self, args: List[str], capture_output: bool = True
|
||||
) -> subprocess.CompletedProcess:
|
||||
"""Run a khal command.
|
||||
|
||||
Args:
|
||||
args: Command arguments (after 'khal')
|
||||
capture_output: Whether to capture stdout/stderr
|
||||
|
||||
Returns:
|
||||
CompletedProcess result
|
||||
"""
|
||||
cmd = ["khal"] + args
|
||||
if self.config_path:
|
||||
cmd.extend(["-c", self.config_path])
|
||||
|
||||
logger.debug(f"Running khal command: {' '.join(cmd)}")
|
||||
|
||||
return subprocess.run(
|
||||
cmd,
|
||||
capture_output=capture_output,
|
||||
text=True,
|
||||
)
|
||||
|
||||
def _parse_event_line(
|
||||
self, line: str, day_header_date: Optional[date] = None
|
||||
) -> Optional[Event]:
|
||||
"""Parse a single event line from khal list output.
|
||||
|
||||
Expected format: title|start-time|end-time|start|end|location|uid|description|organizer|url|categories|status|recurring
|
||||
|
||||
Args:
|
||||
line: The line to parse
|
||||
day_header_date: Current day being parsed (from day headers)
|
||||
|
||||
Returns:
|
||||
Event if successfully parsed, None otherwise
|
||||
"""
|
||||
# Skip empty lines and day headers
|
||||
if not line or "|" not in line:
|
||||
return None
|
||||
|
||||
parts = line.split("|")
|
||||
if len(parts) < 5:
|
||||
return None
|
||||
|
||||
try:
|
||||
title = parts[0].strip()
|
||||
start_str = parts[3].strip() # Full datetime
|
||||
end_str = parts[4].strip() # Full datetime
|
||||
location = parts[5].strip() if len(parts) > 5 else ""
|
||||
uid = parts[6].strip() if len(parts) > 6 else ""
|
||||
description = parts[7].strip() if len(parts) > 7 else ""
|
||||
organizer = parts[8].strip() if len(parts) > 8 else ""
|
||||
url = parts[9].strip() if len(parts) > 9 else ""
|
||||
categories = parts[10].strip() if len(parts) > 10 else ""
|
||||
status = parts[11].strip() if len(parts) > 11 else ""
|
||||
recurring_symbol = parts[12].strip() if len(parts) > 12 else ""
|
||||
|
||||
# Parse datetimes (format: YYYY-MM-DD HH:MM)
|
||||
start = datetime.strptime(start_str, "%Y-%m-%d %H:%M")
|
||||
end = datetime.strptime(end_str, "%Y-%m-%d %H:%M")
|
||||
|
||||
# Check for all-day events (typically start at 00:00 and end at 00:00 next day)
|
||||
all_day = (
|
||||
start.hour == 0
|
||||
and start.minute == 0
|
||||
and end.hour == 0
|
||||
and end.minute == 0
|
||||
and (end.date() - start.date()).days >= 1
|
||||
)
|
||||
|
||||
# Check if event is recurring (repeat symbol is typically a loop arrow)
|
||||
recurring = bool(recurring_symbol)
|
||||
|
||||
return Event(
|
||||
uid=uid or f"{title}_{start_str}",
|
||||
title=title,
|
||||
start=start,
|
||||
end=end,
|
||||
location=location,
|
||||
description=description,
|
||||
organizer=organizer,
|
||||
url=url,
|
||||
categories=categories,
|
||||
status=status,
|
||||
all_day=all_day,
|
||||
recurring=recurring,
|
||||
)
|
||||
except (ValueError, IndexError) as e:
|
||||
logger.warning(f"Failed to parse event line '{line}': {e}")
|
||||
return None
|
||||
|
||||
def get_events(
|
||||
self,
|
||||
start_date: date,
|
||||
end_date: date,
|
||||
calendar: Optional[str] = None,
|
||||
) -> List[Event]:
|
||||
"""Get events in a date range.
|
||||
|
||||
Args:
|
||||
start_date: Start of range (inclusive)
|
||||
end_date: End of range (inclusive)
|
||||
calendar: Optional calendar name to filter by
|
||||
|
||||
Returns:
|
||||
List of events in the range, sorted by start time
|
||||
"""
|
||||
# Format dates for khal
|
||||
start_str = start_date.strftime("%Y-%m-%d")
|
||||
# Add one day to end_date to make it inclusive
|
||||
end_dt = end_date + timedelta(days=1)
|
||||
end_str = end_dt.strftime("%Y-%m-%d")
|
||||
|
||||
# Build command
|
||||
# Format: title|start-time|end-time|start|end|location|uid|description|organizer|url|categories|status|recurring
|
||||
format_str = "{title}|{start-time}|{end-time}|{start}|{end}|{location}|{uid}|{description}|{organizer}|{url}|{categories}|{status}|{repeat-symbol}"
|
||||
args = ["list", "-f", format_str, start_str, end_str]
|
||||
|
||||
if calendar:
|
||||
args.extend(["-a", calendar])
|
||||
|
||||
result = self._run_khal(args)
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.error(f"khal list failed: {result.stderr}")
|
||||
return []
|
||||
|
||||
events = []
|
||||
current_day: Optional[date] = None
|
||||
|
||||
for line in result.stdout.strip().split("\n"):
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# Check for day headers (e.g., "Today, 2025-12-18" or "Monday, 2025-12-22")
|
||||
if ", " in line and "|" not in line:
|
||||
try:
|
||||
# Extract date from header
|
||||
date_part = line.split(", ")[-1]
|
||||
current_day = datetime.strptime(date_part, "%Y-%m-%d").date()
|
||||
except ValueError:
|
||||
pass
|
||||
continue
|
||||
|
||||
event = self._parse_event_line(line, current_day)
|
||||
if event:
|
||||
events.append(event)
|
||||
|
||||
# Sort by start time
|
||||
events.sort(key=lambda e: e.start)
|
||||
|
||||
return events
|
||||
|
||||
def get_event(self, uid: str) -> Optional[Event]:
|
||||
"""Get a single event by UID.
|
||||
|
||||
Args:
|
||||
uid: Event unique identifier
|
||||
|
||||
Returns:
|
||||
Event if found, None otherwise
|
||||
"""
|
||||
# khal doesn't have a direct "get by uid" command
|
||||
# We search for it instead
|
||||
result = self._run_khal(["search", uid])
|
||||
|
||||
if result.returncode != 0 or not result.stdout.strip():
|
||||
return None
|
||||
|
||||
# Parse the first result
|
||||
# Search output format is different, so we need to handle it
|
||||
lines = result.stdout.strip().split("\n")
|
||||
if lines:
|
||||
# For now, return None - would need more parsing
|
||||
# This is a limitation of khal's CLI
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
def get_calendars(self) -> List[str]:
|
||||
"""Get list of available calendar names.
|
||||
|
||||
Returns:
|
||||
List of calendar names
|
||||
"""
|
||||
result = self._run_khal(["printcalendars"])
|
||||
|
||||
if result.returncode != 0:
|
||||
logger.error(f"khal printcalendars failed: {result.stderr}")
|
||||
return []
|
||||
|
||||
calendars = []
|
||||
for line in result.stdout.strip().split("\n"):
|
||||
line = line.strip()
|
||||
if line:
|
||||
calendars.append(line)
|
||||
|
||||
return calendars
|
||||
|
||||
def create_event(
|
||||
self,
|
||||
title: str,
|
||||
start: datetime,
|
||||
end: datetime,
|
||||
calendar: Optional[str] = None,
|
||||
location: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
all_day: bool = False,
|
||||
) -> Event:
|
||||
"""Create a new event.
|
||||
|
||||
Args:
|
||||
title: Event title
|
||||
start: Start datetime
|
||||
end: End datetime
|
||||
calendar: Calendar to add event to
|
||||
location: Event location
|
||||
description: Event description
|
||||
all_day: Whether this is an all-day event
|
||||
|
||||
Returns:
|
||||
The created event
|
||||
"""
|
||||
# Build khal new command
|
||||
# Format: khal new [-a calendar] start end title [:: description] [-l location]
|
||||
if all_day:
|
||||
start_str = start.strftime("%Y-%m-%d")
|
||||
end_str = end.strftime("%Y-%m-%d")
|
||||
else:
|
||||
start_str = start.strftime("%Y-%m-%d %H:%M")
|
||||
end_str = end.strftime("%H:%M") # End time only if same day
|
||||
if end.date() != start.date():
|
||||
end_str = end.strftime("%Y-%m-%d %H:%M")
|
||||
|
||||
args = ["new"]
|
||||
if calendar:
|
||||
args.extend(["-a", calendar])
|
||||
if location:
|
||||
args.extend(["-l", location])
|
||||
|
||||
args.extend([start_str, end_str, title])
|
||||
|
||||
if description:
|
||||
args.extend(["::", description])
|
||||
|
||||
result = self._run_khal(args)
|
||||
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(f"Failed to create event: {result.stderr}")
|
||||
|
||||
# Return a constructed event (khal doesn't return the created event)
|
||||
return Event(
|
||||
uid=f"new_{title}_{start.isoformat()}",
|
||||
title=title,
|
||||
start=start,
|
||||
end=end,
|
||||
location=location or "",
|
||||
description=description or "",
|
||||
calendar=calendar or "",
|
||||
all_day=all_day,
|
||||
)
|
||||
|
||||
def delete_event(self, uid: str) -> bool:
|
||||
"""Delete an event.
|
||||
|
||||
Args:
|
||||
uid: Event unique identifier
|
||||
|
||||
Returns:
|
||||
True if deleted successfully
|
||||
"""
|
||||
# khal edit with --delete flag
|
||||
# This is tricky because khal edit is interactive
|
||||
# We might need to use khal's Python API directly for this
|
||||
logger.warning("delete_event not fully implemented for khal CLI")
|
||||
return False
|
||||
|
||||
def update_event(
|
||||
self,
|
||||
uid: str,
|
||||
title: Optional[str] = None,
|
||||
start: Optional[datetime] = None,
|
||||
end: Optional[datetime] = None,
|
||||
location: Optional[str] = None,
|
||||
description: Optional[str] = None,
|
||||
) -> Optional[Event]:
|
||||
"""Update an existing event.
|
||||
|
||||
Args:
|
||||
uid: Event unique identifier
|
||||
title: New title (if provided)
|
||||
start: New start time (if provided)
|
||||
end: New end time (if provided)
|
||||
location: New location (if provided)
|
||||
description: New description (if provided)
|
||||
|
||||
Returns:
|
||||
Updated event if successful, None otherwise
|
||||
"""
|
||||
# khal edit is interactive, so this is limited via CLI
|
||||
logger.warning("update_event not fully implemented for khal CLI")
|
||||
return None
|
||||
@@ -5,10 +5,11 @@ Mail operations for Microsoft Graph API.
|
||||
import os
|
||||
import re
|
||||
import glob
|
||||
import json
|
||||
import asyncio
|
||||
from email.parser import Parser
|
||||
from email.utils import getaddresses
|
||||
from typing import List, Dict, Any
|
||||
from typing import List, Dict, Any, Set
|
||||
|
||||
from .client import (
|
||||
fetch_with_aiohttp,
|
||||
@@ -27,6 +28,7 @@ async def fetch_mail_async(
|
||||
task_id,
|
||||
dry_run=False,
|
||||
download_attachments=False,
|
||||
is_cancelled=None,
|
||||
):
|
||||
"""
|
||||
Fetch mail from Microsoft Graph API and save to Maildir.
|
||||
@@ -39,6 +41,7 @@ async def fetch_mail_async(
|
||||
task_id: ID of the task in the progress bar.
|
||||
dry_run (bool): If True, don't actually make changes.
|
||||
download_attachments (bool): If True, download email attachments.
|
||||
is_cancelled (callable, optional): Callback that returns True if task should stop.
|
||||
|
||||
Returns:
|
||||
None
|
||||
@@ -105,8 +108,14 @@ async def fetch_mail_async(
|
||||
|
||||
# Update progress to reflect only the messages we actually need to download
|
||||
progress.update(task_id, total=len(messages_to_download), completed=0)
|
||||
downloaded_count = 0
|
||||
|
||||
for message in messages_to_download:
|
||||
# Check if task was cancelled/disabled
|
||||
if is_cancelled and is_cancelled():
|
||||
progress.console.print("Task cancelled, stopping inbox fetch")
|
||||
break
|
||||
|
||||
progress.console.print(
|
||||
f"Processing message: {message.get('subject', 'No Subject')}", end="\r"
|
||||
)
|
||||
@@ -120,44 +129,92 @@ async def fetch_mail_async(
|
||||
download_attachments,
|
||||
)
|
||||
progress.update(task_id, advance=1)
|
||||
progress.update(task_id, completed=len(messages_to_download))
|
||||
progress.console.print(
|
||||
f"\nFinished downloading {len(messages_to_download)} new messages."
|
||||
)
|
||||
downloaded_count += 1
|
||||
|
||||
progress.update(task_id, completed=downloaded_count)
|
||||
progress.console.print(f"\nFinished downloading {downloaded_count} new messages.")
|
||||
progress.console.print(
|
||||
f"Total messages on server: {len(messages)}, Already local: {len(local_msg_ids)}"
|
||||
)
|
||||
|
||||
|
||||
async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=False):
|
||||
def _get_archive_sync_state_path(maildir_path: str) -> str:
|
||||
"""Get the path to the archive sync state file."""
|
||||
return os.path.join(maildir_path, ".Archive", ".sync_state.json")
|
||||
|
||||
|
||||
def _load_archive_sync_state(maildir_path: str) -> Set[str]:
|
||||
"""Load the set of message IDs that have been synced to server."""
|
||||
state_path = _get_archive_sync_state_path(maildir_path)
|
||||
if os.path.exists(state_path):
|
||||
try:
|
||||
with open(state_path, "r") as f:
|
||||
data = json.load(f)
|
||||
return set(data.get("synced_to_server", []))
|
||||
except Exception:
|
||||
pass
|
||||
return set()
|
||||
|
||||
|
||||
def _save_archive_sync_state(maildir_path: str, synced_ids: Set[str]) -> None:
|
||||
"""Save the set of message IDs that have been synced to server."""
|
||||
state_path = _get_archive_sync_state_path(maildir_path)
|
||||
os.makedirs(os.path.dirname(state_path), exist_ok=True)
|
||||
with open(state_path, "w") as f:
|
||||
json.dump({"synced_to_server": list(synced_ids)}, f, indent=2)
|
||||
|
||||
|
||||
async def archive_mail_async(
|
||||
maildir_path, headers, progress, task_id, dry_run=False, is_cancelled=None
|
||||
):
|
||||
"""
|
||||
Archive mail from Maildir to Microsoft Graph API archive folder using batch operations.
|
||||
|
||||
Messages are moved to the server's Archive folder, but local copies are kept.
|
||||
A sync state file tracks which messages have already been synced to avoid
|
||||
re-processing them on subsequent runs.
|
||||
|
||||
Args:
|
||||
maildir_path (str): Path to the Maildir.
|
||||
headers (dict): Headers including authentication.
|
||||
progress: Progress instance for updating progress bars.
|
||||
task_id: ID of the task in the progress bar.
|
||||
dry_run (bool): If True, don't actually make changes.
|
||||
is_cancelled (callable, optional): Callback that returns True if task should stop.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
# Check both possible archive folder names locally
|
||||
# Load already-synced message IDs
|
||||
synced_ids = _load_archive_sync_state(maildir_path)
|
||||
|
||||
# Check both possible archive folder names locally (prefer .Archive)
|
||||
archive_files = []
|
||||
for archive_folder_name in [".Archives", ".Archive"]:
|
||||
for archive_folder_name in [".Archive", ".Archives"]:
|
||||
archive_dir = os.path.join(maildir_path, archive_folder_name)
|
||||
if os.path.exists(archive_dir):
|
||||
archive_files.extend(
|
||||
glob.glob(os.path.join(archive_dir, "**", "*.eml*"), recursive=True)
|
||||
)
|
||||
|
||||
if not archive_files:
|
||||
# Filter out already-synced messages
|
||||
files_to_sync = []
|
||||
for filepath in archive_files:
|
||||
message_id = os.path.basename(filepath).split(".")[0]
|
||||
if message_id not in synced_ids:
|
||||
files_to_sync.append(filepath)
|
||||
|
||||
if not files_to_sync:
|
||||
progress.update(task_id, total=0, completed=0)
|
||||
progress.console.print("No messages to archive")
|
||||
progress.console.print(
|
||||
f"No new messages to archive ({len(archive_files)} already synced)"
|
||||
)
|
||||
return
|
||||
|
||||
progress.update(task_id, total=len(archive_files))
|
||||
progress.update(task_id, total=len(files_to_sync))
|
||||
progress.console.print(
|
||||
f"Found {len(files_to_sync)} new messages to sync to server Archive"
|
||||
)
|
||||
|
||||
# Get archive folder ID from server
|
||||
folder_response = await fetch_with_aiohttp(
|
||||
@@ -179,9 +236,15 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
|
||||
# Process files in batches of 20 (Microsoft Graph batch limit)
|
||||
batch_size = 20
|
||||
successful_moves = []
|
||||
newly_synced_ids: Set[str] = set()
|
||||
|
||||
for i in range(0, len(archive_files), batch_size):
|
||||
batch_files = archive_files[i : i + batch_size]
|
||||
for i in range(0, len(files_to_sync), batch_size):
|
||||
# Check if task was cancelled/disabled
|
||||
if is_cancelled and is_cancelled():
|
||||
progress.console.print("Task cancelled, stopping archive sync")
|
||||
break
|
||||
|
||||
batch_files = files_to_sync[i : i + batch_size]
|
||||
|
||||
# Add small delay between batches to respect API limits
|
||||
if i > 0:
|
||||
@@ -216,23 +279,22 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
|
||||
status = response["status"]
|
||||
|
||||
if status == 201: # 201 Created indicates successful move
|
||||
os.remove(
|
||||
filepath
|
||||
) # Remove the local file since it's now archived on server
|
||||
# Keep local file, just mark as synced
|
||||
newly_synced_ids.add(message_id)
|
||||
successful_moves.append(message_id)
|
||||
progress.console.print(
|
||||
f"Moved message to 'Archive': {message_id}"
|
||||
f"Moved message to server Archive: {message_id}"
|
||||
)
|
||||
elif status == 404:
|
||||
os.remove(
|
||||
filepath
|
||||
) # Remove the file from local archive if not found on server
|
||||
# Message not in Inbox (maybe already archived or deleted on server)
|
||||
# Mark as synced so we don't retry, but keep local copy
|
||||
newly_synced_ids.add(message_id)
|
||||
progress.console.print(
|
||||
f"Message not found on server, removed local copy: {message_id}"
|
||||
f"Message not in Inbox (already archived?): {message_id}"
|
||||
)
|
||||
else:
|
||||
progress.console.print(
|
||||
f"Failed to move message to 'Archive': {message_id}, status: {status}"
|
||||
f"Failed to move message to Archive: {message_id}, status: {status}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -247,19 +309,19 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
|
||||
{"destinationId": archive_folder_id},
|
||||
)
|
||||
if status == 201:
|
||||
os.remove(filepath)
|
||||
newly_synced_ids.add(message_id)
|
||||
successful_moves.append(message_id)
|
||||
progress.console.print(
|
||||
f"Moved message to 'Archive' (fallback): {message_id}"
|
||||
f"Moved message to server Archive (fallback): {message_id}"
|
||||
)
|
||||
elif status == 404:
|
||||
os.remove(filepath)
|
||||
newly_synced_ids.add(message_id)
|
||||
progress.console.print(
|
||||
f"Message not found on server, removed local copy: {message_id}"
|
||||
f"Message not in Inbox (already archived?): {message_id}"
|
||||
)
|
||||
else:
|
||||
progress.console.print(
|
||||
f"Failed to move message to 'Archive': {message_id}, status: {status}"
|
||||
f"Failed to move message to Archive: {message_id}, status: {status}"
|
||||
)
|
||||
except Exception as individual_error:
|
||||
progress.console.print(
|
||||
@@ -270,18 +332,184 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
|
||||
for filepath in batch_files:
|
||||
message_id = os.path.basename(filepath).split(".")[0]
|
||||
progress.console.print(
|
||||
f"[DRY-RUN] Would move message to 'Archive' folder: {message_id}"
|
||||
f"[DRY-RUN] Would move message to server Archive: {message_id}"
|
||||
)
|
||||
|
||||
progress.advance(task_id, len(batch_files))
|
||||
|
||||
if not dry_run:
|
||||
# Save sync state after each batch for resilience
|
||||
if not dry_run and newly_synced_ids:
|
||||
synced_ids.update(newly_synced_ids)
|
||||
_save_archive_sync_state(maildir_path, synced_ids)
|
||||
|
||||
# Final summary
|
||||
if not dry_run and successful_moves:
|
||||
progress.console.print(
|
||||
f"Successfully archived {len(successful_moves)} messages in batches"
|
||||
f"Successfully synced {len(successful_moves)} messages to server Archive (kept local copies)"
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
async def fetch_archive_mail_async(
|
||||
maildir_path,
|
||||
attachments_dir,
|
||||
headers,
|
||||
progress,
|
||||
task_id,
|
||||
dry_run=False,
|
||||
download_attachments=False,
|
||||
max_messages=None,
|
||||
is_cancelled=None,
|
||||
):
|
||||
"""
|
||||
Fetch archived mail from Microsoft Graph API Archive folder and save to local .Archive Maildir.
|
||||
|
||||
Args:
|
||||
maildir_path (str): Path to the Maildir.
|
||||
attachments_dir (str): Path to save attachments.
|
||||
headers (dict): Headers including authentication.
|
||||
progress: Progress instance for updating progress bars.
|
||||
task_id: ID of the task in the progress bar.
|
||||
dry_run (bool): If True, don't actually make changes.
|
||||
download_attachments (bool): If True, download email attachments.
|
||||
max_messages (int, optional): Maximum number of messages to fetch. None = all.
|
||||
is_cancelled (callable, optional): Callback that returns True if task should stop.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
from src.utils.mail_utils.maildir import save_mime_to_maildir_async
|
||||
|
||||
# Use the well-known 'archive' folder name
|
||||
mail_url = "https://graph.microsoft.com/v1.0/me/mailFolders/archive/messages?$top=100&$orderby=receivedDateTime desc&$select=id,subject,from,toRecipients,ccRecipients,receivedDateTime,isRead"
|
||||
messages = []
|
||||
|
||||
# Fetch the total count of messages in the archive
|
||||
archive_info_url = "https://graph.microsoft.com/v1.0/me/mailFolders/archive"
|
||||
|
||||
try:
|
||||
response = await fetch_with_aiohttp(archive_info_url, headers)
|
||||
total_messages = response.get("totalItemCount", 0) if response else 0
|
||||
except Exception as e:
|
||||
progress.console.print(f"Error fetching archive folder info: {e}")
|
||||
total_messages = 0
|
||||
|
||||
# Apply max_messages limit if specified
|
||||
effective_total = (
|
||||
min(total_messages, max_messages) if max_messages else total_messages
|
||||
)
|
||||
progress.update(task_id, total=effective_total)
|
||||
progress.console.print(
|
||||
f"Archive folder has {total_messages} messages"
|
||||
+ (f", fetching up to {max_messages}" if max_messages else "")
|
||||
)
|
||||
|
||||
# Fetch messages from archive
|
||||
fetched_count = 0
|
||||
while mail_url:
|
||||
try:
|
||||
response_data = await fetch_with_aiohttp(mail_url, headers)
|
||||
except Exception as e:
|
||||
progress.console.print(f"Error fetching archive messages: {e}")
|
||||
break
|
||||
|
||||
batch = response_data.get("value", []) if response_data else []
|
||||
|
||||
# Apply max_messages limit
|
||||
if max_messages and fetched_count + len(batch) > max_messages:
|
||||
batch = batch[: max_messages - fetched_count]
|
||||
messages.extend(batch)
|
||||
fetched_count += len(batch)
|
||||
break
|
||||
|
||||
messages.extend(batch)
|
||||
fetched_count += len(batch)
|
||||
progress.advance(task_id, len(batch))
|
||||
|
||||
# Get the next page URL from @odata.nextLink
|
||||
mail_url = response_data.get("@odata.nextLink") if response_data else None
|
||||
|
||||
# Set up local archive directory paths
|
||||
archive_dir = os.path.join(maildir_path, ".Archive")
|
||||
cur_dir = os.path.join(archive_dir, "cur")
|
||||
new_dir = os.path.join(archive_dir, "new")
|
||||
|
||||
# Ensure directories exist
|
||||
os.makedirs(cur_dir, exist_ok=True)
|
||||
os.makedirs(new_dir, exist_ok=True)
|
||||
os.makedirs(os.path.join(archive_dir, "tmp"), exist_ok=True)
|
||||
|
||||
# Get local message IDs in archive
|
||||
cur_files = set(glob.glob(os.path.join(cur_dir, "*.eml*")))
|
||||
new_files = set(glob.glob(os.path.join(new_dir, "*.eml*")))
|
||||
|
||||
local_msg_ids = set()
|
||||
for filename in set.union(cur_files, new_files):
|
||||
message_id = os.path.basename(filename).split(".")[0]
|
||||
local_msg_ids.add(message_id)
|
||||
|
||||
# Filter messages to only include those not already local
|
||||
messages_to_download = [msg for msg in messages if msg["id"] not in local_msg_ids]
|
||||
|
||||
progress.console.print(
|
||||
f"Found {len(messages)} messages on server Archive, {len(local_msg_ids)} already local"
|
||||
)
|
||||
progress.console.print(
|
||||
f"Downloading {len(messages_to_download)} new archived messages"
|
||||
)
|
||||
|
||||
# Update progress to reflect only the messages we actually need to download
|
||||
progress.update(task_id, total=len(messages_to_download), completed=0)
|
||||
|
||||
# Load sync state once, we'll update it incrementally
|
||||
synced_ids = _load_archive_sync_state(maildir_path) if not dry_run else set()
|
||||
downloaded_count = 0
|
||||
|
||||
for message in messages_to_download:
|
||||
# Check if task was cancelled/disabled
|
||||
if is_cancelled and is_cancelled():
|
||||
progress.console.print("Task cancelled, stopping archive fetch")
|
||||
break
|
||||
|
||||
progress.console.print(
|
||||
f"Processing archived message: {message.get('subject', 'No Subject')[:50]}",
|
||||
end="\r",
|
||||
)
|
||||
# Save to .Archive folder instead of main maildir
|
||||
await save_mime_to_maildir_async(
|
||||
archive_dir, # Use archive_dir instead of maildir_path
|
||||
message,
|
||||
attachments_dir,
|
||||
headers,
|
||||
progress,
|
||||
dry_run,
|
||||
download_attachments,
|
||||
)
|
||||
progress.update(task_id, advance=1)
|
||||
downloaded_count += 1
|
||||
|
||||
# Update sync state after each message for resilience
|
||||
# This ensures we don't try to re-upload this message in archive_mail_async
|
||||
if not dry_run:
|
||||
synced_ids.add(message["id"])
|
||||
_save_archive_sync_state(maildir_path, synced_ids)
|
||||
|
||||
progress.update(task_id, completed=downloaded_count)
|
||||
progress.console.print(
|
||||
f"\nFinished downloading {downloaded_count} archived messages."
|
||||
)
|
||||
progress.console.print(
|
||||
f"Total in server Archive: {total_messages}, Already local: {len(local_msg_ids)}"
|
||||
)
|
||||
|
||||
# Also add any messages we already had locally (from the full server list)
|
||||
# to ensure they're marked as synced
|
||||
if not dry_run and messages:
|
||||
for msg in messages:
|
||||
synced_ids.add(msg["id"])
|
||||
_save_archive_sync_state(maildir_path, synced_ids)
|
||||
|
||||
|
||||
async def delete_mail_async(maildir_path, headers, progress, task_id, dry_run=False):
|
||||
"""
|
||||
Delete mail from Maildir and Microsoft Graph API using batch operations.
|
||||
|
||||
Reference in New Issue
Block a user