WIP
This commit is contained in:
@@ -5,10 +5,11 @@ Mail operations for Microsoft Graph API.
|
||||
import os
|
||||
import re
|
||||
import glob
|
||||
import json
|
||||
import asyncio
|
||||
from email.parser import Parser
|
||||
from email.utils import getaddresses
|
||||
from typing import List, Dict, Any
|
||||
from typing import List, Dict, Any, Set
|
||||
|
||||
from .client import (
|
||||
fetch_with_aiohttp,
|
||||
@@ -27,6 +28,7 @@ async def fetch_mail_async(
|
||||
task_id,
|
||||
dry_run=False,
|
||||
download_attachments=False,
|
||||
is_cancelled=None,
|
||||
):
|
||||
"""
|
||||
Fetch mail from Microsoft Graph API and save to Maildir.
|
||||
@@ -39,6 +41,7 @@ async def fetch_mail_async(
|
||||
task_id: ID of the task in the progress bar.
|
||||
dry_run (bool): If True, don't actually make changes.
|
||||
download_attachments (bool): If True, download email attachments.
|
||||
is_cancelled (callable, optional): Callback that returns True if task should stop.
|
||||
|
||||
Returns:
|
||||
None
|
||||
@@ -105,8 +108,14 @@ async def fetch_mail_async(
|
||||
|
||||
# Update progress to reflect only the messages we actually need to download
|
||||
progress.update(task_id, total=len(messages_to_download), completed=0)
|
||||
downloaded_count = 0
|
||||
|
||||
for message in messages_to_download:
|
||||
# Check if task was cancelled/disabled
|
||||
if is_cancelled and is_cancelled():
|
||||
progress.console.print("Task cancelled, stopping inbox fetch")
|
||||
break
|
||||
|
||||
progress.console.print(
|
||||
f"Processing message: {message.get('subject', 'No Subject')}", end="\r"
|
||||
)
|
||||
@@ -120,44 +129,92 @@ async def fetch_mail_async(
|
||||
download_attachments,
|
||||
)
|
||||
progress.update(task_id, advance=1)
|
||||
progress.update(task_id, completed=len(messages_to_download))
|
||||
progress.console.print(
|
||||
f"\nFinished downloading {len(messages_to_download)} new messages."
|
||||
)
|
||||
downloaded_count += 1
|
||||
|
||||
progress.update(task_id, completed=downloaded_count)
|
||||
progress.console.print(f"\nFinished downloading {downloaded_count} new messages.")
|
||||
progress.console.print(
|
||||
f"Total messages on server: {len(messages)}, Already local: {len(local_msg_ids)}"
|
||||
)
|
||||
|
||||
|
||||
async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=False):
|
||||
def _get_archive_sync_state_path(maildir_path: str) -> str:
|
||||
"""Get the path to the archive sync state file."""
|
||||
return os.path.join(maildir_path, ".Archive", ".sync_state.json")
|
||||
|
||||
|
||||
def _load_archive_sync_state(maildir_path: str) -> Set[str]:
|
||||
"""Load the set of message IDs that have been synced to server."""
|
||||
state_path = _get_archive_sync_state_path(maildir_path)
|
||||
if os.path.exists(state_path):
|
||||
try:
|
||||
with open(state_path, "r") as f:
|
||||
data = json.load(f)
|
||||
return set(data.get("synced_to_server", []))
|
||||
except Exception:
|
||||
pass
|
||||
return set()
|
||||
|
||||
|
||||
def _save_archive_sync_state(maildir_path: str, synced_ids: Set[str]) -> None:
|
||||
"""Save the set of message IDs that have been synced to server."""
|
||||
state_path = _get_archive_sync_state_path(maildir_path)
|
||||
os.makedirs(os.path.dirname(state_path), exist_ok=True)
|
||||
with open(state_path, "w") as f:
|
||||
json.dump({"synced_to_server": list(synced_ids)}, f, indent=2)
|
||||
|
||||
|
||||
async def archive_mail_async(
|
||||
maildir_path, headers, progress, task_id, dry_run=False, is_cancelled=None
|
||||
):
|
||||
"""
|
||||
Archive mail from Maildir to Microsoft Graph API archive folder using batch operations.
|
||||
|
||||
Messages are moved to the server's Archive folder, but local copies are kept.
|
||||
A sync state file tracks which messages have already been synced to avoid
|
||||
re-processing them on subsequent runs.
|
||||
|
||||
Args:
|
||||
maildir_path (str): Path to the Maildir.
|
||||
headers (dict): Headers including authentication.
|
||||
progress: Progress instance for updating progress bars.
|
||||
task_id: ID of the task in the progress bar.
|
||||
dry_run (bool): If True, don't actually make changes.
|
||||
is_cancelled (callable, optional): Callback that returns True if task should stop.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
# Check both possible archive folder names locally
|
||||
# Load already-synced message IDs
|
||||
synced_ids = _load_archive_sync_state(maildir_path)
|
||||
|
||||
# Check both possible archive folder names locally (prefer .Archive)
|
||||
archive_files = []
|
||||
for archive_folder_name in [".Archives", ".Archive"]:
|
||||
for archive_folder_name in [".Archive", ".Archives"]:
|
||||
archive_dir = os.path.join(maildir_path, archive_folder_name)
|
||||
if os.path.exists(archive_dir):
|
||||
archive_files.extend(
|
||||
glob.glob(os.path.join(archive_dir, "**", "*.eml*"), recursive=True)
|
||||
)
|
||||
|
||||
if not archive_files:
|
||||
# Filter out already-synced messages
|
||||
files_to_sync = []
|
||||
for filepath in archive_files:
|
||||
message_id = os.path.basename(filepath).split(".")[0]
|
||||
if message_id not in synced_ids:
|
||||
files_to_sync.append(filepath)
|
||||
|
||||
if not files_to_sync:
|
||||
progress.update(task_id, total=0, completed=0)
|
||||
progress.console.print("No messages to archive")
|
||||
progress.console.print(
|
||||
f"No new messages to archive ({len(archive_files)} already synced)"
|
||||
)
|
||||
return
|
||||
|
||||
progress.update(task_id, total=len(archive_files))
|
||||
progress.update(task_id, total=len(files_to_sync))
|
||||
progress.console.print(
|
||||
f"Found {len(files_to_sync)} new messages to sync to server Archive"
|
||||
)
|
||||
|
||||
# Get archive folder ID from server
|
||||
folder_response = await fetch_with_aiohttp(
|
||||
@@ -179,9 +236,15 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
|
||||
# Process files in batches of 20 (Microsoft Graph batch limit)
|
||||
batch_size = 20
|
||||
successful_moves = []
|
||||
newly_synced_ids: Set[str] = set()
|
||||
|
||||
for i in range(0, len(archive_files), batch_size):
|
||||
batch_files = archive_files[i : i + batch_size]
|
||||
for i in range(0, len(files_to_sync), batch_size):
|
||||
# Check if task was cancelled/disabled
|
||||
if is_cancelled and is_cancelled():
|
||||
progress.console.print("Task cancelled, stopping archive sync")
|
||||
break
|
||||
|
||||
batch_files = files_to_sync[i : i + batch_size]
|
||||
|
||||
# Add small delay between batches to respect API limits
|
||||
if i > 0:
|
||||
@@ -216,23 +279,22 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
|
||||
status = response["status"]
|
||||
|
||||
if status == 201: # 201 Created indicates successful move
|
||||
os.remove(
|
||||
filepath
|
||||
) # Remove the local file since it's now archived on server
|
||||
# Keep local file, just mark as synced
|
||||
newly_synced_ids.add(message_id)
|
||||
successful_moves.append(message_id)
|
||||
progress.console.print(
|
||||
f"Moved message to 'Archive': {message_id}"
|
||||
f"Moved message to server Archive: {message_id}"
|
||||
)
|
||||
elif status == 404:
|
||||
os.remove(
|
||||
filepath
|
||||
) # Remove the file from local archive if not found on server
|
||||
# Message not in Inbox (maybe already archived or deleted on server)
|
||||
# Mark as synced so we don't retry, but keep local copy
|
||||
newly_synced_ids.add(message_id)
|
||||
progress.console.print(
|
||||
f"Message not found on server, removed local copy: {message_id}"
|
||||
f"Message not in Inbox (already archived?): {message_id}"
|
||||
)
|
||||
else:
|
||||
progress.console.print(
|
||||
f"Failed to move message to 'Archive': {message_id}, status: {status}"
|
||||
f"Failed to move message to Archive: {message_id}, status: {status}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
@@ -247,19 +309,19 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
|
||||
{"destinationId": archive_folder_id},
|
||||
)
|
||||
if status == 201:
|
||||
os.remove(filepath)
|
||||
newly_synced_ids.add(message_id)
|
||||
successful_moves.append(message_id)
|
||||
progress.console.print(
|
||||
f"Moved message to 'Archive' (fallback): {message_id}"
|
||||
f"Moved message to server Archive (fallback): {message_id}"
|
||||
)
|
||||
elif status == 404:
|
||||
os.remove(filepath)
|
||||
newly_synced_ids.add(message_id)
|
||||
progress.console.print(
|
||||
f"Message not found on server, removed local copy: {message_id}"
|
||||
f"Message not in Inbox (already archived?): {message_id}"
|
||||
)
|
||||
else:
|
||||
progress.console.print(
|
||||
f"Failed to move message to 'Archive': {message_id}, status: {status}"
|
||||
f"Failed to move message to Archive: {message_id}, status: {status}"
|
||||
)
|
||||
except Exception as individual_error:
|
||||
progress.console.print(
|
||||
@@ -270,18 +332,184 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
|
||||
for filepath in batch_files:
|
||||
message_id = os.path.basename(filepath).split(".")[0]
|
||||
progress.console.print(
|
||||
f"[DRY-RUN] Would move message to 'Archive' folder: {message_id}"
|
||||
f"[DRY-RUN] Would move message to server Archive: {message_id}"
|
||||
)
|
||||
|
||||
progress.advance(task_id, len(batch_files))
|
||||
|
||||
if not dry_run:
|
||||
# Save sync state after each batch for resilience
|
||||
if not dry_run and newly_synced_ids:
|
||||
synced_ids.update(newly_synced_ids)
|
||||
_save_archive_sync_state(maildir_path, synced_ids)
|
||||
|
||||
# Final summary
|
||||
if not dry_run and successful_moves:
|
||||
progress.console.print(
|
||||
f"Successfully archived {len(successful_moves)} messages in batches"
|
||||
f"Successfully synced {len(successful_moves)} messages to server Archive (kept local copies)"
|
||||
)
|
||||
return
|
||||
|
||||
|
||||
async def fetch_archive_mail_async(
|
||||
maildir_path,
|
||||
attachments_dir,
|
||||
headers,
|
||||
progress,
|
||||
task_id,
|
||||
dry_run=False,
|
||||
download_attachments=False,
|
||||
max_messages=None,
|
||||
is_cancelled=None,
|
||||
):
|
||||
"""
|
||||
Fetch archived mail from Microsoft Graph API Archive folder and save to local .Archive Maildir.
|
||||
|
||||
Args:
|
||||
maildir_path (str): Path to the Maildir.
|
||||
attachments_dir (str): Path to save attachments.
|
||||
headers (dict): Headers including authentication.
|
||||
progress: Progress instance for updating progress bars.
|
||||
task_id: ID of the task in the progress bar.
|
||||
dry_run (bool): If True, don't actually make changes.
|
||||
download_attachments (bool): If True, download email attachments.
|
||||
max_messages (int, optional): Maximum number of messages to fetch. None = all.
|
||||
is_cancelled (callable, optional): Callback that returns True if task should stop.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
from src.utils.mail_utils.maildir import save_mime_to_maildir_async
|
||||
|
||||
# Use the well-known 'archive' folder name
|
||||
mail_url = "https://graph.microsoft.com/v1.0/me/mailFolders/archive/messages?$top=100&$orderby=receivedDateTime desc&$select=id,subject,from,toRecipients,ccRecipients,receivedDateTime,isRead"
|
||||
messages = []
|
||||
|
||||
# Fetch the total count of messages in the archive
|
||||
archive_info_url = "https://graph.microsoft.com/v1.0/me/mailFolders/archive"
|
||||
|
||||
try:
|
||||
response = await fetch_with_aiohttp(archive_info_url, headers)
|
||||
total_messages = response.get("totalItemCount", 0) if response else 0
|
||||
except Exception as e:
|
||||
progress.console.print(f"Error fetching archive folder info: {e}")
|
||||
total_messages = 0
|
||||
|
||||
# Apply max_messages limit if specified
|
||||
effective_total = (
|
||||
min(total_messages, max_messages) if max_messages else total_messages
|
||||
)
|
||||
progress.update(task_id, total=effective_total)
|
||||
progress.console.print(
|
||||
f"Archive folder has {total_messages} messages"
|
||||
+ (f", fetching up to {max_messages}" if max_messages else "")
|
||||
)
|
||||
|
||||
# Fetch messages from archive
|
||||
fetched_count = 0
|
||||
while mail_url:
|
||||
try:
|
||||
response_data = await fetch_with_aiohttp(mail_url, headers)
|
||||
except Exception as e:
|
||||
progress.console.print(f"Error fetching archive messages: {e}")
|
||||
break
|
||||
|
||||
batch = response_data.get("value", []) if response_data else []
|
||||
|
||||
# Apply max_messages limit
|
||||
if max_messages and fetched_count + len(batch) > max_messages:
|
||||
batch = batch[: max_messages - fetched_count]
|
||||
messages.extend(batch)
|
||||
fetched_count += len(batch)
|
||||
break
|
||||
|
||||
messages.extend(batch)
|
||||
fetched_count += len(batch)
|
||||
progress.advance(task_id, len(batch))
|
||||
|
||||
# Get the next page URL from @odata.nextLink
|
||||
mail_url = response_data.get("@odata.nextLink") if response_data else None
|
||||
|
||||
# Set up local archive directory paths
|
||||
archive_dir = os.path.join(maildir_path, ".Archive")
|
||||
cur_dir = os.path.join(archive_dir, "cur")
|
||||
new_dir = os.path.join(archive_dir, "new")
|
||||
|
||||
# Ensure directories exist
|
||||
os.makedirs(cur_dir, exist_ok=True)
|
||||
os.makedirs(new_dir, exist_ok=True)
|
||||
os.makedirs(os.path.join(archive_dir, "tmp"), exist_ok=True)
|
||||
|
||||
# Get local message IDs in archive
|
||||
cur_files = set(glob.glob(os.path.join(cur_dir, "*.eml*")))
|
||||
new_files = set(glob.glob(os.path.join(new_dir, "*.eml*")))
|
||||
|
||||
local_msg_ids = set()
|
||||
for filename in set.union(cur_files, new_files):
|
||||
message_id = os.path.basename(filename).split(".")[0]
|
||||
local_msg_ids.add(message_id)
|
||||
|
||||
# Filter messages to only include those not already local
|
||||
messages_to_download = [msg for msg in messages if msg["id"] not in local_msg_ids]
|
||||
|
||||
progress.console.print(
|
||||
f"Found {len(messages)} messages on server Archive, {len(local_msg_ids)} already local"
|
||||
)
|
||||
progress.console.print(
|
||||
f"Downloading {len(messages_to_download)} new archived messages"
|
||||
)
|
||||
|
||||
# Update progress to reflect only the messages we actually need to download
|
||||
progress.update(task_id, total=len(messages_to_download), completed=0)
|
||||
|
||||
# Load sync state once, we'll update it incrementally
|
||||
synced_ids = _load_archive_sync_state(maildir_path) if not dry_run else set()
|
||||
downloaded_count = 0
|
||||
|
||||
for message in messages_to_download:
|
||||
# Check if task was cancelled/disabled
|
||||
if is_cancelled and is_cancelled():
|
||||
progress.console.print("Task cancelled, stopping archive fetch")
|
||||
break
|
||||
|
||||
progress.console.print(
|
||||
f"Processing archived message: {message.get('subject', 'No Subject')[:50]}",
|
||||
end="\r",
|
||||
)
|
||||
# Save to .Archive folder instead of main maildir
|
||||
await save_mime_to_maildir_async(
|
||||
archive_dir, # Use archive_dir instead of maildir_path
|
||||
message,
|
||||
attachments_dir,
|
||||
headers,
|
||||
progress,
|
||||
dry_run,
|
||||
download_attachments,
|
||||
)
|
||||
progress.update(task_id, advance=1)
|
||||
downloaded_count += 1
|
||||
|
||||
# Update sync state after each message for resilience
|
||||
# This ensures we don't try to re-upload this message in archive_mail_async
|
||||
if not dry_run:
|
||||
synced_ids.add(message["id"])
|
||||
_save_archive_sync_state(maildir_path, synced_ids)
|
||||
|
||||
progress.update(task_id, completed=downloaded_count)
|
||||
progress.console.print(
|
||||
f"\nFinished downloading {downloaded_count} archived messages."
|
||||
)
|
||||
progress.console.print(
|
||||
f"Total in server Archive: {total_messages}, Already local: {len(local_msg_ids)}"
|
||||
)
|
||||
|
||||
# Also add any messages we already had locally (from the full server list)
|
||||
# to ensure they're marked as synced
|
||||
if not dry_run and messages:
|
||||
for msg in messages:
|
||||
synced_ids.add(msg["id"])
|
||||
_save_archive_sync_state(maildir_path, synced_ids)
|
||||
|
||||
|
||||
async def delete_mail_async(maildir_path, headers, progress, task_id, dry_run=False):
|
||||
"""
|
||||
Delete mail from Maildir and Microsoft Graph API using batch operations.
|
||||
|
||||
Reference in New Issue
Block a user