trying a simple shell script and fixing archives

This commit is contained in:
Tim Bendt
2025-07-15 22:13:46 -04:00
parent f7474a3805
commit df4c49c3ef
18 changed files with 1273 additions and 389 deletions

View File

@@ -16,6 +16,7 @@ from src.services.microsoft_graph.mail import (
)
from src.services.microsoft_graph.auth import get_access_token
# Function to create Maildir structure
def create_maildir_structure(base_path):
"""
@@ -34,7 +35,18 @@ def create_maildir_structure(base_path):
ensure_directory_exists(os.path.join(base_path, ".Trash", "cur"))
async def fetch_calendar_async(headers, progress, task_id, dry_run, vdir_path, ics_path, org_name, days_back, days_forward, continue_iteration):
async def fetch_calendar_async(
headers,
progress,
task_id,
dry_run,
vdir_path,
ics_path,
org_name,
days_back,
days_forward,
continue_iteration,
):
"""
Fetch calendar events and save them in the appropriate format.
@@ -73,8 +85,7 @@ async def fetch_calendar_async(headers, progress, task_id, dry_run, vdir_path, i
progress.console.print(
f"[cyan]Saving events to vdir: {org_vdir_path}[/cyan]"
)
save_events_to_vdir(events, org_vdir_path,
progress, task_id, dry_run)
save_events_to_vdir(events, org_vdir_path, progress, task_id, dry_run)
progress.console.print(
f"[green]Finished saving events to vdir: {org_vdir_path}[/green]"
)
@@ -97,7 +108,8 @@ async def fetch_calendar_async(headers, progress, task_id, dry_run, vdir_path, i
else:
progress.console.print(
f"[DRY-RUN] Would save {len(events)} events to {
'vdir format' if vdir_path else 'single ICS file'}"
'vdir format' if vdir_path else 'single ICS file'
}"
)
progress.update(task_id, advance=len(events))
@@ -108,17 +120,22 @@ async def fetch_calendar_async(headers, progress, task_id, dry_run, vdir_path, i
next_end_date = next_start_date + timedelta(days=days_forward)
progress.console.print(
f"\nCurrent date range: {next_start_date.strftime(
'%Y-%m-%d')} to {next_end_date.strftime('%Y-%m-%d')}"
f"\nCurrent date range: {next_start_date.strftime('%Y-%m-%d')} to {
next_end_date.strftime('%Y-%m-%d')
}"
)
user_response = click.prompt(
"\nContinue to iterate? [y/N]", default="N").strip().lower()
user_response = (
click.prompt("\nContinue to iterate? [y/N]", default="N")
.strip()
.lower()
)
while user_response == "y":
progress.console.print(
f"\nFetching events for {next_start_date.strftime(
'%Y-%m-%d')} to {next_end_date.strftime('%Y-%m-%d')}..."
f"\nFetching events for {next_start_date.strftime('%Y-%m-%d')} to {
next_end_date.strftime('%Y-%m-%d')
}..."
)
# Reset the progress bar for the new fetch
@@ -152,7 +169,12 @@ async def fetch_calendar_async(headers, progress, task_id, dry_run, vdir_path, i
else:
progress.console.print(
f"[DRY-RUN] Would save {len(next_events)} events to {
'vdir format' if vdir_path else 'output_ics/outlook_events_' + next_start_date.strftime('%Y%m%d') + '.ics'}"
'vdir format'
if vdir_path
else 'output_ics/outlook_events_'
+ next_start_date.strftime('%Y%m%d')
+ '.ics'
}"
)
progress.update(task_id, advance=len(next_events))
@@ -161,11 +183,15 @@ async def fetch_calendar_async(headers, progress, task_id, dry_run, vdir_path, i
next_end_date = next_start_date + timedelta(days=days_forward)
progress.console.print(
f"\nNext date range would be: {next_start_date.strftime(
'%Y-%m-%d')} to {next_end_date.strftime('%Y-%m-%d')}"
f"\nNext date range would be: {
next_start_date.strftime('%Y-%m-%d')
} to {next_end_date.strftime('%Y-%m-%d')}"
)
user_response = (
click.prompt("\nContinue to iterate? [y/N]", default="N")
.strip()
.lower()
)
user_response = click.prompt(
"\nContinue to iterate? [y/N]", default="N").strip().lower()
return events
except Exception as e:
@@ -179,17 +205,23 @@ async def fetch_calendar_async(headers, progress, task_id, dry_run, vdir_path, i
return []
async def _sync_outlook_data(dry_run, vdir, icsfile, org, days_back, days_forward, continue_iteration, download_attachments):
async def _sync_outlook_data(
dry_run,
vdir,
icsfile,
org,
days_back,
days_forward,
continue_iteration,
download_attachments,
):
"""Synchronize data from external sources."""
# Expand the user home directory in vdir path
vdir = os.path.expanduser(vdir)
# Save emails to Maildir
maildir_path = (
os.getenv("MAILDIR_PATH", os.path.expanduser(
"~/Mail")) + f"/{org}"
)
maildir_path = os.getenv("MAILDIR_PATH", os.path.expanduser("~/Mail")) + f"/{org}"
attachments_dir = os.path.join(maildir_path, "attachments")
ensure_directory_exists(attachments_dir)
create_maildir_structure(maildir_path)
@@ -210,27 +242,28 @@ async def _sync_outlook_data(dry_run, vdir, icsfile, org, days_back, days_forwar
with progress:
task_fetch = progress.add_task("[green]Syncing Inbox...", total=0)
task_calendar = progress.add_task(
"[cyan]Fetching calendar...", total=0)
task_calendar = progress.add_task("[cyan]Fetching calendar...", total=0)
task_read = progress.add_task("[blue]Marking as read...", total=0)
task_archive = progress.add_task("[yellow]Archiving mail...", total=0)
task_delete = progress.add_task("[red]Deleting mail...", total=0)
# Stage 1: Synchronize local changes (read, archive, delete) to the server
progress.console.print("[bold cyan]Step 1: Syncing local changes to server...[/bold cyan]")
progress.console.print(
"[bold cyan]Step 1: Syncing local changes to server...[/bold cyan]"
)
await asyncio.gather(
synchronize_maildir_async(
maildir_path, headers, progress, task_read, dry_run
),
archive_mail_async(maildir_path, headers,
progress, task_archive, dry_run),
delete_mail_async(maildir_path, headers,
progress, task_delete, dry_run),
archive_mail_async(maildir_path, headers, progress, task_archive, dry_run),
delete_mail_async(maildir_path, headers, progress, task_delete, dry_run),
)
progress.console.print("[bold green]Step 1: Local changes synced.[/bold green]")
# Stage 2: Fetch new data from the server
progress.console.print("\n[bold cyan]Step 2: Fetching new data from server...[/bold cyan]")
progress.console.print(
"\n[bold cyan]Step 2: Fetching new data from server...[/bold cyan]"
)
await asyncio.gather(
fetch_mail_async(
maildir_path,
@@ -241,7 +274,18 @@ async def _sync_outlook_data(dry_run, vdir, icsfile, org, days_back, days_forwar
dry_run,
download_attachments,
),
fetch_calendar_async(headers, progress, task_calendar, dry_run, vdir, icsfile, org, days_back, days_forward, continue_iteration),
fetch_calendar_async(
headers,
progress,
task_calendar,
dry_run,
vdir,
icsfile,
org,
days_back,
days_forward,
continue_iteration,
),
)
progress.console.print("[bold green]Step 2: New data fetched.[/bold green]")
click.echo("Sync complete.")
@@ -291,5 +335,123 @@ async def _sync_outlook_data(dry_run, vdir, icsfile, org, days_back, days_forwar
help="Download email attachments",
default=False,
)
def sync(dry_run, vdir, icsfile, org, days_back, days_forward, continue_iteration, download_attachments):
asyncio.run(_sync_outlook_data(dry_run, vdir, icsfile, org, days_back, days_forward, continue_iteration, download_attachments))
@click.option(
"--daemon",
is_flag=True,
help="Run in daemon mode.",
default=False,
)
def sync(
dry_run,
vdir,
icsfile,
org,
days_back,
days_forward,
continue_iteration,
download_attachments,
daemon,
):
if daemon:
asyncio.run(
daemon_mode(
dry_run,
vdir,
icsfile,
org,
days_back,
days_forward,
continue_iteration,
download_attachments,
)
)
else:
asyncio.run(
_sync_outlook_data(
dry_run,
vdir,
icsfile,
org,
days_back,
days_forward,
continue_iteration,
download_attachments,
)
)
async def daemon_mode(
dry_run,
vdir,
icsfile,
org,
days_back,
days_forward,
continue_iteration,
download_attachments,
):
"""
Run the script in daemon mode, periodically syncing emails.
"""
from src.services.microsoft_graph.mail import get_inbox_count_async
import time
sync_interval = 300 # 5 minutes
check_interval = 10 # 10 seconds
last_sync_time = time.time() - sync_interval # Force initial sync
while True:
if time.time() - last_sync_time >= sync_interval:
click.echo("[green]Performing full sync...[/green]")
# Perform a full sync
await _sync_outlook_data(
dry_run,
vdir,
icsfile,
org,
days_back,
days_forward,
continue_iteration,
download_attachments,
)
last_sync_time = time.time()
else:
# Perform a quick check
click.echo("[cyan]Checking for new messages...[/cyan]")
# Authenticate and get access token
scopes = ["https://graph.microsoft.com/Mail.Read"]
access_token, headers = get_access_token(scopes)
remote_message_count = await get_inbox_count_async(headers)
maildir_path = os.path.expanduser(f"~/Mail/{org}")
local_message_count = len(
[
f
for f in os.listdir(os.path.join(maildir_path, "new"))
if ".eml" in f
]
) + len(
[
f
for f in os.listdir(os.path.join(maildir_path, "cur"))
if ".eml" in f
]
)
if remote_message_count != local_message_count:
click.echo(
f"[yellow]New messages detected ({remote_message_count} / {local_message_count}), performing full sync...[/yellow]"
)
await _sync_outlook_data(
dry_run,
vdir,
icsfile,
org,
days_back,
days_forward,
continue_iteration,
download_attachments,
)
last_sync_time = time.time()
else:
click.echo("[green]No new messages detected.[/green]")
time.sleep(check_interval)

View File

@@ -116,8 +116,15 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
Returns:
None
"""
archive_dir = os.path.join(maildir_path, ".Archives")
archive_files = glob.glob(os.path.join(archive_dir, "**", "*.eml*"), recursive=True)
# Check both possible archive folder names locally
archive_files = []
for archive_folder_name in [".Archives", ".Archive"]:
archive_dir = os.path.join(maildir_path, archive_folder_name)
if os.path.exists(archive_dir):
archive_files.extend(
glob.glob(os.path.join(archive_dir, "**", "*.eml*"), recursive=True)
)
progress.update(task_id, total=len(archive_files))
folder_response = await fetch_with_aiohttp(
@@ -128,13 +135,13 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
(
folder.get("id")
for folder in folders
if folder.get("displayName", "").lower() == "archive"
if folder.get("displayName", "").lower() in ["archive", "archives"]
),
None,
)
if not archive_folder_id:
raise Exception("No folder named 'Archive' found on the server.")
raise Exception("No folder named 'Archive' or 'Archives' found on the server.")
for filepath in archive_files:
message_id = os.path.basename(filepath).split(".")[
@@ -147,17 +154,22 @@ async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=F
headers,
{"destinationId": archive_folder_id},
)
if status != 201: # 201 Created indicates success
progress.console.print(
f"Failed to move message to 'Archive': {message_id}, {status}"
)
if status == 404:
os.remove(filepath) # Remove the file from local archive if not found
if status == 201: # 201 Created indicates successful move
os.remove(
filepath
) # Remove the local file since it's now archived on server
progress.console.print(f"Moved message to 'Archive': {message_id}")
elif status == 404:
os.remove(
filepath
) # Remove the file from local archive if not found on server
progress.console.print(
f"Message not found on server, removed local copy: {message_id}"
)
elif status == 204:
progress.console.print(f"Moved message to 'Archive': {message_id}")
else:
progress.console.print(
f"Failed to move message to 'Archive': {message_id}, status: {status}"
)
else:
progress.console.print(
f"[DRY-RUN] Would move message to 'Archive' folder: {message_id}"
@@ -200,6 +212,21 @@ async def delete_mail_async(maildir_path, headers, progress, task_id, dry_run=Fa
progress.advance(task_id)
async def get_inbox_count_async(headers):
"""
Get the number of messages in the inbox.
Args:
headers (dict): Headers including authentication.
Returns:
int: The number of messages in the inbox.
"""
inbox_url = "https://graph.microsoft.com/v1.0/me/mailFolders/inbox"
response = await fetch_with_aiohttp(inbox_url, headers)
return response.get("totalItemCount", 0)
async def synchronize_maildir_async(
maildir_path, headers, progress, task_id, dry_run=False
):
@@ -217,10 +244,10 @@ async def synchronize_maildir_async(
None
"""
from src.utils.mail_utils.helpers import (
load_last_sync_timestamp,
save_sync_timestamp,
truncate_id,
)
load_last_sync_timestamp,
save_sync_timestamp,
truncate_id,
)
last_sync = load_last_sync_timestamp()