adding UV

This commit is contained in:
Tim Bendt
2025-05-08 12:02:24 -06:00
parent 7e42644224
commit eba883a465
10 changed files with 856 additions and 126 deletions

View File

@@ -1,22 +1,26 @@
import glob
import json
import os
import re
from typing import Set
import msal
import json
import glob
from datetime import datetime
from dateutil import parser
from dateutil.tz import UTC
import time
from datetime import datetime, timedelta
from email.message import EmailMessage
from email.utils import format_datetime
from typing import Set
from dateutil import parser
from dateutil.tz import UTC
from rich import print
from rich.progress import Progress, SpinnerColumn, MofNCompleteColumn
from rich.panel import Panel
import time
import html2text
import asyncio
import argparse
from rich.progress import Progress, SpinnerColumn, MofNCompleteColumn
import aiohttp
import argparse
import asyncio
import html2text
import msal
import orjson
# Filepath for caching timestamp
cache_timestamp_file = 'cache_timestamp.json'
@@ -44,32 +48,39 @@ args = arg_parser.parse_args()
dry_run = args.dry_run
# Define a global semaphore for throttling
semaphore = asyncio.Semaphore(4)
async def fetch_with_aiohttp(url, headers):
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
if response.status != 200:
raise Exception(f"Failed to fetch {url}: {response.status} {await response.text()}")
return await response.json()
async with semaphore:
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
if response.status != 200:
raise Exception(f"Failed to fetch {url}: {response.status} {await response.text()}")
raw_bytes = await response.read()
content_length = response.headers.get('Content-Length')
if content_length and len(raw_bytes) != int(content_length):
print("Warning: Incomplete response received!")
return None
return orjson.loads(raw_bytes)
async def post_with_aiohttp(url, headers, json_data):
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json=json_data) as response:
if response.status != 201:
raise Exception(f"Failed to post to {url}: {response.status} {await response.text()}")
return await response.json()
async with semaphore:
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json=json_data) as response:
return response.status
async def patch_with_aiohttp(url, headers, json_data):
async with aiohttp.ClientSession() as session:
async with session.patch(url, headers=headers, json=json_data) as response:
if response.status != 200:
raise Exception(f"Failed to patch {url}: {response.status} {await response.text()}")
return await response.json()
async with semaphore:
async with aiohttp.ClientSession() as session:
async with session.patch(url, headers=headers, json=json_data) as response:
return response.status
async def delete_with_aiohttp(url, headers):
async with aiohttp.ClientSession() as session:
async with session.delete(url, headers=headers) as response:
if response.status != 204:
raise Exception(f"Failed to delete {url}: {response.status} {await response.text()}")
async with semaphore:
async with aiohttp.ClientSession() as session:
async with session.delete(url, headers=headers) as response:
return response.status
async def synchronize_maildir_async(maildir_path, headers, progress, task_id):
last_sync = load_last_sync_timestamp()
@@ -78,19 +89,26 @@ async def synchronize_maildir_async(maildir_path, headers, progress, task_id):
# Find messages moved from "new" to "cur" and mark them as read
new_dir = os.path.join(maildir_path, 'new')
cur_dir = os.path.join(maildir_path, 'cur')
new_files = set(glob.glob(os.path.join(new_dir, '*.eml')))
cur_files = set(glob.glob(os.path.join(cur_dir, '*.eml')))
new_files = set(glob.glob(os.path.join(new_dir, '*.eml*')))
cur_files = set(glob.glob(os.path.join(cur_dir, '*.eml*')))
moved_to_cur = [os.path.basename(f) for f in cur_files - new_files]
progress.update(task_id, total=len(moved_to_cur))
for filename in moved_to_cur:
message_id = filename.split('.')[0] # Extract the Message-ID from the filename
# TODO: this isn't scalable, we should use a more efficient way to check if the file was modified
if os.path.getmtime(os.path.join(cur_dir, filename)) < last_sync:
progress.update(task_id, advance=1)
continue
message_id = re.sub(r"\:2.+", "", filename.split('.')[0]) # Extract the Message-ID from the filename
if not dry_run:
await patch_with_aiohttp(
status = await patch_with_aiohttp(
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}',
headers,
{'isRead': True}
)
if status == 404:
os.remove(os.path.join(cur_dir, filename))
else:
progress.console.print(f"[DRY-RUN] Would mark message as read: {message_id}")
progress.advance(task_id)
@@ -102,18 +120,23 @@ async def synchronize_maildir_async(maildir_path, headers, progress, task_id):
progress.console.print("[DRY-RUN] Would save sync timestamp.")
async def fetch_mail_async(maildir_path, attachments_dir, headers, progress, task_id):
mail_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$top=100&$orderby=receivedDateTime asc&$select=id,subject,from,toRecipients,ccRecipients,receivedDateTime,isRead,body,attachments'
mail_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$top=100&$orderby=receivedDateTime asc&$select=id,subject,from,toRecipients,ccRecipients,receivedDateTime,isRead'
messages = []
# Fetch the total count of messages in the inbox
inbox_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox'
response = await fetch_with_aiohttp(inbox_url, headers)
total_messages = response.get('totalItemCount', 0)
progress.update(task_id, total=total_messages)
while mail_url:
response_data = await fetch_with_aiohttp(mail_url, headers)
try:
response_data = await fetch_with_aiohttp(mail_url, headers)
except Exception as e:
progress.console.print(f"Error fetching messages: {e}")
continue
messages.extend(response_data.get('value', []))
progress.advance(task_id, len(response_data.get('value', [])))
@@ -124,8 +147,8 @@ async def fetch_mail_async(maildir_path, attachments_dir, headers, progress, tas
progress.update(task_id, completed=(len(messages) / 2))
new_dir = os.path.join(maildir_path, 'new')
cur_dir = os.path.join(maildir_path, 'cur')
new_files = set(glob.glob(os.path.join(new_dir, '*.eml')))
cur_files = set(glob.glob(os.path.join(cur_dir, '*.eml')))
new_files = set(glob.glob(os.path.join(new_dir, '*.eml*')))
cur_files = set(glob.glob(os.path.join(cur_dir, '*.eml*')))
for filename in Set.union(cur_files, new_files):
message_id = filename.split('.')[0].split('/')[-1] # Extract the Message-ID from the filename
@@ -138,14 +161,14 @@ async def fetch_mail_async(maildir_path, attachments_dir, headers, progress, tas
for message in messages:
progress.console.print(f"Processing message: {message.get('subject', 'No Subject')}", end='\r')
save_email_to_maildir(maildir_path, message, attachments_dir, progress)
await save_mime_to_maildir_async(maildir_path, message, attachments_dir, headers, progress)
progress.update(task_id, advance=0.5)
progress.update(task_id, completed=len(messages))
progress.console.print(f"\nFinished saving {len(messages)} messages.")
async def archive_mail_async(maildir_path, headers, progress, task_id):
archive_dir = os.path.join(maildir_path, '.Archives')
archive_files = glob.glob(os.path.join(archive_dir, '**', '*.eml'), recursive=True)
archive_files = glob.glob(os.path.join(archive_dir, '**', '*.eml*'), recursive=True)
progress.update(task_id, total=len(archive_files))
folder_response = await fetch_with_aiohttp('https://graph.microsoft.com/v1.0/me/mailFolders', headers)
@@ -157,17 +180,20 @@ async def archive_mail_async(maildir_path, headers, progress, task_id):
for filepath in archive_files:
message_id = os.path.basename(filepath).split('.')[0] # Extract the Message-ID from the filename
progress.console.print(f"Moving message to 'Archive' folder: {message_id}")
if not dry_run:
response = await post_with_aiohttp(
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}/move',
status = await post_with_aiohttp(
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}/microsoft.graph.move',
headers,
{'destinationId': archive_folder_id}
)
if response.status_code != 201: # 201 Created indicates success
progress.console.print(f"Failed to move message to 'Archive': {message_id}, {response.status_code}, {response.text}")
if response.status_code == 404:
os.remove(filepath) # Remove the file from local archive if not found on server
if status != 201: # 201 Created indicates success
progress.console.print(f"Failed to move message to 'Archive': {message_id}, {status}")
if status == 404:
os.remove(filepath) # Remove the file from local archive if not fo
progress.console.print(f"Message not found on server, removed local copy: {message_id}")
elif status == 204:
progress.console.print(f"Moved message to 'Archive': {message_id}")
else:
progress.console.print(f"[DRY-RUN] Would move message to 'Archive' folder: {message_id}")
progress.advance(task_id)
@@ -175,106 +201,134 @@ async def archive_mail_async(maildir_path, headers, progress, task_id):
async def delete_mail_async(maildir_path, headers, progress, task_id):
trash_dir = os.path.join(maildir_path, '.Trash', 'cur')
trash_files = set(glob.glob(os.path.join(trash_dir, '*.eml')))
trash_files = set(glob.glob(os.path.join(trash_dir, '*.eml*')))
progress.update(task_id, total=len(trash_files))
for filepath in trash_files:
message_id = os.path.basename(filepath).split('.')[0] # Extract the Message-ID from the filename
if not dry_run:
progress.console.print(f"Moving message to trash: {message_id}")
await delete_with_aiohttp(
status = await delete_with_aiohttp(
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}',
headers
)
os.remove(filepath) # Remove the file from local trash
if status == 204 or status == 404:
os.remove(filepath) # Remove the file from local trash
else:
progress.console.print(f"[DRY-RUN] Would delete message: {message_id}")
progress.advance(task_id)
async def fetch_calendar_async(headers, progress, task_id):
total_event_url = 'https://graph.microsoft.com/v1.0/me/events?$count=true'
yesterday = datetime.now().replace(hour=0, minute=0, second=0) - timedelta(days=1)
end_of_today = datetime.now().replace(hour=23, minute=59, second=59)
six_days_future = end_of_today + timedelta(days=6)
# example https://graph.microsoft.com/v1.0/me/calendarView?startDateTime=2025-05-06T00:00:00&endDateTime=2025-05-13T23:59:59.999999&$count=true&$select=id
event_base_url =f"https://graph.microsoft.com/v1.0/me/calendarView?startDateTime={yesterday.isoformat()}&endDateTime={six_days_future.isoformat()}"
total_event_url = f"{event_base_url}&$count=true&$select=id"
total = await fetch_with_aiohttp(total_event_url, headers)
total_events = total.get('@odata.count', 0)
total_events = total.get('@odata.count', 0) + 1
progress.update(task_id, total=total_events)
calendar_url = 'https://graph.microsoft.com/v1.0/me/events?$top=100&$orderby=start/dateTime asc'
calendar_url = f"{event_base_url}&$top=100&$select=start,end,iCalUid,subject,bodyPreview,webLink,location,recurrence,showAs,responseStatus,onlineMeeting"
events = []
if total_events > 100:
progress.update(task_id, total=total_events + total_events % 100)
while calendar_url:
response_data = await fetch_with_aiohttp(calendar_url, headers)
events.extend(response_data.get('value', []))
progress.advance(task_id, len(response_data.get('value', [])))
progress.advance(task_id, 1)
# Get the next page URL from @odata.nextLink
calendar_url = response_data.get('@odata.nextLink')
async def download_calendar_events(headers, progress, task_id):
# Fetch the total count of events in the calendar
total_event_url = 'https://graph.microsoft.com/v1.0/me/events?$count=true'
total = await fetch_with_aiohttp(total_event_url, headers)
output_file = f'output_ics/outlook_events_latest.ics'
if not dry_run:
os.makedirs(os.path.dirname(output_file), exist_ok=True)
progress.console.print(f"Saving events to {output_file}...")
with open(output_file, 'w') as f:
f.write("BEGIN:VCALENDAR\nVERSION:2.0\n")
for event in events:
progress.advance(task_id)
if 'start' in event and 'end' in event:
start = parser.isoparse(event['start']['dateTime']).astimezone(UTC)
end = parser.isoparse(event['end']['dateTime']).astimezone(UTC)
f.write(f"BEGIN:VEVENT\nSUMMARY:{event['subject']}\nDESCRIPTION:{event.get('bodyPreview', '')}\n")
f.write(f"UID:{event.get('iCalUId', '')}\n")
f.write(f"LOCATION:{event.get('location', {})['displayName']}\n")
f.write(f"CLASS:{event.get('showAs', '')}\n")
f.write(f"STATUS:{event.get('responseStatus', {})['response']}\n")
if 'onlineMeeting' in event and event['onlineMeeting']:
f.write(f"URL:{event.get('onlineMeeting', {}).get('joinUrl', '')}\n")
f.write(f"DTSTART:{start.strftime('%Y%m%dT%H%M%S')}\n")
f.write(f"DTEND:{end.strftime('%Y%m%dT%H%M%S')}\n")
if 'recurrence' in event and event['recurrence']: # Check if 'recurrence' exists and is not None
for rule in event['recurrence']:
if rule.startswith('RRULE'):
rule_parts = rule.split(';')
new_rule_parts = []
for part in rule_parts:
if part.startswith('UNTIL='):
until_value = part.split('=')[1]
until_date = parser.isoparse(until_value)
if start.tzinfo is not None and until_date.tzinfo is None:
until_date = until_date.replace(tzinfo=UTC)
new_rule_parts.append(f"UNTIL={until_date.strftime('%Y%m%dT%H%M%SZ')}")
else:
new_rule_parts.append(part)
rule = ';'.join(new_rule_parts)
f.write(f"{rule}\n")
f.write("END:VEVENT\n")
f.write("END:VCALENDAR\n")
progress.console.print(f"Saved events to {output_file}")
else:
progress.console.print(f"[DRY-RUN] Would save events to {output_file}")
total_events = total.get('@odata.count', 0)
progress.update(task_id, total=total_events)
print(f"Total events in calendar: {total_events}")
# Fetch events with pagination and expand recurring events
events_url = 'https://graph.microsoft.com/v1.0/me/events?$top=100&$expand=instances'
events = []
progress.console.print("Fetching Calendar events...")
while events_url:
response_data = await fetch_with_aiohttp(events_url, headers)
events.extend(response_data.get('value', []))
events_url = response_data.get('@odata.nextLink')
progress.advance(task_id, len(response_data.get('value', [])))
# Save events to a file in iCalendar format
output_file = f'output_ics/outlook_events_latest.ics'
if not dry_run:
os.makedirs(os.path.dirname(output_file), exist_ok=True)
progress.console.print(f"Saving events to {output_file}...")
with open(output_file, 'w') as f:
f.write("BEGIN:VCALENDAR\nVERSION:2.0\n")
for event in events:
if 'start' in event and 'end' in event:
start = parser.isoparse(event['start']['dateTime'])
end = parser.isoparse(event['end']['dateTime'])
f.write(f"BEGIN:VEVENT\nSUMMARY:{event['subject']}\n")
f.write(f"DTSTART:{start.strftime('%Y%m%dT%H%M%S')}\n")
f.write(f"DTEND:{end.strftime('%Y%m%dT%H%M%S')}\n")
if 'recurrence' in event and event['recurrence']: # Check if 'recurrence' exists and is not None
for rule in event['recurrence']:
if rule.startswith('RRULE'):
rule_parts = rule.split(';')
new_rule_parts = []
for part in rule_parts:
if part.startswith('UNTIL='):
until_value = part.split('=')[1]
until_date = parser.isoparse(until_value)
if start.tzinfo is not None and until_date.tzinfo is None:
until_date = until_date.replace(tzinfo=UTC)
new_rule_parts.append(f"UNTIL={until_date.strftime('%Y%m%dT%H%M%SZ')}")
else:
new_rule_parts.append(part)
rule = ';'.join(new_rule_parts)
f.write(f"{rule}\n")
f.write("END:VEVENT\n")
f.write("END:VCALENDAR\n")
progress.console.print(f"Saved events to {output_file}")
else:
progress.console.print(f"[DRY-RUN] Would save events to {output_file}")
# Function to check if the cache is still valid
def is_cache_valid():
if 'timestamp' in cache_timestamp and 'max_age' in cache_timestamp:
current_time = time.time()
cache_expiry_time = cache_timestamp['timestamp'] + cache_timestamp['max_age']
return current_time < cache_expiry_time
return False
# Function to create Maildir structure
def create_maildir_structure(base_path):
os.makedirs(os.path.join(base_path, 'cur'), exist_ok=True)
os.makedirs(os.path.join(base_path, 'new'), exist_ok=True)
os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True)
async def save_mime_to_maildir_async(maildir_path, email_data, attachments_dir, headers, progress):
# Create a new EmailMessage object
msg = EmailMessage()
# Determine the directory based on isRead
target_dir = 'cur' if email_data.get('isRead', False) else 'new'
id = email_data.get('id', '')
if not id:
progress.console.print(f"Message ID not found. Skipping save.")
return
email_filename = f"{id}.eml"
email_filepath = os.path.join(maildir_path, target_dir, email_filename)
# Check if the file already exists
if os.path.exists(email_filepath):
progress.console.print(f"Message {id} already exists in {target_dir}. Skipping save.")
return
# Fetch the full MIME payload from the API
mime_url = f'https://graph.microsoft.com/v1.0/me/messages/{id}/$value'
try:
async with aiohttp.ClientSession() as session:
async with session.get(mime_url, headers=headers) as response:
if response.status != 200:
raise Exception(f"Failed to fetch MIME payload for {id}: {response.status} {await response.text()}")
mime_payload = await response.text()
# Save the MIME payload to the Maildir
os.makedirs(os.path.dirname(email_filepath), exist_ok=True)
with open(email_filepath, 'w') as f:
f.write(mime_payload)
progress.console.print(f"Saved message {id} to {target_dir}.")
except Exception as e:
progress.console.print(f"Failed to save message {id}: {e}")
def save_email_to_maildir(maildir_path, email_data, attachments_dir, progress):
# Create a new EmailMessage object
msg = EmailMessage()
@@ -402,7 +456,7 @@ async def main():
f.write(cache.serialize())
access_token = token_response['access_token']
headers = {'Authorization': f'Bearer {access_token}'}
headers = {'Authorization': f'Bearer {access_token}', 'Prefer': 'outlook.body-content-type="text"'}
accounts = app.get_accounts()
if not accounts: