wip refactoring

This commit is contained in:
Tim Bendt
2025-05-12 17:19:34 -06:00
parent d75f16c25d
commit 7123ff1f43
13 changed files with 1258 additions and 421 deletions

View File

@@ -0,0 +1,3 @@
"""
Microsoft Graph API module for interacting with Microsoft 365 services.
"""

View File

@@ -0,0 +1,64 @@
"""
Authentication module for Microsoft Graph API.
"""
import os
import msal
def get_access_token(scopes):
"""
Authenticate with Microsoft Graph API and obtain an access token.
Args:
scopes (list): List of scopes to request.
Returns:
tuple: (access_token, headers) where access_token is the token string
and headers is a dict with Authorization header.
Raises:
ValueError: If environment variables are missing.
Exception: If authentication fails.
"""
# Read Azure app credentials from environment variables
client_id = os.getenv('AZURE_CLIENT_ID')
tenant_id = os.getenv('AZURE_TENANT_ID')
if not client_id or not tenant_id:
raise ValueError("Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables.")
# Token cache
cache = msal.SerializableTokenCache()
cache_file = 'token_cache.bin'
if os.path.exists(cache_file):
cache.deserialize(open(cache_file, 'r').read())
# Authentication
authority = f'https://login.microsoftonline.com/{tenant_id}'
app = msal.PublicClientApplication(client_id, authority=authority, token_cache=cache)
accounts = app.get_accounts()
if accounts:
token_response = app.acquire_token_silent(scopes, account=accounts[0])
else:
flow = app.initiate_device_flow(scopes=scopes)
if 'user_code' not in flow:
raise Exception("Failed to create device flow")
from rich import print
from rich.panel import Panel
print(Panel(flow['message'], border_style="magenta", padding=2, title="MSAL Login Flow Link"))
token_response = app.acquire_token_by_device_flow(flow)
if 'access_token' not in token_response:
raise Exception("Failed to acquire token")
# Save token cache
with open(cache_file, 'w') as f:
f.write(cache.serialize())
access_token = token_response['access_token']
headers = {'Authorization': f'Bearer {access_token}', 'Prefer': 'outlook.body-content-type="text"'}
return access_token, headers

View File

@@ -0,0 +1,56 @@
"""
Calendar operations for Microsoft Graph API.
"""
import os
from datetime import datetime, timedelta
from apis.microsoft_graph.client import fetch_with_aiohttp
async def fetch_calendar_events(headers, days_back=1, days_forward=6, start_date=None, end_date=None):
"""
Fetch calendar events from Microsoft Graph API.
Args:
headers (dict): Headers including authentication.
days_back (int): Number of days to look back.
days_forward (int): Number of days to look forward.
start_date (datetime): Optional start date, overrides days_back if provided.
end_date (datetime): Optional end date, overrides days_forward if provided.
Returns:
tuple: (events, total_count) where events is a list of event dictionaries
and total_count is the total number of events.
"""
# Calculate date range
if start_date is None:
start_date = datetime.now() - timedelta(days=days_back)
if end_date is None:
end_date = start_date + timedelta(days=days_forward)
# Format dates for API
start_date_str = start_date.strftime('%Y-%m-%dT00:00:00Z')
end_date_str = end_date.strftime('%Y-%m-%dT23:59:59Z')
# Prepare the API query
calendar_url = (
f'https://graph.microsoft.com/v1.0/me/calendarView?'
f'startDateTime={start_date_str}&endDateTime={end_date_str}&'
f'$select=id,subject,organizer,start,end,location,isAllDay,showAs,sensitivity'
)
events = []
# Make the API request
response_data = await fetch_with_aiohttp(calendar_url, headers)
events.extend(response_data.get('value', []))
# Check if there are more events (pagination)
next_link = response_data.get('@odata.nextLink')
while next_link:
response_data = await fetch_with_aiohttp(next_link, headers)
events.extend(response_data.get('value', []))
next_link = response_data.get('@odata.nextLink')
# Return events and total count
return events, len(events)

View File

@@ -0,0 +1,85 @@
"""
HTTP client for Microsoft Graph API.
"""
import aiohttp
import asyncio
import orjson
# Define a global semaphore for throttling
semaphore = asyncio.Semaphore(4)
async def fetch_with_aiohttp(url, headers):
"""
Fetch data from Microsoft Graph API.
Args:
url (str): The URL to fetch data from.
headers (dict): Headers including authentication.
Returns:
dict: JSON response data.
Raises:
Exception: If the request fails.
"""
async with semaphore:
async with aiohttp.ClientSession() as session:
async with session.get(url, headers=headers) as response:
if response.status != 200:
raise Exception(f"Failed to fetch {url}: {response.status} {await response.text()}")
raw_bytes = await response.read()
content_length = response.headers.get('Content-Length')
if content_length and len(raw_bytes) != int(content_length):
print("Warning: Incomplete response received!")
return None
return orjson.loads(raw_bytes)
async def post_with_aiohttp(url, headers, json_data):
"""
Post data to Microsoft Graph API.
Args:
url (str): The URL to post data to.
headers (dict): Headers including authentication.
json_data (dict): JSON data to post.
Returns:
int: HTTP status code.
"""
async with semaphore:
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json=json_data) as response:
return response.status
async def patch_with_aiohttp(url, headers, json_data):
"""
Patch data to Microsoft Graph API.
Args:
url (str): The URL to patch data to.
headers (dict): Headers including authentication.
json_data (dict): JSON data to patch.
Returns:
int: HTTP status code.
"""
async with semaphore:
async with aiohttp.ClientSession() as session:
async with session.patch(url, headers=headers, json=json_data) as response:
return response.status
async def delete_with_aiohttp(url, headers):
"""
Delete data from Microsoft Graph API.
Args:
url (str): The URL to delete data from.
headers (dict): Headers including authentication.
Returns:
int: HTTP status code.
"""
async with semaphore:
async with aiohttp.ClientSession() as session:
async with session.delete(url, headers=headers) as response:
return response.status

View File

@@ -0,0 +1,204 @@
"""
Mail operations for Microsoft Graph API.
"""
import os
import re
import glob
from typing import Set
import aiohttp
from apis.microsoft_graph.client import fetch_with_aiohttp, patch_with_aiohttp, post_with_aiohttp, delete_with_aiohttp
async def fetch_mail_async(maildir_path, attachments_dir, headers, progress, task_id, dry_run=False, download_attachments=False):
"""
Fetch mail from Microsoft Graph API and save to Maildir.
Args:
maildir_path (str): Path to the Maildir.
attachments_dir (str): Path to save attachments.
headers (dict): Headers including authentication.
progress: Progress instance for updating progress bars.
task_id: ID of the task in the progress bar.
dry_run (bool): If True, don't actually make changes.
download_attachments (bool): If True, download email attachments.
Returns:
None
"""
from utils.mail_utils.maildir import save_mime_to_maildir_async
from utils.mail_utils.helpers import truncate_id
mail_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$top=100&$orderby=receivedDateTime asc&$select=id,subject,from,toRecipients,ccRecipients,receivedDateTime,isRead'
messages = []
# Fetch the total count of messages in the inbox
inbox_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox'
response = await fetch_with_aiohttp(inbox_url, headers)
total_messages = response.get('totalItemCount', 0)
progress.update(task_id, total=total_messages)
while mail_url:
try:
response_data = await fetch_with_aiohttp(mail_url, headers)
except Exception as e:
progress.console.print(f"Error fetching messages: {e}")
continue
messages.extend(response_data.get('value', []))
progress.advance(task_id, len(response_data.get('value', [])))
# Get the next page URL from @odata.nextLink
mail_url = response_data.get('@odata.nextLink')
inbox_msg_ids = set(message['id'] for message in messages)
progress.update(task_id, completed=(len(messages) / 2))
new_dir = os.path.join(maildir_path, 'new')
cur_dir = os.path.join(maildir_path, 'cur')
new_files = set(glob.glob(os.path.join(new_dir, '*.eml*')))
cur_files = set(glob.glob(os.path.join(cur_dir, '*.eml*')))
for filename in Set.union(cur_files, new_files):
message_id = filename.split('.')[0].split('/')[-1] # Extract the Message-ID from the filename
if (message_id not in inbox_msg_ids):
if not dry_run:
progress.console.print(f"Deleting {filename} from inbox")
os.remove(filename)
else:
progress.console.print(f"[DRY-RUN] Would delete {filename} from inbox")
for message in messages:
progress.console.print(f"Processing message: {message.get('subject', 'No Subject')}", end='\r')
await save_mime_to_maildir_async(maildir_path, message, attachments_dir, headers, progress, dry_run, download_attachments)
progress.update(task_id, advance=0.5)
progress.update(task_id, completed=len(messages))
progress.console.print(f"\nFinished saving {len(messages)} messages.")
async def archive_mail_async(maildir_path, headers, progress, task_id, dry_run=False):
"""
Archive mail from Maildir to Microsoft Graph API archive folder.
Args:
maildir_path (str): Path to the Maildir.
headers (dict): Headers including authentication.
progress: Progress instance for updating progress bars.
task_id: ID of the task in the progress bar.
dry_run (bool): If True, don't actually make changes.
Returns:
None
"""
archive_dir = os.path.join(maildir_path, '.Archives')
archive_files = glob.glob(os.path.join(archive_dir, '**', '*.eml*'), recursive=True)
progress.update(task_id, total=len(archive_files))
folder_response = await fetch_with_aiohttp('https://graph.microsoft.com/v1.0/me/mailFolders', headers)
folders = folder_response.get('value', [])
archive_folder_id = next((folder.get('id') for folder in folders if folder.get('displayName', '').lower() == 'archive'), None)
if not archive_folder_id:
raise Exception("No folder named 'Archive' found on the server.")
for filepath in archive_files:
message_id = os.path.basename(filepath).split('.')[0] # Extract the Message-ID from the filename
if not dry_run:
status = await post_with_aiohttp(
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}/microsoft.graph.move',
headers,
{'destinationId': archive_folder_id}
)
if status != 201: # 201 Created indicates success
progress.console.print(f"Failed to move message to 'Archive': {message_id}, {status}")
if status == 404:
os.remove(filepath) # Remove the file from local archive if not found
progress.console.print(f"Message not found on server, removed local copy: {message_id}")
elif status == 204:
progress.console.print(f"Moved message to 'Archive': {message_id}")
else:
progress.console.print(f"[DRY-RUN] Would move message to 'Archive' folder: {message_id}")
progress.advance(task_id)
return
async def delete_mail_async(maildir_path, headers, progress, task_id, dry_run=False):
"""
Delete mail from Maildir and Microsoft Graph API.
Args:
maildir_path (str): Path to the Maildir.
headers (dict): Headers including authentication.
progress: Progress instance for updating progress bars.
task_id: ID of the task in the progress bar.
dry_run (bool): If True, don't actually make changes.
Returns:
None
"""
trash_dir = os.path.join(maildir_path, '.Trash', 'cur')
trash_files = set(glob.glob(os.path.join(trash_dir, '*.eml*')))
progress.update(task_id, total=len(trash_files))
for filepath in trash_files:
message_id = os.path.basename(filepath).split('.')[0] # Extract the Message-ID from the filename
if not dry_run:
progress.console.print(f"Moving message to trash: {message_id}")
status = await delete_with_aiohttp(
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}',
headers
)
if status == 204 or status == 404:
os.remove(filepath) # Remove the file from local trash
else:
progress.console.print(f"[DRY-RUN] Would delete message: {message_id}")
progress.advance(task_id)
async def synchronize_maildir_async(maildir_path, headers, progress, task_id, dry_run=False):
"""
Synchronize Maildir with Microsoft Graph API.
Args:
maildir_path (str): Path to the Maildir.
headers (dict): Headers including authentication.
progress: Progress instance for updating progress bars.
task_id: ID of the task in the progress bar.
dry_run (bool): If True, don't actually make changes.
Returns:
None
"""
from utils.mail_utils.helpers import load_last_sync_timestamp, save_sync_timestamp, truncate_id
last_sync = load_last_sync_timestamp()
# Find messages moved from "new" to "cur" and mark them as read
new_dir = os.path.join(maildir_path, 'new')
cur_dir = os.path.join(maildir_path, 'cur')
new_files = set(glob.glob(os.path.join(new_dir, '*.eml*')))
cur_files = set(glob.glob(os.path.join(cur_dir, '*.eml*')))
moved_to_cur = [os.path.basename(f) for f in cur_files - new_files]
progress.update(task_id, total=len(moved_to_cur))
for filename in moved_to_cur:
# TODO: this isn't scalable, we should use a more efficient way to check if the file was modified
if os.path.getmtime(os.path.join(cur_dir, filename)) < last_sync:
progress.update(task_id, advance=1)
continue
message_id = re.sub(r"\:2.+", "", filename.split('.')[0]) # Extract the Message-ID from the filename
if not dry_run:
status = await patch_with_aiohttp(
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}',
headers,
{'isRead': True}
)
if status == 404:
os.remove(os.path.join(cur_dir, filename))
else:
progress.console.print(f"[DRY-RUN] Would mark message as read: {truncate_id(message_id)}")
progress.advance(task_id)
# Save the current sync timestamp
if not dry_run:
save_sync_timestamp()
else:
progress.console.print("[DRY-RUN] Would save sync timestamp.")