wip refactoring
This commit is contained in:
570
fetch_outlook.py
570
fetch_outlook.py
@@ -1,460 +1,188 @@
|
||||
import glob
|
||||
import json
|
||||
"""
|
||||
Fetch and synchronize emails and calendar events from Microsoft Outlook (Graph API).
|
||||
"""
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
|
||||
from datetime import datetime, timedelta
|
||||
from email.message import EmailMessage
|
||||
from email.utils import format_datetime
|
||||
from typing import Set
|
||||
|
||||
from dateutil import parser
|
||||
from dateutil.tz import UTC
|
||||
from rich import print
|
||||
import argparse
|
||||
import asyncio
|
||||
from rich.panel import Panel
|
||||
from rich.progress import Progress, SpinnerColumn, MofNCompleteColumn
|
||||
|
||||
import aiohttp
|
||||
import argparse
|
||||
import asyncio
|
||||
import html2text
|
||||
import msal
|
||||
import orjson
|
||||
|
||||
# Filepath for caching timestamp
|
||||
cache_timestamp_file = 'cache_timestamp.json'
|
||||
|
||||
|
||||
# Filepath for sync timestamp
|
||||
sync_timestamp_file = 'sync_timestamp.json'
|
||||
|
||||
# Function to load the last sync timestamp
|
||||
def load_last_sync_timestamp():
|
||||
if os.path.exists(sync_timestamp_file):
|
||||
with open(sync_timestamp_file, 'r') as f:
|
||||
return json.load(f).get('last_sync', 0)
|
||||
return 0
|
||||
|
||||
# Function to save the current sync timestamp
|
||||
def save_sync_timestamp():
|
||||
with open(sync_timestamp_file, 'w') as f:
|
||||
json.dump({'last_sync': time.time()}, f)
|
||||
# Import the refactored modules
|
||||
from apis.microsoft_graph.auth import get_access_token
|
||||
from apis.microsoft_graph.mail import fetch_mail_async, archive_mail_async, delete_mail_async, synchronize_maildir_async
|
||||
from apis.microsoft_graph.calendar import fetch_calendar_events
|
||||
from utils.calendar_utils import save_events_to_vdir, save_events_to_file
|
||||
from utils.mail_utils.helpers import ensure_directory_exists
|
||||
|
||||
# Add argument parsing for dry-run mode
|
||||
arg_parser = argparse.ArgumentParser(description="Fetch and synchronize emails.")
|
||||
arg_parser.add_argument("--dry-run", action="store_true", help="Run in dry-run mode without making changes.", default=False)
|
||||
arg_parser.add_argument("--vdir", help="Output calendar events in vdir format to the specified directory (each event in its own file)", default=None)
|
||||
arg_parser.add_argument("--icsfile", help="Output calendar events into this ics file path.", default=None)
|
||||
arg_parser.add_argument("--org", help="Specify the organization name for the subfolder to store emails and calendar events", default="corteva")
|
||||
arg_parser.add_argument("--days-back", type=int, help="Number of days to look back for calendar events", default=1)
|
||||
arg_parser.add_argument("--days-forward", type=int, help="Number of days to look forward for calendar events", default=6)
|
||||
arg_parser.add_argument("--continue-iteration", action="store_true", help="Enable interactive mode to continue fetching more date ranges", default=False)
|
||||
arg_parser.add_argument("--download-attachments", action="store_true", help="Download email attachments", default=False)
|
||||
args = arg_parser.parse_args()
|
||||
|
||||
# Parse command line arguments
|
||||
dry_run = args.dry_run
|
||||
|
||||
# Define a global semaphore for throttling
|
||||
semaphore = asyncio.Semaphore(4)
|
||||
|
||||
async def fetch_with_aiohttp(url, headers):
|
||||
async with semaphore:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(url, headers=headers) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(f"Failed to fetch {url}: {response.status} {await response.text()}")
|
||||
raw_bytes = await response.read()
|
||||
content_length = response.headers.get('Content-Length')
|
||||
if content_length and len(raw_bytes) != int(content_length):
|
||||
print("Warning: Incomplete response received!")
|
||||
return None
|
||||
return orjson.loads(raw_bytes)
|
||||
|
||||
async def post_with_aiohttp(url, headers, json_data):
|
||||
async with semaphore:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(url, headers=headers, json=json_data) as response:
|
||||
return response.status
|
||||
|
||||
async def patch_with_aiohttp(url, headers, json_data):
|
||||
async with semaphore:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.patch(url, headers=headers, json=json_data) as response:
|
||||
return response.status
|
||||
|
||||
async def delete_with_aiohttp(url, headers):
|
||||
async with semaphore:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.delete(url, headers=headers) as response:
|
||||
return response.status
|
||||
|
||||
async def synchronize_maildir_async(maildir_path, headers, progress, task_id):
|
||||
last_sync = load_last_sync_timestamp()
|
||||
|
||||
# Find messages moved from "new" to "cur" and mark them as read
|
||||
new_dir = os.path.join(maildir_path, 'new')
|
||||
cur_dir = os.path.join(maildir_path, 'cur')
|
||||
new_files = set(glob.glob(os.path.join(new_dir, '*.eml*')))
|
||||
cur_files = set(glob.glob(os.path.join(cur_dir, '*.eml*')))
|
||||
|
||||
moved_to_cur = [os.path.basename(f) for f in cur_files - new_files]
|
||||
progress.update(task_id, total=len(moved_to_cur))
|
||||
for filename in moved_to_cur:
|
||||
# TODO: this isn't scalable, we should use a more efficient way to check if the file was modified
|
||||
if os.path.getmtime(os.path.join(cur_dir, filename)) < last_sync:
|
||||
progress.update(task_id, advance=1)
|
||||
continue
|
||||
message_id = re.sub(r"\:2.+", "", filename.split('.')[0]) # Extract the Message-ID from the filename
|
||||
if not dry_run:
|
||||
status = await patch_with_aiohttp(
|
||||
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}',
|
||||
headers,
|
||||
{'isRead': True}
|
||||
)
|
||||
if status == 404:
|
||||
os.remove(os.path.join(cur_dir, filename))
|
||||
|
||||
else:
|
||||
progress.console.print(f"[DRY-RUN] Would mark message as read: {message_id}")
|
||||
progress.advance(task_id)
|
||||
|
||||
# Save the current sync timestamp
|
||||
if not dry_run:
|
||||
save_sync_timestamp()
|
||||
else:
|
||||
progress.console.print("[DRY-RUN] Would save sync timestamp.")
|
||||
|
||||
async def fetch_mail_async(maildir_path, attachments_dir, headers, progress, task_id):
|
||||
mail_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$top=100&$orderby=receivedDateTime asc&$select=id,subject,from,toRecipients,ccRecipients,receivedDateTime,isRead'
|
||||
messages = []
|
||||
|
||||
# Fetch the total count of messages in the inbox
|
||||
inbox_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox'
|
||||
|
||||
response = await fetch_with_aiohttp(inbox_url, headers)
|
||||
|
||||
total_messages = response.get('totalItemCount', 0)
|
||||
progress.update(task_id, total=total_messages)
|
||||
|
||||
while mail_url:
|
||||
try:
|
||||
response_data = await fetch_with_aiohttp(mail_url, headers)
|
||||
except Exception as e:
|
||||
progress.console.print(f"Error fetching messages: {e}")
|
||||
continue
|
||||
messages.extend(response_data.get('value', []))
|
||||
progress.advance(task_id, len(response_data.get('value', [])))
|
||||
|
||||
# Get the next page URL from @odata.nextLink
|
||||
mail_url = response_data.get('@odata.nextLink')
|
||||
|
||||
inbox_msg_ids = set(message['id'] for message in messages)
|
||||
progress.update(task_id, completed=(len(messages) / 2))
|
||||
new_dir = os.path.join(maildir_path, 'new')
|
||||
cur_dir = os.path.join(maildir_path, 'cur')
|
||||
new_files = set(glob.glob(os.path.join(new_dir, '*.eml*')))
|
||||
cur_files = set(glob.glob(os.path.join(cur_dir, '*.eml*')))
|
||||
|
||||
for filename in Set.union(cur_files, new_files):
|
||||
message_id = filename.split('.')[0].split('/')[-1] # Extract the Message-ID from the filename
|
||||
if (message_id not in inbox_msg_ids):
|
||||
if not dry_run:
|
||||
progress.console.print(f"Deleting {filename} from inbox")
|
||||
os.remove(filename)
|
||||
else:
|
||||
progress.console.print(f"[DRY-RUN] Would delete {filename} from inbox")
|
||||
|
||||
for message in messages:
|
||||
progress.console.print(f"Processing message: {message.get('subject', 'No Subject')}", end='\r')
|
||||
await save_mime_to_maildir_async(maildir_path, message, attachments_dir, headers, progress)
|
||||
progress.update(task_id, advance=0.5)
|
||||
progress.update(task_id, completed=len(messages))
|
||||
progress.console.print(f"\nFinished saving {len(messages)} messages.")
|
||||
|
||||
async def archive_mail_async(maildir_path, headers, progress, task_id):
|
||||
archive_dir = os.path.join(maildir_path, '.Archives')
|
||||
archive_files = glob.glob(os.path.join(archive_dir, '**', '*.eml*'), recursive=True)
|
||||
progress.update(task_id, total=len(archive_files))
|
||||
|
||||
folder_response = await fetch_with_aiohttp('https://graph.microsoft.com/v1.0/me/mailFolders', headers)
|
||||
folders = folder_response.get('value', [])
|
||||
archive_folder_id = next((folder.get('id') for folder in folders if folder.get('displayName', '').lower() == 'archive'), None)
|
||||
|
||||
if not archive_folder_id:
|
||||
raise Exception("No folder named 'Archive' found on the server.")
|
||||
|
||||
for filepath in archive_files:
|
||||
message_id = os.path.basename(filepath).split('.')[0] # Extract the Message-ID from the filename
|
||||
|
||||
if not dry_run:
|
||||
status = await post_with_aiohttp(
|
||||
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}/microsoft.graph.move',
|
||||
headers,
|
||||
{'destinationId': archive_folder_id}
|
||||
)
|
||||
if status != 201: # 201 Created indicates success
|
||||
progress.console.print(f"Failed to move message to 'Archive': {message_id}, {status}")
|
||||
if status == 404:
|
||||
os.remove(filepath) # Remove the file from local archive if not fo
|
||||
progress.console.print(f"Message not found on server, removed local copy: {message_id}")
|
||||
elif status == 204:
|
||||
progress.console.print(f"Moved message to 'Archive': {message_id}")
|
||||
else:
|
||||
progress.console.print(f"[DRY-RUN] Would move message to 'Archive' folder: {message_id}")
|
||||
progress.advance(task_id)
|
||||
return
|
||||
|
||||
async def delete_mail_async(maildir_path, headers, progress, task_id):
|
||||
trash_dir = os.path.join(maildir_path, '.Trash', 'cur')
|
||||
trash_files = set(glob.glob(os.path.join(trash_dir, '*.eml*')))
|
||||
progress.update(task_id, total=len(trash_files))
|
||||
|
||||
for filepath in trash_files:
|
||||
message_id = os.path.basename(filepath).split('.')[0] # Extract the Message-ID from the filename
|
||||
if not dry_run:
|
||||
progress.console.print(f"Moving message to trash: {message_id}")
|
||||
status = await delete_with_aiohttp(
|
||||
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}',
|
||||
headers
|
||||
)
|
||||
if status == 204 or status == 404:
|
||||
os.remove(filepath) # Remove the file from local trash
|
||||
else:
|
||||
progress.console.print(f"[DRY-RUN] Would delete message: {message_id}")
|
||||
progress.advance(task_id)
|
||||
vdir_path = args.vdir
|
||||
ics_path = args.icsfile
|
||||
org_name = args.org
|
||||
days_back = args.days_back
|
||||
days_forward = args.days_forward
|
||||
continue_iteration = args.continue_iteration
|
||||
download_attachments = args.download_attachments
|
||||
|
||||
async def fetch_calendar_async(headers, progress, task_id):
|
||||
yesterday = datetime.now().replace(hour=0, minute=0, second=0) - timedelta(days=1)
|
||||
end_of_today = datetime.now().replace(hour=23, minute=59, second=59)
|
||||
six_days_future = end_of_today + timedelta(days=6)
|
||||
# example https://graph.microsoft.com/v1.0/me/calendarView?startDateTime=2025-05-06T00:00:00&endDateTime=2025-05-13T23:59:59.999999&$count=true&$select=id
|
||||
event_base_url =f"https://graph.microsoft.com/v1.0/me/calendarView?startDateTime={yesterday.isoformat()}&endDateTime={six_days_future.isoformat()}"
|
||||
total_event_url = f"{event_base_url}&$count=true&$select=id"
|
||||
"""
|
||||
Fetch calendar events and save them in the appropriate format.
|
||||
|
||||
total = await fetch_with_aiohttp(total_event_url, headers)
|
||||
Args:
|
||||
headers: Authentication headers for Microsoft Graph API
|
||||
progress: Progress instance for updating progress bars
|
||||
task_id: ID of the task in the progress bar
|
||||
|
||||
total_events = total.get('@odata.count', 0) + 1
|
||||
progress.update(task_id, total=total_events)
|
||||
calendar_url = f"{event_base_url}&$top=100&$select=start,end,iCalUid,subject,bodyPreview,webLink,location,recurrence,showAs,responseStatus,onlineMeeting"
|
||||
events = []
|
||||
if total_events > 100:
|
||||
progress.update(task_id, total=total_events + total_events % 100)
|
||||
while calendar_url:
|
||||
response_data = await fetch_with_aiohttp(calendar_url, headers)
|
||||
events.extend(response_data.get('value', []))
|
||||
progress.advance(task_id, 1)
|
||||
Returns:
|
||||
List of event dictionaries
|
||||
|
||||
# Get the next page URL from @odata.nextLink
|
||||
calendar_url = response_data.get('@odata.nextLink')
|
||||
Raises:
|
||||
Exception: If there's an error fetching or saving events
|
||||
"""
|
||||
from datetime import datetime, timedelta
|
||||
|
||||
output_file = 'output_ics/outlook_events_latest.ics'
|
||||
if not dry_run:
|
||||
os.makedirs(os.path.dirname(output_file), exist_ok=True)
|
||||
progress.console.print(f"Saving events to {output_file}...")
|
||||
with open(output_file, 'w') as f:
|
||||
f.write("BEGIN:VCALENDAR\nVERSION:2.0\n")
|
||||
for event in events:
|
||||
progress.advance(task_id)
|
||||
if 'start' in event and 'end' in event:
|
||||
start = parser.isoparse(event['start']['dateTime']).astimezone(UTC)
|
||||
end = parser.isoparse(event['end']['dateTime']).astimezone(UTC)
|
||||
f.write(f"BEGIN:VEVENT\nSUMMARY:{event['subject']}\nDESCRIPTION:{event.get('bodyPreview', '')}\n")
|
||||
f.write(f"UID:{event.get('iCalUId', '')}\n")
|
||||
f.write(f"LOCATION:{event.get('location', {})['displayName']}\n")
|
||||
f.write(f"CLASS:{event.get('showAs', '')}\n")
|
||||
f.write(f"STATUS:{event.get('responseStatus', {})['response']}\n")
|
||||
if 'onlineMeeting' in event and event['onlineMeeting']:
|
||||
f.write(f"URL:{event.get('onlineMeeting', {}).get('joinUrl', '')}\n")
|
||||
f.write(f"DTSTART:{start.strftime('%Y%m%dT%H%M%S')}\n")
|
||||
f.write(f"DTEND:{end.strftime('%Y%m%dT%H%M%S')}\n")
|
||||
if 'recurrence' in event and event['recurrence']: # Check if 'recurrence' exists and is not None
|
||||
for rule in event['recurrence']:
|
||||
if rule.startswith('RRULE'):
|
||||
rule_parts = rule.split(';')
|
||||
new_rule_parts = []
|
||||
for part in rule_parts:
|
||||
if part.startswith('UNTIL='):
|
||||
until_value = part.split('=')[1]
|
||||
until_date = parser.isoparse(until_value)
|
||||
if start.tzinfo is not None and until_date.tzinfo is None:
|
||||
until_date = until_date.replace(tzinfo=UTC)
|
||||
new_rule_parts.append(f"UNTIL={until_date.strftime('%Y%m%dT%H%M%SZ')}")
|
||||
else:
|
||||
new_rule_parts.append(part)
|
||||
rule = ';'.join(new_rule_parts)
|
||||
f.write(f"{rule}\n")
|
||||
f.write("END:VEVENT\n")
|
||||
f.write("END:VCALENDAR\n")
|
||||
try:
|
||||
# Use the utility function to fetch calendar events
|
||||
progress.console.print("[cyan]Fetching events from Microsoft Graph API...[/cyan]")
|
||||
events, total_events = await fetch_calendar_events(
|
||||
headers=headers,
|
||||
days_back=days_back,
|
||||
days_forward=days_forward
|
||||
)
|
||||
|
||||
progress.console.print(f"Saved events to {output_file}")
|
||||
else:
|
||||
progress.console.print(f"[DRY-RUN] Would save events to {output_file}")
|
||||
progress.console.print(f"[cyan]Got {len(events)} events from API (reported total: {total_events})[/cyan]")
|
||||
|
||||
# Update progress bar with total events
|
||||
progress.update(task_id, total=total_events)
|
||||
|
||||
# Save events to appropriate format
|
||||
if not dry_run:
|
||||
if vdir_path:
|
||||
# Create org-specific directory within vdir path
|
||||
org_vdir_path = os.path.join(vdir_path, org_name)
|
||||
progress.console.print(f"[cyan]Saving events to vdir: {org_vdir_path}[/cyan]")
|
||||
save_events_to_vdir(events, org_vdir_path, progress, task_id, dry_run)
|
||||
progress.console.print(f"[green]Finished saving events to vdir: {org_vdir_path}[/green]")
|
||||
elif ics_path:
|
||||
# Save to a single ICS file in the output_ics directory
|
||||
progress.console.print(f"[cyan]Saving events to ICS file: {ics_path}/events_latest.ics[/cyan]")
|
||||
save_events_to_file(events, f"{ics_path}/events_latest.ics", progress, task_id, dry_run)
|
||||
progress.console.print(f"[green]Finished saving events to ICS file[/green]")
|
||||
else:
|
||||
# No destination specified
|
||||
progress.console.print("[yellow]Warning: No destination path (--vdir or --icsfile) specified for calendar events.[/yellow]")
|
||||
else:
|
||||
progress.console.print(f"[DRY-RUN] Would save {len(events)} events to {'vdir format' if vdir_path else 'single ICS file'}")
|
||||
progress.update(task_id, advance=len(events))
|
||||
|
||||
# Interactive mode: Ask if the user wants to continue with the next date range
|
||||
if continue_iteration:
|
||||
# Move to the next date range
|
||||
next_start_date = datetime.now() - timedelta(days=days_back)
|
||||
next_end_date = next_start_date + timedelta(days=days_forward)
|
||||
|
||||
progress.console.print(f"\nCurrent date range: {next_start_date.strftime('%Y-%m-%d')} to {next_end_date.strftime('%Y-%m-%d')}")
|
||||
|
||||
user_response = input("\nContinue to iterate? [y/N]: ").strip().lower()
|
||||
|
||||
while user_response == 'y':
|
||||
progress.console.print(f"\nFetching events for {next_start_date.strftime('%Y-%m-%d')} to {next_end_date.strftime('%Y-%m-%d')}...")
|
||||
|
||||
# Reset the progress bar for the new fetch
|
||||
progress.update(task_id, completed=0, total=0)
|
||||
|
||||
# Fetch events for the next date range
|
||||
next_events, next_total_events = await fetch_calendar_events(
|
||||
headers=headers,
|
||||
days_back=0,
|
||||
days_forward=days_forward,
|
||||
start_date=next_start_date,
|
||||
end_date=next_end_date
|
||||
)
|
||||
|
||||
# Update progress bar with total events
|
||||
progress.update(task_id, total=next_total_events)
|
||||
|
||||
if not dry_run:
|
||||
if vdir_path:
|
||||
save_events_to_vdir(next_events, org_vdir_path, progress, task_id, dry_run)
|
||||
else:
|
||||
save_events_to_file(next_events, f'output_ics/outlook_events_{next_start_date.strftime("%Y%m%d")}.ics',
|
||||
progress, task_id, dry_run)
|
||||
else:
|
||||
progress.console.print(f"[DRY-RUN] Would save {len(next_events)} events to {'vdir format' if vdir_path else 'output_ics/outlook_events_' + next_start_date.strftime("%Y%m%d") + '.ics'}")
|
||||
progress.update(task_id, advance=len(next_events))
|
||||
|
||||
# Calculate the next date range
|
||||
next_start_date = next_end_date
|
||||
next_end_date = next_start_date + timedelta(days=days_forward)
|
||||
|
||||
progress.console.print(f"\nNext date range would be: {next_start_date.strftime('%Y-%m-%d')} to {next_end_date.strftime('%Y-%m-%d')}")
|
||||
user_response = input("\nContinue to iterate? [y/N]: ").strip().lower()
|
||||
|
||||
return events
|
||||
except Exception as e:
|
||||
progress.console.print(f"[red]Error fetching or saving calendar events: {str(e)}[/red]")
|
||||
import traceback
|
||||
progress.console.print(f"[red]{traceback.format_exc()}[/red]")
|
||||
progress.update(task_id, completed=True)
|
||||
return []
|
||||
|
||||
# Function to create Maildir structure
|
||||
def create_maildir_structure(base_path):
|
||||
os.makedirs(os.path.join(base_path, 'cur'), exist_ok=True)
|
||||
os.makedirs(os.path.join(base_path, 'new'), exist_ok=True)
|
||||
os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True)
|
||||
"""
|
||||
Create the standard Maildir directory structure.
|
||||
|
||||
async def save_mime_to_maildir_async(maildir_path, email_data, attachments_dir, headers, progress):
|
||||
# Create a new EmailMessage object
|
||||
# Determine the directory based on isRead
|
||||
target_dir = 'cur' if email_data.get('isRead', False) else 'new'
|
||||
id = email_data.get('id', '')
|
||||
if not id:
|
||||
progress.console.print("Message ID not found. Skipping save.")
|
||||
return
|
||||
email_filename = f"{id}.eml"
|
||||
email_filepath = os.path.join(maildir_path, target_dir, email_filename)
|
||||
|
||||
# Check if the file already exists
|
||||
if os.path.exists(email_filepath):
|
||||
progress.console.print(f"Message {id} already exists in {target_dir}. Skipping save.")
|
||||
return
|
||||
|
||||
# Fetch the full MIME payload from the API
|
||||
|
||||
mime_url = f'https://graph.microsoft.com/v1.0/me/messages/{id}/$value'
|
||||
try:
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.get(mime_url, headers=headers) as response:
|
||||
if response.status != 200:
|
||||
raise Exception(f"Failed to fetch MIME payload for {id}: {response.status} {await response.text()}")
|
||||
mime_payload = await response.text()
|
||||
|
||||
# Save the MIME payload to the Maildir
|
||||
os.makedirs(os.path.dirname(email_filepath), exist_ok=True)
|
||||
with open(email_filepath, 'w') as f:
|
||||
f.write(mime_payload)
|
||||
progress.console.print(f"Saved message {id} to {target_dir}.")
|
||||
|
||||
except Exception as e:
|
||||
progress.console.print(f"Failed to save message {id}: {e}")
|
||||
|
||||
def save_email_to_maildir(maildir_path, email_data, attachments_dir, progress):
|
||||
# Create a new EmailMessage object
|
||||
msg = EmailMessage()
|
||||
|
||||
received_datetime = email_data.get('receivedDateTime', '')
|
||||
if received_datetime:
|
||||
parsed_datetime = parser.isoparse(received_datetime)
|
||||
msg['Date'] = format_datetime(parsed_datetime)
|
||||
else:
|
||||
msg['Date'] = ''
|
||||
|
||||
msg['Message-ID'] = email_data.get('id', '')
|
||||
msg['Subject'] = email_data.get('subject', 'No Subject')
|
||||
msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com')
|
||||
msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])])
|
||||
msg['Cc'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('ccRecipients', [])])
|
||||
|
||||
# Convert the email body from HTML to Markdown
|
||||
body_html = email_data.get('body', {}).get('content', '')
|
||||
if email_data.get('body', {}).get('contentType', '').lower() == 'html':
|
||||
markdown_converter = html2text.HTML2Text()
|
||||
markdown_converter.ignore_images = True
|
||||
markdown_converter.ignore_links = True
|
||||
body_markdown = markdown_converter.handle(body_html)
|
||||
else:
|
||||
body_markdown = body_html
|
||||
|
||||
# Remove lines between any alphanumeric BannerStart and BannerEnd
|
||||
body_markdown = re.sub(r'\w+BannerStart.*?\w+BannerEnd', '', body_markdown, flags=re.DOTALL)
|
||||
msg.set_content(body_markdown)
|
||||
|
||||
# Download attachments
|
||||
progress.console.print(f"Downloading attachments for message: {msg['Message-ID']}")
|
||||
for attachment in email_data.get('attachments', []):
|
||||
|
||||
attachment_name = attachment.get('name', 'unknown')
|
||||
attachment_content = attachment.get('contentBytes')
|
||||
if attachment_content:
|
||||
attachment_path = os.path.join(attachments_dir, attachment_name)
|
||||
if not dry_run:
|
||||
with open(attachment_path, 'wb') as f:
|
||||
f.write(attachment_content.encode('utf-8'))
|
||||
msg.add_attachment(attachment_content.encode('utf-8'), filename=attachment_name)
|
||||
else:
|
||||
progress.console.print(f"[DRY-RUN] Would save attachment to {attachment_path}")
|
||||
|
||||
# Determine the directory based on isRead
|
||||
target_dir = 'cur' if email_data.get('isRead', False) else 'new'
|
||||
email_filename = f"{msg['Message-ID']}.eml"
|
||||
email_filepath = os.path.join(maildir_path, target_dir, email_filename)
|
||||
|
||||
# Check if the file already exists in any subfolder
|
||||
for root, _, files in os.walk(maildir_path):
|
||||
if email_filename in files:
|
||||
progress.console.print(f"Message {msg['Message-ID']} already exists in {root}. Skipping save.")
|
||||
return
|
||||
|
||||
# Save the email to the Maildir
|
||||
if not dry_run:
|
||||
with open(email_filepath, 'w') as f:
|
||||
f.write(msg.as_string())
|
||||
progress.console.print(f"Saved message {msg['Message-ID']}")
|
||||
else:
|
||||
progress.console.print(f"[DRY-RUN] Would save message {msg['Message-ID']}")
|
||||
Args:
|
||||
base_path (str): Base path for the Maildir.
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
ensure_directory_exists(os.path.join(base_path, 'cur'))
|
||||
ensure_directory_exists(os.path.join(base_path, 'new'))
|
||||
ensure_directory_exists(os.path.join(base_path, 'tmp'))
|
||||
ensure_directory_exists(os.path.join(base_path, '.Archives'))
|
||||
ensure_directory_exists(os.path.join(base_path, '.Trash', 'cur'))
|
||||
|
||||
async def main():
|
||||
"""
|
||||
Main function to run the script.
|
||||
|
||||
|
||||
Returns:
|
||||
None
|
||||
"""
|
||||
# Save emails to Maildir
|
||||
maildir_path = os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva"
|
||||
maildir_path = os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + f"/{org_name}"
|
||||
attachments_dir = os.path.join(maildir_path, 'attachments')
|
||||
os.makedirs(attachments_dir, exist_ok=True)
|
||||
ensure_directory_exists(attachments_dir)
|
||||
create_maildir_structure(maildir_path)
|
||||
|
||||
# Read Azure app credentials from environment variables
|
||||
client_id = os.getenv('AZURE_CLIENT_ID')
|
||||
tenant_id = os.getenv('AZURE_TENANT_ID')
|
||||
|
||||
if not client_id or not tenant_id:
|
||||
raise ValueError("Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables.")
|
||||
|
||||
# Token cache
|
||||
cache = msal.SerializableTokenCache()
|
||||
cache_file = 'token_cache.bin'
|
||||
|
||||
if os.path.exists(cache_file):
|
||||
cache.deserialize(open(cache_file, 'r').read())
|
||||
|
||||
|
||||
|
||||
# Authentication
|
||||
authority = f'https://login.microsoftonline.com/{tenant_id}'
|
||||
# Define scopes for Microsoft Graph API
|
||||
scopes = ['https://graph.microsoft.com/Calendars.Read', 'https://graph.microsoft.com/Mail.ReadWrite']
|
||||
|
||||
app = msal.PublicClientApplication(client_id, authority=authority, token_cache=cache)
|
||||
accounts = app.get_accounts()
|
||||
|
||||
if accounts:
|
||||
token_response = app.acquire_token_silent(scopes, account=accounts[0])
|
||||
else:
|
||||
flow = app.initiate_device_flow(scopes=scopes)
|
||||
if 'user_code' not in flow:
|
||||
raise Exception("Failed to create device flow")
|
||||
print(Panel(flow['message'], border_style="magenta", padding=2, title="MSAL Login Flow Link"))
|
||||
token_response = app.acquire_token_by_device_flow(flow)
|
||||
|
||||
if 'access_token' not in token_response:
|
||||
raise Exception("Failed to acquire token")
|
||||
|
||||
# Save token cache
|
||||
with open(cache_file, 'w') as f:
|
||||
f.write(cache.serialize())
|
||||
|
||||
access_token = token_response['access_token']
|
||||
headers = {'Authorization': f'Bearer {access_token}', 'Prefer': 'outlook.body-content-type="text"'}
|
||||
accounts = app.get_accounts()
|
||||
|
||||
if not accounts:
|
||||
raise Exception("No accounts found")
|
||||
|
||||
maildir_path = os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva"
|
||||
# Authenticate and get access token
|
||||
access_token, headers = get_access_token(scopes)
|
||||
|
||||
# Set up the progress bars
|
||||
progress = Progress(
|
||||
SpinnerColumn(),
|
||||
MofNCompleteColumn(),
|
||||
*Progress.get_default_columns()
|
||||
)
|
||||
|
||||
with progress:
|
||||
task_fetch = progress.add_task("[green]Syncing Inbox...", total=0)
|
||||
task_calendar = progress.add_task("[cyan]Fetching calendar...", total=0)
|
||||
@@ -463,10 +191,10 @@ async def main():
|
||||
task_delete = progress.add_task("[red]Deleting mail...", total=0)
|
||||
|
||||
await asyncio.gather(
|
||||
synchronize_maildir_async(maildir_path, headers, progress, task_read),
|
||||
archive_mail_async(maildir_path, headers, progress, task_archive),
|
||||
delete_mail_async(maildir_path, headers, progress, task_delete),
|
||||
fetch_mail_async(maildir_path, attachments_dir, headers, progress, task_fetch),
|
||||
synchronize_maildir_async(maildir_path, headers, progress, task_read, dry_run),
|
||||
archive_mail_async(maildir_path, headers, progress, task_archive, dry_run),
|
||||
delete_mail_async(maildir_path, headers, progress, task_delete, dry_run),
|
||||
fetch_mail_async(maildir_path, attachments_dir, headers, progress, task_fetch, dry_run, download_attachments),
|
||||
fetch_calendar_async(headers, progress, task_calendar)
|
||||
)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user