diff --git a/fetch_outlook.py b/fetch_outlook.py index c40db90..3982712 100644 --- a/fetch_outlook.py +++ b/fetch_outlook.py @@ -1,4 +1,5 @@ import os +import re import msal import requests import json @@ -6,7 +7,9 @@ from datetime import datetime from dateutil import parser from dateutil.tz import UTC from email.message import EmailMessage +from email.utils import format_datetime import time +import html2text # Filepath for caching timestamp cache_timestamp_file = 'cache_timestamp.json' @@ -33,22 +36,38 @@ def create_maildir_structure(base_path): os.makedirs(os.path.join(base_path, 'new'), exist_ok=True) os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True) -# Function to save email to Maildir format +# Function to save email to Maildir format with Markdown conversion def save_email_to_maildir(maildir_path, email_data): # Create a new EmailMessage object msg = EmailMessage() + received_datetime = email_data.get('receivedDateTime', '') # Add required headers - msg['Date'] = email_data.get('receivedDateTime', '') # Use the receivedDateTime field + if received_datetime: + # Parse the ISO 8601 datetime and convert it to RFC 5322 format + parsed_datetime = parser.isoparse(received_datetime) + msg['Date'] = format_datetime(parsed_datetime) + else: + msg['Date'] = '' # Leave empty if no receivedDateTime is available + msg['Message-ID'] = email_data.get('id', '') # Use the unique ID of the message msg['Subject'] = email_data.get('subject', 'No Subject') # Default to 'No Subject' if missing msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com') msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])]) - # Add the email body - body = email_data.get('body', {}).get('content', '') - msg.set_content(body) + # Convert the email body from HTML to Markdown + body_html = email_data.get('body', {}).get('content', '') + if email_data.get('body', {}).get('contentType', '').lower() == 'html': + markdown_converter = html2text.HTML2Text() + markdown_converter.ignore_images = True + markdown_converter.ignore_links = False # Keep links in the Markdown output + body_markdown = markdown_converter.handle(body_html) + else: + body_markdown = body_html # Use plain text if the body is not HTML + # Add the converted Markdown body to the email + msg.set_content(body_markdown) + body_markdown = re.sub(r'\w+BannerStart.*?\w+BannerEnd', '', body_markdown, flags=re.DOTALL) # Save the email to the Maildir 'new' folder email_filename = f"{msg['Message-ID'] or email_data.get('id', 'unknown')}.eml" email_filepath = os.path.join(maildir_path, 'new', email_filename) @@ -170,7 +189,7 @@ print(f"\nFinished processing {len(messages)} messages.") # Fetch events with pagination and expand recurring events events_url = 'https://graph.microsoft.com/v1.0/me/events?$top=100&$expand=instances' events = [] -print("Fetching events...") +print("Fetching Calendar events...") while events_url: response = requests.get(events_url, headers=headers) response_data = response.json()