This commit is contained in:
Tim Bendt
2025-04-23 07:14:53 -06:00
parent 382a181720
commit 4770bcb459

View File

@@ -1,4 +1,5 @@
import os import os
import re
import msal import msal
import requests import requests
import json import json
@@ -6,7 +7,9 @@ from datetime import datetime
from dateutil import parser from dateutil import parser
from dateutil.tz import UTC from dateutil.tz import UTC
from email.message import EmailMessage from email.message import EmailMessage
from email.utils import format_datetime
import time import time
import html2text
# Filepath for caching timestamp # Filepath for caching timestamp
cache_timestamp_file = 'cache_timestamp.json' cache_timestamp_file = 'cache_timestamp.json'
@@ -33,22 +36,38 @@ def create_maildir_structure(base_path):
os.makedirs(os.path.join(base_path, 'new'), exist_ok=True) os.makedirs(os.path.join(base_path, 'new'), exist_ok=True)
os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True) os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True)
# Function to save email to Maildir format # Function to save email to Maildir format with Markdown conversion
def save_email_to_maildir(maildir_path, email_data): def save_email_to_maildir(maildir_path, email_data):
# Create a new EmailMessage object # Create a new EmailMessage object
msg = EmailMessage() msg = EmailMessage()
received_datetime = email_data.get('receivedDateTime', '')
# Add required headers # Add required headers
msg['Date'] = email_data.get('receivedDateTime', '') # Use the receivedDateTime field if received_datetime:
# Parse the ISO 8601 datetime and convert it to RFC 5322 format
parsed_datetime = parser.isoparse(received_datetime)
msg['Date'] = format_datetime(parsed_datetime)
else:
msg['Date'] = '' # Leave empty if no receivedDateTime is available
msg['Message-ID'] = email_data.get('id', '') # Use the unique ID of the message msg['Message-ID'] = email_data.get('id', '') # Use the unique ID of the message
msg['Subject'] = email_data.get('subject', 'No Subject') # Default to 'No Subject' if missing msg['Subject'] = email_data.get('subject', 'No Subject') # Default to 'No Subject' if missing
msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com') msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com')
msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])]) msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])])
# Add the email body # Convert the email body from HTML to Markdown
body = email_data.get('body', {}).get('content', '') body_html = email_data.get('body', {}).get('content', '')
msg.set_content(body) if email_data.get('body', {}).get('contentType', '').lower() == 'html':
markdown_converter = html2text.HTML2Text()
markdown_converter.ignore_images = True
markdown_converter.ignore_links = False # Keep links in the Markdown output
body_markdown = markdown_converter.handle(body_html)
else:
body_markdown = body_html # Use plain text if the body is not HTML
# Add the converted Markdown body to the email
msg.set_content(body_markdown)
body_markdown = re.sub(r'\w+BannerStart.*?\w+BannerEnd', '', body_markdown, flags=re.DOTALL)
# Save the email to the Maildir 'new' folder # Save the email to the Maildir 'new' folder
email_filename = f"{msg['Message-ID'] or email_data.get('id', 'unknown')}.eml" email_filename = f"{msg['Message-ID'] or email_data.get('id', 'unknown')}.eml"
email_filepath = os.path.join(maildir_path, 'new', email_filename) email_filepath = os.path.join(maildir_path, 'new', email_filename)
@@ -170,7 +189,7 @@ print(f"\nFinished processing {len(messages)} messages.")
# Fetch events with pagination and expand recurring events # Fetch events with pagination and expand recurring events
events_url = 'https://graph.microsoft.com/v1.0/me/events?$top=100&$expand=instances' events_url = 'https://graph.microsoft.com/v1.0/me/events?$top=100&$expand=instances'
events = [] events = []
print("Fetching events...") print("Fetching Calendar events...")
while events_url: while events_url:
response = requests.get(events_url, headers=headers) response = requests.get(events_url, headers=headers)
response_data = response.json() response_data = response.json()