This commit is contained in:
Tim Bendt
2025-04-23 07:14:53 -06:00
parent 382a181720
commit 4770bcb459

View File

@@ -1,4 +1,5 @@
import os
import re
import msal
import requests
import json
@@ -6,7 +7,9 @@ from datetime import datetime
from dateutil import parser
from dateutil.tz import UTC
from email.message import EmailMessage
from email.utils import format_datetime
import time
import html2text
# Filepath for caching timestamp
cache_timestamp_file = 'cache_timestamp.json'
@@ -33,22 +36,38 @@ def create_maildir_structure(base_path):
os.makedirs(os.path.join(base_path, 'new'), exist_ok=True)
os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True)
# Function to save email to Maildir format
# Function to save email to Maildir format with Markdown conversion
def save_email_to_maildir(maildir_path, email_data):
# Create a new EmailMessage object
msg = EmailMessage()
received_datetime = email_data.get('receivedDateTime', '')
# Add required headers
msg['Date'] = email_data.get('receivedDateTime', '') # Use the receivedDateTime field
if received_datetime:
# Parse the ISO 8601 datetime and convert it to RFC 5322 format
parsed_datetime = parser.isoparse(received_datetime)
msg['Date'] = format_datetime(parsed_datetime)
else:
msg['Date'] = '' # Leave empty if no receivedDateTime is available
msg['Message-ID'] = email_data.get('id', '') # Use the unique ID of the message
msg['Subject'] = email_data.get('subject', 'No Subject') # Default to 'No Subject' if missing
msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com')
msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])])
# Add the email body
body = email_data.get('body', {}).get('content', '')
msg.set_content(body)
# Convert the email body from HTML to Markdown
body_html = email_data.get('body', {}).get('content', '')
if email_data.get('body', {}).get('contentType', '').lower() == 'html':
markdown_converter = html2text.HTML2Text()
markdown_converter.ignore_images = True
markdown_converter.ignore_links = False # Keep links in the Markdown output
body_markdown = markdown_converter.handle(body_html)
else:
body_markdown = body_html # Use plain text if the body is not HTML
# Add the converted Markdown body to the email
msg.set_content(body_markdown)
body_markdown = re.sub(r'\w+BannerStart.*?\w+BannerEnd', '', body_markdown, flags=re.DOTALL)
# Save the email to the Maildir 'new' folder
email_filename = f"{msg['Message-ID'] or email_data.get('id', 'unknown')}.eml"
email_filepath = os.path.join(maildir_path, 'new', email_filename)
@@ -170,7 +189,7 @@ print(f"\nFinished processing {len(messages)} messages.")
# Fetch events with pagination and expand recurring events
events_url = 'https://graph.microsoft.com/v1.0/me/events?$top=100&$expand=instances'
events = []
print("Fetching events...")
print("Fetching Calendar events...")
while events_url:
response = requests.get(events_url, headers=headers)
response_data = response.json()