wip
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import os
|
||||
import re
|
||||
import msal
|
||||
import requests
|
||||
import json
|
||||
@@ -6,7 +7,9 @@ from datetime import datetime
|
||||
from dateutil import parser
|
||||
from dateutil.tz import UTC
|
||||
from email.message import EmailMessage
|
||||
from email.utils import format_datetime
|
||||
import time
|
||||
import html2text
|
||||
|
||||
# Filepath for caching timestamp
|
||||
cache_timestamp_file = 'cache_timestamp.json'
|
||||
@@ -33,22 +36,38 @@ def create_maildir_structure(base_path):
|
||||
os.makedirs(os.path.join(base_path, 'new'), exist_ok=True)
|
||||
os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True)
|
||||
|
||||
# Function to save email to Maildir format
|
||||
# Function to save email to Maildir format with Markdown conversion
|
||||
def save_email_to_maildir(maildir_path, email_data):
|
||||
# Create a new EmailMessage object
|
||||
msg = EmailMessage()
|
||||
|
||||
received_datetime = email_data.get('receivedDateTime', '')
|
||||
# Add required headers
|
||||
msg['Date'] = email_data.get('receivedDateTime', '') # Use the receivedDateTime field
|
||||
if received_datetime:
|
||||
# Parse the ISO 8601 datetime and convert it to RFC 5322 format
|
||||
parsed_datetime = parser.isoparse(received_datetime)
|
||||
msg['Date'] = format_datetime(parsed_datetime)
|
||||
else:
|
||||
msg['Date'] = '' # Leave empty if no receivedDateTime is available
|
||||
|
||||
msg['Message-ID'] = email_data.get('id', '') # Use the unique ID of the message
|
||||
msg['Subject'] = email_data.get('subject', 'No Subject') # Default to 'No Subject' if missing
|
||||
msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com')
|
||||
msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])])
|
||||
|
||||
# Add the email body
|
||||
body = email_data.get('body', {}).get('content', '')
|
||||
msg.set_content(body)
|
||||
# Convert the email body from HTML to Markdown
|
||||
body_html = email_data.get('body', {}).get('content', '')
|
||||
if email_data.get('body', {}).get('contentType', '').lower() == 'html':
|
||||
markdown_converter = html2text.HTML2Text()
|
||||
markdown_converter.ignore_images = True
|
||||
markdown_converter.ignore_links = False # Keep links in the Markdown output
|
||||
body_markdown = markdown_converter.handle(body_html)
|
||||
else:
|
||||
body_markdown = body_html # Use plain text if the body is not HTML
|
||||
|
||||
# Add the converted Markdown body to the email
|
||||
msg.set_content(body_markdown)
|
||||
body_markdown = re.sub(r'\w+BannerStart.*?\w+BannerEnd', '', body_markdown, flags=re.DOTALL)
|
||||
# Save the email to the Maildir 'new' folder
|
||||
email_filename = f"{msg['Message-ID'] or email_data.get('id', 'unknown')}.eml"
|
||||
email_filepath = os.path.join(maildir_path, 'new', email_filename)
|
||||
@@ -170,7 +189,7 @@ print(f"\nFinished processing {len(messages)} messages.")
|
||||
# Fetch events with pagination and expand recurring events
|
||||
events_url = 'https://graph.microsoft.com/v1.0/me/events?$top=100&$expand=instances'
|
||||
events = []
|
||||
print("Fetching events...")
|
||||
print("Fetching Calendar events...")
|
||||
while events_url:
|
||||
response = requests.get(events_url, headers=headers)
|
||||
response_data = response.json()
|
||||
|
||||
Reference in New Issue
Block a user