wip
This commit is contained in:
@@ -1,4 +1,5 @@
|
|||||||
import os
|
import os
|
||||||
|
import re
|
||||||
import msal
|
import msal
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
@@ -6,7 +7,9 @@ from datetime import datetime
|
|||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from dateutil.tz import UTC
|
from dateutil.tz import UTC
|
||||||
from email.message import EmailMessage
|
from email.message import EmailMessage
|
||||||
|
from email.utils import format_datetime
|
||||||
import time
|
import time
|
||||||
|
import html2text
|
||||||
|
|
||||||
# Filepath for caching timestamp
|
# Filepath for caching timestamp
|
||||||
cache_timestamp_file = 'cache_timestamp.json'
|
cache_timestamp_file = 'cache_timestamp.json'
|
||||||
@@ -33,22 +36,38 @@ def create_maildir_structure(base_path):
|
|||||||
os.makedirs(os.path.join(base_path, 'new'), exist_ok=True)
|
os.makedirs(os.path.join(base_path, 'new'), exist_ok=True)
|
||||||
os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True)
|
os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True)
|
||||||
|
|
||||||
# Function to save email to Maildir format
|
# Function to save email to Maildir format with Markdown conversion
|
||||||
def save_email_to_maildir(maildir_path, email_data):
|
def save_email_to_maildir(maildir_path, email_data):
|
||||||
# Create a new EmailMessage object
|
# Create a new EmailMessage object
|
||||||
msg = EmailMessage()
|
msg = EmailMessage()
|
||||||
|
|
||||||
|
received_datetime = email_data.get('receivedDateTime', '')
|
||||||
# Add required headers
|
# Add required headers
|
||||||
msg['Date'] = email_data.get('receivedDateTime', '') # Use the receivedDateTime field
|
if received_datetime:
|
||||||
|
# Parse the ISO 8601 datetime and convert it to RFC 5322 format
|
||||||
|
parsed_datetime = parser.isoparse(received_datetime)
|
||||||
|
msg['Date'] = format_datetime(parsed_datetime)
|
||||||
|
else:
|
||||||
|
msg['Date'] = '' # Leave empty if no receivedDateTime is available
|
||||||
|
|
||||||
msg['Message-ID'] = email_data.get('id', '') # Use the unique ID of the message
|
msg['Message-ID'] = email_data.get('id', '') # Use the unique ID of the message
|
||||||
msg['Subject'] = email_data.get('subject', 'No Subject') # Default to 'No Subject' if missing
|
msg['Subject'] = email_data.get('subject', 'No Subject') # Default to 'No Subject' if missing
|
||||||
msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com')
|
msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com')
|
||||||
msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])])
|
msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])])
|
||||||
|
|
||||||
# Add the email body
|
# Convert the email body from HTML to Markdown
|
||||||
body = email_data.get('body', {}).get('content', '')
|
body_html = email_data.get('body', {}).get('content', '')
|
||||||
msg.set_content(body)
|
if email_data.get('body', {}).get('contentType', '').lower() == 'html':
|
||||||
|
markdown_converter = html2text.HTML2Text()
|
||||||
|
markdown_converter.ignore_images = True
|
||||||
|
markdown_converter.ignore_links = False # Keep links in the Markdown output
|
||||||
|
body_markdown = markdown_converter.handle(body_html)
|
||||||
|
else:
|
||||||
|
body_markdown = body_html # Use plain text if the body is not HTML
|
||||||
|
|
||||||
|
# Add the converted Markdown body to the email
|
||||||
|
msg.set_content(body_markdown)
|
||||||
|
body_markdown = re.sub(r'\w+BannerStart.*?\w+BannerEnd', '', body_markdown, flags=re.DOTALL)
|
||||||
# Save the email to the Maildir 'new' folder
|
# Save the email to the Maildir 'new' folder
|
||||||
email_filename = f"{msg['Message-ID'] or email_data.get('id', 'unknown')}.eml"
|
email_filename = f"{msg['Message-ID'] or email_data.get('id', 'unknown')}.eml"
|
||||||
email_filepath = os.path.join(maildir_path, 'new', email_filename)
|
email_filepath = os.path.join(maildir_path, 'new', email_filename)
|
||||||
@@ -170,7 +189,7 @@ print(f"\nFinished processing {len(messages)} messages.")
|
|||||||
# Fetch events with pagination and expand recurring events
|
# Fetch events with pagination and expand recurring events
|
||||||
events_url = 'https://graph.microsoft.com/v1.0/me/events?$top=100&$expand=instances'
|
events_url = 'https://graph.microsoft.com/v1.0/me/events?$top=100&$expand=instances'
|
||||||
events = []
|
events = []
|
||||||
print("Fetching events...")
|
print("Fetching Calendar events...")
|
||||||
while events_url:
|
while events_url:
|
||||||
response = requests.get(events_url, headers=headers)
|
response = requests.get(events_url, headers=headers)
|
||||||
response_data = response.json()
|
response_data = response.json()
|
||||||
|
|||||||
Reference in New Issue
Block a user