basic sync of trash and read status

This commit is contained in:
Tim Bendt
2025-04-23 12:30:39 -06:00
parent 4770bcb459
commit 59372b91ad
2 changed files with 108 additions and 19 deletions

1
.gitignore vendored
View File

@@ -7,3 +7,4 @@ output_markdown_files/output_6.md
token_cache.bin token_cache.bin
output_ics/outlook_events_latest.ics output_ics/outlook_events_latest.ics
cache_timestamp.json cache_timestamp.json
sync_timestamp.json

View File

@@ -3,6 +3,7 @@ import re
import msal import msal
import requests import requests
import json import json
import glob
from datetime import datetime from datetime import datetime
from dateutil import parser from dateutil import parser
from dateutil.tz import UTC from dateutil.tz import UTC
@@ -14,6 +15,62 @@ import html2text
# Filepath for caching timestamp # Filepath for caching timestamp
cache_timestamp_file = 'cache_timestamp.json' cache_timestamp_file = 'cache_timestamp.json'
# Filepath for sync timestamp
sync_timestamp_file = 'sync_timestamp.json'
# Function to load the last sync timestamp
def load_last_sync_timestamp():
if os.path.exists(sync_timestamp_file):
with open(sync_timestamp_file, 'r') as f:
return json.load(f).get('last_sync', 0)
return 0
# Function to save the current sync timestamp
def save_sync_timestamp():
with open(sync_timestamp_file, 'w') as f:
json.dump({'last_sync': time.time()}, f)
# Function to synchronize maildir with the server
def synchronize_maildir(maildir_path, headers):
last_sync = load_last_sync_timestamp()
current_time = time.time()
# Find messages moved from "new" to "cur" and mark them as read
new_dir = os.path.join(maildir_path, 'new')
cur_dir = os.path.join(maildir_path, 'cur')
new_files = set(glob.glob(os.path.join(new_dir, '*.eml')))
cur_files = set(glob.glob(os.path.join(cur_dir, '*.eml')))
moved_to_cur = [os.path.basename(f) for f in cur_files - new_files]
for filename in moved_to_cur:
message_id = filename.split('.')[0] # Extract the Message-ID from the filename
print(f"Marking message as read: {message_id}")
response = requests.patch(
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}',
headers=headers,
json={'isRead': True}
)
if response.status_code != 200:
print(f"Failed to mark message as read: {message_id}, {response.status_code}, {response.text}")
# Find messages moved to ".Trash/cur" and delete them on the server
trash_dir = os.path.join(maildir_path, '.Trash', 'cur')
trash_files = set(glob.glob(os.path.join(trash_dir, '*.eml')))
for filepath in trash_files:
message_id = os.path.basename(filepath).split('.')[0] # Extract the Message-ID from the filename
print(f"Moving message to trash: {message_id}")
response = requests.delete(
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}',
headers=headers
)
if response.status_code != 204: # 204 No Content indicates success
print(f"Failed to move message to trash: {message_id}, {response.status_code}, {response.text}")
# Save the current sync timestamp
save_sync_timestamp()
# Load cached timestamp if it exists # Load cached timestamp if it exists
if os.path.exists(cache_timestamp_file): if os.path.exists(cache_timestamp_file):
with open(cache_timestamp_file, 'r') as f: with open(cache_timestamp_file, 'r') as f:
@@ -36,43 +93,63 @@ def create_maildir_structure(base_path):
os.makedirs(os.path.join(base_path, 'new'), exist_ok=True) os.makedirs(os.path.join(base_path, 'new'), exist_ok=True)
os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True) os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True)
# Function to save email to Maildir format with Markdown conversion def save_email_to_maildir(maildir_path, email_data, attachments_dir):
def save_email_to_maildir(maildir_path, email_data):
# Create a new EmailMessage object # Create a new EmailMessage object
msg = EmailMessage() msg = EmailMessage()
received_datetime = email_data.get('receivedDateTime', '') received_datetime = email_data.get('receivedDateTime', '')
# Add required headers
if received_datetime: if received_datetime:
# Parse the ISO 8601 datetime and convert it to RFC 5322 format
parsed_datetime = parser.isoparse(received_datetime) parsed_datetime = parser.isoparse(received_datetime)
msg['Date'] = format_datetime(parsed_datetime) msg['Date'] = format_datetime(parsed_datetime)
else: else:
msg['Date'] = '' # Leave empty if no receivedDateTime is available msg['Date'] = ''
msg['Message-ID'] = email_data.get('id', '') # Use the unique ID of the message msg['Message-ID'] = email_data.get('id', '')
msg['Subject'] = email_data.get('subject', 'No Subject') # Default to 'No Subject' if missing msg['Subject'] = email_data.get('subject', 'No Subject')
msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com') msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com')
msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])]) msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])])
msg['Cc'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('ccRecipients', [])])
# Convert the email body from HTML to Markdown # Convert the email body from HTML to Markdown
body_html = email_data.get('body', {}).get('content', '') body_html = email_data.get('body', {}).get('content', '')
if email_data.get('body', {}).get('contentType', '').lower() == 'html': if email_data.get('body', {}).get('contentType', '').lower() == 'html':
markdown_converter = html2text.HTML2Text() markdown_converter = html2text.HTML2Text()
markdown_converter.ignore_images = True markdown_converter.ignore_images = True
markdown_converter.ignore_links = False # Keep links in the Markdown output markdown_converter.ignore_links = False
body_markdown = markdown_converter.handle(body_html) body_markdown = markdown_converter.handle(body_html)
else: else:
body_markdown = body_html # Use plain text if the body is not HTML body_markdown = body_html
# Add the converted Markdown body to the email # Remove lines between any alphanumeric BannerStart and BannerEnd
msg.set_content(body_markdown)
body_markdown = re.sub(r'\w+BannerStart.*?\w+BannerEnd', '', body_markdown, flags=re.DOTALL) body_markdown = re.sub(r'\w+BannerStart.*?\w+BannerEnd', '', body_markdown, flags=re.DOTALL)
# Save the email to the Maildir 'new' folder msg.set_content(body_markdown)
email_filename = f"{msg['Message-ID'] or email_data.get('id', 'unknown')}.eml"
email_filepath = os.path.join(maildir_path, 'new', email_filename) # Download attachments
for attachment in email_data.get('attachments', []):
attachment_id = attachment.get('id')
attachment_name = attachment.get('name', 'unknown')
attachment_content = attachment.get('contentBytes')
if attachment_content:
attachment_path = os.path.join(attachments_dir, attachment_name)
with open(attachment_path, 'wb') as f:
f.write(attachment_content.encode('utf-8'))
msg.add_attachment(attachment_content.encode('utf-8'), filename=attachment_name)
# Determine the directory based on isRead
target_dir = 'cur' if email_data.get('isRead', False) else 'new'
email_filename = f"{msg['Message-ID']}.eml"
email_filepath = os.path.join(maildir_path, target_dir, email_filename)
# Check if the file already exists in any subfolder
for root, _, files in os.walk(maildir_path):
if email_filename in files:
print(f"Message {msg['Message-ID']} already exists in {root}. Skipping save.")
return
# Save the email to the Maildir
with open(email_filepath, 'w') as f: with open(email_filepath, 'w') as f:
f.write(msg.as_string()) f.write(msg.as_string())
print(f"Saved message {msg['Message-ID']} to {email_filepath}")
# Read Azure app credentials from environment variables # Read Azure app credentials from environment variables
client_id = os.getenv('AZURE_CLIENT_ID') client_id = os.getenv('AZURE_CLIENT_ID')
@@ -81,6 +158,8 @@ tenant_id = os.getenv('AZURE_TENANT_ID')
if not client_id or not tenant_id: if not client_id or not tenant_id:
raise ValueError("Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables.") raise ValueError("Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables.")
# Token cache # Token cache
cache = msal.SerializableTokenCache() cache = msal.SerializableTokenCache()
cache_file = 'token_cache.bin' cache_file = 'token_cache.bin'
@@ -99,7 +178,7 @@ else:
# Authentication # Authentication
authority = f'https://login.microsoftonline.com/{tenant_id}' authority = f'https://login.microsoftonline.com/{tenant_id}'
scopes = ['https://graph.microsoft.com/Calendars.Read', 'https://graph.microsoft.com/Mail.Read'] scopes = ['https://graph.microsoft.com/Calendars.Read', 'https://graph.microsoft.com/Mail.ReadWrite']
app = msal.PublicClientApplication(client_id, authority=authority, token_cache=cache) app = msal.PublicClientApplication(client_id, authority=authority, token_cache=cache)
accounts = app.get_accounts() accounts = app.get_accounts()
@@ -126,7 +205,13 @@ accounts = app.get_accounts()
if not accounts: if not accounts:
raise Exception("No accounts found") raise Exception("No accounts found")
mail_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$top=100&$orderby=receivedDateTime asc'
# Call the synchronization function before fetching mail
print("Synchronizing maildir with server...")
synchronize_maildir(maildir_path=os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva", headers=headers)
print("Synchronization complete.")
mail_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$top=100&$orderby=receivedDateTime asc&$select=id,subject,from,toRecipients,ccRecipients,receivedDateTime,isRead,body,attachments'
messages = [] messages = []
print("Fetching mail...") print("Fetching mail...")
@@ -174,14 +259,17 @@ while mail_url:
mail_url = response_data.get('@odata.nextLink') mail_url = response_data.get('@odata.nextLink')
print("\nFinished fetching mail.") print("\nFinished fetching mail. Now saving them to maildir.")
maildir_path = os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva/INBOX" # Save emails to Maildir
maildir_path = os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva"
attachments_dir = os.path.join(maildir_path, 'attachments')
os.makedirs(attachments_dir, exist_ok=True)
create_maildir_structure(maildir_path) create_maildir_structure(maildir_path)
for message in messages: for message in messages:
print(f"Processing message: {message.get('subject', 'No Subject')}", end='\r') print(f"Processing message: {message.get('subject', 'No Subject')}", end='\r')
save_email_to_maildir(maildir_path, message) save_email_to_maildir(maildir_path, message, attachments_dir)
print(f"\nFinished processing {len(messages)} messages.") print(f"\nFinished processing {len(messages)} messages.")