basic sync of trash and read status

This commit is contained in:
Tim Bendt
2025-04-23 12:30:39 -06:00
parent 4770bcb459
commit 59372b91ad
2 changed files with 108 additions and 19 deletions

1
.gitignore vendored
View File

@@ -7,3 +7,4 @@ output_markdown_files/output_6.md
token_cache.bin
output_ics/outlook_events_latest.ics
cache_timestamp.json
sync_timestamp.json

View File

@@ -3,6 +3,7 @@ import re
import msal
import requests
import json
import glob
from datetime import datetime
from dateutil import parser
from dateutil.tz import UTC
@@ -14,6 +15,62 @@ import html2text
# Filepath for caching timestamp
cache_timestamp_file = 'cache_timestamp.json'
# Filepath for sync timestamp
sync_timestamp_file = 'sync_timestamp.json'
# Function to load the last sync timestamp
def load_last_sync_timestamp():
if os.path.exists(sync_timestamp_file):
with open(sync_timestamp_file, 'r') as f:
return json.load(f).get('last_sync', 0)
return 0
# Function to save the current sync timestamp
def save_sync_timestamp():
with open(sync_timestamp_file, 'w') as f:
json.dump({'last_sync': time.time()}, f)
# Function to synchronize maildir with the server
def synchronize_maildir(maildir_path, headers):
last_sync = load_last_sync_timestamp()
current_time = time.time()
# Find messages moved from "new" to "cur" and mark them as read
new_dir = os.path.join(maildir_path, 'new')
cur_dir = os.path.join(maildir_path, 'cur')
new_files = set(glob.glob(os.path.join(new_dir, '*.eml')))
cur_files = set(glob.glob(os.path.join(cur_dir, '*.eml')))
moved_to_cur = [os.path.basename(f) for f in cur_files - new_files]
for filename in moved_to_cur:
message_id = filename.split('.')[0] # Extract the Message-ID from the filename
print(f"Marking message as read: {message_id}")
response = requests.patch(
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}',
headers=headers,
json={'isRead': True}
)
if response.status_code != 200:
print(f"Failed to mark message as read: {message_id}, {response.status_code}, {response.text}")
# Find messages moved to ".Trash/cur" and delete them on the server
trash_dir = os.path.join(maildir_path, '.Trash', 'cur')
trash_files = set(glob.glob(os.path.join(trash_dir, '*.eml')))
for filepath in trash_files:
message_id = os.path.basename(filepath).split('.')[0] # Extract the Message-ID from the filename
print(f"Moving message to trash: {message_id}")
response = requests.delete(
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}',
headers=headers
)
if response.status_code != 204: # 204 No Content indicates success
print(f"Failed to move message to trash: {message_id}, {response.status_code}, {response.text}")
# Save the current sync timestamp
save_sync_timestamp()
# Load cached timestamp if it exists
if os.path.exists(cache_timestamp_file):
with open(cache_timestamp_file, 'r') as f:
@@ -36,43 +93,63 @@ def create_maildir_structure(base_path):
os.makedirs(os.path.join(base_path, 'new'), exist_ok=True)
os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True)
# Function to save email to Maildir format with Markdown conversion
def save_email_to_maildir(maildir_path, email_data):
def save_email_to_maildir(maildir_path, email_data, attachments_dir):
# Create a new EmailMessage object
msg = EmailMessage()
received_datetime = email_data.get('receivedDateTime', '')
# Add required headers
if received_datetime:
# Parse the ISO 8601 datetime and convert it to RFC 5322 format
parsed_datetime = parser.isoparse(received_datetime)
msg['Date'] = format_datetime(parsed_datetime)
else:
msg['Date'] = '' # Leave empty if no receivedDateTime is available
msg['Date'] = ''
msg['Message-ID'] = email_data.get('id', '') # Use the unique ID of the message
msg['Subject'] = email_data.get('subject', 'No Subject') # Default to 'No Subject' if missing
msg['Message-ID'] = email_data.get('id', '')
msg['Subject'] = email_data.get('subject', 'No Subject')
msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com')
msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])])
msg['Cc'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('ccRecipients', [])])
# Convert the email body from HTML to Markdown
body_html = email_data.get('body', {}).get('content', '')
if email_data.get('body', {}).get('contentType', '').lower() == 'html':
markdown_converter = html2text.HTML2Text()
markdown_converter.ignore_images = True
markdown_converter.ignore_links = False # Keep links in the Markdown output
markdown_converter.ignore_links = False
body_markdown = markdown_converter.handle(body_html)
else:
body_markdown = body_html # Use plain text if the body is not HTML
body_markdown = body_html
# Add the converted Markdown body to the email
msg.set_content(body_markdown)
# Remove lines between any alphanumeric BannerStart and BannerEnd
body_markdown = re.sub(r'\w+BannerStart.*?\w+BannerEnd', '', body_markdown, flags=re.DOTALL)
# Save the email to the Maildir 'new' folder
email_filename = f"{msg['Message-ID'] or email_data.get('id', 'unknown')}.eml"
email_filepath = os.path.join(maildir_path, 'new', email_filename)
msg.set_content(body_markdown)
# Download attachments
for attachment in email_data.get('attachments', []):
attachment_id = attachment.get('id')
attachment_name = attachment.get('name', 'unknown')
attachment_content = attachment.get('contentBytes')
if attachment_content:
attachment_path = os.path.join(attachments_dir, attachment_name)
with open(attachment_path, 'wb') as f:
f.write(attachment_content.encode('utf-8'))
msg.add_attachment(attachment_content.encode('utf-8'), filename=attachment_name)
# Determine the directory based on isRead
target_dir = 'cur' if email_data.get('isRead', False) else 'new'
email_filename = f"{msg['Message-ID']}.eml"
email_filepath = os.path.join(maildir_path, target_dir, email_filename)
# Check if the file already exists in any subfolder
for root, _, files in os.walk(maildir_path):
if email_filename in files:
print(f"Message {msg['Message-ID']} already exists in {root}. Skipping save.")
return
# Save the email to the Maildir
with open(email_filepath, 'w') as f:
f.write(msg.as_string())
print(f"Saved message {msg['Message-ID']} to {email_filepath}")
# Read Azure app credentials from environment variables
client_id = os.getenv('AZURE_CLIENT_ID')
@@ -81,6 +158,8 @@ tenant_id = os.getenv('AZURE_TENANT_ID')
if not client_id or not tenant_id:
raise ValueError("Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables.")
# Token cache
cache = msal.SerializableTokenCache()
cache_file = 'token_cache.bin'
@@ -99,7 +178,7 @@ else:
# Authentication
authority = f'https://login.microsoftonline.com/{tenant_id}'
scopes = ['https://graph.microsoft.com/Calendars.Read', 'https://graph.microsoft.com/Mail.Read']
scopes = ['https://graph.microsoft.com/Calendars.Read', 'https://graph.microsoft.com/Mail.ReadWrite']
app = msal.PublicClientApplication(client_id, authority=authority, token_cache=cache)
accounts = app.get_accounts()
@@ -126,7 +205,13 @@ accounts = app.get_accounts()
if not accounts:
raise Exception("No accounts found")
mail_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$top=100&$orderby=receivedDateTime asc'
# Call the synchronization function before fetching mail
print("Synchronizing maildir with server...")
synchronize_maildir(maildir_path=os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva", headers=headers)
print("Synchronization complete.")
mail_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$top=100&$orderby=receivedDateTime asc&$select=id,subject,from,toRecipients,ccRecipients,receivedDateTime,isRead,body,attachments'
messages = []
print("Fetching mail...")
@@ -174,14 +259,17 @@ while mail_url:
mail_url = response_data.get('@odata.nextLink')
print("\nFinished fetching mail.")
print("\nFinished fetching mail. Now saving them to maildir.")
maildir_path = os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva/INBOX"
# Save emails to Maildir
maildir_path = os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva"
attachments_dir = os.path.join(maildir_path, 'attachments')
os.makedirs(attachments_dir, exist_ok=True)
create_maildir_structure(maildir_path)
for message in messages:
print(f"Processing message: {message.get('subject', 'No Subject')}", end='\r')
save_email_to_maildir(maildir_path, message)
save_email_to_maildir(maildir_path, message, attachments_dir)
print(f"\nFinished processing {len(messages)} messages.")