basic sync of trash and read status
This commit is contained in:
126
fetch_outlook.py
126
fetch_outlook.py
@@ -3,6 +3,7 @@ import re
|
||||
import msal
|
||||
import requests
|
||||
import json
|
||||
import glob
|
||||
from datetime import datetime
|
||||
from dateutil import parser
|
||||
from dateutil.tz import UTC
|
||||
@@ -14,6 +15,62 @@ import html2text
|
||||
# Filepath for caching timestamp
|
||||
cache_timestamp_file = 'cache_timestamp.json'
|
||||
|
||||
|
||||
# Filepath for sync timestamp
|
||||
sync_timestamp_file = 'sync_timestamp.json'
|
||||
|
||||
# Function to load the last sync timestamp
|
||||
def load_last_sync_timestamp():
|
||||
if os.path.exists(sync_timestamp_file):
|
||||
with open(sync_timestamp_file, 'r') as f:
|
||||
return json.load(f).get('last_sync', 0)
|
||||
return 0
|
||||
|
||||
# Function to save the current sync timestamp
|
||||
def save_sync_timestamp():
|
||||
with open(sync_timestamp_file, 'w') as f:
|
||||
json.dump({'last_sync': time.time()}, f)
|
||||
|
||||
# Function to synchronize maildir with the server
|
||||
def synchronize_maildir(maildir_path, headers):
|
||||
last_sync = load_last_sync_timestamp()
|
||||
current_time = time.time()
|
||||
|
||||
# Find messages moved from "new" to "cur" and mark them as read
|
||||
new_dir = os.path.join(maildir_path, 'new')
|
||||
cur_dir = os.path.join(maildir_path, 'cur')
|
||||
new_files = set(glob.glob(os.path.join(new_dir, '*.eml')))
|
||||
cur_files = set(glob.glob(os.path.join(cur_dir, '*.eml')))
|
||||
|
||||
moved_to_cur = [os.path.basename(f) for f in cur_files - new_files]
|
||||
for filename in moved_to_cur:
|
||||
message_id = filename.split('.')[0] # Extract the Message-ID from the filename
|
||||
print(f"Marking message as read: {message_id}")
|
||||
response = requests.patch(
|
||||
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}',
|
||||
headers=headers,
|
||||
json={'isRead': True}
|
||||
)
|
||||
if response.status_code != 200:
|
||||
print(f"Failed to mark message as read: {message_id}, {response.status_code}, {response.text}")
|
||||
|
||||
# Find messages moved to ".Trash/cur" and delete them on the server
|
||||
trash_dir = os.path.join(maildir_path, '.Trash', 'cur')
|
||||
trash_files = set(glob.glob(os.path.join(trash_dir, '*.eml')))
|
||||
for filepath in trash_files:
|
||||
message_id = os.path.basename(filepath).split('.')[0] # Extract the Message-ID from the filename
|
||||
print(f"Moving message to trash: {message_id}")
|
||||
response = requests.delete(
|
||||
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}',
|
||||
headers=headers
|
||||
)
|
||||
if response.status_code != 204: # 204 No Content indicates success
|
||||
print(f"Failed to move message to trash: {message_id}, {response.status_code}, {response.text}")
|
||||
|
||||
# Save the current sync timestamp
|
||||
save_sync_timestamp()
|
||||
|
||||
|
||||
# Load cached timestamp if it exists
|
||||
if os.path.exists(cache_timestamp_file):
|
||||
with open(cache_timestamp_file, 'r') as f:
|
||||
@@ -36,43 +93,63 @@ def create_maildir_structure(base_path):
|
||||
os.makedirs(os.path.join(base_path, 'new'), exist_ok=True)
|
||||
os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True)
|
||||
|
||||
# Function to save email to Maildir format with Markdown conversion
|
||||
def save_email_to_maildir(maildir_path, email_data):
|
||||
def save_email_to_maildir(maildir_path, email_data, attachments_dir):
|
||||
# Create a new EmailMessage object
|
||||
msg = EmailMessage()
|
||||
|
||||
received_datetime = email_data.get('receivedDateTime', '')
|
||||
# Add required headers
|
||||
if received_datetime:
|
||||
# Parse the ISO 8601 datetime and convert it to RFC 5322 format
|
||||
parsed_datetime = parser.isoparse(received_datetime)
|
||||
msg['Date'] = format_datetime(parsed_datetime)
|
||||
else:
|
||||
msg['Date'] = '' # Leave empty if no receivedDateTime is available
|
||||
msg['Date'] = ''
|
||||
|
||||
msg['Message-ID'] = email_data.get('id', '') # Use the unique ID of the message
|
||||
msg['Subject'] = email_data.get('subject', 'No Subject') # Default to 'No Subject' if missing
|
||||
msg['Message-ID'] = email_data.get('id', '')
|
||||
msg['Subject'] = email_data.get('subject', 'No Subject')
|
||||
msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com')
|
||||
msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])])
|
||||
msg['Cc'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('ccRecipients', [])])
|
||||
|
||||
# Convert the email body from HTML to Markdown
|
||||
body_html = email_data.get('body', {}).get('content', '')
|
||||
if email_data.get('body', {}).get('contentType', '').lower() == 'html':
|
||||
markdown_converter = html2text.HTML2Text()
|
||||
markdown_converter.ignore_images = True
|
||||
markdown_converter.ignore_links = False # Keep links in the Markdown output
|
||||
markdown_converter.ignore_links = False
|
||||
body_markdown = markdown_converter.handle(body_html)
|
||||
else:
|
||||
body_markdown = body_html # Use plain text if the body is not HTML
|
||||
body_markdown = body_html
|
||||
|
||||
# Add the converted Markdown body to the email
|
||||
msg.set_content(body_markdown)
|
||||
# Remove lines between any alphanumeric BannerStart and BannerEnd
|
||||
body_markdown = re.sub(r'\w+BannerStart.*?\w+BannerEnd', '', body_markdown, flags=re.DOTALL)
|
||||
# Save the email to the Maildir 'new' folder
|
||||
email_filename = f"{msg['Message-ID'] or email_data.get('id', 'unknown')}.eml"
|
||||
email_filepath = os.path.join(maildir_path, 'new', email_filename)
|
||||
msg.set_content(body_markdown)
|
||||
|
||||
# Download attachments
|
||||
for attachment in email_data.get('attachments', []):
|
||||
attachment_id = attachment.get('id')
|
||||
attachment_name = attachment.get('name', 'unknown')
|
||||
attachment_content = attachment.get('contentBytes')
|
||||
if attachment_content:
|
||||
attachment_path = os.path.join(attachments_dir, attachment_name)
|
||||
with open(attachment_path, 'wb') as f:
|
||||
f.write(attachment_content.encode('utf-8'))
|
||||
msg.add_attachment(attachment_content.encode('utf-8'), filename=attachment_name)
|
||||
|
||||
# Determine the directory based on isRead
|
||||
target_dir = 'cur' if email_data.get('isRead', False) else 'new'
|
||||
email_filename = f"{msg['Message-ID']}.eml"
|
||||
email_filepath = os.path.join(maildir_path, target_dir, email_filename)
|
||||
|
||||
# Check if the file already exists in any subfolder
|
||||
for root, _, files in os.walk(maildir_path):
|
||||
if email_filename in files:
|
||||
print(f"Message {msg['Message-ID']} already exists in {root}. Skipping save.")
|
||||
return
|
||||
|
||||
# Save the email to the Maildir
|
||||
with open(email_filepath, 'w') as f:
|
||||
f.write(msg.as_string())
|
||||
print(f"Saved message {msg['Message-ID']} to {email_filepath}")
|
||||
|
||||
# Read Azure app credentials from environment variables
|
||||
client_id = os.getenv('AZURE_CLIENT_ID')
|
||||
@@ -81,6 +158,8 @@ tenant_id = os.getenv('AZURE_TENANT_ID')
|
||||
if not client_id or not tenant_id:
|
||||
raise ValueError("Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables.")
|
||||
|
||||
|
||||
|
||||
# Token cache
|
||||
cache = msal.SerializableTokenCache()
|
||||
cache_file = 'token_cache.bin'
|
||||
@@ -99,7 +178,7 @@ else:
|
||||
|
||||
# Authentication
|
||||
authority = f'https://login.microsoftonline.com/{tenant_id}'
|
||||
scopes = ['https://graph.microsoft.com/Calendars.Read', 'https://graph.microsoft.com/Mail.Read']
|
||||
scopes = ['https://graph.microsoft.com/Calendars.Read', 'https://graph.microsoft.com/Mail.ReadWrite']
|
||||
|
||||
app = msal.PublicClientApplication(client_id, authority=authority, token_cache=cache)
|
||||
accounts = app.get_accounts()
|
||||
@@ -126,7 +205,13 @@ accounts = app.get_accounts()
|
||||
|
||||
if not accounts:
|
||||
raise Exception("No accounts found")
|
||||
mail_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$top=100&$orderby=receivedDateTime asc'
|
||||
|
||||
# Call the synchronization function before fetching mail
|
||||
print("Synchronizing maildir with server...")
|
||||
synchronize_maildir(maildir_path=os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva", headers=headers)
|
||||
print("Synchronization complete.")
|
||||
|
||||
mail_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$top=100&$orderby=receivedDateTime asc&$select=id,subject,from,toRecipients,ccRecipients,receivedDateTime,isRead,body,attachments'
|
||||
messages = []
|
||||
print("Fetching mail...")
|
||||
|
||||
@@ -174,14 +259,17 @@ while mail_url:
|
||||
mail_url = response_data.get('@odata.nextLink')
|
||||
|
||||
|
||||
print("\nFinished fetching mail.")
|
||||
print("\nFinished fetching mail. Now saving them to maildir.")
|
||||
|
||||
maildir_path = os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva/INBOX"
|
||||
# Save emails to Maildir
|
||||
maildir_path = os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva"
|
||||
attachments_dir = os.path.join(maildir_path, 'attachments')
|
||||
os.makedirs(attachments_dir, exist_ok=True)
|
||||
create_maildir_structure(maildir_path)
|
||||
|
||||
for message in messages:
|
||||
print(f"Processing message: {message.get('subject', 'No Subject')}", end='\r')
|
||||
save_email_to_maildir(maildir_path, message)
|
||||
save_email_to_maildir(maildir_path, message, attachments_dir)
|
||||
|
||||
print(f"\nFinished processing {len(messages)} messages.")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user