basic sync of trash and read status
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -7,3 +7,4 @@ output_markdown_files/output_6.md
|
|||||||
token_cache.bin
|
token_cache.bin
|
||||||
output_ics/outlook_events_latest.ics
|
output_ics/outlook_events_latest.ics
|
||||||
cache_timestamp.json
|
cache_timestamp.json
|
||||||
|
sync_timestamp.json
|
||||||
|
|||||||
126
fetch_outlook.py
126
fetch_outlook.py
@@ -3,6 +3,7 @@ import re
|
|||||||
import msal
|
import msal
|
||||||
import requests
|
import requests
|
||||||
import json
|
import json
|
||||||
|
import glob
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
from dateutil import parser
|
from dateutil import parser
|
||||||
from dateutil.tz import UTC
|
from dateutil.tz import UTC
|
||||||
@@ -14,6 +15,62 @@ import html2text
|
|||||||
# Filepath for caching timestamp
|
# Filepath for caching timestamp
|
||||||
cache_timestamp_file = 'cache_timestamp.json'
|
cache_timestamp_file = 'cache_timestamp.json'
|
||||||
|
|
||||||
|
|
||||||
|
# Filepath for sync timestamp
|
||||||
|
sync_timestamp_file = 'sync_timestamp.json'
|
||||||
|
|
||||||
|
# Function to load the last sync timestamp
|
||||||
|
def load_last_sync_timestamp():
|
||||||
|
if os.path.exists(sync_timestamp_file):
|
||||||
|
with open(sync_timestamp_file, 'r') as f:
|
||||||
|
return json.load(f).get('last_sync', 0)
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Function to save the current sync timestamp
|
||||||
|
def save_sync_timestamp():
|
||||||
|
with open(sync_timestamp_file, 'w') as f:
|
||||||
|
json.dump({'last_sync': time.time()}, f)
|
||||||
|
|
||||||
|
# Function to synchronize maildir with the server
|
||||||
|
def synchronize_maildir(maildir_path, headers):
|
||||||
|
last_sync = load_last_sync_timestamp()
|
||||||
|
current_time = time.time()
|
||||||
|
|
||||||
|
# Find messages moved from "new" to "cur" and mark them as read
|
||||||
|
new_dir = os.path.join(maildir_path, 'new')
|
||||||
|
cur_dir = os.path.join(maildir_path, 'cur')
|
||||||
|
new_files = set(glob.glob(os.path.join(new_dir, '*.eml')))
|
||||||
|
cur_files = set(glob.glob(os.path.join(cur_dir, '*.eml')))
|
||||||
|
|
||||||
|
moved_to_cur = [os.path.basename(f) for f in cur_files - new_files]
|
||||||
|
for filename in moved_to_cur:
|
||||||
|
message_id = filename.split('.')[0] # Extract the Message-ID from the filename
|
||||||
|
print(f"Marking message as read: {message_id}")
|
||||||
|
response = requests.patch(
|
||||||
|
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}',
|
||||||
|
headers=headers,
|
||||||
|
json={'isRead': True}
|
||||||
|
)
|
||||||
|
if response.status_code != 200:
|
||||||
|
print(f"Failed to mark message as read: {message_id}, {response.status_code}, {response.text}")
|
||||||
|
|
||||||
|
# Find messages moved to ".Trash/cur" and delete them on the server
|
||||||
|
trash_dir = os.path.join(maildir_path, '.Trash', 'cur')
|
||||||
|
trash_files = set(glob.glob(os.path.join(trash_dir, '*.eml')))
|
||||||
|
for filepath in trash_files:
|
||||||
|
message_id = os.path.basename(filepath).split('.')[0] # Extract the Message-ID from the filename
|
||||||
|
print(f"Moving message to trash: {message_id}")
|
||||||
|
response = requests.delete(
|
||||||
|
f'https://graph.microsoft.com/v1.0/me/messages/{message_id}',
|
||||||
|
headers=headers
|
||||||
|
)
|
||||||
|
if response.status_code != 204: # 204 No Content indicates success
|
||||||
|
print(f"Failed to move message to trash: {message_id}, {response.status_code}, {response.text}")
|
||||||
|
|
||||||
|
# Save the current sync timestamp
|
||||||
|
save_sync_timestamp()
|
||||||
|
|
||||||
|
|
||||||
# Load cached timestamp if it exists
|
# Load cached timestamp if it exists
|
||||||
if os.path.exists(cache_timestamp_file):
|
if os.path.exists(cache_timestamp_file):
|
||||||
with open(cache_timestamp_file, 'r') as f:
|
with open(cache_timestamp_file, 'r') as f:
|
||||||
@@ -36,43 +93,63 @@ def create_maildir_structure(base_path):
|
|||||||
os.makedirs(os.path.join(base_path, 'new'), exist_ok=True)
|
os.makedirs(os.path.join(base_path, 'new'), exist_ok=True)
|
||||||
os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True)
|
os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True)
|
||||||
|
|
||||||
# Function to save email to Maildir format with Markdown conversion
|
def save_email_to_maildir(maildir_path, email_data, attachments_dir):
|
||||||
def save_email_to_maildir(maildir_path, email_data):
|
|
||||||
# Create a new EmailMessage object
|
# Create a new EmailMessage object
|
||||||
msg = EmailMessage()
|
msg = EmailMessage()
|
||||||
|
|
||||||
received_datetime = email_data.get('receivedDateTime', '')
|
received_datetime = email_data.get('receivedDateTime', '')
|
||||||
# Add required headers
|
|
||||||
if received_datetime:
|
if received_datetime:
|
||||||
# Parse the ISO 8601 datetime and convert it to RFC 5322 format
|
|
||||||
parsed_datetime = parser.isoparse(received_datetime)
|
parsed_datetime = parser.isoparse(received_datetime)
|
||||||
msg['Date'] = format_datetime(parsed_datetime)
|
msg['Date'] = format_datetime(parsed_datetime)
|
||||||
else:
|
else:
|
||||||
msg['Date'] = '' # Leave empty if no receivedDateTime is available
|
msg['Date'] = ''
|
||||||
|
|
||||||
msg['Message-ID'] = email_data.get('id', '') # Use the unique ID of the message
|
msg['Message-ID'] = email_data.get('id', '')
|
||||||
msg['Subject'] = email_data.get('subject', 'No Subject') # Default to 'No Subject' if missing
|
msg['Subject'] = email_data.get('subject', 'No Subject')
|
||||||
msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com')
|
msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com')
|
||||||
msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])])
|
msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])])
|
||||||
|
msg['Cc'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('ccRecipients', [])])
|
||||||
|
|
||||||
# Convert the email body from HTML to Markdown
|
# Convert the email body from HTML to Markdown
|
||||||
body_html = email_data.get('body', {}).get('content', '')
|
body_html = email_data.get('body', {}).get('content', '')
|
||||||
if email_data.get('body', {}).get('contentType', '').lower() == 'html':
|
if email_data.get('body', {}).get('contentType', '').lower() == 'html':
|
||||||
markdown_converter = html2text.HTML2Text()
|
markdown_converter = html2text.HTML2Text()
|
||||||
markdown_converter.ignore_images = True
|
markdown_converter.ignore_images = True
|
||||||
markdown_converter.ignore_links = False # Keep links in the Markdown output
|
markdown_converter.ignore_links = False
|
||||||
body_markdown = markdown_converter.handle(body_html)
|
body_markdown = markdown_converter.handle(body_html)
|
||||||
else:
|
else:
|
||||||
body_markdown = body_html # Use plain text if the body is not HTML
|
body_markdown = body_html
|
||||||
|
|
||||||
# Add the converted Markdown body to the email
|
# Remove lines between any alphanumeric BannerStart and BannerEnd
|
||||||
msg.set_content(body_markdown)
|
|
||||||
body_markdown = re.sub(r'\w+BannerStart.*?\w+BannerEnd', '', body_markdown, flags=re.DOTALL)
|
body_markdown = re.sub(r'\w+BannerStart.*?\w+BannerEnd', '', body_markdown, flags=re.DOTALL)
|
||||||
# Save the email to the Maildir 'new' folder
|
msg.set_content(body_markdown)
|
||||||
email_filename = f"{msg['Message-ID'] or email_data.get('id', 'unknown')}.eml"
|
|
||||||
email_filepath = os.path.join(maildir_path, 'new', email_filename)
|
# Download attachments
|
||||||
|
for attachment in email_data.get('attachments', []):
|
||||||
|
attachment_id = attachment.get('id')
|
||||||
|
attachment_name = attachment.get('name', 'unknown')
|
||||||
|
attachment_content = attachment.get('contentBytes')
|
||||||
|
if attachment_content:
|
||||||
|
attachment_path = os.path.join(attachments_dir, attachment_name)
|
||||||
|
with open(attachment_path, 'wb') as f:
|
||||||
|
f.write(attachment_content.encode('utf-8'))
|
||||||
|
msg.add_attachment(attachment_content.encode('utf-8'), filename=attachment_name)
|
||||||
|
|
||||||
|
# Determine the directory based on isRead
|
||||||
|
target_dir = 'cur' if email_data.get('isRead', False) else 'new'
|
||||||
|
email_filename = f"{msg['Message-ID']}.eml"
|
||||||
|
email_filepath = os.path.join(maildir_path, target_dir, email_filename)
|
||||||
|
|
||||||
|
# Check if the file already exists in any subfolder
|
||||||
|
for root, _, files in os.walk(maildir_path):
|
||||||
|
if email_filename in files:
|
||||||
|
print(f"Message {msg['Message-ID']} already exists in {root}. Skipping save.")
|
||||||
|
return
|
||||||
|
|
||||||
|
# Save the email to the Maildir
|
||||||
with open(email_filepath, 'w') as f:
|
with open(email_filepath, 'w') as f:
|
||||||
f.write(msg.as_string())
|
f.write(msg.as_string())
|
||||||
|
print(f"Saved message {msg['Message-ID']} to {email_filepath}")
|
||||||
|
|
||||||
# Read Azure app credentials from environment variables
|
# Read Azure app credentials from environment variables
|
||||||
client_id = os.getenv('AZURE_CLIENT_ID')
|
client_id = os.getenv('AZURE_CLIENT_ID')
|
||||||
@@ -81,6 +158,8 @@ tenant_id = os.getenv('AZURE_TENANT_ID')
|
|||||||
if not client_id or not tenant_id:
|
if not client_id or not tenant_id:
|
||||||
raise ValueError("Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables.")
|
raise ValueError("Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables.")
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Token cache
|
# Token cache
|
||||||
cache = msal.SerializableTokenCache()
|
cache = msal.SerializableTokenCache()
|
||||||
cache_file = 'token_cache.bin'
|
cache_file = 'token_cache.bin'
|
||||||
@@ -99,7 +178,7 @@ else:
|
|||||||
|
|
||||||
# Authentication
|
# Authentication
|
||||||
authority = f'https://login.microsoftonline.com/{tenant_id}'
|
authority = f'https://login.microsoftonline.com/{tenant_id}'
|
||||||
scopes = ['https://graph.microsoft.com/Calendars.Read', 'https://graph.microsoft.com/Mail.Read']
|
scopes = ['https://graph.microsoft.com/Calendars.Read', 'https://graph.microsoft.com/Mail.ReadWrite']
|
||||||
|
|
||||||
app = msal.PublicClientApplication(client_id, authority=authority, token_cache=cache)
|
app = msal.PublicClientApplication(client_id, authority=authority, token_cache=cache)
|
||||||
accounts = app.get_accounts()
|
accounts = app.get_accounts()
|
||||||
@@ -126,7 +205,13 @@ accounts = app.get_accounts()
|
|||||||
|
|
||||||
if not accounts:
|
if not accounts:
|
||||||
raise Exception("No accounts found")
|
raise Exception("No accounts found")
|
||||||
mail_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$top=100&$orderby=receivedDateTime asc'
|
|
||||||
|
# Call the synchronization function before fetching mail
|
||||||
|
print("Synchronizing maildir with server...")
|
||||||
|
synchronize_maildir(maildir_path=os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva", headers=headers)
|
||||||
|
print("Synchronization complete.")
|
||||||
|
|
||||||
|
mail_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$top=100&$orderby=receivedDateTime asc&$select=id,subject,from,toRecipients,ccRecipients,receivedDateTime,isRead,body,attachments'
|
||||||
messages = []
|
messages = []
|
||||||
print("Fetching mail...")
|
print("Fetching mail...")
|
||||||
|
|
||||||
@@ -174,14 +259,17 @@ while mail_url:
|
|||||||
mail_url = response_data.get('@odata.nextLink')
|
mail_url = response_data.get('@odata.nextLink')
|
||||||
|
|
||||||
|
|
||||||
print("\nFinished fetching mail.")
|
print("\nFinished fetching mail. Now saving them to maildir.")
|
||||||
|
|
||||||
maildir_path = os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva/INBOX"
|
# Save emails to Maildir
|
||||||
|
maildir_path = os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva"
|
||||||
|
attachments_dir = os.path.join(maildir_path, 'attachments')
|
||||||
|
os.makedirs(attachments_dir, exist_ok=True)
|
||||||
create_maildir_structure(maildir_path)
|
create_maildir_structure(maildir_path)
|
||||||
|
|
||||||
for message in messages:
|
for message in messages:
|
||||||
print(f"Processing message: {message.get('subject', 'No Subject')}", end='\r')
|
print(f"Processing message: {message.get('subject', 'No Subject')}", end='\r')
|
||||||
save_email_to_maildir(maildir_path, message)
|
save_email_to_maildir(maildir_path, message, attachments_dir)
|
||||||
|
|
||||||
print(f"\nFinished processing {len(messages)} messages.")
|
print(f"\nFinished processing {len(messages)} messages.")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user