diff --git a/.gitignore b/.gitignore index 1880e23..3ad97b5 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,4 @@ output_markdown_files/output_5.md output_markdown_files/output_6.md token_cache.bin output_ics/outlook_events_latest.ics +cache_timestamp.json diff --git a/.vscode/launch.json b/.vscode/launch.json index e0121b7..5a341ae 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -1,15 +1,12 @@ { - // Use IntelliSense to learn about possible attributes. - // Hover to view descriptions of existing attributes. - // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 - "version": "0.2.0", - "configurations": [ - { - "name": "Python Debugger: Current File", - "type": "debugpy", - "request": "launch", - "program": "${file}", - "console": "integratedTerminal" - } - ] -} \ No newline at end of file + "version": "0.2.0", + "configurations": [ + { + "name": "Python Debugger: Current File", + "type": "python", + "request": "launch", + "program": "${file}", + "console": "integratedTerminal" + } + ] +} diff --git a/fetch_outlook.py b/fetch_outlook.py new file mode 100644 index 0000000..c40db90 --- /dev/null +++ b/fetch_outlook.py @@ -0,0 +1,210 @@ +import os +import msal +import requests +import json +from datetime import datetime +from dateutil import parser +from dateutil.tz import UTC +from email.message import EmailMessage +import time + +# Filepath for caching timestamp +cache_timestamp_file = 'cache_timestamp.json' + +# Load cached timestamp if it exists +if os.path.exists(cache_timestamp_file): + with open(cache_timestamp_file, 'r') as f: + cache_timestamp = json.load(f) +else: + cache_timestamp = {} + +# Function to check if the cache is still valid +def is_cache_valid(): + if 'timestamp' in cache_timestamp and 'max_age' in cache_timestamp: + current_time = time.time() + cache_expiry_time = cache_timestamp['timestamp'] + cache_timestamp['max_age'] + return current_time < cache_expiry_time + return False + + +# Function to create Maildir structure +def create_maildir_structure(base_path): + os.makedirs(os.path.join(base_path, 'cur'), exist_ok=True) + os.makedirs(os.path.join(base_path, 'new'), exist_ok=True) + os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True) + +# Function to save email to Maildir format +def save_email_to_maildir(maildir_path, email_data): + # Create a new EmailMessage object + msg = EmailMessage() + + # Add required headers + msg['Date'] = email_data.get('receivedDateTime', '') # Use the receivedDateTime field + msg['Message-ID'] = email_data.get('id', '') # Use the unique ID of the message + msg['Subject'] = email_data.get('subject', 'No Subject') # Default to 'No Subject' if missing + msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com') + msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])]) + + # Add the email body + body = email_data.get('body', {}).get('content', '') + msg.set_content(body) + + # Save the email to the Maildir 'new' folder + email_filename = f"{msg['Message-ID'] or email_data.get('id', 'unknown')}.eml" + email_filepath = os.path.join(maildir_path, 'new', email_filename) + with open(email_filepath, 'w') as f: + f.write(msg.as_string()) + +# Read Azure app credentials from environment variables +client_id = os.getenv('AZURE_CLIENT_ID') +tenant_id = os.getenv('AZURE_TENANT_ID') + +if not client_id or not tenant_id: + raise ValueError("Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables.") + +# Token cache +cache = msal.SerializableTokenCache() +cache_file = 'token_cache.bin' + +if os.path.exists(cache_file): + cache.deserialize(open(cache_file, 'r').read()) + +# Filepath for caching ETag +etag_cache_file = 'etag_cache.json' +# Load cached ETag if it exists +if os.path.exists(etag_cache_file): + with open(etag_cache_file, 'r') as f: + etag_cache = json.load(f) +else: + etag_cache = {} + +# Authentication +authority = f'https://login.microsoftonline.com/{tenant_id}' +scopes = ['https://graph.microsoft.com/Calendars.Read', 'https://graph.microsoft.com/Mail.Read'] + +app = msal.PublicClientApplication(client_id, authority=authority, token_cache=cache) +accounts = app.get_accounts() + +if accounts: + token_response = app.acquire_token_silent(scopes, account=accounts[0]) +else: + flow = app.initiate_device_flow(scopes=scopes) + if 'user_code' not in flow: + raise Exception("Failed to create device flow") + print(flow['message']) + token_response = app.acquire_token_by_device_flow(flow) + +if 'access_token' not in token_response: + raise Exception("Failed to acquire token") + +# Save token cache +with open(cache_file, 'w') as f: + f.write(cache.serialize()) + +access_token = token_response['access_token'] +headers = {'Authorization': f'Bearer {access_token}'} +accounts = app.get_accounts() + +if not accounts: + raise Exception("No accounts found") +mail_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$top=100&$orderby=receivedDateTime asc' +messages = [] +print("Fetching mail...") + +# Fetch the total count of messages in the inbox +inbox_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox' +response = requests.get(inbox_url, headers=headers) + +if response.status_code != 200: + raise Exception(f"Failed to fetch inbox details: {response.status_code} {response.text}") + +total_messages = response.json().get('totalItemCount', 0) +print(f"Total messages in inbox: {total_messages}") + +while mail_url: + if is_cache_valid(): + print("Using cached messages...") + break # No need to fetch further, cache is still valid + + response = requests.get(mail_url, headers=headers) + + if response.status_code != 200: + raise Exception(f"Failed to fetch mail: {response.status_code} {response.text}") + + # Parse the Cache-Control header to get the max-age value + cache_control = response.headers.get('Cache-Control', '') + max_age = 0 + if 'max-age=' in cache_control: + max_age = int(cache_control.split('max-age=')[1].split(',')[0]) + + # Update the cache timestamp and max-age + cache_timestamp['timestamp'] = time.time() + cache_timestamp['max_age'] = max_age + with open(cache_timestamp_file, 'w') as f: + json.dump(cache_timestamp, f) + + # Process the response + response_data = response.json() + messages.extend(response_data.get('value', [])) # Add the current page of messages to the list + + # Calculate and display progress percentage + progress = (len(messages) / total_messages) * 100 if total_messages > 0 else 0 + print(f"Fetched {len(messages)} of {total_messages} messages ({progress:.2f}%)", end='\r') + + # Get the next page URL from @odata.nextLink + mail_url = response_data.get('@odata.nextLink') + + +print("\nFinished fetching mail.") + +maildir_path = os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva/INBOX" +create_maildir_structure(maildir_path) + +for message in messages: + print(f"Processing message: {message.get('subject', 'No Subject')}", end='\r') + save_email_to_maildir(maildir_path, message) + +print(f"\nFinished processing {len(messages)} messages.") + + +# Fetch events with pagination and expand recurring events +events_url = 'https://graph.microsoft.com/v1.0/me/events?$top=100&$expand=instances' +events = [] +print("Fetching events...") +while events_url: + response = requests.get(events_url, headers=headers) + response_data = response.json() + events.extend(response_data.get('value', [])) + print(f"Fetched {len(events)} events so far...", end='\r') + events_url = response_data.get('@odata.nextLink') + +# Save events to a file in iCalendar format +output_file = f'output_ics/outlook_events_latest.ics' +print(f"Saving events to {output_file}...") +with open(output_file, 'w') as f: + f.write("BEGIN:VCALENDAR\nVERSION:2.0\n") + for event in events: + if 'start' in event and 'end' in event: + start = parser.isoparse(event['start']['dateTime']) + end = parser.isoparse(event['end']['dateTime']) + f.write(f"BEGIN:VEVENT\nSUMMARY:{event['subject']}\n") + f.write(f"DTSTART:{start.strftime('%Y%m%dT%H%M%S')}\n") + f.write(f"DTEND:{end.strftime('%Y%m%dT%H%M%S')}\n") + if 'recurrence' in event and event['recurrence']: # Check if 'recurrence' exists and is not None + for rule in event['recurrence']: + if rule.startswith('RRULE'): + rule_parts = rule.split(';') + new_rule_parts = [] + for part in rule_parts: + if part.startswith('UNTIL='): + until_value = part.split('=')[1] + until_date = parser.isoparse(until_value) + if start.tzinfo is not None and until_date.tzinfo is None: + until_date = until_date.replace(tzinfo=UTC) + new_rule_parts.append(f"UNTIL={until_date.strftime('%Y%m%dT%H%M%SZ')}") + else: + new_rule_parts.append(part) + rule = ';'.join(new_rule_parts) + f.write(f"{rule}\n") + f.write("END:VEVENT\n") + f.write("END:VCALENDAR\n") diff --git a/fetch_outlook_events.py b/fetch_outlook_events.py deleted file mode 100644 index 420df1c..0000000 --- a/fetch_outlook_events.py +++ /dev/null @@ -1,89 +0,0 @@ -import os -import msal -import requests -import json -from datetime import datetime -from dateutil import parser -from dateutil.tz import UTC - -# Read Azure app credentials from environment variables -client_id = os.getenv('AZURE_CLIENT_ID') -tenant_id = os.getenv('AZURE_TENANT_ID') - -if not client_id or not tenant_id: - raise ValueError("Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables.") - -# Token cache -cache = msal.SerializableTokenCache() -cache_file = 'token_cache.bin' - -if os.path.exists(cache_file): - cache.deserialize(open(cache_file, 'r').read()) - -# Authentication -authority = f'https://login.microsoftonline.com/{tenant_id}' -scopes = ['https://graph.microsoft.com/Calendars.Read'] - -app = msal.PublicClientApplication(client_id, authority=authority, token_cache=cache) -accounts = app.get_accounts() - -if accounts: - token_response = app.acquire_token_silent(scopes, account=accounts[0]) -else: - flow = app.initiate_device_flow(scopes=scopes) - if 'user_code' not in flow: - raise Exception("Failed to create device flow") - print(flow['message']) - token_response = app.acquire_token_by_device_flow(flow) - -if 'access_token' not in token_response: - raise Exception("Failed to acquire token") - -# Save token cache -with open(cache_file, 'w') as f: - f.write(cache.serialize()) - -access_token = token_response['access_token'] - -# Fetch events with pagination and expand recurring events -headers = {'Authorization': f'Bearer {access_token}'} -events_url = 'https://graph.microsoft.com/v1.0/me/events?$top=100&$expand=instances' -events = [] -print("Fetching events...") -while events_url: - response = requests.get(events_url, headers=headers) - response_data = response.json() - events.extend(response_data.get('value', [])) - print(f"Fetched {len(events)} events so far...", end='\r') - events_url = response_data.get('@odata.nextLink') - -# Save events to a file in iCalendar format -output_file = f'output_ics/outlook_events_latest.ics' -print(f"Saving events to {output_file}...") -with open(output_file, 'w') as f: - f.write("BEGIN:VCALENDAR\nVERSION:2.0\n") - for event in events: - if 'start' in event and 'end' in event: - start = parser.isoparse(event['start']['dateTime']) - end = parser.isoparse(event['end']['dateTime']) - f.write(f"BEGIN:VEVENT\nSUMMARY:{event['subject']}\n") - f.write(f"DTSTART:{start.strftime('%Y%m%dT%H%M%S')}\n") - f.write(f"DTEND:{end.strftime('%Y%m%dT%H%M%S')}\n") - if 'recurrence' in event and event['recurrence']: # Check if 'recurrence' exists and is not None - for rule in event['recurrence']: - if rule.startswith('RRULE'): - rule_parts = rule.split(';') - new_rule_parts = [] - for part in rule_parts: - if part.startswith('UNTIL='): - until_value = part.split('=')[1] - until_date = parser.isoparse(until_value) - if start.tzinfo is not None and until_date.tzinfo is None: - until_date = until_date.replace(tzinfo=UTC) - new_rule_parts.append(f"UNTIL={until_date.strftime('%Y%m%dT%H%M%SZ')}") - else: - new_rule_parts.append(part) - rule = ';'.join(new_rule_parts) - f.write(f"{rule}\n") - f.write("END:VEVENT\n") - f.write("END:VCALENDAR\n")