basic working script
This commit is contained in:
1
.gitignore
vendored
1
.gitignore
vendored
@@ -6,3 +6,4 @@ output_markdown_files/output_5.md
|
|||||||
output_markdown_files/output_6.md
|
output_markdown_files/output_6.md
|
||||||
token_cache.bin
|
token_cache.bin
|
||||||
output_ics/outlook_events_latest.ics
|
output_ics/outlook_events_latest.ics
|
||||||
|
cache_timestamp.json
|
||||||
|
|||||||
25
.vscode/launch.json
vendored
25
.vscode/launch.json
vendored
@@ -1,15 +1,12 @@
|
|||||||
{
|
{
|
||||||
// Use IntelliSense to learn about possible attributes.
|
"version": "0.2.0",
|
||||||
// Hover to view descriptions of existing attributes.
|
"configurations": [
|
||||||
// For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387
|
{
|
||||||
"version": "0.2.0",
|
"name": "Python Debugger: Current File",
|
||||||
"configurations": [
|
"type": "python",
|
||||||
{
|
"request": "launch",
|
||||||
"name": "Python Debugger: Current File",
|
"program": "${file}",
|
||||||
"type": "debugpy",
|
"console": "integratedTerminal"
|
||||||
"request": "launch",
|
}
|
||||||
"program": "${file}",
|
]
|
||||||
"console": "integratedTerminal"
|
}
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
|
|||||||
210
fetch_outlook.py
Normal file
210
fetch_outlook.py
Normal file
@@ -0,0 +1,210 @@
|
|||||||
|
import os
|
||||||
|
import msal
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
from datetime import datetime
|
||||||
|
from dateutil import parser
|
||||||
|
from dateutil.tz import UTC
|
||||||
|
from email.message import EmailMessage
|
||||||
|
import time
|
||||||
|
|
||||||
|
# Filepath for caching timestamp
|
||||||
|
cache_timestamp_file = 'cache_timestamp.json'
|
||||||
|
|
||||||
|
# Load cached timestamp if it exists
|
||||||
|
if os.path.exists(cache_timestamp_file):
|
||||||
|
with open(cache_timestamp_file, 'r') as f:
|
||||||
|
cache_timestamp = json.load(f)
|
||||||
|
else:
|
||||||
|
cache_timestamp = {}
|
||||||
|
|
||||||
|
# Function to check if the cache is still valid
|
||||||
|
def is_cache_valid():
|
||||||
|
if 'timestamp' in cache_timestamp and 'max_age' in cache_timestamp:
|
||||||
|
current_time = time.time()
|
||||||
|
cache_expiry_time = cache_timestamp['timestamp'] + cache_timestamp['max_age']
|
||||||
|
return current_time < cache_expiry_time
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
# Function to create Maildir structure
|
||||||
|
def create_maildir_structure(base_path):
|
||||||
|
os.makedirs(os.path.join(base_path, 'cur'), exist_ok=True)
|
||||||
|
os.makedirs(os.path.join(base_path, 'new'), exist_ok=True)
|
||||||
|
os.makedirs(os.path.join(base_path, 'tmp'), exist_ok=True)
|
||||||
|
|
||||||
|
# Function to save email to Maildir format
|
||||||
|
def save_email_to_maildir(maildir_path, email_data):
|
||||||
|
# Create a new EmailMessage object
|
||||||
|
msg = EmailMessage()
|
||||||
|
|
||||||
|
# Add required headers
|
||||||
|
msg['Date'] = email_data.get('receivedDateTime', '') # Use the receivedDateTime field
|
||||||
|
msg['Message-ID'] = email_data.get('id', '') # Use the unique ID of the message
|
||||||
|
msg['Subject'] = email_data.get('subject', 'No Subject') # Default to 'No Subject' if missing
|
||||||
|
msg['From'] = email_data.get('from', {}).get('emailAddress', {}).get('address', 'unknown@unknown.com')
|
||||||
|
msg['To'] = ', '.join([recipient['emailAddress']['address'] for recipient in email_data.get('toRecipients', [])])
|
||||||
|
|
||||||
|
# Add the email body
|
||||||
|
body = email_data.get('body', {}).get('content', '')
|
||||||
|
msg.set_content(body)
|
||||||
|
|
||||||
|
# Save the email to the Maildir 'new' folder
|
||||||
|
email_filename = f"{msg['Message-ID'] or email_data.get('id', 'unknown')}.eml"
|
||||||
|
email_filepath = os.path.join(maildir_path, 'new', email_filename)
|
||||||
|
with open(email_filepath, 'w') as f:
|
||||||
|
f.write(msg.as_string())
|
||||||
|
|
||||||
|
# Read Azure app credentials from environment variables
|
||||||
|
client_id = os.getenv('AZURE_CLIENT_ID')
|
||||||
|
tenant_id = os.getenv('AZURE_TENANT_ID')
|
||||||
|
|
||||||
|
if not client_id or not tenant_id:
|
||||||
|
raise ValueError("Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables.")
|
||||||
|
|
||||||
|
# Token cache
|
||||||
|
cache = msal.SerializableTokenCache()
|
||||||
|
cache_file = 'token_cache.bin'
|
||||||
|
|
||||||
|
if os.path.exists(cache_file):
|
||||||
|
cache.deserialize(open(cache_file, 'r').read())
|
||||||
|
|
||||||
|
# Filepath for caching ETag
|
||||||
|
etag_cache_file = 'etag_cache.json'
|
||||||
|
# Load cached ETag if it exists
|
||||||
|
if os.path.exists(etag_cache_file):
|
||||||
|
with open(etag_cache_file, 'r') as f:
|
||||||
|
etag_cache = json.load(f)
|
||||||
|
else:
|
||||||
|
etag_cache = {}
|
||||||
|
|
||||||
|
# Authentication
|
||||||
|
authority = f'https://login.microsoftonline.com/{tenant_id}'
|
||||||
|
scopes = ['https://graph.microsoft.com/Calendars.Read', 'https://graph.microsoft.com/Mail.Read']
|
||||||
|
|
||||||
|
app = msal.PublicClientApplication(client_id, authority=authority, token_cache=cache)
|
||||||
|
accounts = app.get_accounts()
|
||||||
|
|
||||||
|
if accounts:
|
||||||
|
token_response = app.acquire_token_silent(scopes, account=accounts[0])
|
||||||
|
else:
|
||||||
|
flow = app.initiate_device_flow(scopes=scopes)
|
||||||
|
if 'user_code' not in flow:
|
||||||
|
raise Exception("Failed to create device flow")
|
||||||
|
print(flow['message'])
|
||||||
|
token_response = app.acquire_token_by_device_flow(flow)
|
||||||
|
|
||||||
|
if 'access_token' not in token_response:
|
||||||
|
raise Exception("Failed to acquire token")
|
||||||
|
|
||||||
|
# Save token cache
|
||||||
|
with open(cache_file, 'w') as f:
|
||||||
|
f.write(cache.serialize())
|
||||||
|
|
||||||
|
access_token = token_response['access_token']
|
||||||
|
headers = {'Authorization': f'Bearer {access_token}'}
|
||||||
|
accounts = app.get_accounts()
|
||||||
|
|
||||||
|
if not accounts:
|
||||||
|
raise Exception("No accounts found")
|
||||||
|
mail_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox/messages?$top=100&$orderby=receivedDateTime asc'
|
||||||
|
messages = []
|
||||||
|
print("Fetching mail...")
|
||||||
|
|
||||||
|
# Fetch the total count of messages in the inbox
|
||||||
|
inbox_url = 'https://graph.microsoft.com/v1.0/me/mailFolders/inbox'
|
||||||
|
response = requests.get(inbox_url, headers=headers)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(f"Failed to fetch inbox details: {response.status_code} {response.text}")
|
||||||
|
|
||||||
|
total_messages = response.json().get('totalItemCount', 0)
|
||||||
|
print(f"Total messages in inbox: {total_messages}")
|
||||||
|
|
||||||
|
while mail_url:
|
||||||
|
if is_cache_valid():
|
||||||
|
print("Using cached messages...")
|
||||||
|
break # No need to fetch further, cache is still valid
|
||||||
|
|
||||||
|
response = requests.get(mail_url, headers=headers)
|
||||||
|
|
||||||
|
if response.status_code != 200:
|
||||||
|
raise Exception(f"Failed to fetch mail: {response.status_code} {response.text}")
|
||||||
|
|
||||||
|
# Parse the Cache-Control header to get the max-age value
|
||||||
|
cache_control = response.headers.get('Cache-Control', '')
|
||||||
|
max_age = 0
|
||||||
|
if 'max-age=' in cache_control:
|
||||||
|
max_age = int(cache_control.split('max-age=')[1].split(',')[0])
|
||||||
|
|
||||||
|
# Update the cache timestamp and max-age
|
||||||
|
cache_timestamp['timestamp'] = time.time()
|
||||||
|
cache_timestamp['max_age'] = max_age
|
||||||
|
with open(cache_timestamp_file, 'w') as f:
|
||||||
|
json.dump(cache_timestamp, f)
|
||||||
|
|
||||||
|
# Process the response
|
||||||
|
response_data = response.json()
|
||||||
|
messages.extend(response_data.get('value', [])) # Add the current page of messages to the list
|
||||||
|
|
||||||
|
# Calculate and display progress percentage
|
||||||
|
progress = (len(messages) / total_messages) * 100 if total_messages > 0 else 0
|
||||||
|
print(f"Fetched {len(messages)} of {total_messages} messages ({progress:.2f}%)", end='\r')
|
||||||
|
|
||||||
|
# Get the next page URL from @odata.nextLink
|
||||||
|
mail_url = response_data.get('@odata.nextLink')
|
||||||
|
|
||||||
|
|
||||||
|
print("\nFinished fetching mail.")
|
||||||
|
|
||||||
|
maildir_path = os.getenv('MAILDIR_PATH', os.path.expanduser('~/Mail')) + "/corteva/INBOX"
|
||||||
|
create_maildir_structure(maildir_path)
|
||||||
|
|
||||||
|
for message in messages:
|
||||||
|
print(f"Processing message: {message.get('subject', 'No Subject')}", end='\r')
|
||||||
|
save_email_to_maildir(maildir_path, message)
|
||||||
|
|
||||||
|
print(f"\nFinished processing {len(messages)} messages.")
|
||||||
|
|
||||||
|
|
||||||
|
# Fetch events with pagination and expand recurring events
|
||||||
|
events_url = 'https://graph.microsoft.com/v1.0/me/events?$top=100&$expand=instances'
|
||||||
|
events = []
|
||||||
|
print("Fetching events...")
|
||||||
|
while events_url:
|
||||||
|
response = requests.get(events_url, headers=headers)
|
||||||
|
response_data = response.json()
|
||||||
|
events.extend(response_data.get('value', []))
|
||||||
|
print(f"Fetched {len(events)} events so far...", end='\r')
|
||||||
|
events_url = response_data.get('@odata.nextLink')
|
||||||
|
|
||||||
|
# Save events to a file in iCalendar format
|
||||||
|
output_file = f'output_ics/outlook_events_latest.ics'
|
||||||
|
print(f"Saving events to {output_file}...")
|
||||||
|
with open(output_file, 'w') as f:
|
||||||
|
f.write("BEGIN:VCALENDAR\nVERSION:2.0\n")
|
||||||
|
for event in events:
|
||||||
|
if 'start' in event and 'end' in event:
|
||||||
|
start = parser.isoparse(event['start']['dateTime'])
|
||||||
|
end = parser.isoparse(event['end']['dateTime'])
|
||||||
|
f.write(f"BEGIN:VEVENT\nSUMMARY:{event['subject']}\n")
|
||||||
|
f.write(f"DTSTART:{start.strftime('%Y%m%dT%H%M%S')}\n")
|
||||||
|
f.write(f"DTEND:{end.strftime('%Y%m%dT%H%M%S')}\n")
|
||||||
|
if 'recurrence' in event and event['recurrence']: # Check if 'recurrence' exists and is not None
|
||||||
|
for rule in event['recurrence']:
|
||||||
|
if rule.startswith('RRULE'):
|
||||||
|
rule_parts = rule.split(';')
|
||||||
|
new_rule_parts = []
|
||||||
|
for part in rule_parts:
|
||||||
|
if part.startswith('UNTIL='):
|
||||||
|
until_value = part.split('=')[1]
|
||||||
|
until_date = parser.isoparse(until_value)
|
||||||
|
if start.tzinfo is not None and until_date.tzinfo is None:
|
||||||
|
until_date = until_date.replace(tzinfo=UTC)
|
||||||
|
new_rule_parts.append(f"UNTIL={until_date.strftime('%Y%m%dT%H%M%SZ')}")
|
||||||
|
else:
|
||||||
|
new_rule_parts.append(part)
|
||||||
|
rule = ';'.join(new_rule_parts)
|
||||||
|
f.write(f"{rule}\n")
|
||||||
|
f.write("END:VEVENT\n")
|
||||||
|
f.write("END:VCALENDAR\n")
|
||||||
@@ -1,89 +0,0 @@
|
|||||||
import os
|
|
||||||
import msal
|
|
||||||
import requests
|
|
||||||
import json
|
|
||||||
from datetime import datetime
|
|
||||||
from dateutil import parser
|
|
||||||
from dateutil.tz import UTC
|
|
||||||
|
|
||||||
# Read Azure app credentials from environment variables
|
|
||||||
client_id = os.getenv('AZURE_CLIENT_ID')
|
|
||||||
tenant_id = os.getenv('AZURE_TENANT_ID')
|
|
||||||
|
|
||||||
if not client_id or not tenant_id:
|
|
||||||
raise ValueError("Please set the AZURE_CLIENT_ID and AZURE_TENANT_ID environment variables.")
|
|
||||||
|
|
||||||
# Token cache
|
|
||||||
cache = msal.SerializableTokenCache()
|
|
||||||
cache_file = 'token_cache.bin'
|
|
||||||
|
|
||||||
if os.path.exists(cache_file):
|
|
||||||
cache.deserialize(open(cache_file, 'r').read())
|
|
||||||
|
|
||||||
# Authentication
|
|
||||||
authority = f'https://login.microsoftonline.com/{tenant_id}'
|
|
||||||
scopes = ['https://graph.microsoft.com/Calendars.Read']
|
|
||||||
|
|
||||||
app = msal.PublicClientApplication(client_id, authority=authority, token_cache=cache)
|
|
||||||
accounts = app.get_accounts()
|
|
||||||
|
|
||||||
if accounts:
|
|
||||||
token_response = app.acquire_token_silent(scopes, account=accounts[0])
|
|
||||||
else:
|
|
||||||
flow = app.initiate_device_flow(scopes=scopes)
|
|
||||||
if 'user_code' not in flow:
|
|
||||||
raise Exception("Failed to create device flow")
|
|
||||||
print(flow['message'])
|
|
||||||
token_response = app.acquire_token_by_device_flow(flow)
|
|
||||||
|
|
||||||
if 'access_token' not in token_response:
|
|
||||||
raise Exception("Failed to acquire token")
|
|
||||||
|
|
||||||
# Save token cache
|
|
||||||
with open(cache_file, 'w') as f:
|
|
||||||
f.write(cache.serialize())
|
|
||||||
|
|
||||||
access_token = token_response['access_token']
|
|
||||||
|
|
||||||
# Fetch events with pagination and expand recurring events
|
|
||||||
headers = {'Authorization': f'Bearer {access_token}'}
|
|
||||||
events_url = 'https://graph.microsoft.com/v1.0/me/events?$top=100&$expand=instances'
|
|
||||||
events = []
|
|
||||||
print("Fetching events...")
|
|
||||||
while events_url:
|
|
||||||
response = requests.get(events_url, headers=headers)
|
|
||||||
response_data = response.json()
|
|
||||||
events.extend(response_data.get('value', []))
|
|
||||||
print(f"Fetched {len(events)} events so far...", end='\r')
|
|
||||||
events_url = response_data.get('@odata.nextLink')
|
|
||||||
|
|
||||||
# Save events to a file in iCalendar format
|
|
||||||
output_file = f'output_ics/outlook_events_latest.ics'
|
|
||||||
print(f"Saving events to {output_file}...")
|
|
||||||
with open(output_file, 'w') as f:
|
|
||||||
f.write("BEGIN:VCALENDAR\nVERSION:2.0\n")
|
|
||||||
for event in events:
|
|
||||||
if 'start' in event and 'end' in event:
|
|
||||||
start = parser.isoparse(event['start']['dateTime'])
|
|
||||||
end = parser.isoparse(event['end']['dateTime'])
|
|
||||||
f.write(f"BEGIN:VEVENT\nSUMMARY:{event['subject']}\n")
|
|
||||||
f.write(f"DTSTART:{start.strftime('%Y%m%dT%H%M%S')}\n")
|
|
||||||
f.write(f"DTEND:{end.strftime('%Y%m%dT%H%M%S')}\n")
|
|
||||||
if 'recurrence' in event and event['recurrence']: # Check if 'recurrence' exists and is not None
|
|
||||||
for rule in event['recurrence']:
|
|
||||||
if rule.startswith('RRULE'):
|
|
||||||
rule_parts = rule.split(';')
|
|
||||||
new_rule_parts = []
|
|
||||||
for part in rule_parts:
|
|
||||||
if part.startswith('UNTIL='):
|
|
||||||
until_value = part.split('=')[1]
|
|
||||||
until_date = parser.isoparse(until_value)
|
|
||||||
if start.tzinfo is not None and until_date.tzinfo is None:
|
|
||||||
until_date = until_date.replace(tzinfo=UTC)
|
|
||||||
new_rule_parts.append(f"UNTIL={until_date.strftime('%Y%m%dT%H%M%SZ')}")
|
|
||||||
else:
|
|
||||||
new_rule_parts.append(part)
|
|
||||||
rule = ';'.join(new_rule_parts)
|
|
||||||
f.write(f"{rule}\n")
|
|
||||||
f.write("END:VEVENT\n")
|
|
||||||
f.write("END:VCALENDAR\n")
|
|
||||||
Reference in New Issue
Block a user