Files
luk/benchmark_list_update.py
2025-05-14 15:11:24 -06:00

316 lines
12 KiB
Python
Executable File

#!/usr/bin/env python3
"""
Benchmark script to compare two approaches for updating envelopes list in maildir_gtd.
This script compares:
1. Using .pop() to remove items from ListView
2. Using refresh_list_view() to rebuild the entire ListView
It tests with different numbers of envelopes (100, 1000, 2000) and measures:
- Time to remove a single item
- Time to remove multiple items in sequence
- Memory usage
"""
import sys
import os
import time
import random
import gc
import tracemalloc
from datetime import datetime, timedelta, UTC
from typing import List, Dict, Any, Callable, Tuple
import json
# Add parent directory to path so we can import modules correctly
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
# Import required classes and functions
from textual.widgets import ListView, ListItem, Label
from textual.app import App, ComposeResult
from textual.containers import Vertical
# Import our application's modules
from maildir_gtd.app import MessageStore
from maildir_gtd.utils import group_envelopes_by_date
# Mock class to simulate the ListView behavior
class MockListView:
def __init__(self):
self.items = []
self.index = 0
def append(self, item):
self.items.append(item)
def pop(self, idx=None):
if idx is None:
return self.items.pop()
return self.items.pop(idx)
def clear(self):
self.items = []
def __len__(self):
return len(self.items)
# Helper functions to generate test data
def generate_envelope(idx: int) -> Dict[str, Any]:
"""Generate a synthetic envelope with predictable data."""
now = datetime.now(UTC)
# Distribute dates over the last 60 days to create realistic grouping
date = now - timedelta(days=random.randint(0, 60),
hours=random.randint(0, 23),
minutes=random.randint(0, 59))
return {
"id": str(idx),
"subject": f"Test Subject {idx}",
"from": {"addr": f"sender{idx}@example.com"},
"to": {"addr": f"recipient{idx}@example.com"},
"date": date.strftime("%Y-%m-%d %H:%M"),
"cc": {},
"type": "message"
}
def generate_test_envelopes(count: int) -> List[Dict[str, Any]]:
"""Generate a specified number of test envelopes."""
return [generate_envelope(i) for i in range(1, count + 1)]
# Benchmark functions
def benchmark_pop_approach(store: MessageStore, list_view: MockListView, indices_to_remove: List[int]) -> float:
"""Benchmark the .pop() approach."""
start_time = time.time()
for idx in sorted(indices_to_remove, reverse=True): # Remove from highest to lowest to avoid index shifting issues
msg_id = int(store.envelopes[idx]["id"])
store.remove(msg_id)
list_view.pop(idx)
end_time = time.time()
return end_time - start_time
def benchmark_refresh_approach(store: MessageStore, list_view: MockListView, indices_to_remove: List[int]) -> float:
"""Benchmark the refresh_list_view approach."""
start_time = time.time()
for idx in indices_to_remove:
msg_id = int(store.envelopes[idx]["id"])
store.remove(msg_id)
# Simulate refresh_list_view by clearing and rebuilding the list
list_view.clear()
for item in store.envelopes:
if item and item.get("type") == "header":
list_view.append(f"Header: {item['label']}")
elif item: # Check if not None
list_view.append(f"Email: {item.get('subject', '')}")
end_time = time.time()
return end_time - start_time
def run_memory_benchmark(func, *args):
"""Run a function with memory tracking."""
tracemalloc.start()
result = func(*args)
current, peak = tracemalloc.get_traced_memory()
tracemalloc.stop()
return result, current, peak
def run_benchmark(envelope_count: int, num_operations: int = 10):
"""Run benchmarks for a specific number of envelopes."""
print(f"\n{'=' * 50}")
print(f"Running benchmark with {envelope_count} envelopes")
print(f"{'=' * 50}")
# Generate test data
envelopes = generate_test_envelopes(envelope_count)
# Set up for pop approach
pop_store = MessageStore()
pop_store.load(envelopes.copy())
pop_list_view = MockListView()
# Build initial list view
for item in pop_store.envelopes:
if item and item.get("type") == "header":
pop_list_view.append(f"Header: {item['label']}")
elif item:
pop_list_view.append(f"Email: {item.get('subject', '')}")
# Set up for refresh approach
refresh_store = MessageStore()
refresh_store.load(envelopes.copy())
refresh_list_view = MockListView()
# Build initial list view
for item in refresh_store.envelopes:
if item and item.get("type") == "header":
refresh_list_view.append(f"Header: {item['label']}")
elif item:
refresh_list_view.append(f"Email: {item.get('subject', '')}")
# Generate random indices to remove (ensure they're valid message indices, not headers)
valid_indices = []
for idx, item in enumerate(pop_store.envelopes):
if item and item.get("type") != "header" and item is not None:
valid_indices.append(idx)
if len(valid_indices) < num_operations:
num_operations = len(valid_indices)
print(f"Warning: Only {num_operations} valid messages available for removal")
indices_to_remove = random.sample(valid_indices, num_operations)
# Single operation benchmark
print("\n🔹 Single operation benchmark (removing 1 item):")
# Pop approach - single operation
gc.collect() # Ensure clean state
single_pop_time, pop_current, pop_peak = run_memory_benchmark(
benchmark_pop_approach, pop_store, pop_list_view, [indices_to_remove[0]]
)
print(f" Pop approach: {single_pop_time*1000:.2f} ms (Memory - Current: {pop_current/1024:.1f} KB, Peak: {pop_peak/1024:.1f} KB)")
# Refresh approach - single operation
gc.collect() # Ensure clean state
single_refresh_time, refresh_current, refresh_peak = run_memory_benchmark(
benchmark_refresh_approach, refresh_store, refresh_list_view, [indices_to_remove[0]]
)
print(f" Refresh approach: {single_refresh_time*1000:.2f} ms (Memory - Current: {refresh_current/1024:.1f} KB, Peak: {refresh_peak/1024:.1f} KB)")
# Determine which is better for single operation
if single_pop_time < single_refresh_time:
print(f" 🥇 Pop is {single_refresh_time/single_pop_time:.1f}x faster for single operation")
else:
print(f" 🥇 Refresh is {single_pop_time/single_refresh_time:.1f}x faster for single operation")
# Reset for multi-operation benchmark
gc.collect()
pop_store = MessageStore()
pop_store.load(envelopes.copy())
pop_list_view = MockListView()
for item in pop_store.envelopes:
if item and item.get("type") == "header":
pop_list_view.append(f"Header: {item['label']}")
elif item:
pop_list_view.append(f"Email: {item.get('subject', '')}")
refresh_store = MessageStore()
refresh_store.load(envelopes.copy())
refresh_list_view = MockListView()
for item in refresh_store.envelopes:
if item and item.get("type") == "header":
refresh_list_view.append(f"Header: {item['label']}")
elif item:
refresh_list_view.append(f"Email: {item.get('subject', '')}")
# Multiple operations benchmark
print(f"\n🔹 Multiple operations benchmark (removing {num_operations} items):")
# Pop approach - multiple operations
gc.collect()
multi_pop_time, pop_current, pop_peak = run_memory_benchmark(
benchmark_pop_approach, pop_store, pop_list_view, indices_to_remove
)
print(f" Pop approach: {multi_pop_time*1000:.2f} ms (Memory - Current: {pop_current/1024:.1f} KB, Peak: {pop_peak/1024:.1f} KB)")
# Refresh approach - multiple operations
gc.collect()
multi_refresh_time, refresh_current, refresh_peak = run_memory_benchmark(
benchmark_refresh_approach, refresh_store, refresh_list_view, indices_to_remove
)
print(f" Refresh approach: {multi_refresh_time*1000:.2f} ms (Memory - Current: {refresh_current/1024:.1f} KB, Peak: {refresh_peak/1024:.1f} KB)")
# Determine which is better for multiple operations
if multi_pop_time < multi_refresh_time:
print(f" 🥇 Pop is {multi_refresh_time/multi_pop_time:.1f}x faster for multiple operations")
else:
print(f" 🥇 Refresh is {multi_pop_time/multi_refresh_time:.1f}x faster for multiple operations")
return {
"envelope_count": envelope_count,
"num_operations": num_operations,
"single_operation": {
"pop_time_ms": single_pop_time * 1000,
"refresh_time_ms": single_refresh_time * 1000,
"pop_memory_kb": pop_peak / 1024,
"refresh_memory_kb": refresh_peak / 1024
},
"multiple_operations": {
"pop_time_ms": multi_pop_time * 1000,
"refresh_time_ms": multi_refresh_time * 1000,
"pop_memory_kb": pop_peak / 1024,
"refresh_memory_kb": refresh_peak / 1024
}
}
def main():
print("\n📊 MAILDIR GTD LIST UPDATE BENCHMARK 📊")
print("Comparing .pop() vs refresh_list_view() approaches")
print("=" * 60)
# Define test cases
envelope_counts = [100, 1000, 2000]
results = []
for count in envelope_counts:
result = run_benchmark(count)
results.append(result)
# Print summary
print("\n" + "=" * 60)
print("📊 BENCHMARK SUMMARY")
print("=" * 60)
# Console table formatting
print(f"{'Size':<10} | {'Single Op (pop)':<15} | {'Single Op (refresh)':<20} | {'Multi Op (pop)':<15} | {'Multi Op (refresh)':<20}")
print("-" * 90)
for result in results:
count = result["envelope_count"]
single_pop = f"{result['single_operation']['pop_time_ms']:.2f} ms"
single_refresh = f"{result['single_operation']['refresh_time_ms']:.2f} ms"
multi_pop = f"{result['multiple_operations']['pop_time_ms']:.2f} ms"
multi_refresh = f"{result['multiple_operations']['refresh_time_ms']:.2f} ms"
print(f"{count:<10} | {single_pop:<15} | {single_refresh:<20} | {multi_pop:<15} | {multi_refresh:<20}")
# Display conclusions
print("\n🔍 CONCLUSIONS:")
for result in results:
count = result["envelope_count"]
single_ratio = result['single_operation']['refresh_time_ms'] / result['single_operation']['pop_time_ms']
multi_ratio = result['multiple_operations']['refresh_time_ms'] / result['multiple_operations']['pop_time_ms']
print(f"\nFor {count} envelopes:")
if single_ratio > 1:
print(f"- Single operation: .pop() is {single_ratio:.1f}x faster")
else:
print(f"- Single operation: refresh_list_view() is {1/single_ratio:.1f}x faster")
if multi_ratio > 1:
print(f"- Multiple operations: .pop() is {multi_ratio:.1f}x faster")
else:
print(f"- Multiple operations: refresh_list_view() is {1/multi_ratio:.1f}x faster")
print("\n🔑 RECOMMENDATION:")
# Calculate average performance difference across all tests
avg_single_ratio = sum(r['single_operation']['refresh_time_ms'] / r['single_operation']['pop_time_ms'] for r in results) / len(results)
avg_multi_ratio = sum(r['multiple_operations']['refresh_time_ms'] / r['multiple_operations']['pop_time_ms'] for r in results) / len(results)
if avg_single_ratio > 1 and avg_multi_ratio > 1:
print("The .pop() approach is generally faster, but consider the following:")
print("- .pop() risks index misalignment issues with the message_store")
print("- refresh_list_view() ensures UI and data structure stay synchronized")
print("- The performance difference may not be noticeable to users")
print("👉 Recommendation: Use refresh_list_view() for reliability unless performance becomes a real issue")
else:
print("The refresh_list_view() approach is not only safer but also performs competitively:")
print("- It ensures perfect synchronization between UI and data model")
print("- It eliminates the risk of index misalignment")
print("👉 Recommendation: Use refresh_list_view() approach as it's more reliable and performs well")
if __name__ == "__main__":
main()