basically refactored the email viewer

2025-05-14 15:11:24 -06:00
parent 5c9ad69309
commit fc57e201a2
20 changed files with 1348 additions and 575 deletions
--- a/benchmark_list_update.py
+++ b/benchmark_list_update.py
@@ -0,0 +1,315 @@
+#!/usr/bin/env python3
+"""
+Benchmark script to compare two approaches for updating envelopes list in maildir_gtd.
+This script compares:
+1. Using .pop() to remove items from ListView
+2. Using refresh_list_view() to rebuild the entire ListView
+
+It tests with different numbers of envelopes (100, 1000, 2000) and measures:
+- Time to remove a single item
+- Time to remove multiple items in sequence
+- Memory usage
+"""
+
+import sys
+import os
+import time
+import random
+import gc
+import tracemalloc
+from datetime import datetime, timedelta, UTC
+from typing import List, Dict, Any, Callable, Tuple
+import json
+
+# Add parent directory to path so we can import modules correctly
+sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
+
+# Import required classes and functions
+from textual.widgets import ListView, ListItem, Label
+from textual.app import App, ComposeResult
+from textual.containers import Vertical
+
+# Import our application's modules
+from maildir_gtd.app import MessageStore
+from maildir_gtd.utils import group_envelopes_by_date
+
+# Mock class to simulate the ListView behavior
+class MockListView:
+    def __init__(self):
+        self.items = []
+        self.index = 0
+
+    def append(self, item):
+        self.items.append(item)
+
+    def pop(self, idx=None):
+        if idx is None:
+            return self.items.pop()
+        return self.items.pop(idx)
+
+    def clear(self):
+        self.items = []
+
+    def __len__(self):
+        return len(self.items)
+
+# Helper functions to generate test data
+def generate_envelope(idx: int) -> Dict[str, Any]:
+    """Generate a synthetic envelope with predictable data."""
+    now = datetime.now(UTC)
+    # Distribute dates over the last 60 days to create realistic grouping
+    date = now - timedelta(days=random.randint(0, 60),
+                           hours=random.randint(0, 23),
+                           minutes=random.randint(0, 59))
+
+    return {
+        "id": str(idx),
+        "subject": f"Test Subject {idx}",
+        "from": {"addr": f"sender{idx}@example.com"},
+        "to": {"addr": f"recipient{idx}@example.com"},
+        "date": date.strftime("%Y-%m-%d %H:%M"),
+        "cc": {},
+        "type": "message"
+    }
+
+def generate_test_envelopes(count: int) -> List[Dict[str, Any]]:
+    """Generate a specified number of test envelopes."""
+    return [generate_envelope(i) for i in range(1, count + 1)]
+
+# Benchmark functions
+def benchmark_pop_approach(store: MessageStore, list_view: MockListView, indices_to_remove: List[int]) -> float:
+    """Benchmark the .pop() approach."""
+    start_time = time.time()
+
+    for idx in sorted(indices_to_remove, reverse=True):  # Remove from highest to lowest to avoid index shifting issues
+        msg_id = int(store.envelopes[idx]["id"])
+        store.remove(msg_id)
+        list_view.pop(idx)
+
+    end_time = time.time()
+    return end_time - start_time
+
+def benchmark_refresh_approach(store: MessageStore, list_view: MockListView, indices_to_remove: List[int]) -> float:
+    """Benchmark the refresh_list_view approach."""
+    start_time = time.time()
+
+    for idx in indices_to_remove:
+        msg_id = int(store.envelopes[idx]["id"])
+        store.remove(msg_id)
+
+        # Simulate refresh_list_view by clearing and rebuilding the list
+        list_view.clear()
+        for item in store.envelopes:
+            if item and item.get("type") == "header":
+                list_view.append(f"Header: {item['label']}")
+            elif item:  # Check if not None
+                list_view.append(f"Email: {item.get('subject', '')}")
+
+    end_time = time.time()
+    return end_time - start_time
+
+def run_memory_benchmark(func, *args):
+    """Run a function with memory tracking."""
+    tracemalloc.start()
+    result = func(*args)
+    current, peak = tracemalloc.get_traced_memory()
+    tracemalloc.stop()
+    return result, current, peak
+
+def run_benchmark(envelope_count: int, num_operations: int = 10):
+    """Run benchmarks for a specific number of envelopes."""
+    print(f"\n{'=' * 50}")
+    print(f"Running benchmark with {envelope_count} envelopes")
+    print(f"{'=' * 50}")
+
+    # Generate test data
+    envelopes = generate_test_envelopes(envelope_count)
+
+    # Set up for pop approach
+    pop_store = MessageStore()
+    pop_store.load(envelopes.copy())
+    pop_list_view = MockListView()
+
+    # Build initial list view
+    for item in pop_store.envelopes:
+        if item and item.get("type") == "header":
+            pop_list_view.append(f"Header: {item['label']}")
+        elif item:
+            pop_list_view.append(f"Email: {item.get('subject', '')}")
+
+    # Set up for refresh approach
+    refresh_store = MessageStore()
+    refresh_store.load(envelopes.copy())
+    refresh_list_view = MockListView()
+
+    # Build initial list view
+    for item in refresh_store.envelopes:
+        if item and item.get("type") == "header":
+            refresh_list_view.append(f"Header: {item['label']}")
+        elif item:
+            refresh_list_view.append(f"Email: {item.get('subject', '')}")
+
+    # Generate random indices to remove (ensure they're valid message indices, not headers)
+    valid_indices = []
+    for idx, item in enumerate(pop_store.envelopes):
+        if item and item.get("type") != "header" and item is not None:
+            valid_indices.append(idx)
+
+    if len(valid_indices) < num_operations:
+        num_operations = len(valid_indices)
+        print(f"Warning: Only {num_operations} valid messages available for removal")
+
+    indices_to_remove = random.sample(valid_indices, num_operations)
+
+    # Single operation benchmark
+    print("\n🔹 Single operation benchmark (removing 1 item):")
+
+    # Pop approach - single operation
+    gc.collect()  # Ensure clean state
+    single_pop_time, pop_current, pop_peak = run_memory_benchmark(
+        benchmark_pop_approach, pop_store, pop_list_view, [indices_to_remove[0]]
+    )
+    print(f"  Pop approach:     {single_pop_time*1000:.2f} ms  (Memory - Current: {pop_current/1024:.1f} KB, Peak: {pop_peak/1024:.1f} KB)")
+
+    # Refresh approach - single operation
+    gc.collect()  # Ensure clean state
+    single_refresh_time, refresh_current, refresh_peak = run_memory_benchmark(
+        benchmark_refresh_approach, refresh_store, refresh_list_view, [indices_to_remove[0]]
+    )
+    print(f"  Refresh approach: {single_refresh_time*1000:.2f} ms  (Memory - Current: {refresh_current/1024:.1f} KB, Peak: {refresh_peak/1024:.1f} KB)")
+
+    # Determine which is better for single operation
+    if single_pop_time < single_refresh_time:
+        print(f"  🥇 Pop is {single_refresh_time/single_pop_time:.1f}x faster for single operation")
+    else:
+        print(f"  🥇 Refresh is {single_pop_time/single_refresh_time:.1f}x faster for single operation")
+
+    # Reset for multi-operation benchmark
+    gc.collect()
+    pop_store = MessageStore()
+    pop_store.load(envelopes.copy())
+    pop_list_view = MockListView()
+    for item in pop_store.envelopes:
+        if item and item.get("type") == "header":
+            pop_list_view.append(f"Header: {item['label']}")
+        elif item:
+            pop_list_view.append(f"Email: {item.get('subject', '')}")
+
+    refresh_store = MessageStore()
+    refresh_store.load(envelopes.copy())
+    refresh_list_view = MockListView()
+    for item in refresh_store.envelopes:
+        if item and item.get("type") == "header":
+            refresh_list_view.append(f"Header: {item['label']}")
+        elif item:
+            refresh_list_view.append(f"Email: {item.get('subject', '')}")
+
+    # Multiple operations benchmark
+    print(f"\n🔹 Multiple operations benchmark (removing {num_operations} items):")
+
+    # Pop approach - multiple operations
+    gc.collect()
+    multi_pop_time, pop_current, pop_peak = run_memory_benchmark(
+        benchmark_pop_approach, pop_store, pop_list_view, indices_to_remove
+    )
+    print(f"  Pop approach:     {multi_pop_time*1000:.2f} ms  (Memory - Current: {pop_current/1024:.1f} KB, Peak: {pop_peak/1024:.1f} KB)")
+
+    # Refresh approach - multiple operations
+    gc.collect()
+    multi_refresh_time, refresh_current, refresh_peak = run_memory_benchmark(
+        benchmark_refresh_approach, refresh_store, refresh_list_view, indices_to_remove
+    )
+    print(f"  Refresh approach: {multi_refresh_time*1000:.2f} ms  (Memory - Current: {refresh_current/1024:.1f} KB, Peak: {refresh_peak/1024:.1f} KB)")
+
+    # Determine which is better for multiple operations
+    if multi_pop_time < multi_refresh_time:
+        print(f"  🥇 Pop is {multi_refresh_time/multi_pop_time:.1f}x faster for multiple operations")
+    else:
+        print(f"  🥇 Refresh is {multi_pop_time/multi_refresh_time:.1f}x faster for multiple operations")
+
+    return {
+        "envelope_count": envelope_count,
+        "num_operations": num_operations,
+        "single_operation": {
+            "pop_time_ms": single_pop_time * 1000,
+            "refresh_time_ms": single_refresh_time * 1000,
+            "pop_memory_kb": pop_peak / 1024,
+            "refresh_memory_kb": refresh_peak / 1024
+        },
+        "multiple_operations": {
+            "pop_time_ms": multi_pop_time * 1000,
+            "refresh_time_ms": multi_refresh_time * 1000,
+            "pop_memory_kb": pop_peak / 1024,
+            "refresh_memory_kb": refresh_peak / 1024
+        }
+    }
+
+def main():
+    print("\n📊 MAILDIR GTD LIST UPDATE BENCHMARK 📊")
+    print("Comparing .pop() vs refresh_list_view() approaches")
+    print("=" * 60)
+
+    # Define test cases
+    envelope_counts = [100, 1000, 2000]
+    results = []
+
+    for count in envelope_counts:
+        result = run_benchmark(count)
+        results.append(result)
+
+    # Print summary
+    print("\n" + "=" * 60)
+    print("📊 BENCHMARK SUMMARY")
+    print("=" * 60)
+
+    # Console table formatting
+    print(f"{'Size':<10} | {'Single Op (pop)':<15} | {'Single Op (refresh)':<20} | {'Multi Op (pop)':<15} | {'Multi Op (refresh)':<20}")
+    print("-" * 90)
+
+    for result in results:
+        count = result["envelope_count"]
+        single_pop = f"{result['single_operation']['pop_time_ms']:.2f} ms"
+        single_refresh = f"{result['single_operation']['refresh_time_ms']:.2f} ms"
+        multi_pop = f"{result['multiple_operations']['pop_time_ms']:.2f} ms"
+        multi_refresh = f"{result['multiple_operations']['refresh_time_ms']:.2f} ms"
+
+        print(f"{count:<10} | {single_pop:<15} | {single_refresh:<20} | {multi_pop:<15} | {multi_refresh:<20}")
+
+    # Display conclusions
+    print("\n🔍 CONCLUSIONS:")
+    for result in results:
+        count = result["envelope_count"]
+        single_ratio = result['single_operation']['refresh_time_ms'] / result['single_operation']['pop_time_ms']
+        multi_ratio = result['multiple_operations']['refresh_time_ms'] / result['multiple_operations']['pop_time_ms']
+
+        print(f"\nFor {count} envelopes:")
+
+        if single_ratio > 1:
+            print(f"- Single operation: .pop() is {single_ratio:.1f}x faster")
+        else:
+            print(f"- Single operation: refresh_list_view() is {1/single_ratio:.1f}x faster")
+
+        if multi_ratio > 1:
+            print(f"- Multiple operations: .pop() is {multi_ratio:.1f}x faster")
+        else:
+            print(f"- Multiple operations: refresh_list_view() is {1/multi_ratio:.1f}x faster")
+
+    print("\n🔑 RECOMMENDATION:")
+    # Calculate average performance difference across all tests
+    avg_single_ratio = sum(r['single_operation']['refresh_time_ms'] / r['single_operation']['pop_time_ms'] for r in results) / len(results)
+    avg_multi_ratio = sum(r['multiple_operations']['refresh_time_ms'] / r['multiple_operations']['pop_time_ms'] for r in results) / len(results)
+
+    if avg_single_ratio > 1 and avg_multi_ratio > 1:
+        print("The .pop() approach is generally faster, but consider the following:")
+        print("- .pop() risks index misalignment issues with the message_store")
+        print("- refresh_list_view() ensures UI and data structure stay synchronized")
+        print("- The performance difference may not be noticeable to users")
+        print("👉 Recommendation: Use refresh_list_view() for reliability unless performance becomes a real issue")
+    else:
+        print("The refresh_list_view() approach is not only safer but also performs competitively:")
+        print("- It ensures perfect synchronization between UI and data model")
+        print("- It eliminates the risk of index misalignment")
+        print("👉 Recommendation: Use refresh_list_view() approach as it's more reliable and performs well")
+
+if __name__ == "__main__":
+    main()