#!/usr/bin/env python3
"""
Benchmark script to compare two approaches for updating envelopes list in maildir_gtd.
This script compares:
1. Using .pop() to remove items from ListView
2. Using refresh_list_view() to rebuild the entire ListView

It tests with different numbers of envelopes (100, 1000, 2000) and measures:
- Time to remove a single item
- Time to remove multiple items in sequence
- Memory usage
"""

import sys
import os
import time
import random
import gc
import tracemalloc
from datetime import datetime, timedelta, UTC
from typing import List, Dict, Any, Callable, Tuple
import json

# Add parent directory to path so we can import modules correctly
sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__))))

# Import required classes and functions
from textual.widgets import ListView, ListItem, Label
from textual.app import App, ComposeResult
from textual.containers import Vertical

# Import our application's modules
from maildir_gtd.app import MessageStore
from maildir_gtd.utils import group_envelopes_by_date

# Mock class to simulate the ListView behavior
class MockListView:
    def __init__(self):
        self.items = []
        self.index = 0

    def append(self, item):
        self.items.append(item)

    def pop(self, idx=None):
        if idx is None:
            return self.items.pop()
        return self.items.pop(idx)

    def clear(self):
        self.items = []

    def __len__(self):
        return len(self.items)

# Helper functions to generate test data
def generate_envelope(idx: int) -> Dict[str, Any]:
    """Generate a synthetic envelope with predictable data."""
    now = datetime.now(UTC)
    # Distribute dates over the last 60 days to create realistic grouping
    date = now - timedelta(days=random.randint(0, 60),
                           hours=random.randint(0, 23),
                           minutes=random.randint(0, 59))

    return {
        "id": str(idx),
        "subject": f"Test Subject {idx}",
        "from": {"addr": f"sender{idx}@example.com"},
        "to": {"addr": f"recipient{idx}@example.com"},
        "date": date.strftime("%Y-%m-%d %H:%M"),
        "cc": {},
        "type": "message"
    }

def generate_test_envelopes(count: int) -> List[Dict[str, Any]]:
    """Generate a specified number of test envelopes."""
    return [generate_envelope(i) for i in range(1, count + 1)]

# Benchmark functions
def benchmark_pop_approach(store: MessageStore, list_view: MockListView, indices_to_remove: List[int]) -> float:
    """Benchmark the .pop() approach."""
    start_time = time.time()

    for idx in sorted(indices_to_remove, reverse=True):  # Remove from highest to lowest to avoid index shifting issues
        msg_id = int(store.envelopes[idx]["id"])
        store.remove(msg_id)
        list_view.pop(idx)

    end_time = time.time()
    return end_time - start_time

def benchmark_refresh_approach(store: MessageStore, list_view: MockListView, indices_to_remove: List[int]) -> float:
    """Benchmark the refresh_list_view approach."""
    start_time = time.time()

    for idx in indices_to_remove:
        msg_id = int(store.envelopes[idx]["id"])
        store.remove(msg_id)

        # Simulate refresh_list_view by clearing and rebuilding the list
        list_view.clear()
        for item in store.envelopes:
            if item and item.get("type") == "header":
                list_view.append(f"Header: {item['label']}")
            elif item:  # Check if not None
                list_view.append(f"Email: {item.get('subject', '')}")

    end_time = time.time()
    return end_time - start_time

def run_memory_benchmark(func, *args):
    """Run a function with memory tracking."""
    tracemalloc.start()
    result = func(*args)
    current, peak = tracemalloc.get_traced_memory()
    tracemalloc.stop()
    return result, current, peak

def run_benchmark(envelope_count: int, num_operations: int = 10):
    """Run benchmarks for a specific number of envelopes."""
    print(f"\n{'=' * 50}")
    print(f"Running benchmark with {envelope_count} envelopes")
    print(f"{'=' * 50}")

    # Generate test data
    envelopes = generate_test_envelopes(envelope_count)

    # Set up for pop approach
    pop_store = MessageStore()
    pop_store.load(envelopes.copy())
    pop_list_view = MockListView()

    # Build initial list view
    for item in pop_store.envelopes:
        if item and item.get("type") == "header":
            pop_list_view.append(f"Header: {item['label']}")
        elif item:
            pop_list_view.append(f"Email: {item.get('subject', '')}")

    # Set up for refresh approach
    refresh_store = MessageStore()
    refresh_store.load(envelopes.copy())
    refresh_list_view = MockListView()

    # Build initial list view
    for item in refresh_store.envelopes:
        if item and item.get("type") == "header":
            refresh_list_view.append(f"Header: {item['label']}")
        elif item:
            refresh_list_view.append(f"Email: {item.get('subject', '')}")

    # Generate random indices to remove (ensure they're valid message indices, not headers)
    valid_indices = []
    for idx, item in enumerate(pop_store.envelopes):
        if item and item.get("type") != "header" and item is not None:
            valid_indices.append(idx)

    if len(valid_indices) < num_operations:
        num_operations = len(valid_indices)
        print(f"Warning: Only {num_operations} valid messages available for removal")

    indices_to_remove = random.sample(valid_indices, num_operations)

    # Single operation benchmark
    print("\n🔹 Single operation benchmark (removing 1 item):")

    # Pop approach - single operation
    gc.collect()  # Ensure clean state
    single_pop_time, pop_current, pop_peak = run_memory_benchmark(
        benchmark_pop_approach, pop_store, pop_list_view, [indices_to_remove[0]]
    )
    print(f"  Pop approach:     {single_pop_time*1000:.2f} ms  (Memory - Current: {pop_current/1024:.1f} KB, Peak: {pop_peak/1024:.1f} KB)")

    # Refresh approach - single operation
    gc.collect()  # Ensure clean state
    single_refresh_time, refresh_current, refresh_peak = run_memory_benchmark(
        benchmark_refresh_approach, refresh_store, refresh_list_view, [indices_to_remove[0]]
    )
    print(f"  Refresh approach: {single_refresh_time*1000:.2f} ms  (Memory - Current: {refresh_current/1024:.1f} KB, Peak: {refresh_peak/1024:.1f} KB)")

    # Determine which is better for single operation
    if single_pop_time < single_refresh_time:
        print(f"  🥇 Pop is {single_refresh_time/single_pop_time:.1f}x faster for single operation")
    else:
        print(f"  🥇 Refresh is {single_pop_time/single_refresh_time:.1f}x faster for single operation")

    # Reset for multi-operation benchmark
    gc.collect()
    pop_store = MessageStore()
    pop_store.load(envelopes.copy())
    pop_list_view = MockListView()
    for item in pop_store.envelopes:
        if item and item.get("type") == "header":
            pop_list_view.append(f"Header: {item['label']}")
        elif item:
            pop_list_view.append(f"Email: {item.get('subject', '')}")

    refresh_store = MessageStore()
    refresh_store.load(envelopes.copy())
    refresh_list_view = MockListView()
    for item in refresh_store.envelopes:
        if item and item.get("type") == "header":
            refresh_list_view.append(f"Header: {item['label']}")
        elif item:
            refresh_list_view.append(f"Email: {item.get('subject', '')}")

    # Multiple operations benchmark
    print(f"\n🔹 Multiple operations benchmark (removing {num_operations} items):")

    # Pop approach - multiple operations
    gc.collect()
    multi_pop_time, pop_current, pop_peak = run_memory_benchmark(
        benchmark_pop_approach, pop_store, pop_list_view, indices_to_remove
    )
    print(f"  Pop approach:     {multi_pop_time*1000:.2f} ms  (Memory - Current: {pop_current/1024:.1f} KB, Peak: {pop_peak/1024:.1f} KB)")

    # Refresh approach - multiple operations
    gc.collect()
    multi_refresh_time, refresh_current, refresh_peak = run_memory_benchmark(
        benchmark_refresh_approach, refresh_store, refresh_list_view, indices_to_remove
    )
    print(f"  Refresh approach: {multi_refresh_time*1000:.2f} ms  (Memory - Current: {refresh_current/1024:.1f} KB, Peak: {refresh_peak/1024:.1f} KB)")

    # Determine which is better for multiple operations
    if multi_pop_time < multi_refresh_time:
        print(f"  🥇 Pop is {multi_refresh_time/multi_pop_time:.1f}x faster for multiple operations")
    else:
        print(f"  🥇 Refresh is {multi_pop_time/multi_refresh_time:.1f}x faster for multiple operations")

    return {
        "envelope_count": envelope_count,
        "num_operations": num_operations,
        "single_operation": {
            "pop_time_ms": single_pop_time * 1000,
            "refresh_time_ms": single_refresh_time * 1000,
            "pop_memory_kb": pop_peak / 1024,
            "refresh_memory_kb": refresh_peak / 1024
        },
        "multiple_operations": {
            "pop_time_ms": multi_pop_time * 1000,
            "refresh_time_ms": multi_refresh_time * 1000,
            "pop_memory_kb": pop_peak / 1024,
            "refresh_memory_kb": refresh_peak / 1024
        }
    }

def main():
    print("\n📊 MAILDIR GTD LIST UPDATE BENCHMARK 📊")
    print("Comparing .pop() vs refresh_list_view() approaches")
    print("=" * 60)

    # Define test cases
    envelope_counts = [100, 1000, 2000]
    results = []

    for count in envelope_counts:
        result = run_benchmark(count)
        results.append(result)

    # Print summary
    print("\n" + "=" * 60)
    print("📊 BENCHMARK SUMMARY")
    print("=" * 60)

    # Console table formatting
    print(f"{'Size':<10} | {'Single Op (pop)':<15} | {'Single Op (refresh)':<20} | {'Multi Op (pop)':<15} | {'Multi Op (refresh)':<20}")
    print("-" * 90)

    for result in results:
        count = result["envelope_count"]
        single_pop = f"{result['single_operation']['pop_time_ms']:.2f} ms"
        single_refresh = f"{result['single_operation']['refresh_time_ms']:.2f} ms"
        multi_pop = f"{result['multiple_operations']['pop_time_ms']:.2f} ms"
        multi_refresh = f"{result['multiple_operations']['refresh_time_ms']:.2f} ms"

        print(f"{count:<10} | {single_pop:<15} | {single_refresh:<20} | {multi_pop:<15} | {multi_refresh:<20}")

    # Display conclusions
    print("\n🔍 CONCLUSIONS:")
    for result in results:
        count = result["envelope_count"]
        single_ratio = result['single_operation']['refresh_time_ms'] / result['single_operation']['pop_time_ms']
        multi_ratio = result['multiple_operations']['refresh_time_ms'] / result['multiple_operations']['pop_time_ms']

        print(f"\nFor {count} envelopes:")

        if single_ratio > 1:
            print(f"- Single operation: .pop() is {single_ratio:.1f}x faster")
        else:
            print(f"- Single operation: refresh_list_view() is {1/single_ratio:.1f}x faster")

        if multi_ratio > 1:
            print(f"- Multiple operations: .pop() is {multi_ratio:.1f}x faster")
        else:
            print(f"- Multiple operations: refresh_list_view() is {1/multi_ratio:.1f}x faster")

    print("\n🔑 RECOMMENDATION:")
    # Calculate average performance difference across all tests
    avg_single_ratio = sum(r['single_operation']['refresh_time_ms'] / r['single_operation']['pop_time_ms'] for r in results) / len(results)
    avg_multi_ratio = sum(r['multiple_operations']['refresh_time_ms'] / r['multiple_operations']['pop_time_ms'] for r in results) / len(results)

    if avg_single_ratio > 1 and avg_multi_ratio > 1:
        print("The .pop() approach is generally faster, but consider the following:")
        print("- .pop() risks index misalignment issues with the message_store")
        print("- refresh_list_view() ensures UI and data structure stay synchronized")
        print("- The performance difference may not be noticeable to users")
        print("👉 Recommendation: Use refresh_list_view() for reliability unless performance becomes a real issue")
    else:
        print("The refresh_list_view() approach is not only safer but also performs competitively:")
        print("- It ensures perfect synchronization between UI and data model")
        print("- It eliminates the risk of index misalignment")
        print("👉 Recommendation: Use refresh_list_view() approach as it's more reliable and performs well")

if __name__ == "__main__":
    main()