Files
luk/sweep_tasks.py
2025-08-20 08:30:54 -04:00

382 lines
13 KiB
Python
Executable File
Raw Blame History

This file contains invisible Unicode characters

This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/usr/bin/env python3
"""
Godspeed Task Sweeper - Consolidate incomplete tasks from markdown files.
This script recursively searches through directories (like 2024/, 2025/, etc.)
and moves all incomplete tasks from markdown files into the Godspeed Inbox.md file.
"""
import argparse
import re
import shutil
from pathlib import Path
from typing import List, Tuple, Set
from datetime import datetime
class TaskSweeper:
"""Sweeps incomplete tasks from markdown files into Godspeed Inbox."""
def __init__(self, notes_dir: Path, godspeed_dir: Path, dry_run: bool = False):
self.notes_dir = Path(notes_dir)
self.godspeed_dir = Path(godspeed_dir)
self.dry_run = dry_run
self.inbox_file = self.godspeed_dir / "Inbox.md"
# Import the sync engine for consistent parsing
try:
from src.services.godspeed.sync import GodspeedSync
self.sync_engine = GodspeedSync(None, godspeed_dir)
except ImportError:
# Fallback parsing if import fails
self.sync_engine = None
def _parse_task_line_fallback(self, line: str) -> Tuple[str, str, str, str]:
"""Fallback task parsing if sync engine not available."""
# Match patterns like: - [ ] Task title <!-- id:abc123 -->
task_pattern = (
r"^\s*-\s*\[([xX\s\-])\]\s*(.+?)(?:\s*<!--\s*id:(\w+)\s*-->)?\s*$"
)
match = re.match(task_pattern, line.strip())
if not match:
return None
checkbox, title_and_notes, local_id = match.groups()
# Determine status
if checkbox.lower() == "x":
status = "complete"
elif checkbox == "-":
status = "cleared"
else:
status = "incomplete"
# Extract title (remove any inline notes after <!--)
title = title_and_notes.split("<!--")[0].strip()
# Generate ID if missing
if not local_id:
import uuid
local_id = str(uuid.uuid4())[:8]
return local_id, status, title, ""
def _parse_markdown_file(self, file_path: Path) -> Tuple[List[Tuple], List[str]]:
"""Parse a markdown file and extract tasks and non-task content."""
if not file_path.exists():
return [], []
tasks = []
non_task_lines = []
try:
with open(file_path, "r", encoding="utf-8") as f:
lines = f.readlines()
except Exception as e:
print(f" ⚠️ Error reading {file_path}: {e}")
return [], []
i = 0
while i < len(lines):
line = lines[i].rstrip()
# Check if this line looks like a task
if line.strip().startswith("- ["):
# Try to parse with sync engine first
if self.sync_engine:
# Collect potential multi-line task
task_block = [line]
j = i + 1
while (
j < len(lines)
and lines[j].strip()
and not lines[j].strip().startswith("- [")
):
task_block.append(lines[j].rstrip())
j += 1
task_text = "\n".join(task_block)
parsed = self.sync_engine._parse_task_line(task_text)
if parsed:
tasks.append(parsed)
i = j # Skip the lines we've processed
continue
# Fallback parsing
parsed = self._parse_task_line_fallback(line)
if parsed:
tasks.append(parsed)
i += 1
continue
# Not a task, keep as regular content
non_task_lines.append(line)
i += 1
return tasks, non_task_lines
def _write_tasks_to_file(self, file_path: Path, tasks: List[Tuple]):
"""Write tasks to a markdown file."""
if not tasks:
return
file_path.parent.mkdir(parents=True, exist_ok=True)
# Read existing content if file exists
existing_content = ""
if file_path.exists():
with open(file_path, "r", encoding="utf-8") as f:
existing_content = f.read()
# Format new tasks
new_task_lines = []
for local_id, status, title, notes in tasks:
if self.sync_engine:
formatted = self.sync_engine._format_task_line(
local_id, status, title, notes
)
else:
# Fallback formatting
checkbox = {"incomplete": "[ ]", "complete": "[x]", "cleared": "[-]"}[
status
]
formatted = f"- {checkbox} {title} <!-- id:{local_id} -->"
if notes:
formatted += f"\n {notes}"
new_task_lines.append(formatted)
# Combine with existing content
if existing_content.strip():
new_content = (
existing_content.rstrip() + "\n\n" + "\n".join(new_task_lines) + "\n"
)
else:
new_content = "\n".join(new_task_lines) + "\n"
with open(file_path, "w", encoding="utf-8") as f:
f.write(new_content)
def _clean_file(self, file_path: Path, non_task_lines: List[str]):
"""Remove tasks from original file, keeping only non-task content."""
if not non_task_lines or all(not line.strip() for line in non_task_lines):
# File would be empty, delete it
if not self.dry_run:
file_path.unlink()
print(f" 🗑️ Would delete empty file: {file_path}")
else:
# Write back non-task content
cleaned_content = "\n".join(non_task_lines).strip()
if cleaned_content:
cleaned_content += "\n"
if not self.dry_run:
with open(file_path, "w", encoding="utf-8") as f:
f.write(cleaned_content)
print(f" ✂️ Cleaned file (removed tasks): {file_path}")
def find_markdown_files(self) -> List[Path]:
"""Find all markdown files in the notes directory, excluding Godspeed directory."""
markdown_files = []
for md_file in self.notes_dir.rglob("*.md"):
# Skip files in the Godspeed directory
if (
self.godspeed_dir in md_file.parents
or md_file.parent == self.godspeed_dir
):
continue
# Skip hidden files and directories
if any(part.startswith(".") for part in md_file.parts):
continue
markdown_files.append(md_file)
return sorted(markdown_files)
def sweep_tasks(self) -> dict:
"""Sweep incomplete tasks from all markdown files into Inbox."""
print(f"🧹 Sweeping incomplete tasks from: {self.notes_dir}")
print(f"📥 Target Inbox: {self.inbox_file}")
print(f"🔍 Dry run: {self.dry_run}")
print("=" * 60)
markdown_files = self.find_markdown_files()
print(f"\n📁 Found {len(markdown_files)} markdown files to process")
swept_tasks = []
processed_files = []
empty_files_deleted = []
for file_path in markdown_files:
rel_path = file_path.relative_to(self.notes_dir)
print(f"\n📄 Processing: {rel_path}")
tasks, non_task_lines = self._parse_markdown_file(file_path)
if not tasks:
print(f" No tasks found")
continue
# Separate incomplete tasks from completed/cleared ones
incomplete_tasks = []
complete_tasks = []
for task in tasks:
local_id, status, title, notes = task
if status == "incomplete":
incomplete_tasks.append(task)
else:
complete_tasks.append(task)
if incomplete_tasks:
print(f" 🔄 Found {len(incomplete_tasks)} incomplete tasks:")
for _, status, title, notes in incomplete_tasks:
print(f"{title}")
if notes:
print(f" Notes: {notes}")
# Add source file annotation
source_annotation = f"<!-- Swept from {rel_path} on {datetime.now().strftime('%Y-%m-%d %H:%M')} -->"
annotated_tasks = []
for local_id, status, title, notes in incomplete_tasks:
# Add source info to notes
source_notes = f"From: {rel_path}"
if notes:
combined_notes = f"{notes}\n{source_notes}"
else:
combined_notes = source_notes
annotated_tasks.append((local_id, status, title, combined_notes))
swept_tasks.extend(annotated_tasks)
processed_files.append(str(rel_path))
if complete_tasks:
print(
f" ✅ Keeping {len(complete_tasks)} completed/cleared tasks in place"
)
# Reconstruct remaining content (non-tasks + completed tasks)
remaining_content = non_task_lines.copy()
# Add completed/cleared tasks back to remaining content
if complete_tasks:
remaining_content.append("") # Empty line before tasks
for task in complete_tasks:
if self.sync_engine:
formatted = self.sync_engine._format_task_line(*task)
else:
local_id, status, title, notes = task
checkbox = {
"incomplete": "[ ]",
"complete": "[x]",
"cleared": "[-]",
}[status]
formatted = f"- {checkbox} {title} <!-- id:{local_id} -->"
if notes:
formatted += f"\n {notes}"
remaining_content.append(formatted)
# Clean the original file
if incomplete_tasks:
self._clean_file(file_path, remaining_content)
# Write swept tasks to Inbox
if swept_tasks:
print(f"\n📥 Writing {len(swept_tasks)} tasks to Inbox...")
if not self.dry_run:
self._write_tasks_to_file(self.inbox_file, swept_tasks)
print(f" ✅ Inbox updated: {self.inbox_file}")
# Summary
print(f"\n" + "=" * 60)
print(f"📊 SWEEP SUMMARY:")
print(f" • Files processed: {len(processed_files)}")
print(f" • Tasks swept: {len(swept_tasks)}")
print(f" • Target: {self.inbox_file}")
if self.dry_run:
print(f"\n⚠️ DRY RUN - No files were actually modified")
print(f" Run without --dry-run to perform the sweep")
return {
"swept_tasks": len(swept_tasks),
"processed_files": processed_files,
"inbox_file": str(self.inbox_file),
}
def main():
parser = argparse.ArgumentParser(
description="Sweep incomplete tasks from markdown files into Godspeed Inbox",
epilog="""
Examples:
python sweep_tasks.py ~/Documents/Notes ~/Documents/Godspeed
python sweep_tasks.py . ./godspeed --dry-run
python sweep_tasks.py ~/Notes ~/Notes/godspeed --dry-run
""",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument(
"notes_dir",
type=Path,
help="Root directory containing markdown files with tasks (e.g., ~/Documents/Notes)",
)
parser.add_argument(
"godspeed_dir",
type=Path,
help="Godspeed sync directory where Inbox.md will be created (e.g., ~/Documents/Godspeed)",
)
parser.add_argument(
"--dry-run",
action="store_true",
help="Show what would be done without making changes",
)
args = parser.parse_args()
# Validate directories
if not args.notes_dir.exists():
print(f"❌ Notes directory does not exist: {args.notes_dir}")
return 1
if not args.notes_dir.is_dir():
print(f"❌ Notes path is not a directory: {args.notes_dir}")
return 1
# Godspeed directory will be created if it doesn't exist
try:
sweeper = TaskSweeper(args.notes_dir, args.godspeed_dir, args.dry_run)
result = sweeper.sweep_tasks()
if result["swept_tasks"] > 0:
print(f"\n🎉 Successfully swept {result['swept_tasks']} tasks!")
if not args.dry_run:
print(f"💡 Next steps:")
print(f" 1. Review tasks in: {result['inbox_file']}")
print(f" 2. Run 'godspeed upload' to sync to API")
print(f" 3. Organize tasks into appropriate lists in Godspeed app")
else:
print(f"\n✨ No incomplete tasks found to sweep.")
return 0
except Exception as e:
print(f"❌ Error during sweep: {e}")
import traceback
traceback.print_exc()
return 1
if __name__ == "__main__":
exit(main())