Commit 555d69b2 authored by Jan Reimes's avatar Jan Reimes
Browse files

Add progress bar for AI workspace process command

Added Rich progress bar to track document processing when running
'workspace process' command. Progress shows:
- Total documents to process
- Current document being processed
- Pipeline stage (extraction, classification, embedding, summarization)
- Progress bar with percentage
- Time elapsed and estimated time remaining

Implementation:
- Added Rich Progress imports to cli/ai.py
- Created progress context with multiple columns for detailed tracking
- Added progress_callback function that updates task description dynamically
- Passes callback to process_all() for real-time updates
- Updates task to completed state after all documents processed

This provides clear visual feedback during long-running operations,
helping users understand processing status and estimated completion time.
parent bc7f4ddd
Loading
Loading
Loading
Loading
+36 −8
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@ from typing import Annotated

import typer
from rich.console import Console
from rich.progress import Progress, SpinnerColumn, TextColumn, BarColumn, TaskProgressColumn, TimeRemainingColumn, TimeElapsedColumn
from rich.table import Table

from tdoc_crawler.ai import (
@@ -540,15 +541,42 @@ def workspace_process(
            console.print(f"[yellow]No active members found in workspace '{normalize_workspace_name(workspace)}'[/yellow]")
        return

    # Process documents
    # Create progress bar for tracking document processing
    with Progress(
        SpinnerColumn(),
        TextColumn("[progress.description]{task.description}", justify="right"),
        BarColumn(),
        TaskProgressColumn(),
        TimeRemainingColumn(elapsed_when_finished=True),
        TimeElapsedColumn(),
        console=console,
        refresh_per_second=10,
    ) as progress:
        # Create main task
        task = progress.add_task(
            f"[cyan]Processing workspace '{normalize_workspace_name(workspace)}'[/cyan]",
            total=len(document_ids),
        )

        # Create progress callback
        def progress_callback(stage: PipelineStage, doc_id: str) -> None:
            # Update description with current document and stage
            stage_name = stage.value.replace("_", " ").title()
            progress.update(task, description=f"[cyan]Processing {doc_id}[/cyan] [dim]- {stage_name}[/dim]")

        # Process documents with progress tracking
        results = process_all(
            document_ids=document_ids,
            checkout_base=manager.root,
            new_only=new_only,
            force_rerun=force_rerun,
            workspace=workspace,
            progress_callback=progress_callback,
        )

        # Update progress to completed
        progress.update(task, completed=len(document_ids))

    if json_output:
        typer.echo(
            json.dumps(