Loading src/tdoc_crawler/cli/ai.py +24 −39 Original line number Diff line number Diff line Loading @@ -23,7 +23,6 @@ from tdoc_ai import ( get_status, make_workspace_member, normalize_workspace_name, process_document, query_graph, set_active_workspace, summarize_document, Loading @@ -45,7 +44,6 @@ from tdoc_crawler.cli.args import ( AgendaPatternExcludeOption, AgendaPatternOption, CheckoutBaseOption, CheckoutPathOption, ConvertDocumentArgument, ConvertOutputOption, EmbeddingBackendOption, Loading @@ -57,7 +55,6 @@ from tdoc_crawler.cli.args import ( ProcessAllOption, ProcessForceOption, ProcessNewOnlyOption, ProcessTDocIdOption, QueryArgument, QueryOption, SourcePatternExcludeOption, Loading Loading @@ -216,9 +213,7 @@ def ai_query( @ai_app.command("process") def ai_process( document_id: ProcessTDocIdOption = None, workspace: WorkspaceNameOption = None, checkout_path: CheckoutPathOption = None, checkout_base: CheckoutBaseOption = None, process_all_flag: ProcessAllOption = False, new_only: ProcessNewOnlyOption = False, Loading @@ -226,11 +221,19 @@ def ai_process( accelerate: EmbeddingBackendOption = "torch", json_output: JsonOutputOption = False, ) -> None: """Process a single document or all documents through the AI pipeline.""" """Process all documents in a workspace through the AI pipeline. Processing runs in three phases: Phase 1: CLASSIFY → EXTRACT (create markdown artifacts) Phase 2: EMBED (generate vector embeddings) Phase 3: GRAPH (build knowledge graph) Failed documents in one phase are not processed in later phases, keeping logs clean and errors contained. """ workspace = workspace or "default" config = AiConfig.from_env(embedding_backend=accelerate) if process_all_flag: # Process all documents in workspace manager = _get_cache_manager() checkout_root = Path(checkout_base) if checkout_base else manager.root Loading @@ -246,24 +249,6 @@ def ai_process( typer.echo(json.dumps(result)) else: console.print(f"[green]Processed {len(result)} documents in workspace {workspace}[/green]") elif document_id: # Process single document manager = _get_cache_manager() resolved_checkout = Path(checkout_path) if checkout_path else manager.checkout_dir / document_id result = process_document( document_id, workspace=workspace, checkout_path=resolved_checkout, force_rerun=force, config=config, ) if json_output: typer.echo(json.dumps(result)) else: console.print(f"[green]Processed {document_id}[/green]") else: console.print("[red]Error: Must specify --tdoc-id or --all[/red]") raise typer.Exit(1) @ai_app.command("status") Loading Loading
src/tdoc_crawler/cli/ai.py +24 −39 Original line number Diff line number Diff line Loading @@ -23,7 +23,6 @@ from tdoc_ai import ( get_status, make_workspace_member, normalize_workspace_name, process_document, query_graph, set_active_workspace, summarize_document, Loading @@ -45,7 +44,6 @@ from tdoc_crawler.cli.args import ( AgendaPatternExcludeOption, AgendaPatternOption, CheckoutBaseOption, CheckoutPathOption, ConvertDocumentArgument, ConvertOutputOption, EmbeddingBackendOption, Loading @@ -57,7 +55,6 @@ from tdoc_crawler.cli.args import ( ProcessAllOption, ProcessForceOption, ProcessNewOnlyOption, ProcessTDocIdOption, QueryArgument, QueryOption, SourcePatternExcludeOption, Loading Loading @@ -216,9 +213,7 @@ def ai_query( @ai_app.command("process") def ai_process( document_id: ProcessTDocIdOption = None, workspace: WorkspaceNameOption = None, checkout_path: CheckoutPathOption = None, checkout_base: CheckoutBaseOption = None, process_all_flag: ProcessAllOption = False, new_only: ProcessNewOnlyOption = False, Loading @@ -226,11 +221,19 @@ def ai_process( accelerate: EmbeddingBackendOption = "torch", json_output: JsonOutputOption = False, ) -> None: """Process a single document or all documents through the AI pipeline.""" """Process all documents in a workspace through the AI pipeline. Processing runs in three phases: Phase 1: CLASSIFY → EXTRACT (create markdown artifacts) Phase 2: EMBED (generate vector embeddings) Phase 3: GRAPH (build knowledge graph) Failed documents in one phase are not processed in later phases, keeping logs clean and errors contained. """ workspace = workspace or "default" config = AiConfig.from_env(embedding_backend=accelerate) if process_all_flag: # Process all documents in workspace manager = _get_cache_manager() checkout_root = Path(checkout_base) if checkout_base else manager.root Loading @@ -246,24 +249,6 @@ def ai_process( typer.echo(json.dumps(result)) else: console.print(f"[green]Processed {len(result)} documents in workspace {workspace}[/green]") elif document_id: # Process single document manager = _get_cache_manager() resolved_checkout = Path(checkout_path) if checkout_path else manager.checkout_dir / document_id result = process_document( document_id, workspace=workspace, checkout_path=resolved_checkout, force_rerun=force, config=config, ) if json_output: typer.echo(json.dumps(result)) else: console.print(f"[green]Processed {document_id}[/green]") else: console.print("[red]Error: Must specify --tdoc-id or --all[/red]") raise typer.Exit(1) @ai_app.command("status") Loading