Loading src/tdoc_crawler/cli/ai.py +16 −33 Original line number Diff line number Diff line Loading @@ -8,10 +8,8 @@ import platform from datetime import UTC, datetime from functools import cache from pathlib import Path from typing import Annotated import typer from rich.console import Console from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn, TimeRemainingColumn from rich.table import Table from tdoc_ai import ( Loading Loading @@ -53,19 +51,18 @@ from tdoc_crawler.cli.args import ( EmbeddingTopKOption, EndDateOption, GraphQueryArgument, GraphQueryOption, JsonOutputOption, ProcessAllOption, ProcessForceOption, ProcessLimitOption, ProcessNewOnlyOption, QueryArgument, QueryOption, QueryMaxWordsOption, SourcePatternExcludeOption, SourcePatternOption, StartDateOption, StatusTDocIdOption, SummarizeDocumentArgument, SummarizeFormatOption, SummarizeWordsOption, TitlePatternExcludeOption, TitlePatternOption, Loading @@ -86,12 +83,13 @@ from tdoc_crawler.cli.args import ( ) from tdoc_crawler.config import CacheManager, resolve_cache_manager from tdoc_crawler.database import TDocDatabase from tdoc_crawler.logging import get_console from tdoc_crawler.tdocs.models import TDocQueryConfig from tdoc_crawler.utils.date_parser import parse_partial_date from tdoc_crawler.utils.normalization import resolve_release_to_full_version ai_app = typer.Typer(help="AI document processing commands") console = Console() console = get_console() _logger = logging.getLogger(__name__) Loading Loading @@ -120,25 +118,14 @@ def resolve_workspace(workspace: str | None) -> str: @ai_app.command("summarize") def ai_summarize( document_id: SummarizeDocumentArgument, words: SummarizeWordsOption = 200, output_format: SummarizeFormatOption = "markdown", json_output: Annotated[bool, typer.Option("--json", help="Output as JSON (overrides --format)")] = False, max_words: SummarizeWordsOption = 200, ) -> None: """Summarize a single document with specified word count.""" try: result = summarize_document(document_id=document_id, words=words, fetch_remote=True) if json_output or output_format == "json": typer.echo(result.to_json()) elif output_format == "yaml": typer.echo(result.to_yaml()) else: result = summarize_document(document_id=document_id, max_words=max_words, fetch_remote=True) console.print(f"## Summary for {document_id}") console.print(result.summary) except Exception as exc: if json_output: typer.echo(json.dumps({"error": str(exc)}), err=True) else: console.print(f"[red]Error: {exc}[/red]") raise typer.Exit(1) from exc Loading Loading @@ -173,18 +160,16 @@ def ai_convert( @ai_app.command("query") def ai_query( query_arg: QueryArgument = None, query: QueryOption = None, query_text: QueryArgument, workspace: WorkspaceNameOption = None, top_k: EmbeddingTopKOption = 5, words: int = typer.Option(300, "--words", help="Target word count for LLM answer (default: 300)"), top_k: EmbeddingTopKOption = 8, max_words: QueryMaxWordsOption = 300, json_output: JsonOutputOption = False, ) -> None: """Search TDocs using semantic embeddings and knowledge graph (RAG + GraphRAG). Uses LLM to synthesize an answer from embedding and graph context. """ query_text = query or query_arg if not query_text: console.print("[red]Error: query is required (positional or --query).[/red]") raise typer.Exit(1) Loading @@ -200,7 +185,7 @@ def ai_query( query_text, workspace=resolved_workspace, embedding_chunks=embedding_results, max_words=words, max_words=max_words, query_level="advanced", # Always use LLM synthesis ) Loading Loading @@ -249,7 +234,7 @@ def ai_process( force: ProcessForceOption = False, accelerate: EmbeddingBackendOption = "torch", json_output: JsonOutputOption = False, limit: int | None = typer.Option(None, "--limit", "-l", help="Limit number of documents to process (for testing)"), limit: ProcessLimitOption = None, ) -> None: """Process all documents in a workspace through the AI pipeline. Loading Loading @@ -340,13 +325,11 @@ def ai_status( @ai_app.command("graph") def ai_graph( query_arg: GraphQueryArgument = None, query: GraphQueryOption = None, query_text: GraphQueryArgument = None, workspace: WorkspaceNameOption = None, json_output: JsonOutputOption = False, ) -> None: """Query the knowledge graph for a workspace.""" query_text = query or query_arg if not query_text: console.print("[red]Error: query is required (positional or --query).[/red]") raise typer.Exit(1) Loading Loading @@ -721,7 +704,7 @@ def workspace_process( new_only: WorkspaceProcessNewOnlyOption = True, force_rerun: WorkspaceProcessForceOption = False, json_output: JsonOutputOption = False, limit: int | None = typer.Option(None, "--limit", "-l", help="Limit number of documents to process (for testing)"), limit: ProcessLimitOption = None, ) -> None: """Process all active document members in a workspace through the AI pipeline. Loading src/tdoc_crawler/cli/args.py +3 −4 Original line number Diff line number Diff line Loading @@ -152,8 +152,7 @@ NoProgressOption = Annotated[ # Options - AI SummarizeDocumentArgument = Annotated[str, typer.Argument(help="Document ID to summarize")] SummarizeWordsOption = Annotated[int, typer.Option("--words", "-w", help="Target word count (default: 200)")] SummarizeFormatOption = Annotated[str, typer.Option("--format", "-f", help="Output format (markdown, json, yaml)")] SummarizeWordsOption = Annotated[int, typer.Option("--words", "-w", help="Target/Maximum word count (default: 200)")] JsonOutputOption = Annotated[bool, typer.Option("--json", help="Output as JSON")] ConvertDocumentArgument = Annotated[str, typer.Argument(help="Document ID to convert")] Loading @@ -163,9 +162,9 @@ ConvertOutputOption = Annotated[ ] QueryArgument = Annotated[str | None, typer.Argument(help="Semantic search query")] QueryOption = Annotated[str | None, typer.Option("--query", help="Semantic search query")] WorkspaceNameOption = Annotated[str | None, typer.Option("--workspace", "-w", help="Workspace name")] EmbeddingTopKOption = Annotated[int, typer.Option("--top-k", "-k", help="Number of embedding results to return")] QueryMaxWordsOption = Annotated[int, typer.Option("--words", help="Maximum word count for LLM answer (default: 300)")] _ = Annotated[str | None, typer.Option("--tdoc-id", "-t", help="TDoc ID to process")] EmbeddingBackendOption = Annotated[ Loading @@ -182,11 +181,11 @@ CheckoutBaseOption = Annotated[str | None, typer.Option("--checkout-base", help= ProcessAllOption = Annotated[bool, typer.Option("--all", help="Process all documents in workspace")] ProcessNewOnlyOption = Annotated[bool, typer.Option("--new-only", help="Process only new documents")] ProcessForceOption = Annotated[bool, typer.Option("--force", help="Force reprocessing")] ProcessLimitOption = Annotated[int | None, typer.Option("--limit", "-l", help="Limit number of documents to process (for testing)")] StatusTDocIdOption = Annotated[str | None, typer.Option("--tdoc-id", "-t", help="TDoc ID to check status for")] GraphQueryArgument = Annotated[str | None, typer.Argument(help="Graph query string")] GraphQueryOption = Annotated[str | None, typer.Option("--query", help="Graph query string")] WorkspaceNameArgument = Annotated[str, typer.Argument(help="Workspace name")] WorkspaceActivateArgument = Annotated[str, typer.Argument(help="Workspace name to activate")] Loading Loading
src/tdoc_crawler/cli/ai.py +16 −33 Original line number Diff line number Diff line Loading @@ -8,10 +8,8 @@ import platform from datetime import UTC, datetime from functools import cache from pathlib import Path from typing import Annotated import typer from rich.console import Console from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn, TimeRemainingColumn from rich.table import Table from tdoc_ai import ( Loading Loading @@ -53,19 +51,18 @@ from tdoc_crawler.cli.args import ( EmbeddingTopKOption, EndDateOption, GraphQueryArgument, GraphQueryOption, JsonOutputOption, ProcessAllOption, ProcessForceOption, ProcessLimitOption, ProcessNewOnlyOption, QueryArgument, QueryOption, QueryMaxWordsOption, SourcePatternExcludeOption, SourcePatternOption, StartDateOption, StatusTDocIdOption, SummarizeDocumentArgument, SummarizeFormatOption, SummarizeWordsOption, TitlePatternExcludeOption, TitlePatternOption, Loading @@ -86,12 +83,13 @@ from tdoc_crawler.cli.args import ( ) from tdoc_crawler.config import CacheManager, resolve_cache_manager from tdoc_crawler.database import TDocDatabase from tdoc_crawler.logging import get_console from tdoc_crawler.tdocs.models import TDocQueryConfig from tdoc_crawler.utils.date_parser import parse_partial_date from tdoc_crawler.utils.normalization import resolve_release_to_full_version ai_app = typer.Typer(help="AI document processing commands") console = Console() console = get_console() _logger = logging.getLogger(__name__) Loading Loading @@ -120,25 +118,14 @@ def resolve_workspace(workspace: str | None) -> str: @ai_app.command("summarize") def ai_summarize( document_id: SummarizeDocumentArgument, words: SummarizeWordsOption = 200, output_format: SummarizeFormatOption = "markdown", json_output: Annotated[bool, typer.Option("--json", help="Output as JSON (overrides --format)")] = False, max_words: SummarizeWordsOption = 200, ) -> None: """Summarize a single document with specified word count.""" try: result = summarize_document(document_id=document_id, words=words, fetch_remote=True) if json_output or output_format == "json": typer.echo(result.to_json()) elif output_format == "yaml": typer.echo(result.to_yaml()) else: result = summarize_document(document_id=document_id, max_words=max_words, fetch_remote=True) console.print(f"## Summary for {document_id}") console.print(result.summary) except Exception as exc: if json_output: typer.echo(json.dumps({"error": str(exc)}), err=True) else: console.print(f"[red]Error: {exc}[/red]") raise typer.Exit(1) from exc Loading Loading @@ -173,18 +160,16 @@ def ai_convert( @ai_app.command("query") def ai_query( query_arg: QueryArgument = None, query: QueryOption = None, query_text: QueryArgument, workspace: WorkspaceNameOption = None, top_k: EmbeddingTopKOption = 5, words: int = typer.Option(300, "--words", help="Target word count for LLM answer (default: 300)"), top_k: EmbeddingTopKOption = 8, max_words: QueryMaxWordsOption = 300, json_output: JsonOutputOption = False, ) -> None: """Search TDocs using semantic embeddings and knowledge graph (RAG + GraphRAG). Uses LLM to synthesize an answer from embedding and graph context. """ query_text = query or query_arg if not query_text: console.print("[red]Error: query is required (positional or --query).[/red]") raise typer.Exit(1) Loading @@ -200,7 +185,7 @@ def ai_query( query_text, workspace=resolved_workspace, embedding_chunks=embedding_results, max_words=words, max_words=max_words, query_level="advanced", # Always use LLM synthesis ) Loading Loading @@ -249,7 +234,7 @@ def ai_process( force: ProcessForceOption = False, accelerate: EmbeddingBackendOption = "torch", json_output: JsonOutputOption = False, limit: int | None = typer.Option(None, "--limit", "-l", help="Limit number of documents to process (for testing)"), limit: ProcessLimitOption = None, ) -> None: """Process all documents in a workspace through the AI pipeline. Loading Loading @@ -340,13 +325,11 @@ def ai_status( @ai_app.command("graph") def ai_graph( query_arg: GraphQueryArgument = None, query: GraphQueryOption = None, query_text: GraphQueryArgument = None, workspace: WorkspaceNameOption = None, json_output: JsonOutputOption = False, ) -> None: """Query the knowledge graph for a workspace.""" query_text = query or query_arg if not query_text: console.print("[red]Error: query is required (positional or --query).[/red]") raise typer.Exit(1) Loading Loading @@ -721,7 +704,7 @@ def workspace_process( new_only: WorkspaceProcessNewOnlyOption = True, force_rerun: WorkspaceProcessForceOption = False, json_output: JsonOutputOption = False, limit: int | None = typer.Option(None, "--limit", "-l", help="Limit number of documents to process (for testing)"), limit: ProcessLimitOption = None, ) -> None: """Process all active document members in a workspace through the AI pipeline. Loading
src/tdoc_crawler/cli/args.py +3 −4 Original line number Diff line number Diff line Loading @@ -152,8 +152,7 @@ NoProgressOption = Annotated[ # Options - AI SummarizeDocumentArgument = Annotated[str, typer.Argument(help="Document ID to summarize")] SummarizeWordsOption = Annotated[int, typer.Option("--words", "-w", help="Target word count (default: 200)")] SummarizeFormatOption = Annotated[str, typer.Option("--format", "-f", help="Output format (markdown, json, yaml)")] SummarizeWordsOption = Annotated[int, typer.Option("--words", "-w", help="Target/Maximum word count (default: 200)")] JsonOutputOption = Annotated[bool, typer.Option("--json", help="Output as JSON")] ConvertDocumentArgument = Annotated[str, typer.Argument(help="Document ID to convert")] Loading @@ -163,9 +162,9 @@ ConvertOutputOption = Annotated[ ] QueryArgument = Annotated[str | None, typer.Argument(help="Semantic search query")] QueryOption = Annotated[str | None, typer.Option("--query", help="Semantic search query")] WorkspaceNameOption = Annotated[str | None, typer.Option("--workspace", "-w", help="Workspace name")] EmbeddingTopKOption = Annotated[int, typer.Option("--top-k", "-k", help="Number of embedding results to return")] QueryMaxWordsOption = Annotated[int, typer.Option("--words", help="Maximum word count for LLM answer (default: 300)")] _ = Annotated[str | None, typer.Option("--tdoc-id", "-t", help="TDoc ID to process")] EmbeddingBackendOption = Annotated[ Loading @@ -182,11 +181,11 @@ CheckoutBaseOption = Annotated[str | None, typer.Option("--checkout-base", help= ProcessAllOption = Annotated[bool, typer.Option("--all", help="Process all documents in workspace")] ProcessNewOnlyOption = Annotated[bool, typer.Option("--new-only", help="Process only new documents")] ProcessForceOption = Annotated[bool, typer.Option("--force", help="Force reprocessing")] ProcessLimitOption = Annotated[int | None, typer.Option("--limit", "-l", help="Limit number of documents to process (for testing)")] StatusTDocIdOption = Annotated[str | None, typer.Option("--tdoc-id", "-t", help="TDoc ID to check status for")] GraphQueryArgument = Annotated[str | None, typer.Argument(help="Graph query string")] GraphQueryOption = Annotated[str | None, typer.Option("--query", help="Graph query string")] WorkspaceNameArgument = Annotated[str, typer.Argument(help="Workspace name")] WorkspaceActivateArgument = Annotated[str, typer.Argument(help="Workspace name to activate")] Loading