🔧 chore(ai): remove unused imports and consolidate options (f14be17c) · Commits · Jan Reimes / 3gpp-crawler

src/tdoc_crawler/cli/ai.py

+16 −33

Original line number	Diff line number	Diff line
		@@ -8,10 +8,8 @@ import platform
		from datetime import UTC, datetime
		from functools import cache
		from pathlib import Path
		from typing import Annotated

		import typer
		from rich.console import Console
		from rich.progress import BarColumn, Progress, SpinnerColumn, TaskProgressColumn, TextColumn, TimeRemainingColumn
		from rich.table import Table
		from tdoc_ai import (
		@@ -53,19 +51,18 @@ from tdoc_crawler.cli.args import (
		EmbeddingTopKOption,
		EndDateOption,
		GraphQueryArgument,
		GraphQueryOption,
		JsonOutputOption,
		ProcessAllOption,
		ProcessForceOption,
		ProcessLimitOption,
		ProcessNewOnlyOption,
		QueryArgument,
		QueryOption,
		QueryMaxWordsOption,
		SourcePatternExcludeOption,
		SourcePatternOption,
		StartDateOption,
		StatusTDocIdOption,
		SummarizeDocumentArgument,
		SummarizeFormatOption,
		SummarizeWordsOption,
		TitlePatternExcludeOption,
		TitlePatternOption,
		@@ -86,12 +83,13 @@ from tdoc_crawler.cli.args import (
		)
		from tdoc_crawler.config import CacheManager, resolve_cache_manager
		from tdoc_crawler.database import TDocDatabase
		from tdoc_crawler.logging import get_console
		from tdoc_crawler.tdocs.models import TDocQueryConfig
		from tdoc_crawler.utils.date_parser import parse_partial_date
		from tdoc_crawler.utils.normalization import resolve_release_to_full_version

		ai_app = typer.Typer(help="AI document processing commands")
		console = Console()
		console = get_console()
		_logger = logging.getLogger(__name__)


		@@ -120,25 +118,14 @@ def resolve_workspace(workspace: str \| None) -> str:
		@ai_app.command("summarize")
		def ai_summarize(
		document_id: SummarizeDocumentArgument,
		words: SummarizeWordsOption = 200,
		output_format: SummarizeFormatOption = "markdown",
		json_output: Annotated[bool, typer.Option("--json", help="Output as JSON (overrides --format)")] = False,
		max_words: SummarizeWordsOption = 200,
		) -> None:
		"""Summarize a single document with specified word count."""
		try:
		result = summarize_document(document_id=document_id, words=words, fetch_remote=True)

		if json_output or output_format == "json":
		typer.echo(result.to_json())
		elif output_format == "yaml":
		typer.echo(result.to_yaml())
		else:
		result = summarize_document(document_id=document_id, max_words=max_words, fetch_remote=True)
		console.print(f"## Summary for {document_id}")
		console.print(result.summary)
		except Exception as exc:
		if json_output:
		typer.echo(json.dumps({"error": str(exc)}), err=True)
		else:
		console.print(f"[red]Error: {exc}[/red]")
		raise typer.Exit(1) from exc

		@@ -173,18 +160,16 @@ def ai_convert(

		@ai_app.command("query")
		def ai_query(
		query_arg: QueryArgument = None,
		query: QueryOption = None,
		query_text: QueryArgument,
		workspace: WorkspaceNameOption = None,
		top_k: EmbeddingTopKOption = 5,
		words: int = typer.Option(300, "--words", help="Target word count for LLM answer (default: 300)"),
		top_k: EmbeddingTopKOption = 8,
		max_words: QueryMaxWordsOption = 300,
		json_output: JsonOutputOption = False,
		) -> None:
		"""Search TDocs using semantic embeddings and knowledge graph (RAG + GraphRAG).

		Uses LLM to synthesize an answer from embedding and graph context.
		"""
		query_text = query or query_arg
		if not query_text:
		console.print("[red]Error: query is required (positional or --query).[/red]")
		raise typer.Exit(1)
		@@ -200,7 +185,7 @@ def ai_query(
		query_text,
		workspace=resolved_workspace,
		embedding_chunks=embedding_results,
		max_words=words,
		max_words=max_words,
		query_level="advanced", # Always use LLM synthesis
		)

		@@ -249,7 +234,7 @@ def ai_process(
		force: ProcessForceOption = False,
		accelerate: EmbeddingBackendOption = "torch",
		json_output: JsonOutputOption = False,
		limit: int \| None = typer.Option(None, "--limit", "-l", help="Limit number of documents to process (for testing)"),
		limit: ProcessLimitOption = None,
		) -> None:
		"""Process all documents in a workspace through the AI pipeline.

		@@ -340,13 +325,11 @@ def ai_status(

		@ai_app.command("graph")
		def ai_graph(
		query_arg: GraphQueryArgument = None,
		query: GraphQueryOption = None,
		query_text: GraphQueryArgument = None,
		workspace: WorkspaceNameOption = None,
		json_output: JsonOutputOption = False,
		) -> None:
		"""Query the knowledge graph for a workspace."""
		query_text = query or query_arg
		if not query_text:
		console.print("[red]Error: query is required (positional or --query).[/red]")
		raise typer.Exit(1)
		@@ -721,7 +704,7 @@ def workspace_process(
		new_only: WorkspaceProcessNewOnlyOption = True,
		force_rerun: WorkspaceProcessForceOption = False,
		json_output: JsonOutputOption = False,
		limit: int \| None = typer.Option(None, "--limit", "-l", help="Limit number of documents to process (for testing)"),
		limit: ProcessLimitOption = None,
		) -> None:
		"""Process all active document members in a workspace through the AI pipeline.

src/tdoc_crawler/cli/args.py

+3 −4

Original line number	Diff line number	Diff line
		@@ -152,8 +152,7 @@ NoProgressOption = Annotated[

		# Options - AI
		SummarizeDocumentArgument = Annotated[str, typer.Argument(help="Document ID to summarize")]
		SummarizeWordsOption = Annotated[int, typer.Option("--words", "-w", help="Target word count (default: 200)")]
		SummarizeFormatOption = Annotated[str, typer.Option("--format", "-f", help="Output format (markdown, json, yaml)")]
		SummarizeWordsOption = Annotated[int, typer.Option("--words", "-w", help="Target/Maximum word count (default: 200)")]
		JsonOutputOption = Annotated[bool, typer.Option("--json", help="Output as JSON")]

		ConvertDocumentArgument = Annotated[str, typer.Argument(help="Document ID to convert")]
		@@ -163,9 +162,9 @@ ConvertOutputOption = Annotated[
		]

		QueryArgument = Annotated[str \| None, typer.Argument(help="Semantic search query")]
		QueryOption = Annotated[str \| None, typer.Option("--query", help="Semantic search query")]
		WorkspaceNameOption = Annotated[str \| None, typer.Option("--workspace", "-w", help="Workspace name")]
		EmbeddingTopKOption = Annotated[int, typer.Option("--top-k", "-k", help="Number of embedding results to return")]
		QueryMaxWordsOption = Annotated[int, typer.Option("--words", help="Maximum word count for LLM answer (default: 300)")]

		_ = Annotated[str \| None, typer.Option("--tdoc-id", "-t", help="TDoc ID to process")]
		EmbeddingBackendOption = Annotated[
		@@ -182,11 +181,11 @@ CheckoutBaseOption = Annotated[str \| None, typer.Option("--checkout-base", help=
		ProcessAllOption = Annotated[bool, typer.Option("--all", help="Process all documents in workspace")]
		ProcessNewOnlyOption = Annotated[bool, typer.Option("--new-only", help="Process only new documents")]
		ProcessForceOption = Annotated[bool, typer.Option("--force", help="Force reprocessing")]
		ProcessLimitOption = Annotated[int \| None, typer.Option("--limit", "-l", help="Limit number of documents to process (for testing)")]

		StatusTDocIdOption = Annotated[str \| None, typer.Option("--tdoc-id", "-t", help="TDoc ID to check status for")]

		GraphQueryArgument = Annotated[str \| None, typer.Argument(help="Graph query string")]
		GraphQueryOption = Annotated[str \| None, typer.Option("--query", help="Graph query string")]

		WorkspaceNameArgument = Annotated[str, typer.Argument(help="Workspace name")]
		WorkspaceActivateArgument = Annotated[str, typer.Argument(help="Workspace name to activate")]