Loading src/tdoc_crawler/cli/app.py +61 −2 Original line number Diff line number Diff line Loading @@ -19,6 +19,9 @@ from tdoc_crawler.crawlers import MeetingCrawler, TDocCrawler from tdoc_crawler.credentials import set_credentials from tdoc_crawler.database import TDocDatabase from tdoc_crawler.models import MeetingCrawlConfig, MeetingQueryConfig, OutputFormat, QueryConfig, SortOrder, TDocCrawlConfig from tdoc_crawler.specs import SpecCatalog from tdoc_crawler.specs.sources import fetch_threegpp_metadata, fetch_whatthespec_metadata from tdoc_crawler.specs.sources.base import FunctionSpecSource from .args import ( CacheDirOption, Loading @@ -43,6 +46,9 @@ from .args import ( OutputFormatOption, OverallTimeoutOption, PromptCredentialsOption, ReleaseOption, SpecFileOption, SpecOption, StartDateOption, SubgroupOption, TDocIdArgument, Loading @@ -55,8 +61,23 @@ from .args import ( ) from .console import get_console from .fetching import maybe_fetch_missing_tdocs from .helpers import build_limits, database_path, launch_file, parse_subgroups, parse_working_groups, prepare_tdoc_file from .printing import meeting_to_dict, print_meeting_table, print_tdoc_table, tdoc_to_dict from .helpers import ( build_limits, collect_spec_numbers, database_path, launch_file, parse_subgroups, parse_working_groups, prepare_tdoc_file, ) from .printing import ( meeting_to_dict, print_meeting_table, print_spec_crawl_table, print_tdoc_table, spec_crawl_to_dict, tdoc_to_dict, ) load_dotenv() Loading Loading @@ -503,6 +524,44 @@ def stats( console.print(table) @app.command("crawl-specs", rich_help_panel=HELP_PANEL_CRAWLING) def crawl_specs( spec: SpecOption = None, spec_file: SpecFileOption = None, release: ReleaseOption = "latest", output_format: OutputFormatOption = OutputFormat.TABLE.value, cache_dir: CacheDirOption = DEFAULT_CACHE_DIR, ) -> None: """Crawl spec metadata from configured sources.""" specs = collect_spec_numbers(spec, spec_file) try: output = OutputFormat(output_format.lower()) except ValueError as exc: console.print("[red]Invalid output format; use table, json, or yaml") raise typer.Exit(code=2) from exc sources = [ FunctionSpecSource("3gpp", fetch_threegpp_metadata), FunctionSpecSource("whatthespec", fetch_whatthespec_metadata), ] db_path = database_path(cache_dir) with TDocDatabase(db_path) as database: catalog = SpecCatalog(database) results = catalog.crawl_specs(specs, release, sources) if not results: console.print("[yellow]No specs crawled[/yellow]") return if output is OutputFormat.JSON: console.print(json.dumps([spec_crawl_to_dict(result) for result in results], indent=2)) elif output is OutputFormat.YAML: console.print(yaml.dump([spec_crawl_to_dict(result) for result in results], sort_keys=False)) else: print_spec_crawl_table(results) # Register command aliases app.command("ct", rich_help_panel=HELP_PANEL_CRAWLING, hidden=True)(crawl_tdocs) app.command("cm", rich_help_panel=HELP_PANEL_CRAWLING, hidden=True)(crawl_meetings) Loading src/tdoc_crawler/cli/args.py +6 −0 Original line number Diff line number Diff line Loading @@ -74,3 +74,9 @@ IncludeWithoutFilesOption = Annotated[ TDocIdArgument = Annotated[str, typer.Argument(help="TDoc identifier to download and open")] CheckoutTDocIdsArgument = Annotated[list[str], typer.Argument(help="TDoc identifier(s) to checkout")] ForceOption = Annotated[bool, typer.Option("--force", "-f", help="Re-download even if already checked out")] SpecOption = Annotated[list[str] | None, typer.Option("--spec", help="Spec number(s) (dotted or undotted)")] SpecFileOption = Annotated[Path | None, typer.Option("--spec-file", help="File with spec numbers")] ReleaseOption = Annotated[str, typer.Option("--release", help="Spec release selector")] DocOnlyOption = Annotated[bool, typer.Option("--doc-only/--no-doc-only", help="Attempt document-only download")] CheckoutDirOption = Annotated[Path | None, typer.Option("--checkout-dir", help="Spec checkout base directory")] src/tdoc_crawler/cli/helpers.py +37 −0 Original line number Diff line number Diff line Loading @@ -107,6 +107,43 @@ def parse_subgroups(values: list[str] | None) -> list[str] | None: return resolved def collect_spec_numbers(specs: list[str] | None, spec_file: Path | None) -> list[str]: """Collect spec numbers from CLI options and stdin. Args: specs: Spec numbers provided on the command line. Use "-" to read stdin. spec_file: Optional file containing spec numbers (one per line). Returns: List of spec numbers in input order. """ collected: list[str] = [] if specs: for item in specs: if item == "-": stdin_text = sys.stdin.read() collected.extend(line.strip() for line in stdin_text.splitlines() if line.strip()) else: stripped = item.strip() if stripped: collected.append(stripped) if spec_file is not None: try: file_text = spec_file.read_text(encoding="utf-8") except OSError as exc: console.print(f"[red]Failed to read spec file: {exc}") raise typer.Exit(code=2) from exc collected.extend(line.strip() for line in file_text.splitlines() if line.strip()) if not collected: console.print("[red]No spec numbers provided[/red]") raise typer.Exit(code=2) return collected def build_limits( limit_tdocs: int | None, limit_meetings: int | None, Loading src/tdoc_crawler/cli/printing.py +43 −0 Original line number Diff line number Diff line Loading @@ -7,6 +7,7 @@ from typing import Any from rich.table import Table from tdoc_crawler.models import MeetingMetadata, TDocMetadata from tdoc_crawler.specs import SpecCrawlResult from .console import get_console Loading Loading @@ -78,3 +79,45 @@ def print_meeting_table(results: list[MeetingMetadata]) -> None: ) console.print(table) def spec_crawl_to_dict(result: SpecCrawlResult) -> dict[str, Any]: """Convert SpecCrawlResult to dictionary for JSON/YAML output.""" return { "spec_number": result.spec_number, "release": result.release, "status": result.status, "latest_version": result.latest_version, "sources": [ { "source_name": outcome.source_name, "status": outcome.status, "versions": outcome.versions, "message": outcome.message, } for outcome in result.sources ], "message": result.message, } def print_spec_crawl_table(results: list[SpecCrawlResult]) -> None: """Print spec crawl results as formatted table.""" table = Table(title=f"Spec crawl results ({len(results)} rows)") table.add_column("Spec", style="cyan") table.add_column("Status", style="magenta") table.add_column("Release", style="yellow") table.add_column("Latest", style="green") table.add_column("Sources", style="blue") for result in results[:100]: sources = ", ".join(f"{outcome.source_name}:{outcome.status}" for outcome in result.sources) or "-" table.add_row( result.spec_number, result.status, result.release, result.latest_version or "-", sources, ) console.print(table) Loading
src/tdoc_crawler/cli/app.py +61 −2 Original line number Diff line number Diff line Loading @@ -19,6 +19,9 @@ from tdoc_crawler.crawlers import MeetingCrawler, TDocCrawler from tdoc_crawler.credentials import set_credentials from tdoc_crawler.database import TDocDatabase from tdoc_crawler.models import MeetingCrawlConfig, MeetingQueryConfig, OutputFormat, QueryConfig, SortOrder, TDocCrawlConfig from tdoc_crawler.specs import SpecCatalog from tdoc_crawler.specs.sources import fetch_threegpp_metadata, fetch_whatthespec_metadata from tdoc_crawler.specs.sources.base import FunctionSpecSource from .args import ( CacheDirOption, Loading @@ -43,6 +46,9 @@ from .args import ( OutputFormatOption, OverallTimeoutOption, PromptCredentialsOption, ReleaseOption, SpecFileOption, SpecOption, StartDateOption, SubgroupOption, TDocIdArgument, Loading @@ -55,8 +61,23 @@ from .args import ( ) from .console import get_console from .fetching import maybe_fetch_missing_tdocs from .helpers import build_limits, database_path, launch_file, parse_subgroups, parse_working_groups, prepare_tdoc_file from .printing import meeting_to_dict, print_meeting_table, print_tdoc_table, tdoc_to_dict from .helpers import ( build_limits, collect_spec_numbers, database_path, launch_file, parse_subgroups, parse_working_groups, prepare_tdoc_file, ) from .printing import ( meeting_to_dict, print_meeting_table, print_spec_crawl_table, print_tdoc_table, spec_crawl_to_dict, tdoc_to_dict, ) load_dotenv() Loading Loading @@ -503,6 +524,44 @@ def stats( console.print(table) @app.command("crawl-specs", rich_help_panel=HELP_PANEL_CRAWLING) def crawl_specs( spec: SpecOption = None, spec_file: SpecFileOption = None, release: ReleaseOption = "latest", output_format: OutputFormatOption = OutputFormat.TABLE.value, cache_dir: CacheDirOption = DEFAULT_CACHE_DIR, ) -> None: """Crawl spec metadata from configured sources.""" specs = collect_spec_numbers(spec, spec_file) try: output = OutputFormat(output_format.lower()) except ValueError as exc: console.print("[red]Invalid output format; use table, json, or yaml") raise typer.Exit(code=2) from exc sources = [ FunctionSpecSource("3gpp", fetch_threegpp_metadata), FunctionSpecSource("whatthespec", fetch_whatthespec_metadata), ] db_path = database_path(cache_dir) with TDocDatabase(db_path) as database: catalog = SpecCatalog(database) results = catalog.crawl_specs(specs, release, sources) if not results: console.print("[yellow]No specs crawled[/yellow]") return if output is OutputFormat.JSON: console.print(json.dumps([spec_crawl_to_dict(result) for result in results], indent=2)) elif output is OutputFormat.YAML: console.print(yaml.dump([spec_crawl_to_dict(result) for result in results], sort_keys=False)) else: print_spec_crawl_table(results) # Register command aliases app.command("ct", rich_help_panel=HELP_PANEL_CRAWLING, hidden=True)(crawl_tdocs) app.command("cm", rich_help_panel=HELP_PANEL_CRAWLING, hidden=True)(crawl_meetings) Loading
src/tdoc_crawler/cli/args.py +6 −0 Original line number Diff line number Diff line Loading @@ -74,3 +74,9 @@ IncludeWithoutFilesOption = Annotated[ TDocIdArgument = Annotated[str, typer.Argument(help="TDoc identifier to download and open")] CheckoutTDocIdsArgument = Annotated[list[str], typer.Argument(help="TDoc identifier(s) to checkout")] ForceOption = Annotated[bool, typer.Option("--force", "-f", help="Re-download even if already checked out")] SpecOption = Annotated[list[str] | None, typer.Option("--spec", help="Spec number(s) (dotted or undotted)")] SpecFileOption = Annotated[Path | None, typer.Option("--spec-file", help="File with spec numbers")] ReleaseOption = Annotated[str, typer.Option("--release", help="Spec release selector")] DocOnlyOption = Annotated[bool, typer.Option("--doc-only/--no-doc-only", help="Attempt document-only download")] CheckoutDirOption = Annotated[Path | None, typer.Option("--checkout-dir", help="Spec checkout base directory")]
src/tdoc_crawler/cli/helpers.py +37 −0 Original line number Diff line number Diff line Loading @@ -107,6 +107,43 @@ def parse_subgroups(values: list[str] | None) -> list[str] | None: return resolved def collect_spec_numbers(specs: list[str] | None, spec_file: Path | None) -> list[str]: """Collect spec numbers from CLI options and stdin. Args: specs: Spec numbers provided on the command line. Use "-" to read stdin. spec_file: Optional file containing spec numbers (one per line). Returns: List of spec numbers in input order. """ collected: list[str] = [] if specs: for item in specs: if item == "-": stdin_text = sys.stdin.read() collected.extend(line.strip() for line in stdin_text.splitlines() if line.strip()) else: stripped = item.strip() if stripped: collected.append(stripped) if spec_file is not None: try: file_text = spec_file.read_text(encoding="utf-8") except OSError as exc: console.print(f"[red]Failed to read spec file: {exc}") raise typer.Exit(code=2) from exc collected.extend(line.strip() for line in file_text.splitlines() if line.strip()) if not collected: console.print("[red]No spec numbers provided[/red]") raise typer.Exit(code=2) return collected def build_limits( limit_tdocs: int | None, limit_meetings: int | None, Loading
src/tdoc_crawler/cli/printing.py +43 −0 Original line number Diff line number Diff line Loading @@ -7,6 +7,7 @@ from typing import Any from rich.table import Table from tdoc_crawler.models import MeetingMetadata, TDocMetadata from tdoc_crawler.specs import SpecCrawlResult from .console import get_console Loading Loading @@ -78,3 +79,45 @@ def print_meeting_table(results: list[MeetingMetadata]) -> None: ) console.print(table) def spec_crawl_to_dict(result: SpecCrawlResult) -> dict[str, Any]: """Convert SpecCrawlResult to dictionary for JSON/YAML output.""" return { "spec_number": result.spec_number, "release": result.release, "status": result.status, "latest_version": result.latest_version, "sources": [ { "source_name": outcome.source_name, "status": outcome.status, "versions": outcome.versions, "message": outcome.message, } for outcome in result.sources ], "message": result.message, } def print_spec_crawl_table(results: list[SpecCrawlResult]) -> None: """Print spec crawl results as formatted table.""" table = Table(title=f"Spec crawl results ({len(results)} rows)") table.add_column("Spec", style="cyan") table.add_column("Status", style="magenta") table.add_column("Release", style="yellow") table.add_column("Latest", style="green") table.add_column("Sources", style="blue") for result in results[:100]: sources = ", ".join(f"{outcome.source_name}:{outcome.status}" for outcome in result.sources) or "-" table.add_row( result.spec_number, result.status, result.release, result.latest_version or "-", sources, ) console.print(table)