refactor(cli,http): centralize cache handling via CacheManager and pass shared... (1bf8bcb9) · Commits · Jan Reimes / 3gpp-crawler

src/tdoc_crawler/checkout.py

+20 −12

Original line number	Diff line number	Diff line
		@@ -67,6 +67,7 @@ def checkout_tdoc(
		checkout_dir: Path,
		*,
		force: bool = False,
		session: requests.Session \| None = None,
		) -> Path:
		"""Download and extract a TDoc to the checkout folder.

		@@ -74,6 +75,7 @@ def checkout_tdoc(
		metadata: TDoc metadata containing URL and ID
		checkout_dir: Base checkout directory
		force: If True, re-download even if already exists
		session: Optional requests.Session to reuse for downloads

		Returns:
		Path to the extracted TDoc directory
		@@ -94,7 +96,7 @@ def checkout_tdoc(
		raise ValueError(f"TDoc {metadata.tdoc_id} has no URL")

		try:
		download_to_path(metadata.url, temp_zip_path)
		download_to_path(metadata.url, temp_zip_path, session=session)
		with zipfile.ZipFile(temp_zip_path) as archive:
		archive.extractall(checkout_path)
		logger.info(f"Checked out {metadata.tdoc_id} to {checkout_path}")
		@@ -105,13 +107,14 @@ def checkout_tdoc(
		return checkout_path


		def prepare_tdoc_file(metadata: TDocMetadata, cache_dir: Path, return_dir: bool = False) -> Path:
		def prepare_tdoc_file(metadata: TDocMetadata, cache_dir: Path, return_dir: bool = False, session: requests.Session \| None = None) -> Path:
		"""Prepare TDoc file for opening (download and extract if needed).

		Args:
		metadata: TDoc metadata with download URL.
		cache_dir: Cache directory for downloads and extracted files.
		return_dir: When True and TDoc is a zip, return the extract directory.
		session: Optional requests.Session to reuse for downloads.

		Returns:
		Path to the downloaded file, or the extract directory when return_dir is True.
		@@ -134,7 +137,7 @@ def prepare_tdoc_file(metadata: TDocMetadata, cache_dir: Path, return_dir: bool
		return extract_dir if return_dir else files[0]
		shutil.rmtree(extract_dir)
		zip_path = downloads_dir / f"{metadata.tdoc_id}.zip"
		download_to_path(metadata.url, zip_path)
		download_to_path(metadata.url, zip_path, session=session)
		try:
		with zipfile.ZipFile(zip_path) as archive:
		archive.extractall(extract_dir)
		@@ -152,7 +155,7 @@ def prepare_tdoc_file(metadata: TDocMetadata, cache_dir: Path, return_dir: bool
		target_path = downloads_dir / target_name
		if not target_path.exists():
		try:
		download_to_path(metadata.url, target_path)
		download_to_path(metadata.url, target_path, session=session)
		except requests.exceptions.HTTPError as exc:
		status_code = exc.response.status_code if exc.response is not None else "unknown"
		raise FileNotFoundError(f"failed-to-download ({status_code}): {metadata.url}") from exc
		@@ -271,6 +274,7 @@ def checkout_tdocs(
		results: list[TDocMetadata],
		checkout_dir: Path,
		force: bool = False,
		session: requests.Session \| None = None,
		) -> CheckoutResult:
		"""Checkout multiple TDoc files to the checkout directory.

		@@ -278,6 +282,7 @@ def checkout_tdocs(
		results: List of TDocMetadata to checkout
		checkout_dir: Base checkout directory
		force: If True, re-download even if already exists
		session: Optional requests.Session to reuse for downloads

		Returns:
		CheckoutResult with success/error counts
		@@ -294,7 +299,7 @@ def checkout_tdocs(
		try:
		if not metadata.url:
		raise ValueError("missing URL")
		checkout_tdoc(metadata, checkout_dir, force=force)
		checkout_tdoc(metadata, checkout_dir, force=force, session=session)
		success_count += 1
		except (FileNotFoundError, OSError, ValueError, zipfile.BadZipFile) as exc:
		errors.append(f"{metadata.tdoc_id}: {exc}")
		@@ -305,13 +310,17 @@ def checkout_tdocs(

		def checkout_meeting_tdocs(
		meetings: list[MeetingMetadata],
		cache_dir: Path,
		checkout_dir: Path,
		http_cache_path: Path,
		session: requests.Session \| None = None,
		) -> CheckoutResult:
		"""Checkout TDoc files from a list of meetings.

		Args:
		meetings: List of MeetingMetadata to checkout TDocs from
		cache_dir: Cache directory for document list caching
		checkout_dir: Base checkout directory
		http_cache_path: Path to HTTP cache database
		session: Optional requests.Session to reuse for downloads

		Returns:
		CheckoutResult with success/error counts
		@@ -319,7 +328,6 @@ def checkout_meeting_tdocs(
		if not meetings:
		return CheckoutResult(success_count=0, error_count=0, errors=[])

		checkout_dir = cache_dir / "checkout"
		unique: dict[str, TDocMetadata] = {}
		errors: list[str] = []

		@@ -328,7 +336,7 @@ def checkout_meeting_tdocs(
		errors.append(f"{meeting.short_name}: no files URL")
		continue
		try:
		tdocs = fetch_meeting_document_list(meeting.meeting_id, cache_dir)
		tdocs = fetch_meeting_document_list(meeting.meeting_id, http_cache_path)
		except DocumentListError as exc:
		errors.append(f"{meeting.short_name}: {exc}")
		continue
		@@ -336,7 +344,7 @@ def checkout_meeting_tdocs(
		if metadata.tdoc_id not in unique:
		unique[metadata.tdoc_id] = metadata

		return checkout_tdocs(list(unique.values()), checkout_dir, force=False)
		return checkout_tdocs(list(unique.values()), checkout_dir, force=False, session=session)


		__all__ = [

src/tdoc_crawler/cli/app.py

+146 −101

Original line number	Diff line number	Diff line
		@@ -5,7 +5,6 @@ from __future__ import annotations
		import json
		import zipfile
		from datetime import datetime
		from pathlib import Path
		from typing import Annotated, Any, cast

		import typer
		@@ -78,10 +77,12 @@ from tdoc_crawler.cli.printing import (
		spec_query_to_dict,
		tdoc_to_dict,
		)
		from tdoc_crawler.config import DEFAULT_CACHE_DIR, CacheManager
		from tdoc_crawler.crawlers import MeetingCrawler, TDocCrawler
		from tdoc_crawler.credentials import resolve_credentials, set_credentials
		from tdoc_crawler.database import TDocDatabase, database_path
		from tdoc_crawler.database import TDocDatabase
		from tdoc_crawler.fetching import fetch_missing_tdocs
		from tdoc_crawler.http_client import create_cached_session
		from tdoc_crawler.logging import set_verbosity
		from tdoc_crawler.models import MeetingCrawlConfig, MeetingQueryConfig, OutputFormat, QueryConfig, SortOrder, TDocCrawlConfig
		from tdoc_crawler.models.specs import SpecQueryFilters
		@@ -93,7 +94,6 @@ load_dotenv()
		app = typer.Typer(help="TDoc crawler - crawl and query structured 3GPP metadata")
		console = get_console()

		DEFAULT_CACHE_DIR = Path.home() / ".tdoc-crawler"

		HELP_PANEL_MAIN = "Main Commands"
		HELP_PANEL_CRAWLING = "Crawling Commands"
		@@ -123,11 +123,14 @@ def crawl_tdocs(
		# Set logging verbosity early to ensure all log messages respect the configured level
		set_verbosity(verbosity)

		manager = CacheManager(cache_dir)
		manager.ensure_paths()

		subgroups = parse_subgroups(subgroup)
		working_groups = parse_working_groups(working_group, subgroups)
		limits = build_limits(limit_tdocs, limit_meetings, limit_meetings_per_wg, limit_wgs)
		config = TDocCrawlConfig(
		cache_dir=cache_dir,
		cache_dir=manager.root,
		working_groups=working_groups,
		subgroups=subgroups,
		meeting_ids=None,
		@@ -147,7 +150,7 @@ def crawl_tdocs(
		use_parallel_crawling=False,
		)

		db_path = database_path(config.cache_dir)
		db_path = manager.db_path

		# Build descriptive message
		scope_parts = []
		@@ -158,7 +161,7 @@ def crawl_tdocs(
		console.print(f"[cyan]Crawling TDocs ({', '.join(scope_parts)})[/cyan]")

		with TDocDatabase(db_path) as database:
		checkout_dir = cache_dir / "checkout"
		checkout_dir = manager.checkout_dir
		# Clear TDocs if requested
		if clear_tdocs:
		deleted_count = database.clear_tdocs()
		@@ -217,13 +220,17 @@ def crawl_tdocs(
		if checkout:
		checkout_limit = limit_tdocs if limit_tdocs and limit_tdocs > 0 else None
		query_config = QueryConfig(
		cache_dir=cache_dir,
		cache_dir=manager.root,
		working_groups=working_groups,
		limit=checkout_limit,
		order=SortOrder.DESC,
		)
		results = database.query_tdocs(query_config)
		checkout_result = checkout_tdocs(results, checkout_dir, force=False)

		# Use a shared session for checkout downloads
		with create_cached_session(manager.http_cache_path) as session:
		checkout_result = checkout_tdocs(results, checkout_dir, force=False, session=session)

		console.print(f"\n[cyan]Checked out {checkout_result.success_count} TDoc(s)[/cyan]")
		if checkout_result.error_count:
		console.print(f"[red]Failed: {checkout_result.error_count} TDoc(s)[/red]")
		@@ -266,12 +273,15 @@ def crawl_meetings(
		# Set logging verbosity early to ensure all log messages respect the configured level
		set_verbosity(verbosity)

		manager = CacheManager(cache_dir)
		manager.ensure_paths()

		subgroups = parse_subgroups(subgroup)
		working_groups = parse_working_groups(working_group, subgroups)
		limits = build_limits(None, limit_meetings, limit_meetings_per_wg, limit_wgs)
		set_credentials(eol_username, eol_password, prompt_credentials)
		config = MeetingCrawlConfig(
		cache_dir=cache_dir,
		cache_dir=manager.root,
		working_groups=working_groups,
		subgroups=subgroups,
		incremental=incremental,
		@@ -281,7 +291,7 @@ def crawl_meetings(
		credentials=None,
		)

		db_path = database_path(config.cache_dir)
		db_path = manager.db_path
		# Build descriptive message
		scope_parts = []
		if subgroups:
		@@ -291,7 +301,7 @@ def crawl_meetings(
		console.print(f"[cyan]Crawling meetings ({', '.join(scope_parts)})[/cyan]")

		with TDocDatabase(db_path) as database:
		checkout_dir = cache_dir / "checkout"
		checkout_dir = manager.checkout_dir
		# Clear all data if requested
		if clear_db:
		tdocs_count, meetings_count = database.clear_all_data()
		@@ -355,7 +365,7 @@ def crawl_meetings(

		if checkout:
		query_config = MeetingQueryConfig(
		cache_dir=cache_dir,
		cache_dir=manager.root,
		working_groups=working_groups,
		subgroups=subgroups,
		limit=limit_meetings if limit_meetings and limit_meetings > 0 else None,
		@@ -364,7 +374,9 @@ def crawl_meetings(
		)
		with TDocDatabase(db_path) as database:
		meetings = database.query_meetings(query_config)
		checkout_meeting_tdocs(meetings, cache_dir)

		with create_cached_session(manager.http_cache_path) as session:
		checkout_meeting_tdocs(meetings, manager.checkout_dir, manager.http_cache_path, session=session)


		@app.command("query-tdocs", rich_help_panel=HELP_PANEL_QUERY)
		@@ -387,6 +399,7 @@ def query_tdocs(
		) -> None:
		"""Query TDoc metadata from database."""
		set_verbosity(verbosity)
		manager = CacheManager(cache_dir)
		working_groups = parse_working_groups(working_group)
		try:
		start = datetime.fromisoformat(start_date) if start_date else None
		@@ -406,7 +419,7 @@ def query_tdocs(
		raise typer.Exit(code=2) from exc

		config = QueryConfig(
		cache_dir=cache_dir,
		cache_dir=manager.root,
		output_format=output_format,
		tdoc_ids=tdoc_ids,
		working_groups=working_groups,
		@@ -420,9 +433,9 @@ def query_tdocs(
		if not no_fetch:
		set_credentials(eol_username, eol_password, prompt=None)

		db_path = database_path(config.cache_dir)
		db_path = manager.db_path
		with TDocDatabase(db_path) as database:
		checkout_dir = cache_dir / "checkout"
		checkout_dir = manager.checkout_dir
		if clear_tdocs:
		deleted_count = database.clear_tdocs()
		console.print(f"[yellow]Cleared {deleted_count} TDocs from database[/yellow]")
		@@ -439,7 +452,9 @@ def query_tdocs(

		results = database.query_tdocs(config)
		if not no_fetch:
		result = fetch_missing_tdocs(database, config.cache_dir, config, results, None)
		# Use cached session for missing TDoc fetching
		with create_cached_session(manager.http_cache_path) as session:
		result = fetch_missing_tdocs(database, manager.root, config, results, session=session)
		if result.fetch_result and result.fetch_result.errors:
		console.print(f"[yellow]{len(result.fetch_result.errors)} issues detected during targeted crawl[/yellow]")
		for error in result.fetch_result.errors[:3]:
		@@ -451,7 +466,8 @@ def query_tdocs(
		return

		if checkout:
		checkout_tdocs(results, cache_dir / "checkout", force=False)
		with create_cached_session(manager.http_cache_path) as session:
		checkout_tdocs(results, manager.checkout_dir, force=False, session=session)

		if config.output_format is OutputFormat.JSON:
		console.print(json.dumps([tdoc_to_dict(result) for result in results], indent=2))
		@@ -477,6 +493,7 @@ def query_meetings(
		) -> None:
		"""Query meeting metadata from database."""
		set_verbosity(verbosity)
		manager = CacheManager(cache_dir)
		working_groups = parse_working_groups(working_group)
		subgroups = parse_subgroups(subgroup)
		try:
		@@ -486,7 +503,7 @@ def query_meetings(
		raise typer.Exit(code=2) from exc

		config = MeetingQueryConfig(
		cache_dir=cache_dir,
		cache_dir=manager.root,
		working_groups=working_groups,
		subgroups=subgroups,
		limit=limit,
		@@ -494,9 +511,9 @@ def query_meetings(
		include_without_files=include_without_files,
		)

		db_path = database_path(config.cache_dir)
		db_path = manager.db_path
		with TDocDatabase(db_path) as database:
		checkout_dir = cache_dir / "checkout"
		checkout_dir = manager.checkout_dir
		if clear_tdocs:
		deleted_count = database.clear_tdocs()
		console.print(f"[yellow]Cleared {deleted_count} TDocs from database[/yellow]")
		@@ -518,7 +535,8 @@ def query_meetings(
		return

		if checkout:
		checkout_meeting_tdocs(meetings, cache_dir)
		with create_cached_session(manager.http_cache_path) as session:
		checkout_meeting_tdocs(meetings, manager.checkout_dir, manager.http_cache_path, session=session)

		try:
		output = OutputFormat(output_format.lower())
		@@ -549,6 +567,7 @@ def query_specs(
		) -> None:
		"""Query spec metadata from database."""
		set_verbosity(verbosity)
		manager = CacheManager(cache_dir)
		specs = collect_spec_numbers(spec_numbers, spec_file)
		working_groups = parse_working_groups(working_group)
		wg_filter = working_groups[0].value if working_groups else None
		@@ -566,9 +585,9 @@ def query_specs(
		console.print("[red]Invalid output format; use table, json, or yaml")
		raise typer.Exit(code=2) from exc

		db_path = database_path(cache_dir)
		db_path = manager.db_path
		with TDocDatabase(db_path) as database:
		checkout_dir = cache_dir / "checkout"
		checkout_dir = manager.checkout_dir
		if clear_tdocs:
		deleted_count = database.clear_tdocs()
		console.print(f"[yellow]Cleared {deleted_count} TDocs from database[/yellow]")
		@@ -593,7 +612,7 @@ def query_specs(
		if checkout:
		spec_list = [result.spec_number for result in results]
		with TDocDatabase(db_path) as database:
		checkout_specs(spec_list, cache_dir / "checkout", database, release="latest")
		checkout_specs(spec_list, manager.checkout_dir, database, release="latest")

		if output is OutputFormat.JSON:
		console.print(json.dumps([spec_query_to_dict(result) for result in results], indent=2))
		@@ -616,19 +635,28 @@ def open_tdoc(
		"""Download, extract, and open a TDoc file."""
		set_verbosity(verbosity)
		set_credentials(eol_username, eol_password, prompt=None)
		manager = CacheManager(cache_dir)
		normalized_id = tdoc_id.strip().upper()
		config = QueryConfig(
		cache_dir=cache_dir,
		cache_dir=manager.root,
		tdoc_ids=[normalized_id],
		)

		db_path = database_path(cache_dir)
		db_path = manager.db_path
		with create_cached_session(manager.http_cache_path) as session:
		with TDocDatabase(db_path) as database:
		results = database.query_tdocs(config)

		credentials = resolve_credentials(eol_username, eol_password, prompt=None)
		result = fetch_missing_tdocs(
		database, cache_dir, config, results, credentials=credentials, full_metadata=full_metadata, use_whatthespec=use_whatthespec
		database,
		manager.root,
		config,
		results,
		credentials=credentials,
		full_metadata=full_metadata,
		use_whatthespec=use_whatthespec,
		session=session,
		)
		if result.fetch_result and result.fetch_result.errors:
		console.print(f"[yellow]{len(result.fetch_result.errors)} issues detected during targeted crawl[/yellow]")
		@@ -639,7 +667,7 @@ def open_tdoc(
		metadata = results[0]

		try:
		target_file = prepare_tdoc_file(metadata, cache_dir)
		target_file = prepare_tdoc_file(metadata, manager.root, session=session)
		except (FileNotFoundError, OSError, ValueError, zipfile.BadZipFile) as exc:
		console.print(f"[red]Failed to prepare TDoc {normalized_id}: {exc}")
		raise typer.Exit(code=1) from exc
		@@ -662,20 +690,28 @@ def checkout(
		"""Download and extract TDoc(s) to checkout folder."""
		set_verbosity(verbosity)
		set_credentials(eol_username, eol_password, prompt=None)
		manager = CacheManager(cache_dir)
		normalized_ids = [tid.strip().upper() for tid in tdoc_id]
		config = QueryConfig(
		cache_dir=cache_dir,
		cache_dir=manager.root,
		tdoc_ids=normalized_ids,
		)

		db_path = database_path(cache_dir)
		db_path = manager.db_path
		with create_cached_session(manager.http_cache_path) as session:
		with TDocDatabase(db_path) as database:
		results = database.query_tdocs(config)
		from tdoc_crawler.credentials import resolve_credentials

		credentials = resolve_credentials(eol_username, eol_password, prompt=None)
		result = fetch_missing_tdocs(
		database, cache_dir, config, results, credentials=credentials, full_metadata=full_metadata, use_whatthespec=use_whatthespec
		database,
		manager.root,
		config,
		results,
		credentials=credentials,
		full_metadata=full_metadata,
		use_whatthespec=use_whatthespec,
		session=session,
		)
		if result.fetch_result and result.fetch_result.errors:
		console.print(f"[yellow]{len(result.fetch_result.errors)} issues detected during targeted crawl[/yellow]")
		@@ -683,7 +719,7 @@ def checkout(
		if not results:
		raise typer.Exit(code=1)

		checkout_dir = cache_dir / "checkout"
		checkout_dir = manager.checkout_dir
		success_count = 0
		error_count = 0

		@@ -698,7 +734,7 @@ def checkout(

		for metadata in results:
		try:
		checkout_path = checkout_tdoc(metadata, checkout_dir, force=force)
		checkout_path = checkout_tdoc(metadata, checkout_dir, force=force, session=session)
		progress.console.print(f"[green]✓ {metadata.tdoc_id} → {checkout_path}")
		success_count += 1
		except (FileNotFoundError, OSError, ValueError, zipfile.BadZipFile) as exc:
		@@ -718,7 +754,8 @@ def stats(
		) -> None:
		"""Display database statistics."""
		set_verbosity(verbosity)
		db_path = database_path(cache_dir)
		manager = CacheManager(cache_dir)
		db_path = manager.db_path
		if not db_path.exists():
		console.print(f"[red]Database not found: {db_path}[/red]")
		raise typer.Exit(code=1)
		@@ -758,6 +795,7 @@ def crawl_specs(
		) -> None:
		"""Crawl spec metadata from configured sources."""
		set_verbosity(verbosity)
		manager = CacheManager(cache_dir)
		if spec_numbers is None:
		spec_numbers = []
		specs = collect_spec_numbers(spec_numbers, spec_file)
		@@ -769,9 +807,9 @@ def crawl_specs(

		sources = build_default_spec_sources()

		db_path = database_path(cache_dir)
		db_path = manager.db_path
		with TDocDatabase(db_path) as database:
		checkout_dir = cache_dir / "checkout"
		checkout_dir = manager.checkout_dir
		if clear_tdocs:
		deleted_count = database.clear_tdocs()
		console.print(f"[yellow]Cleared {deleted_count} TDocs from database[/yellow]")
		@@ -795,7 +833,12 @@ def crawl_specs(

		if checkout:
		with TDocDatabase(db_path) as database:
		checkout_specs([result.spec_number for result in results], cache_dir / "checkout", database, release=release)
		checkout_specs(
		[result.spec_number for result in results],
		manager.checkout_dir,
		database,
		release=release,
		)

		if output is OutputFormat.JSON:
		console.print(json.dumps([spec_crawl_to_dict(result) for result in results], indent=2))
		@@ -817,6 +860,7 @@ def checkout_spec(
		) -> None:
		"""Download and extract spec documents."""
		set_verbosity(verbosity)
		manager = CacheManager(cache_dir)
		if spec_numbers is None:
		spec_numbers = []
		specs = collect_spec_numbers(spec_numbers, spec_file)
		@@ -824,11 +868,11 @@ def checkout_spec(
		console.print("[red]No specs provided[/red]")
		raise typer.Exit(code=1)

		effective_checkout_dir = checkout_dir or (cache_dir / "checkout")
		effective_checkout_dir = checkout_dir or manager.checkout_dir

		sources = build_default_spec_sources()

		db_path = database_path(cache_dir)
		db_path = manager.db_path
		with TDocDatabase(db_path) as database:
		downloader = SpecDownloads(database)
		results = downloader.checkout_specs(specs, doc_only, effective_checkout_dir, release, sources=sources)
		@@ -848,11 +892,12 @@ def open_spec(
		"""Download and open a spec document."""
		set_verbosity(verbosity)
		normalized = spec.strip()
		checkout_dir = cache_dir / "checkout"
		manager = CacheManager(cache_dir)
		checkout_dir = manager.checkout_dir

		sources = build_default_spec_sources()

		db_path = database_path(cache_dir)
		db_path = manager.db_path
		with TDocDatabase(db_path) as database:
		downloader = SpecDownloads(database)
		try:

src/tdoc_crawler/cli/args.py

+21 −5

Original line number	Diff line number	Diff line
		@@ -19,10 +19,14 @@ IncrementalOption = Annotated[bool, typer.Option("--incremental/--full", help="T
		ClearTDocsOption = Annotated[bool, typer.Option("--clear-tdocs", help="Clear all TDocs before crawling")]
		ClearSpecsOption = Annotated[bool, typer.Option("--clear-specs", help="Clear all specs before crawling")]
		ClearDbOption = Annotated[bool, typer.Option("--clear-db", help="Clear all meetings and TDocs before crawling")]
		CheckoutOption = Annotated[bool, typer.Option("--checkout", help="Download and extract metadata results to checkout folder")]
		CheckoutOption = Annotated[
		bool, typer.Option("--checkout/--no-checkout", help="Download and extract metadata results to checkout folder", envvar="TDC_CHECKOUT")
		]
		LimitTDocsOption = Annotated[int \| None, typer.Option("--limit-tdocs", help="Limit number of TDocs", envvar="TDC_LIMIT_TDOCS")]
		LimitMeetingsOption = Annotated[int \| None, typer.Option("--limit-meetings", help="Limit meetings overall", envvar="TDC_LIMIT_MEETINGS")]
		LimitMeetingsPerWgOption = Annotated[int \| None, typer.Option("--limit-meetings-per-wg", help="Limit meetings per working group")]
		LimitMeetingsPerWgOption = Annotated[
		int \| None, typer.Option("--limit-meetings-per-wg", help="Limit meetings per working group", envvar="TDC_LIMIT_MEETINGS_PER_WG")
		]
		LimitWgsOption = Annotated[int \| None, typer.Option("--limit-wgs", help="Limit number of working groups")]
		WorkersOption = Annotated[int, typer.Option("--workers", help="Number of parallel subinterpreter workers", envvar="TDC_WORKERS")]
		OverallTimeoutOption = Annotated[
		@@ -37,7 +41,9 @@ TDocIdsArgument = Annotated[list[str] \| None, typer.Argument(help="TDoc identifi
		OutputFormatOption = Annotated[str, typer.Option("--output", "-o", help="Output format", envvar="TDC_OUTPUT")]

		FullMetadataOption = Annotated[bool, typer.Option("--full-metadata", help="Fetch full metadata instead of URL only")]
		UseWhatTheSpecOption = Annotated[bool, typer.Option("--use-whatthespec", help="Use WhatTheSpec API for fetching")]
		UseWhatTheSpecOption = Annotated[
		bool, typer.Option("--use-whatthespec/--no-use-whatthespec", help="Use WhatTheSpec API for fetching", envvar="TDC_USE_WHATTHESPEC")
		]
		LimitOption = Annotated[int \| None, typer.Option("--limit", "-l", help="Maximum number of rows")]
		OrderOption = Annotated[str, typer.Option("--order", help="Sort order (asc\|desc)")]
		StartDateOption = Annotated[str \| None, typer.Option("--start-date", help="Filter from ISO timestamp", envvar="TDC_START_DATE")]
		@@ -64,6 +70,16 @@ ForceOption = Annotated[bool, typer.Option("--force", "-f", help="Re-download ev
		SpecOption = Annotated[list[str] \| None, typer.Option("--spec", help="Spec number(s) (dotted or undotted)")]
		SpecArgument = Annotated[list[str] \| None, typer.Argument(help="Spec number(s) to query (dotted or undotted)")]
		SpecFileOption = Annotated[Path \| None, typer.Option("--spec-file", help="File with spec numbers")]
		ReleaseOption = Annotated[str, typer.Option("--release", help="Spec release selector")]
		ReleaseOption = Annotated[
		str,
		typer.Option(
		"--release",
		help=(
		"Spec release selector. Values: 'latest' (default), 'all', or explicit version. "
		"Version formats: 18.0.0, 18.1 (=18.1.x), 18 (=18.x.y). "
		"Prefixes supported: v18, v18.1, v18.1.2, Rel-18, rel18 (case-insensitive)."
		),
		),
		]
		DocOnlyOption = Annotated[bool, typer.Option("--doc-only/--no-doc-only", help="Attempt document-only download")]
		CheckoutDirOption = Annotated[Path \| None, typer.Option("--checkout-dir", help="Spec checkout base directory", envvar="TDC_CHECKOUT_DIR")]
		CheckoutDirOption = Annotated[Path \| None, typer.Option("--checkout-dir", help="Spec checkout base directory")]

src/tdoc_crawler/crawlers/meeting_doclist.py

+3 −3

Original line number	Diff line number	Diff line
		@@ -23,7 +23,7 @@ class DocumentListError(Exception):

		def fetch_meeting_document_list(
		meeting_id: int,
		cache_dir: Path,
		cache_path: Path,
		cache_ttl: int = 7200,
		cache_refresh_on_access: bool = True,
		timeout: int = 30,
		@@ -32,7 +32,7 @@ def fetch_meeting_document_list(

		Args:
		meeting_id: 3GPP meeting identifier
		cache_dir: Directory for HTTP cache storage
		cache_path: Path to HTTP cache SQLite database
		cache_ttl: HTTP cache TTL in seconds
		cache_refresh_on_access: Whether to refresh cache TTL on access
		timeout: Request timeout in seconds
		@@ -49,7 +49,7 @@ def fetch_meeting_document_list(

		# Create cached session (no credentials required)
		session = create_cached_session(
		cache_dir=cache_dir,
		cache_path=cache_path,
		ttl=cache_ttl,
		refresh_ttl_on_access=cache_refresh_on_access,
		max_retries=3,

src/tdoc_crawler/crawlers/parallel.py

+2 −4

File changed.

Preview size limit exceeded, changes collapsed.