cli/specs: update printing and downloads to reflect model changes; formatting and lint fixes (5f7e1a1e) · Commits · Jan Reimes / 3gpp-crawler

src/tdoc_crawler/cli/crawl.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -173,7 +173,7 @@ def crawl_tdocs(
		results = database.query_tdocs(query_config)

		with create_cached_session(http_cache_enabled=http_cache_enabled) as session:
		checkout_result = checkout_tdocs(results, checkout_dir, force=False, session=session)
		checkout_result = checkout_tdocs(results, manager.checkout_dir, force=False, session=session)

		console.print(f"\n[cyan]Checked out {checkout_result.success_count} TDoc(s)[/cyan]")
		if checkout_result.error_count:

+6 −4

Original line number	Diff line number	Diff line
		@@ -9,6 +9,8 @@ from rich.table import Table
		from tdoc_crawler.database.specs import SpecCrawlResult
		from tdoc_crawler.logging import get_console
		from tdoc_crawler.meetings.models import MeetingMetadata
		from tdoc_crawler.models.subworking_groups import SUBTB_INDEX
		from tdoc_crawler.models.working_groups import WorkingGroup
		from tdoc_crawler.specs.models import SpecQueryResult
		from tdoc_crawler.tdocs.models import TDocMetadata

		@@ -32,7 +34,6 @@ def print_tdoc_table(results: list[TDocMetadata]) -> None:
		table.add_column("Title", style="yellow")
		table.add_column("Source(s)", style="magenta")
		table.add_column("Type", style="green")
		table.add_column("Meeting", style="cyan")
		table.add_column("Size (KB)", justify="right", style="blue")

		for result in results[:100]:
		@@ -42,7 +43,6 @@ def print_tdoc_table(results: list[TDocMetadata]) -> None:
		result.title or "-",
		result.source or "-",
		result.tdoc_type or "-",
		result.meeting_name or "-",
		size_kb,
		)

		@@ -72,10 +72,12 @@ def print_meeting_table(results: list[MeetingMetadata]) -> None:
		)
		or "-"
		)
		working_group = next((wg.value for wg in WorkingGroup if wg.tbid == meeting.tbid), "-")
		subgroup = SUBTB_INDEX[meeting.subtb].code if meeting.subtb in SUBTB_INDEX else "-"
		table.add_row(
		meeting.short_name,
		meeting.working_group,
		meeting.subgroup or "-",
		working_group,
		subgroup,
		date_range,
		meeting.location or "-",
		meeting.files_url or "-",

+6 −5

Original line number	Diff line number	Diff line
		@@ -169,7 +169,12 @@ class SpecDownloads:
		url = SPEC_URL_TEMPLATE.format(series=series, normalized=normalized, file_name=target.file_name)
		return url, target.file_name

		async def _attempt_doc_only_async(self, url: str, normalized: str, target_dir: Path) -> bool:
		def _download_full_zip(self, url: str, target_path: Path) -> None:
		"""Download full zip file, re-use session if already created for doc-only attempt."""
		self.session = download_to_file(url, target_path, session=self.session, close_session=False, cache_manager_name=self._cache_manager.name)

		@staticmethod
		async def _attempt_doc_only_async(url: str, normalized: str, target_dir: Path) -> bool:
		"""Attempt to download only the document file from remote zip."""
		try:
		async with HTTPZipReader(url) as reader:
		@@ -199,10 +204,6 @@ class SpecDownloads:
		_logger.warning("Doc-only download failed for %s: %s", url, exc)
		return False

		def _download_full_zip(self, url: str, target_path: Path) -> None:
		"""Download full zip file, re-use session if already created for doc-only attempt."""
		self.session = download_to_file(url, target_path, session=self.session, close_session=False, cache_manager_name=self._cache_manager.name)

		@staticmethod
		def _extract_zip(zip_file: Path, extract_dir: Path, keep_zip: bool = True) -> None:
		"""Extract zip file."""