Loading src/tdoc_crawler/cli/crawl.py +1 −1 Original line number Diff line number Diff line Loading @@ -173,7 +173,7 @@ def crawl_tdocs( results = database.query_tdocs(query_config) with create_cached_session(http_cache_enabled=http_cache_enabled) as session: checkout_result = checkout_tdocs(results, checkout_dir, force=False, session=session) checkout_result = checkout_tdocs(results, manager.checkout_dir, force=False, session=session) console.print(f"\n[cyan]Checked out {checkout_result.success_count} TDoc(s)[/cyan]") if checkout_result.error_count: Loading src/tdoc_crawler/cli/printing.py +6 −4 Original line number Diff line number Diff line Loading @@ -9,6 +9,8 @@ from rich.table import Table from tdoc_crawler.database.specs import SpecCrawlResult from tdoc_crawler.logging import get_console from tdoc_crawler.meetings.models import MeetingMetadata from tdoc_crawler.models.subworking_groups import SUBTB_INDEX from tdoc_crawler.models.working_groups import WorkingGroup from tdoc_crawler.specs.models import SpecQueryResult from tdoc_crawler.tdocs.models import TDocMetadata Loading @@ -32,7 +34,6 @@ def print_tdoc_table(results: list[TDocMetadata]) -> None: table.add_column("Title", style="yellow") table.add_column("Source(s)", style="magenta") table.add_column("Type", style="green") table.add_column("Meeting", style="cyan") table.add_column("Size (KB)", justify="right", style="blue") for result in results[:100]: Loading @@ -42,7 +43,6 @@ def print_tdoc_table(results: list[TDocMetadata]) -> None: result.title or "-", result.source or "-", result.tdoc_type or "-", result.meeting_name or "-", size_kb, ) Loading Loading @@ -72,10 +72,12 @@ def print_meeting_table(results: list[MeetingMetadata]) -> None: ) or "-" ) working_group = next((wg.value for wg in WorkingGroup if wg.tbid == meeting.tbid), "-") subgroup = SUBTB_INDEX[meeting.subtb].code if meeting.subtb in SUBTB_INDEX else "-" table.add_row( meeting.short_name, meeting.working_group, meeting.subgroup or "-", working_group, subgroup, date_range, meeting.location or "-", meeting.files_url or "-", Loading src/tdoc_crawler/specs/downloads.py +6 −5 Original line number Diff line number Diff line Loading @@ -169,7 +169,12 @@ class SpecDownloads: url = SPEC_URL_TEMPLATE.format(series=series, normalized=normalized, file_name=target.file_name) return url, target.file_name async def _attempt_doc_only_async(self, url: str, normalized: str, target_dir: Path) -> bool: def _download_full_zip(self, url: str, target_path: Path) -> None: """Download full zip file, re-use session if already created for doc-only attempt.""" self.session = download_to_file(url, target_path, session=self.session, close_session=False, cache_manager_name=self._cache_manager.name) @staticmethod async def _attempt_doc_only_async(url: str, normalized: str, target_dir: Path) -> bool: """Attempt to download only the document file from remote zip.""" try: async with HTTPZipReader(url) as reader: Loading Loading @@ -199,10 +204,6 @@ class SpecDownloads: _logger.warning("Doc-only download failed for %s: %s", url, exc) return False def _download_full_zip(self, url: str, target_path: Path) -> None: """Download full zip file, re-use session if already created for doc-only attempt.""" self.session = download_to_file(url, target_path, session=self.session, close_session=False, cache_manager_name=self._cache_manager.name) @staticmethod def _extract_zip(zip_file: Path, extract_dir: Path, keep_zip: bool = True) -> None: """Extract zip file.""" Loading Loading
src/tdoc_crawler/cli/crawl.py +1 −1 Original line number Diff line number Diff line Loading @@ -173,7 +173,7 @@ def crawl_tdocs( results = database.query_tdocs(query_config) with create_cached_session(http_cache_enabled=http_cache_enabled) as session: checkout_result = checkout_tdocs(results, checkout_dir, force=False, session=session) checkout_result = checkout_tdocs(results, manager.checkout_dir, force=False, session=session) console.print(f"\n[cyan]Checked out {checkout_result.success_count} TDoc(s)[/cyan]") if checkout_result.error_count: Loading
src/tdoc_crawler/cli/printing.py +6 −4 Original line number Diff line number Diff line Loading @@ -9,6 +9,8 @@ from rich.table import Table from tdoc_crawler.database.specs import SpecCrawlResult from tdoc_crawler.logging import get_console from tdoc_crawler.meetings.models import MeetingMetadata from tdoc_crawler.models.subworking_groups import SUBTB_INDEX from tdoc_crawler.models.working_groups import WorkingGroup from tdoc_crawler.specs.models import SpecQueryResult from tdoc_crawler.tdocs.models import TDocMetadata Loading @@ -32,7 +34,6 @@ def print_tdoc_table(results: list[TDocMetadata]) -> None: table.add_column("Title", style="yellow") table.add_column("Source(s)", style="magenta") table.add_column("Type", style="green") table.add_column("Meeting", style="cyan") table.add_column("Size (KB)", justify="right", style="blue") for result in results[:100]: Loading @@ -42,7 +43,6 @@ def print_tdoc_table(results: list[TDocMetadata]) -> None: result.title or "-", result.source or "-", result.tdoc_type or "-", result.meeting_name or "-", size_kb, ) Loading Loading @@ -72,10 +72,12 @@ def print_meeting_table(results: list[MeetingMetadata]) -> None: ) or "-" ) working_group = next((wg.value for wg in WorkingGroup if wg.tbid == meeting.tbid), "-") subgroup = SUBTB_INDEX[meeting.subtb].code if meeting.subtb in SUBTB_INDEX else "-" table.add_row( meeting.short_name, meeting.working_group, meeting.subgroup or "-", working_group, subgroup, date_range, meeting.location or "-", meeting.files_url or "-", Loading
src/tdoc_crawler/specs/downloads.py +6 −5 Original line number Diff line number Diff line Loading @@ -169,7 +169,12 @@ class SpecDownloads: url = SPEC_URL_TEMPLATE.format(series=series, normalized=normalized, file_name=target.file_name) return url, target.file_name async def _attempt_doc_only_async(self, url: str, normalized: str, target_dir: Path) -> bool: def _download_full_zip(self, url: str, target_path: Path) -> None: """Download full zip file, re-use session if already created for doc-only attempt.""" self.session = download_to_file(url, target_path, session=self.session, close_session=False, cache_manager_name=self._cache_manager.name) @staticmethod async def _attempt_doc_only_async(url: str, normalized: str, target_dir: Path) -> bool: """Attempt to download only the document file from remote zip.""" try: async with HTTPZipReader(url) as reader: Loading Loading @@ -199,10 +204,6 @@ class SpecDownloads: _logger.warning("Doc-only download failed for %s: %s", url, exc) return False def _download_full_zip(self, url: str, target_path: Path) -> None: """Download full zip file, re-use session if already created for doc-only attempt.""" self.session = download_to_file(url, target_path, session=self.session, close_session=False, cache_manager_name=self._cache_manager.name) @staticmethod def _extract_zip(zip_file: Path, extract_dir: Path, keep_zip: bool = True) -> None: """Extract zip file.""" Loading