Commit 0111d59d authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor(cli): remove result limits in table printing functions

* Updated print_tdoc_table, print_meeting_table, print_spec_crawl_table, and print_spec_table to process all results without limiting to the first 100 entries.
* Improved data handling for meeting and spec results in CLI output.
parent 4e4f34d9
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -148,7 +148,7 @@ def print_tdoc_table(
        meeting_map: Optional map of meeting_id to MeetingMetadata for meeting info
    """
    rows = []
    for result in results[:100]:
    for result in results:
        size_kb = f"{result.file_size // 1024}" if result.file_size else "?"
        meeting = meeting_map.get(result.meeting_id) if meeting_map and result.meeting_id else None
        rows.append(
@@ -185,7 +185,7 @@ def print_tdoc_table(
def print_meeting_table(results: list[MeetingMetadata]) -> None:
    """Print meeting results as formatted table."""
    rows = []
    for meeting in results[:100]:
    for meeting in results:
        date_range = (
            " - ".join(
                filter(
@@ -261,7 +261,7 @@ def spec_query_to_dict(result: SpecQueryResult) -> dict[str, Any]:
def print_spec_crawl_table(results: list[SpecCrawlResult]) -> None:
    """Print spec crawl results as formatted table."""
    rows = []
    for result in results[:100]:
    for result in results:
        sources = ", ".join(f"{outcome.source_name}:{outcome.status}" for outcome in result.sources) or "-"
        rows.append(
            {
@@ -291,7 +291,7 @@ def print_spec_crawl_table(results: list[SpecCrawlResult]) -> None:
def print_spec_table(results: list[SpecQueryResult]) -> None:
    """Print spec query results as formatted table."""
    rows = []
    for result in results[:100]:
    for result in results:
        differences = ", ".join(sorted(result.source_differences.keys())) if result.source_differences else "-"
        rows.append(
            {
+25 −6
Original line number Diff line number Diff line
@@ -7,9 +7,13 @@ from pathlib import Path

from tdoc_crawler.config.settings import PathConfig
from tdoc_crawler.http_client import create_cached_session
from tdoc_crawler.logging import get_logger
from tdoc_crawler.models.workspaces import TDocNotFoundError
from tdoc_crawler.tdocs.operations.checkout import checkout_tdoc, get_checkout_path
from tdoc_crawler.tdocs.sources.whatthespec import resolve_via_whatthespec
from tdoc_crawler.workspaces.utils import resolve_tdoc_checkout_path

logger = get_logger(__name__)


@dataclass
@@ -31,10 +35,9 @@ def fetch_tdoc_files(document_id: str, force_download: bool = False) -> TDocFile
    """Fetch TDoc files from checkout or download from 3GPP FTP.

    Pipeline:
    1. Resolve TDoc ID to metadata via WhatTheSpec
    2. Calculate checkout path
    3. If not in checkout, download via checkout_tdoc
    4. Find available file types in checkout directory
    1. Check if TDoc already exists in local checkout (filesystem scan)
    2. If found, return immediately — no network call needed
    3. Otherwise resolve via WhatTheSpec and download if needed

    Args:
        document_id: TDoc identifier (e.g., "S4-260001")
@@ -46,17 +49,33 @@ def fetch_tdoc_files(document_id: str, force_download: bool = False) -> TDocFile
    Raises:
        TDocNotFoundError: If TDoc cannot be found or downloaded
    """
    checkout_dir = PathConfig().checkout_dir
    normalized_id = document_id.upper()

    # Step 1: Check local checkout first (covers previously crawled TDocs)
    if not force_download:
        existing_path = resolve_tdoc_checkout_path(normalized_id, checkout_dir)
        if existing_path is not None:
            files = _scan_checkout_dir(existing_path)
            if files.primary_path is not None:
                return files

    # Step 2: Resolve via WhatTheSpec and download if needed
    metadata = resolve_via_whatthespec(document_id)
    if metadata is None:
        raise TDocNotFoundError(f"TDoc {document_id} not found via WhatTheSpec")
        raise TDocNotFoundError(f"TDoc {document_id} not found via WhatTheSpec or local database")

    checkout_dir = PathConfig().checkout_dir
    checkout_path = get_checkout_path(metadata, checkout_dir)

    if not checkout_path.exists() or force_download:
        with create_cached_session() as session:
            checkout_tdoc(metadata, checkout_dir, force=force_download, session=session)

    return _scan_checkout_dir(checkout_path)


def _scan_checkout_dir(checkout_path: Path) -> TDocFiles:
    """Scan a checkout directory for available document files."""
    files = TDocFiles(checkout_dir=checkout_path)

    if checkout_path.is_dir():