Commit 1c5e761b authored by Jan Reimes's avatar Jan Reimes
Browse files

feat(cli): enhance CLI with hybrid server checks and async handling

* Add ensure_hybrid_server_for_profile function to validate hybrid server availability.
* Update handle_clear_options calls in meetings, specs, and tdocs to run asynchronously.
* Replace resolve_spec_release with resolve_spec_release_from_db in members.py.
* Introduce normalize_spec_number_compact function for compact spec number formatting.
parent bcbaa943
Loading
Loading
Loading
Loading
+18 −0
Original line number Diff line number Diff line
@@ -7,6 +7,8 @@ from pathlib import Path
from rich.progress import BarColumn, MofNCompleteColumn, Progress, SpinnerColumn, TaskID, TextColumn

from tdoc_crawler.database.base import DocDatabase
from tdoc_crawler.extraction.hybrid_server import ensure_hybrid_server
from tdoc_crawler.extraction.profiles import ExtractionProfile
from tdoc_crawler.logging import get_console
from tdoc_crawler.specs.operations.checkout import clear_checkout_specs
from tdoc_crawler.tdocs.operations.checkout import clear_checkout_tdocs
@@ -82,8 +84,24 @@ def create_progress_bar(description: str, total: float = 100) -> tuple[Progress,
    return progress, task


def ensure_hybrid_server_for_profile(profile: ExtractionProfile) -> bool:
    """Ensure hybrid server is available for extraction profiles that require it."""
    if profile == ExtractionProfile.PDF_ONLY:
        return True

    console.print(f"[dim]Ensuring hybrid server for profile '{profile.value}'...[/dim]")
    _, status = ensure_hybrid_server(auto_start=True, progress_callback=lambda message: console.print(f"[dim]{message}[/dim]"))
    if status.running:
        return True

    error = status.error or "unknown startup failure"
    console.print(f"[red]Hybrid server is not available: {error}[/red]")
    return False


__all__ = [
    "console",
    "create_progress_bar",
    "ensure_hybrid_server_for_profile",
    "handle_clear_options",
]
+9 −7
Original line number Diff line number Diff line
@@ -96,6 +96,7 @@ def crawl_meetings(
        scope_parts.append(f"working groups: {', '.join(wg.value for wg in working_groups)}")
    console.print(f"[cyan]Crawling meetings ({', '.join(scope_parts)})[/cyan]")

    asyncio.run(
        handle_clear_options(
            db_file,
            crawler_config.path.checkout_dir,
@@ -103,6 +104,7 @@ def crawl_meetings(
            clear_tdocs=clear_tdocs,
            clear_specs=clear_specs,
            clear_db=clear_db,
        ),
    )

    async def run_meeting_crawl() -> object:
+8 −6
Original line number Diff line number Diff line
@@ -57,12 +57,14 @@ def crawl_specs(

    sources = build_default_spec_sources()

    asyncio.run(
        handle_clear_options(
            crawler_config.path.db_file,
            crawler_config.path.checkout_dir,
            SpecDatabase,
            clear_tdocs=clear_tdocs,
            clear_specs=clear_specs,
        ),
    )

    async def crawl_specs_db() -> list:
+8 −6
Original line number Diff line number Diff line
@@ -128,12 +128,14 @@ def crawl_tdocs(

    console.print(f"[cyan]Crawling TDocs ({', '.join(scope_parts)})[/cyan]")

    asyncio.run(
        handle_clear_options(
            db_file,
            crawler_config.path.checkout_dir,
            TDocDatabase,
            clear_tdocs=clear_tdocs,
            clear_specs=clear_specs,
        ),
    )

    async def run_tdoc_crawl() -> tuple:
+2 −2
Original line number Diff line number Diff line
@@ -31,7 +31,6 @@ from tdoc_crawler.config import PathConfig
from tdoc_crawler.database.tdocs import TDocDatabase
from tdoc_crawler.models.base import OutputFormat, SortOrder
from tdoc_crawler.models.workspaces import SourceKind
from tdoc_crawler.specs.operations.checkout import resolve_spec_release
from tdoc_crawler.tdocs.models import TDocQueryConfig
from tdoc_crawler.utils.date_parser import parse_partial_date
from tdoc_crawler.workspaces import (
@@ -42,6 +41,7 @@ from tdoc_crawler.workspaces import (
    normalize_workspace_name,
    remove_workspace_member,
)
from tdoc_crawler.workspaces.utils import resolve_spec_release_from_db


def workspace_members(
@@ -78,7 +78,7 @@ def _resolve_spec_release_for_add(
    if source_kind != SourceKind.SPEC:
        return release
    try:
        resolved = asyncio.run(resolve_spec_release(item, release, auto_crawl=auto_crawl))
        resolved, _ = asyncio.run(resolve_spec_release_from_db(item, release))
        console.print(f"[dim]  Resolved {item} release '{release}'{resolved}[/dim]")
        return resolved
    except ValueError as exc:
Loading