Commit d36f6bf8 authored by Jan Reimes's avatar Jan Reimes
Browse files

♻️ refactor(cli): extract helpers to fix PLR0915 too-many-statements

- crawl.py: extract _build_scope_description, _print_crawl_result,
  _print_meeting_crawl_result to reduce crawl_tdocs/crawl_meetings
- query.py: extract _parse_date_range, remove dead graph_rag parameter
parent d85d38fd
Loading
Loading
Loading
Loading
+41 −29
Original line number Diff line number Diff line
@@ -57,6 +57,7 @@ from tdoc_crawler.meetings.models import MeetingCrawlConfig, MeetingMetadata, Me
from tdoc_crawler.meetings.operations.crawl import MeetingCrawler, MeetingCrawlResult
from tdoc_crawler.models.base import OutputFormat, SortOrder
from tdoc_crawler.models.subworking_groups import SUBTB_INDEX
from tdoc_crawler.models.working_groups import WorkingGroup
from tdoc_crawler.specs.operations.checkout import build_default_spec_sources, checkout_specs
from tdoc_crawler.tdocs.models import TDocCrawlConfig, TDocQueryConfig
from tdoc_crawler.tdocs.operations import TDocCrawler
@@ -75,6 +76,43 @@ def _parse_date(date_str: str | None, is_end: bool = False) -> date | None:
    return parse_partial_date(date_str, is_end=is_end)


def _build_scope_description(
    meetings: list[MeetingMetadata],
    subgroups: list[str] | None,
    working_groups: list[WorkingGroup],
) -> list[str]:
    scope_parts = []
    if meetings:
        unique_subgroups = {SUBTB_INDEX[m.subtb].code for m in meetings if m.subtb and m.subtb in SUBTB_INDEX}
        if unique_subgroups:
            scope_parts.append(f"subgroups: {', '.join(sorted(unique_subgroups))}")
        else:
            scope_parts.append(f"meetings: {len(meetings)} meeting(s)")
    elif subgroups:
        scope_parts.append(f"subgroups: {', '.join(subgroups)}")
    else:
        scope_parts.append(f"working groups: {', '.join(wg.value for wg in working_groups)}")
    return scope_parts


def _print_crawl_result(result: TDocCrawlResult, throughput: float) -> None:
    console.print(f"[green]Processed {result.processed} TDocs ({throughput:.1f} TDocs/sec)[/green]")
    console.print(f"[green]Inserted {result.inserted}, updated {result.updated}[/green]")
    if result.errors:
        console.print(f"[yellow]{len(result.errors)} issues detected[/yellow]")
        for error in result.errors[:5]:
            console.print(f"  - {error}")


def _print_meeting_crawl_result(result: MeetingCrawlResult) -> None:
    console.print(f"[green]Processed {result.processed} meetings[/green]")
    console.print(f"[green]Inserted {result.inserted}, updated {result.updated}[/green]")
    if result.errors:
        console.print(f"[yellow]{len(result.errors)} issues detected[/yellow]")
        for error in result.errors[:5]:
            console.print(f"  - {error}")


def crawl_tdocs(
    working_group: WorkingGroupOption = None,
    subgroup: SubgroupOption = None,
@@ -139,9 +177,6 @@ def crawl_tdocs(

    db_file = crawler_config.path.db_file

    scope_parts = []

    # Query actual meetings from database to show realistic scope
    async def fetch_meetings() -> list[MeetingMetadata]:
        async with MeetingDatabase(db_file) as meeting_db:
            query_config = MeetingQueryConfig(
@@ -156,19 +191,7 @@ def crawl_tdocs(
            return await meeting_db.query_meetings(query_config)

    meetings = asyncio.run(fetch_meetings())

    if meetings:
        # Extract unique subgroups from queried meetings
        unique_subgroups = {SUBTB_INDEX[m.subtb].code for m in meetings if m.subtb and m.subtb in SUBTB_INDEX}
        if unique_subgroups:
            scope_parts.append(f"subgroups: {', '.join(sorted(unique_subgroups))}")
        else:
            scope_parts.append(f"meetings: {len(meetings)} meeting(s)")
    # Fallback to input parameters if no meetings found in DB
    elif subgroups:
        scope_parts.append(f"subgroups: {', '.join(subgroups)}")
    else:
        scope_parts.append(f"working groups: {', '.join(wg.value for wg in working_groups)}")
    scope_parts = _build_scope_description(meetings, subgroups, working_groups)

    console.print(f"[cyan]Crawling TDocs ({', '.join(scope_parts)})[/cyan]")

@@ -245,13 +268,7 @@ def crawl_tdocs(
            return result, throughput

    result, throughput = asyncio.run(run_tdoc_crawl())

    console.print(f"[green]Processed {result.processed} TDocs ({throughput:.1f} TDocs/sec)[/green]")
    console.print(f"[green]Inserted {result.inserted}, updated {result.updated}[/green]")
    if result.errors:
        console.print(f"[yellow]{len(result.errors)} issues detected[/yellow]")
        for error in result.errors[:5]:
            console.print(f"  - {error}")
    _print_crawl_result(result, throughput)


def crawl_meetings(
@@ -348,12 +365,7 @@ def crawl_meetings(

    result = asyncio.run(run_meeting_crawl())

    console.print(f"[green]Processed {result.processed} meetings[/green]")
    console.print(f"[green]Inserted {result.inserted}, updated {result.updated}[/green]")
    if result.errors:
        console.print(f"[yellow]{len(result.errors)} issues detected[/yellow]")
        for error in result.errors[:5]:
            console.print(f"  - {error}")
    _print_meeting_crawl_result(result)

    if checkout:
        query_config = MeetingQueryConfig(
+15 −19
Original line number Diff line number Diff line
@@ -4,7 +4,6 @@ from __future__ import annotations

import asyncio
from datetime import UTC, datetime
from typing import Annotated

import typer

@@ -64,6 +63,20 @@ from tdoc_crawler.utils.parse import collect_spec_numbers, parse_subgroups, pars
HELP_PANEL = "Query Commands"


def _parse_date_range(start_date: str | None, end_date: str | None) -> tuple[datetime | None, datetime | None]:
    try:
        start = datetime.combine(parse_partial_date(start_date), datetime.min.time(), tzinfo=UTC) if start_date else None
    except ValueError as exc:
        console.print("[red]Invalid start date format; use ISO-8601")
        raise typer.Exit(code=2) from exc
    try:
        end = datetime.combine(parse_partial_date(end_date, is_end=True), datetime.max.time(), tzinfo=UTC) if end_date else None
    except ValueError as exc:
        console.print("[red]Invalid end date format; use ISO-8601")
        raise typer.Exit(code=2) from exc
    return start, end


def query_tdocs(
    tdoc_ids: TDocIdsArgument = None,
    working_group: WorkingGroupOption = None,
@@ -84,7 +97,6 @@ def query_tdocs(
    title_ex: TitlePatternExcludeOption = None,
    agenda: AgendaPatternOption = None,
    agenda_ex: AgendaPatternExcludeOption = None,
    graph_rag: Annotated[bool, typer.Option("--graph-rag", help="Use hybrid graph-RAG search")] = False,
    verbosity: VerbosityOption = str(DEFAULT_VERBOSITY),
) -> None:
    """Query TDoc metadata from database."""
@@ -92,16 +104,7 @@ def query_tdocs(
    path_config = PathConfig(cache_dir=cache_dir) if cache_dir else PathConfig()

    working_groups = parse_working_groups(working_group)
    try:
        start = datetime.combine(parse_partial_date(start_date), datetime.min.time(), tzinfo=UTC) if start_date else None
    except ValueError as exc:
        console.print("[red]Invalid start date format; use ISO-8601")
        raise typer.Exit(code=2) from exc
    try:
        end = datetime.combine(parse_partial_date(end_date, is_end=True), datetime.max.time(), tzinfo=UTC) if end_date else None
    except ValueError as exc:
        console.print("[red]Invalid end date format; use ISO-8601")
        raise typer.Exit(code=2) from exc
    start, end = _parse_date_range(start_date, end_date)

    try:
        sort_order = SortOrder(order.lower())
@@ -177,13 +180,6 @@ def query_tdocs(

    meeting_map = asyncio.run(load_meeting_map())

    # If graph-rag flag is set, perform hybrid search
    # Disabled due to logical inconsistency - query_hybrid expects text query, not TDoc ID
    # The hybrid search function is designed for semantic search + graph expansion,
    # but was being incorrectly called with a TDoc ID instead of a text query.
    # This functionality has been disabled as it doesn't make logical sense in CLI context.
    pass

    if config.output_format is OutputFormat.TABLE:
        print_tdoc_table(results, meeting_map)
    else: