Commit fc06dce3 authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor(core): rename shadowing variables and add meeting helper

- config_app.py: rename path→target_dir, _check_path_exists→_check_dir_exists

- query.py: rename path→path_config (avoids shadowing pathlib.Path)

- sources.py: rename path→config_file in load_config_file, ConfigLoadError

- meetings.py: add get_meeting_short_name() sync helper for TDoc metadata

- test_targeted_fetch.py: fix infer_working_groups_from_ids empty list expectation
parent 265c8a82
Loading
Loading
Loading
Loading
+5 −5
Original line number Diff line number Diff line
@@ -80,12 +80,12 @@ def config_show(
    print(exporter.export(format))


def _check_path_exists(path: Path) -> tuple[bool, str]:
    """Check if a path exists or can be created."""
    if path.exists():
def _check_dir_exists(target_dir: Path) -> tuple[bool, str]:
    """Check if a directory exists or can be created."""
    if target_dir.exists():
        return True, ""
    try:
        path.parent.mkdir(parents=True, exist_ok=True)
        target_dir.parent.mkdir(parents=True, exist_ok=True)
        return True, " (will be created)"
    except PermissionError:
        return False, " (permission denied)"
@@ -99,7 +99,7 @@ def _validate_config_values(config: ThreeGPPConfig) -> list[tuple[str, str]]:

    # Check cache_dir
    cache_dir = config.path.cache_dir
    exists, detail = _check_path_exists(cache_dir)
    exists, detail = _check_dir_exists(cache_dir)
    if not exists:
        issues.append(("error", f"cache_dir does not exist and cannot be created: {cache_dir}{detail}"))
    elif detail:
+12 −12
Original line number Diff line number Diff line
@@ -89,7 +89,7 @@ def query_tdocs(
) -> None:
    """Query TDoc metadata from database."""
    set_verbosity(verbosity)
    path = PathConfig(cache_dir=cache_dir) if cache_dir else PathConfig()
    path_config = PathConfig(cache_dir=cache_dir) if cache_dir else PathConfig()

    working_groups = parse_working_groups(working_group)
    try:
@@ -131,10 +131,10 @@ def query_tdocs(
        agenda_pattern_exclude=agenda_ex,
    )

    db_file = path.db_file
    db_file = path_config.db_file
    handle_clear_options(
        db_file,
        path.checkout_dir,
        path_config.checkout_dir,
        TDocDatabase,
        clear_tdocs=clear_tdocs,
        clear_specs=clear_specs,
@@ -168,7 +168,7 @@ def query_tdocs(

    if checkout:
        with create_cached_session() as session:
            checkout_tdocs(results, path.checkout_dir, force=False, session=session)
            checkout_tdocs(results, path_config.checkout_dir, force=False, session=session)

    # Build meeting map for enriched output
    async def load_meeting_map() -> dict:
@@ -206,7 +206,7 @@ def query_meetings(
) -> None:
    """Query meeting metadata from database."""
    set_verbosity(verbosity)
    path = PathConfig(cache_dir=cache_dir) if cache_dir else PathConfig()
    path_config = PathConfig(cache_dir=cache_dir) if cache_dir else PathConfig()
    working_groups = parse_working_groups(working_group)
    subgroups = parse_subgroups(subgroup)
    try:
@@ -223,10 +223,10 @@ def query_meetings(
        include_without_files=include_without_files,
    )

    db_file = path.db_file
    db_file = path_config.db_file
    handle_clear_options(
        db_file,
        path.checkout_dir,
        path_config.checkout_dir,
        MeetingDatabase,
        clear_tdocs=clear_tdocs,
        clear_specs=clear_specs,
@@ -244,7 +244,7 @@ def query_meetings(

    if checkout:
        with create_cached_session() as session:
            checkout_meeting_tdocs(meetings, path.checkout_dir, path.http_cache_file, session=session)
            checkout_meeting_tdocs(meetings, path_config.checkout_dir, path_config.http_cache_file, session=session)

    try:
        output = OutputFormat(output_format.lower())
@@ -275,7 +275,7 @@ def query_specs(
) -> None:
    """Query spec metadata from database."""
    set_verbosity(verbosity)
    path = PathConfig(cache_dir=cache_dir) if cache_dir else PathConfig()
    path_config = PathConfig(cache_dir=cache_dir) if cache_dir else PathConfig()
    specs = collect_spec_numbers(spec_numbers, spec_file)
    working_groups = parse_working_groups(working_group)
    wg_filter = working_groups[0].value if working_groups else None
@@ -293,10 +293,10 @@ def query_specs(
        console.print("[red]Invalid output format; use table, json, jsonl, toon, or yaml")
        raise typer.Exit(code=2) from exc

    db_file = path.db_file
    db_file = path_config.db_file
    handle_clear_options(
        db_file,
        path.checkout_dir,
        path_config.checkout_dir,
        SpecDatabase,
        clear_tdocs=clear_tdocs,
        clear_specs=clear_specs,
@@ -317,7 +317,7 @@ def query_specs(

        async def load_specs_for_checkout() -> None:
            async with SpecDatabase(db_file) as database:
                checkout_specs(spec_list, path.checkout_dir, database, release="latest")
                checkout_specs(spec_list, path_config.checkout_dir, database, release="latest")

        asyncio.run(load_specs_for_checkout())

+17 −17
Original line number Diff line number Diff line
@@ -40,14 +40,14 @@ class ConfigLoadError(Exception):
    """Raised when a config file cannot be loaded or parsed.

    Attributes:
        file_path: Path to the file that failed to load.
        config_file: Path to the file that failed to load.
        reason: Human-readable explanation of the failure.
    """

    def __init__(self, file_path: Path, reason: str) -> None:
        self.file_path = file_path
    def __init__(self, config_file: Path, reason: str) -> None:
        self.config_file = config_file
        self.reason = reason
        super().__init__(f"Failed to load config from {file_path}: {reason}")
        super().__init__(f"Failed to load config from {config_file}: {reason}")


def _interpolate_env_vars(value: Any) -> Any:
@@ -145,14 +145,14 @@ def discover_config_files(cwd: Path | None = None) -> list[Path]:
    return files


def load_config_file(path: Path) -> dict[str, Any]:
def load_config_file(config_file: Path) -> dict[str, Any]:
    """Load a single config file and return its contents as a dict.

    Supports TOML (.toml), YAML (.yaml, .yml), and JSON (.json) formats.
    Environment variable interpolation is applied to string values.

    Args:
        path: Path to the config file.
        config_file: Path to the config file.

    Returns:
        Dictionary of config values. Empty dict if file not found.
@@ -160,49 +160,49 @@ def load_config_file(path: Path) -> dict[str, Any]:
    Raises:
        ConfigLoadError: If the file cannot be parsed or has permission issues.
    """
    if not path.is_file():
    if not config_file.is_file():
        return {}

    suffix = path.suffix.lower()
    suffix = config_file.suffix.lower()

    try:
        if suffix == ".toml":
            # For TOML, we need to interpolate BEFORE parsing since TOML
            # doesn't support ${VAR} syntax natively
            with path.open("r", encoding="utf-8") as f:
            with config_file.open("r", encoding="utf-8") as f:
                content = f.read()
            content = _interpolate_env_vars(content)
            data = tomllib.loads(content)
        elif suffix in {".yaml", ".yml"}:
            yaml = import_module("yaml")

            with path.open("r", encoding="utf-8") as f:
            with config_file.open("r", encoding="utf-8") as f:
                try:
                    data = yaml.safe_load(f)
                except yaml.scanner.ScannerError as e:
                    raise ConfigLoadError(path, f"Parse error: {e}") from e
                    raise ConfigLoadError(config_file, f"Parse error: {e}") from e
            if data is None:
                data = {}
            data = _interpolate_env_vars(data)
        elif suffix == ".json":
            with path.open("r", encoding="utf-8") as f:
            with config_file.open("r", encoding="utf-8") as f:
                data = json.load(f)
            data = _interpolate_env_vars(data)
        else:
            raise ConfigLoadError(path, f"Unsupported file format: {suffix}")
            raise ConfigLoadError(config_file, f"Unsupported file format: {suffix}")
    except FileNotFoundError:
        return {}
    except PermissionError as e:
        raise ConfigLoadError(path, f"Permission denied: {e}") from e
        raise ConfigLoadError(config_file, f"Permission denied: {e}") from e
    except (ValueError, json.JSONDecodeError) as e:
        raise ConfigLoadError(path, f"Parse error: {e}") from e
        raise ConfigLoadError(config_file, f"Parse error: {e}") from e
    except ImportError as e:
        if "yaml" in str(e):
            raise ConfigLoadError(path, "PyYAML is required for YAML files but is not installed") from e
            raise ConfigLoadError(config_file, "PyYAML is required for YAML files but is not installed") from e
        raise

    if not isinstance(data, dict):
        raise ConfigLoadError(path, f"Expected dict at root, got {type(data).__name__}")
        raise ConfigLoadError(config_file, f"Expected dict at root, got {type(data).__name__}")

    return data

+31 −1
Original line number Diff line number Diff line
"""Meeting database operations."""

import asyncio
from collections import defaultdict
from collections.abc import Callable, Iterable
from datetime import datetime

from tdoc_crawler.config.settings import PathConfig
from tdoc_crawler.database.base import DocDatabase
from tdoc_crawler.database.oxyde_models import CrawlLogEntry, MeetingMetadata, TDocMetadata
from tdoc_crawler.logging import get_logger
@@ -294,4 +296,32 @@ class MeetingDatabase(DocDatabase):
        return False


__all__ = ["MeetingDatabase"]
async def _resolve_meeting_short_name(meeting_id: int) -> str | None:
    """Look up a meeting's short name by ID.

    Args:
        meeting_id: Meeting identifier.

    Returns:
        Meeting short name (e.g., "SA4#134") if found, None otherwise.
    """
    async with MeetingDatabase(PathConfig().db_file) as db:
        meeting = await db._get_meeting(meeting_id)
        return meeting.short_name if meeting is not None else None


def get_meeting_short_name(meeting_id: int) -> str | None:
    """Sync convenience wrapper to look up a meeting's short name by ID.

    Uses ``asyncio.run()`` — must not be called from an async context.

    Args:
        meeting_id: Meeting identifier.

    Returns:
        Meeting short name (e.g., "SA4#134") if found, None otherwise.
    """
    return asyncio.run(_resolve_meeting_short_name(meeting_id))


__all__ = ["MeetingDatabase", "get_meeting_short_name"]
+2 −2
Original line number Diff line number Diff line
@@ -60,10 +60,10 @@ class TestInferWorkingGroups:
        assert WorkingGroup.RAN in groups

    def test_infer_empty_list(self) -> None:
        """Return all working groups for empty input."""
        """Return empty list for empty input."""
        ids: list[str] = []
        groups = infer_working_groups_from_ids(ids)
        assert len(groups) == 3
        assert len(groups) == 0


@pytest.mark.asyncio