Commit 21eaa269 authored by Jan Reimes's avatar Jan Reimes
Browse files

feat(logging): add rich logging integration with configurable verbosity

parent a8590985
Loading
Loading
Loading
Loading
+175 −3
Original line number Diff line number Diff line
@@ -7,20 +7,30 @@ directory structure as the server.

from __future__ import annotations

import logging
import posixpath
import shutil
import zipfile
from contextlib import suppress
from dataclasses import dataclass
from pathlib import Path
from typing import TYPE_CHECKING, cast
from urllib.parse import urlparse

import requests

from tdoc_crawler.crawlers.meeting_doclist import DocumentListError, fetch_meeting_document_list
from tdoc_crawler.http_client import download_to_path
from tdoc_crawler.models import TDocMetadata
from tdoc_crawler.logging import get_logger
from tdoc_crawler.models import MeetingMetadata, TDocMetadata
from tdoc_crawler.specs.downloads import SpecDownloads
from tdoc_crawler.specs.sources.base import FunctionSpecSource, SpecSource
from tdoc_crawler.specs.sources.threegpp import fetch_threegpp_metadata
from tdoc_crawler.specs.sources.whatthespec import fetch_whatthespec_metadata

logger = logging.getLogger(__name__)
if TYPE_CHECKING:
    from tdoc_crawler.database import TDocDatabase

logger = get_logger(__name__)


def get_checkout_path(metadata: TDocMetadata, checkout_dir: Path) -> Path:
@@ -173,8 +183,170 @@ def get_checked_out_tdocs(checkout_dir: Path) -> list[str]:
    return tdoc_ids


def clear_checkout_tdocs(checkout_dir: Path) -> int:
    """Clear TDoc checkout entries from the checkout directory.

    Args:
        checkout_dir: Base checkout directory

    Returns:
        Number of entries removed
    """
    if not checkout_dir.exists():
        return 0
    removed = 0
    for entry in checkout_dir.iterdir():
        if entry.name == "Specs":
            continue
        if entry.is_dir():
            shutil.rmtree(entry)
        else:
            entry.unlink()
        removed += 1
    return removed


def clear_checkout_specs(checkout_dir: Path) -> int:
    """Clear spec checkout entries from the checkout directory.

    Args:
        checkout_dir: Base checkout directory

    Returns:
        Number of entries removed (always 1 if Specs directory existed)
    """
    specs_dir = checkout_dir / "Specs"
    if not specs_dir.exists():
        return 0
    shutil.rmtree(specs_dir)
    return 1


def checkout_specs(
    spec_numbers: list[str],
    checkout_dir: Path,
    database: TDocDatabase,
    release: str = "latest",
    doc_only: bool = False,
) -> list[Path]:
    """Checkout spec documents to the checkout directory.

    Args:
            spec_numbers: List of spec numbers to checkout
            checkout_dir: Base checkout directory
            database: TDocDatabase instance for metadata lookup
            release: Release version to checkout
            doc_only: If True, download only document files instead of full zip

    Returns:
            List of paths to checked out specs
    """
    sources = build_default_spec_sources()
    downloader = SpecDownloads(database)
    return downloader.checkout_specs(spec_numbers, doc_only, checkout_dir, release, sources=sources)


def build_default_spec_sources() -> list[SpecSource]:
    """Build the default list of spec sources.

    Returns:
        List of SpecSource instances for fetching spec metadata
    """
    return [
        cast("SpecSource", FunctionSpecSource("3gpp", fetch_threegpp_metadata)),
        cast("SpecSource", FunctionSpecSource("whatthespec", fetch_whatthespec_metadata)),
    ]


@dataclass
class CheckoutResult:
    """Result of a checkout operation."""

    success_count: int
    error_count: int
    errors: list[str]


def checkout_tdocs(
    results: list[TDocMetadata],
    checkout_dir: Path,
    force: bool = False,
) -> CheckoutResult:
    """Checkout multiple TDoc files to the checkout directory.

    Args:
        results: List of TDocMetadata to checkout
        checkout_dir: Base checkout directory
        force: If True, re-download even if already exists

    Returns:
        CheckoutResult with success/error counts
    """
    if not results:
        return CheckoutResult(success_count=0, error_count=0, errors=[])

    checkout_dir.mkdir(parents=True, exist_ok=True)
    success_count = 0
    error_count = 0
    errors: list[str] = []

    for metadata in results:
        try:
            if not metadata.url:
                raise ValueError("missing URL")
            checkout_tdoc(metadata, checkout_dir, force=force)
            success_count += 1
        except (FileNotFoundError, OSError, ValueError, zipfile.BadZipFile) as exc:
            errors.append(f"{metadata.tdoc_id}: {exc}")
            error_count += 1

    return CheckoutResult(success_count=success_count, error_count=error_count, errors=errors)


def checkout_meeting_tdocs(
    meetings: list[MeetingMetadata],
    cache_dir: Path,
) -> CheckoutResult:
    """Checkout TDoc files from a list of meetings.

    Args:
            meetings: List of MeetingMetadata to checkout TDocs from
            cache_dir: Cache directory for document list caching

    Returns:
            CheckoutResult with success/error counts
    """
    if not meetings:
        return CheckoutResult(success_count=0, error_count=0, errors=[])

    checkout_dir = cache_dir / "checkout"
    unique: dict[str, TDocMetadata] = {}
    errors: list[str] = []

    for meeting in meetings:
        if not meeting.files_url:
            errors.append(f"{meeting.short_name}: no files URL")
            continue
        try:
            tdocs = fetch_meeting_document_list(meeting.meeting_id, cache_dir)
        except DocumentListError as exc:
            errors.append(f"{meeting.short_name}: {exc}")
            continue
        for metadata in tdocs:
            if metadata.tdoc_id not in unique:
                unique[metadata.tdoc_id] = metadata

    return checkout_tdocs(list(unique.values()), checkout_dir, force=False)


__all__ = [
    "CheckoutResult",
    "checkout_meeting_tdocs",
    "checkout_specs",
    "checkout_tdoc",
    "checkout_tdocs",
    "clear_checkout_specs",
    "clear_checkout_tdocs",
    "get_checked_out_tdocs",
    "get_checkout_path",
    "prepare_tdoc_file",
+77 −145

File changed.

Preview size limit exceeded, changes collapsed.

+6 −1
Original line number Diff line number Diff line
@@ -2,11 +2,16 @@

from __future__ import annotations

import logging
from pathlib import Path
from typing import Annotated

import typer

from tdoc_crawler.logging import DEFAULT_LEVEL as LOGGING_DEFAULT_LEVEL

DEFAULT_VERBOSITY = logging.getLevelName(LOGGING_DEFAULT_LEVEL)

CacheDirOption = Annotated[Path, typer.Option("--cache-dir", "-c", help="Cache directory", envvar="TDC_CACHE_DIR")]
WorkingGroupOption = Annotated[list[str] | None, typer.Option("--working-group", "-w", help="Filter by working group", envvar="TDC_WORKING_GROUP")]
SubgroupOption = Annotated[list[str] | None, typer.Option("--sub-group", "-s", help="Filter by sub-working group", envvar="TDC_SUB_GROUP")]
@@ -26,7 +31,7 @@ OverallTimeoutOption = Annotated[
]
MaxRetriesOption = Annotated[int, typer.Option("--max-retries", help="HTTP retry attempts", envvar="TDC_MAX_RETRIES")]
TimeoutOption = Annotated[int, typer.Option("--timeout", help="HTTP timeout seconds", envvar="TDC_TIMEOUT")]
VerboseOption = Annotated[bool, typer.Option("--verbose", "-v", help="Enable verbose logging", envvar="TDC_VERBOSE")]
VerbosityOption = Annotated[str, typer.Option("--verbosity", "-v", help="Logging verbosity level", envvar="TDC_VERBOSITY")]

TDocIdsArgument = Annotated[list[str] | None, typer.Argument(help="TDoc identifiers to query")]
OutputFormatOption = Annotated[str, typer.Option("--output", "-o", help="Output format", envvar="TDC_OUTPUT")]
+4 −7
Original line number Diff line number Diff line
@@ -2,7 +2,6 @@

from __future__ import annotations

import logging
import os
import subprocess
import sys
@@ -15,11 +14,12 @@ import typer
from tdoc_crawler.cli.console import get_console
from tdoc_crawler.crawlers import normalize_subgroup_alias, normalize_working_group_alias
from tdoc_crawler.database import TDocDatabase
from tdoc_crawler.logging import get_logger
from tdoc_crawler.models import CrawlLimits, HttpCacheConfig, MeetingQueryConfig, SortOrder, WorkingGroup
from tdoc_crawler.specs.normalization import normalize_portal_meeting_name
from tdoc_crawler.specs.normalization import expand_spec_ranges_batch, normalize_portal_meeting_name

console = get_console()
_logger = logging.getLogger(__name__)
_logger = get_logger(__name__)

DEFAULT_DATABASE_FILENAME = "tdoc_crawler.db"
DOWNLOAD_TIMEOUT = 60
@@ -130,9 +130,6 @@ def collect_spec_numbers(specs: list[str] | None, spec_file: Path | None) -> lis
    if not collected:
        return []

    # Import here to avoid circular imports
    from tdoc_crawler.specs.normalization import expand_spec_ranges_batch

    try:
        expanded = expand_spec_ranges_batch(collected)
    except ValueError as e:
@@ -239,7 +236,7 @@ def launch_file(path: Path) -> None:
        raise typer.Exit(code=1)
    try:
        if sys.platform.startswith("win"):
            os.startfile(path)  # type: ignore[attr-defined]  # noqa: S606
            os.startfile(path)  # noqa: S606
        elif sys.platform == "darwin":
            open_cmd = Path("/usr/bin/open")
            if open_cmd.exists():
+79 −0
Original line number Diff line number Diff line
"""Logging module with RichHandler integration.

Provides a factory function for creating loggers with RichHandler
configured for consistent console output across the application.
"""

import functools
import logging
from typing import Final

from rich.console import Console
from rich.logging import RichHandler

# Default logging level for all loggers
DEFAULT_LEVEL: Final[int] = logging.WARNING

# Configure the root logger with RichHandler on first import
_console: Final[Console] = Console()
_rich_handler: RichHandler = RichHandler(
    console=_console,
    show_time=True,
    show_path=False,
    rich_tracebacks=True,
    markup=False,
    tracebacks_width=None,
)

# Add RichHandler to root logger (no other handlers)
logging.root.handlers.clear()
logging.root.addHandler(_rich_handler)
logging.root.setLevel(DEFAULT_LEVEL)


@functools.cache
def get_logger(name: str) -> logging.Logger:
    """Get a logger instance with RichHandler configured.

    This factory function returns cached logger instances for consistent
    logging across the application. The first call sets up the
    RichHandler on the root logger, which all child loggers inherit.

    Args:
        name: The logger name, typically __name__ of the calling module.

    Returns:
        A configured logger instance.

    Examples:
        >>> logger = get_logger(__name__)
        >>> logger.info("Starting operation")
        >>> logger.debug("Detailed diagnostic information")
    """
    return logging.getLogger(name)


def set_verbosity(level: int | str) -> None:
    """Set the logging level for all loggers.

    This updates the root logger level, which affects all child loggers
    created via get_logger().

    Args:
        level: The logging level (e.g., logging.DEBUG, logging.INFO, etc.)
                   Accepts integer values (0-50), logging constants, or string names of levels.

    Examples:
        >>> set_verbosity(logging.DEBUG)
        >>> set_verbosity(10)  # DEBUG level
    """
    if isinstance(level, str):
        level = level.upper()
        if level_int := logging._nameToLevel.get(level):
            level = level_int
        elif level.isnumeric():
            level = int(level)
        else:
            raise ValueError(f"Invalid logging level name: {level}")

    logging.root.setLevel(level)
Loading