Commit f77f2f73 authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor(domain): consolidate constants and update imports across core modules

parent 3b5c3c8c
Loading
Loading
Loading
Loading
+15 −12
Original line number Diff line number Diff line
@@ -13,16 +13,6 @@ from dotenv import load_dotenv
from rich.progress import BarColumn, MofNCompleteColumn, Progress, SpinnerColumn, TextColumn
from rich.table import Table

from tdoc_crawler.checkout import (
    build_default_spec_sources,
    checkout_meeting_tdocs,
    checkout_specs,
    checkout_tdoc,
    checkout_tdocs,
    clear_checkout_specs,
    clear_checkout_tdocs,
    prepare_tdoc_file,
)
from tdoc_crawler.cli.args import (
    DEFAULT_VERBOSITY,
    CacheDirOption,
@@ -78,14 +68,27 @@ from tdoc_crawler.cli.printing import (
)
from tdoc_crawler.cli.utils import launch_file
from tdoc_crawler.config import CacheManager
from tdoc_crawler.crawlers import MeetingCrawler, TDocCrawler
from tdoc_crawler.credentials import resolve_credentials, set_credentials
from tdoc_crawler.database import TDocDatabase
from tdoc_crawler.fetching import fetch_missing_tdocs
from tdoc_crawler.http_client import create_cached_session
from tdoc_crawler.logging import set_verbosity
from tdoc_crawler.meetings import MeetingCrawler
from tdoc_crawler.models import CrawlLimits, MeetingCrawlConfig, MeetingQueryConfig, OutputFormat, QueryConfig, SortOrder, SpecQueryFilters, TDocCrawlConfig
from tdoc_crawler.specs import SpecDatabase, SpecDownloads
from tdoc_crawler.specs.operations.checkout import (
    build_default_spec_sources,
    checkout_specs,
    clear_checkout_specs,
)
from tdoc_crawler.tdocs import TDocCrawler
from tdoc_crawler.tdocs.operations.checkout import (
    checkout_meeting_tdocs,
    checkout_tdoc,
    checkout_tdocs,
    clear_checkout_tdocs,
    prepare_tdoc_file,
)
from tdoc_crawler.tdocs.operations.fetch import fetch_missing_tdocs
from tdoc_crawler.utils.parse import collect_spec_numbers, parse_subgroups, parse_working_groups

load_dotenv()
+17 −3
Original line number Diff line number Diff line
@@ -17,13 +17,27 @@ DEFAULT_MANAGER = "default"
_cache_managers: dict[str, CacheManager] = {}


def register_cache_manager(manager: CacheManager) -> None:
    """Register a cache manager instance under a given name."""
    if (name := manager.name) in _cache_managers:
def register_cache_manager(manager: CacheManager, force: bool = False) -> None:
    """Register a cache manager instance under a given name.

    Args:
        manager: CacheManager instance to register
        force: If True, overwrite existing manager with same name
    """
    name = manager.name
    if name in _cache_managers and not force:
        raise ValueError(f"Cache manager with name '{name}' is already registered.")
    _cache_managers[name] = manager


def reset_cache_managers() -> None:
    """Clear all registered cache managers.

    Primarily useful for testing to ensure clean state between tests.
    """
    _cache_managers.clear()


def resolve_cache_manager(name: str | None = None) -> CacheManager:
    """Resolve a cache manager by name, or return the default if name is None."""
    name = name or DEFAULT_MANAGER
+39 −0
Original line number Diff line number Diff line
"""Shared constants and registries."""

from __future__ import annotations

from tdoc_crawler.constants.patterns import (
    DATE_PATTERN,
    EXCLUDED_DIRS,
    EXCLUDED_DIRS_NORMALIZED,
    TDOC_PATTERN,
    TDOC_PATTERN_STR,
    TDOC_SUBDIRS,
    TDOC_SUBDIRS_NORMALIZED,
)
from tdoc_crawler.constants.registry import MEETING_CODE_REGISTRY
from tdoc_crawler.constants.urls import (
    LOGIN_URL,
    MEETINGS_BASE_URL,
    PORTAL_BASE_URL,
    SPEC_URL_TEMPLATE,
    TDOC_DOWNLOAD_URL,
    TDOC_VIEW_URL,
)

__all__ = [
    "DATE_PATTERN",
    "EXCLUDED_DIRS",
    "EXCLUDED_DIRS_NORMALIZED",
    "LOGIN_URL",
    "MEETINGS_BASE_URL",
    "MEETING_CODE_REGISTRY",
    "PORTAL_BASE_URL",
    "SPEC_URL_TEMPLATE",
    "TDOC_DOWNLOAD_URL",
    "TDOC_PATTERN",
    "TDOC_PATTERN_STR",
    "TDOC_SUBDIRS",
    "TDOC_SUBDIRS_NORMALIZED",
    "TDOC_VIEW_URL",
]
+27 −0
Original line number Diff line number Diff line
"""Regex patterns for parsing 3GPP data."""

from __future__ import annotations

import re
from typing import Final

TDOC_PATTERN_STR: Final[str] = r"([RSC][1-6P].{4,10})\.(zip|txt|pdf)"
TDOC_PATTERN: Final[re.Pattern[str]] = re.compile(TDOC_PATTERN_STR, re.IGNORECASE)

DATE_PATTERN: Final[re.Pattern[str]] = re.compile(r"(\d{4}[\-\u2010-\u2015]\d{2}[\-\u2010-\u2015]\d{2})")

TDOC_SUBDIRS: Final[tuple[str, ...]] = ("Docs", "Documents", "Tdocs", "TDocs", "DOCS")
TDOC_SUBDIRS_NORMALIZED: Final[frozenset[str]] = frozenset(entry.upper() for entry in TDOC_SUBDIRS)

EXCLUDED_DIRS: Final[tuple[str, ...]] = ("Inbox", "Draft", "Drafts", "Agenda", "Invitation", "Report")
EXCLUDED_DIRS_NORMALIZED: Final[frozenset[str]] = frozenset(entry.upper() for entry in EXCLUDED_DIRS)

__all__ = [
    "DATE_PATTERN",
    "EXCLUDED_DIRS",
    "EXCLUDED_DIRS_NORMALIZED",
    "TDOC_PATTERN",
    "TDOC_PATTERN_STR",
    "TDOC_SUBDIRS",
    "TDOC_SUBDIRS_NORMALIZED",
]
+39 −0
Original line number Diff line number Diff line
"""Working group registries and code mappings."""

from __future__ import annotations

from typing import Final

MEETING_CODE_REGISTRY: Final[dict[str, list[tuple[str, str | None]]]] = {
    "RAN": [
        ("RP", "RP"),
        ("R1", "R1"),
        ("R2", "R2"),
        ("R3", "R3"),
        ("R4", "R4"),
        ("R5", "R5"),
        ("R6", "R6"),
    ],
    "SA": [
        ("SP", "SP"),
        ("S1", "S1"),
        ("S2", "S2"),
        ("S3", "S3"),
        ("S4", "S4"),
        ("S5", "S5"),
        ("S6", "S6"),
    ],
    "CT": [
        ("CP", "CP"),
        ("C1", "C1"),
        ("C2", "C2"),
        ("C3", "C3"),
        ("C4", "C4"),
        ("C5", "C5"),
        ("C6", "C6"),
    ],
}

__all__ = [
    "MEETING_CODE_REGISTRY",
]
Loading