Commit 154c3cd7 authored by Jan Reimes's avatar Jan Reimes
Browse files

♻️ refactor(config): rename TDocCrawlerConfig to ThreeGPPConfig, deduplicate path defaults

Remove backward-compatible TDocCrawlerConfig alias now that all
consumers have been migrated.  Deduplicate _DEFAULT_* path constants
by sourcing them from CacheManager instead of duplicating in settings.
parent a575d180
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -46,7 +46,7 @@ from tdoc_crawler.cli.args import (
)
from tdoc_crawler.cli.formatting import format_output
from tdoc_crawler.cli.printing import print_spec_crawl_table, spec_crawl_to_dict
from tdoc_crawler.config import TDocCrawlerConfig
from tdoc_crawler.config import ThreeGPPConfig
from tdoc_crawler.credentials import set_credentials
from tdoc_crawler.database import MeetingDatabase, TDocDatabase
from tdoc_crawler.database.specs import SpecCrawlResult, SpecDatabase
@@ -147,7 +147,7 @@ def crawl_tdocs(
    """
    set_verbosity(verbosity)

    crawler_config = TDocCrawlerConfig.from_settings()
    crawler_config = ThreeGPPConfig.from_settings()
    # Override cache_dir if provided (deprecated but still supported)
    if cache_dir is not None:
        crawler_config.path.cache_dir = cache_dir
@@ -297,7 +297,7 @@ def crawl_meetings(
    set_verbosity(verbosity)
    set_credentials(eol_username, eol_password, prompt=prompt_credentials)

    crawler_config = TDocCrawlerConfig.from_settings()
    crawler_config = ThreeGPPConfig.from_settings()
    if cache_dir is not None:
        crawler_config.path.cache_dir = cache_dir
    crawler_config.ensure_paths()
@@ -401,7 +401,7 @@ def crawl_specs(
) -> None:
    """Crawl spec metadata from configured sources."""
    set_verbosity(verbosity)
    crawler_config = TDocCrawlerConfig.from_settings()
    crawler_config = ThreeGPPConfig.from_settings()
    if cache_dir is not None:
        crawler_config.path.cache_dir = cache_dir
    crawler_config.ensure_paths()
+0 −2
Original line number Diff line number Diff line
@@ -16,7 +16,6 @@ from tdoc_crawler.config.settings import (
    CredentialsConfig,
    HttpConfig,
    PathConfig,
    TDocCrawlerConfig,
    ThreeGPPConfig,
)
from tdoc_crawler.config.sources import (
@@ -42,7 +41,6 @@ __all__ = [
    "CredentialsConfig",
    "HttpConfig",
    "PathConfig",
    "TDocCrawlerConfig",
    "ThreeGPPConfig",
    "discover_config_files",
    "load_config_file",
+5 −0
Original line number Diff line number Diff line
@@ -108,6 +108,11 @@ class CacheManager:
        """Path to workspace registry JSON file."""
        return self._cache_dir / WORKSPACE_REGISTRY_FILENAME

    @classmethod
    def is_registered(cls) -> bool:
        """Check if a CacheManager instance has been registered."""
        return cls._instance is not None


def resolve_cache_manager() -> CacheManager:
    """Resolve the registered CacheManager instance.
+2 −2
Original line number Diff line number Diff line
@@ -87,7 +87,7 @@ class ConfigExporter:
        """Export to TOML with comments from field descriptions."""
        lines = ["# 3GPP Crawler Configuration", ""]
        lines.append("# This file was generated by `tdoc-crawler config init`")
        lines.append("# Default values are defined in TDocCrawlerConfig using Field(default=...)")
        lines.append("# Default values are defined in ThreeGPPConfig using Field(default=...)")
        lines.append("")

        current_section = None
@@ -125,7 +125,7 @@ class ConfigExporter:
        lines = [
            "# 3GPP Crawler Configuration",
            "# This file was generated by `tdoc-crawler config init`",
            "# Default values are defined in TDocCrawlerConfig using Field(default=...)",
            "# Default values are defined in ThreeGPPConfig using Field(default=...)",
            "",
        ]

+13 −21
Original line number Diff line number Diff line
@@ -17,16 +17,16 @@ from pathlib import Path
from pydantic import AliasChoices, Field, field_validator
from pydantic_settings import BaseSettings, SettingsConfigDict

from tdoc_crawler.config.cache_manager import (
    DEFAULT_CHECKOUT_DIRNAME,
    DEFAULT_DATABASE_FILENAME,
    DEFAULT_HTTP_CACHE_FILENAME,
    DEFAULT_WORKSPACES_DIRNAME,
)
from tdoc_crawler.config.env_vars import ConfigEnvVar
from tdoc_crawler.config.sources import discover_config_files, load_config_file, merge_configs

# Default values (not constants - just module-level defaults)
# Actual paths are resolved by CacheManager at runtime
_DEFAULT_CACHE_DIR_STR = "~/.3gpp-crawler"
_DEFAULT_DATABASE_FILENAME = "3gpp_crawler.db"
_DEFAULT_HTTP_CACHE_FILENAME = "http-cache.sqlite3"
_DEFAULT_CHECKOUT_DIRNAME = "checkout"
_DEFAULT_WORKSPACES_DIRNAME = "workspaces"


class PathConfig(BaseSettings):
@@ -44,37 +44,33 @@ class PathConfig(BaseSettings):
        description="Root cache directory for storing downloaded files and metadata",
    )
    db_filename: str = Field(
        default=_DEFAULT_DATABASE_FILENAME,
        default=DEFAULT_DATABASE_FILENAME,
        description="SQLite database filename for storing crawl metadata",
    )
    checkout_dirname: str = Field(
        default=_DEFAULT_CHECKOUT_DIRNAME,
        default=DEFAULT_CHECKOUT_DIRNAME,
        description="Subdirectory name for checked-out documents",
    )
    workspaces_dirname: str = Field(
        default=_DEFAULT_WORKSPACES_DIRNAME,
        default=DEFAULT_WORKSPACES_DIRNAME,
        description="Subdirectory name for workspace data (sources, wiki)",
    )

    @property
    def db_file(self) -> Path:
        """Path to the SQLite database file."""
        return self.cache_dir / _DEFAULT_DATABASE_FILENAME
        return self.cache_dir / DEFAULT_DATABASE_FILENAME

    @property
    def http_cache_file(self) -> Path:
        """Path to the HTTP cache database file."""
        return self.cache_dir / _DEFAULT_HTTP_CACHE_FILENAME
        return self.cache_dir / DEFAULT_HTTP_CACHE_FILENAME

    @property
    def checkout_dir(self) -> Path:
        """Path to the checkout directory for documents."""
        return self.cache_dir / _DEFAULT_CHECKOUT_DIRNAME
        return self.cache_dir / DEFAULT_CHECKOUT_DIRNAME

    @property
    def workspaces_dir(self) -> Path:
        """Path to the workspaces directory (separate from document checkouts)."""
        return self.cache_dir / _DEFAULT_WORKSPACES_DIRNAME
        return self.cache_dir / DEFAULT_WORKSPACES_DIRNAME

    @field_validator("cache_dir", mode="before")
    @classmethod
@@ -327,14 +323,10 @@ class ThreeGPPConfig(BaseSettings):
        return cls(**merged)


# Backward-compatible alias — remove after all consumers are updated
TDocCrawlerConfig = ThreeGPPConfig

__all__ = [
    "CrawlConfig",
    "CredentialsConfig",
    "HttpConfig",
    "PathConfig",
    "TDocCrawlerConfig",
    "ThreeGPPConfig",
]
Loading