Commit 7daefd6f authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor: clarify HTTP cache path as file, not directory

- Rename http_cache_dir property to http_cache_file across config and sources
- Update create_cached_session to accept file path instead of directory
- Clarify in docstrings that cache_dir parameter is the SQLite database file path
- Applies consistently across 3GPP and WhatTheSpec sources
parent 746603a0
Loading
Loading
Loading
Loading
+9 −5
Original line number Diff line number Diff line
@@ -73,14 +73,18 @@ class CacheManager:
        if ensure_paths:
            self.ensure_paths()

    def register(self) -> Self:
        """Register this instance as a cache manager under the given name."""
        register_cache_manager(self)
    def register(self, force: bool = True) -> Self:
        """Register this instance as a cache manager under the given name.

        Args:
            force: If True (default), overwrite existing manager with same name
        """
        register_cache_manager(self, force=force)
        return self

    @property
    def http_cache_dir(self) -> Path:
        """Path to the HTTP client cache database."""
    def http_cache_file(self) -> Path:
        """Path to the HTTP client cache database file."""
        return self.root / DEFAULT_HTTP_CACHE_FILENAME

    @property
+3 −6
Original line number Diff line number Diff line
@@ -38,7 +38,7 @@ def download_to_path(url: str, destination: Path, session: requests.Session | No
    temp_session: requests.Session | None = None
    if session is None:
        manager = resolve_cache_manager(cache_manager_name)
        temp_session = create_cached_session(cache_dir=manager.http_cache_dir)
        temp_session = create_cached_session(manager.http_cache_file)
        active_session = temp_session
    else:
        active_session = session
@@ -78,19 +78,16 @@ def create_cached_session(

    Args:
        cache_dir: Path to the SQLite cache database file.
        ttl: Time-to-live for cached responses in seconds (default: 7200)
        refresh_ttl_on_access: Whether to refresh TTL on cache access (default: True)
        max_retries: Number of retry attempts for failed requests (default: 3)

    Returns:
        Configured requests.Session with caching enabled
    """
    # Ensure cache directory exists
    # Ensure parent directory exists (SQLite will create the file)
    cache_dir.parent.mkdir(parents=True, exist_ok=True)

    logger.debug(f"Creating cached HTTP session: cache_db={cache_dir}, ttl={ttl}s, refresh_on_access={refresh_ttl_on_access}, max_retries={max_retries}")

    # Create SQLite storage backend
    # Create SQLite storage backend (cache_dir is the database file path)
    storage = SyncSqliteStorage(
        database_path=str(cache_dir),
        default_ttl=ttl,
+5 −11
Original line number Diff line number Diff line
"""Spec catalog and download support."""
"""Spec catalog and download support.

Note: Avoid eager imports here to prevent circular dependencies.
Import SpecDatabase directly from tdoc_crawler.database.specs when needed.
"""

from tdoc_crawler.models import SpecQueryFilters, SpecQueryResult
from tdoc_crawler.specs.database import SpecCrawlResult, SpecCrawlSourceOutcome, SpecDatabase
from tdoc_crawler.specs.downloads import SpecDownloads
from tdoc_crawler.utils.normalization import normalize_portal_meeting_name, normalize_spec_number

__all__ = [
    "SpecCrawlResult",
    "SpecCrawlSourceOutcome",
    "SpecDatabase",
    "SpecDownloads",
    "SpecQueryFilters",
    "SpecQueryResult",
    "normalize_portal_meeting_name",
    "normalize_spec_number",
]
+16 −1
Original line number Diff line number Diff line
"""Specification data models."""
"""Specification data models.

This module contains Pydantic models for specification metadata,
including the canonical spec identity, source records, versions,
and download outcomes.
"""

from dataclasses import dataclass, field
from datetime import datetime
@@ -84,3 +89,13 @@ class SpecQueryResult:
    status: str | None = None
    working_group: str | None = None
    source_differences: dict[str, dict[str, str | None]] = field(default_factory=dict)


__all__ = [
    "Specification",
    "SpecificationSourceRecord",
    "SpecificationVersion",
    "SpecificationDownload",
    "SpecQueryFilters",
    "SpecQueryResult",
]
+1 −1
Original line number Diff line number Diff line
@@ -39,7 +39,7 @@ def fetch_threegpp_metadata(spec_number: str, cache_manager_name: str | None = N
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
    }

    session = create_cached_session(resolve_cache_manager(cache_manager_name).root)
    session = create_cached_session(resolve_cache_manager(cache_manager_name).http_cache_file)
    response = session.get(url, timeout=30, allow_redirects=True, headers=headers)
    response.raise_for_status()

Loading