Commit 1443bc0e authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor(cli): reorganize imports in fetching.py and portal.py

* Consolidate import statements to avoid circular dependencies.
* Improve code readability by grouping related imports.
* Add TODO comment regarding cache_dir usage in PortalSession.
parent 72275696
Loading
Loading
Loading
Loading
+3 −12
Original line number Diff line number Diff line
@@ -7,14 +7,13 @@ from pathlib import Path

from pydantic import ValidationError

from tdoc_crawler.crawlers import TDocCrawlResult, fetch_tdoc_metadata
from tdoc_crawler.cli.console import get_console
from tdoc_crawler.cli.helpers import resolve_meeting_id
from tdoc_crawler.crawlers import TDocCrawlResult, WhatTheSpecResolutionError, extract_tdoc_url_from_portal, fetch_tdoc_metadata, resolve_via_whatthespec
from tdoc_crawler.credentials import resolve_credentials
from tdoc_crawler.database import TDocDatabase
from tdoc_crawler.models import HttpCacheConfig, PortalCredentials, QueryConfig, TDocMetadata

from .console import get_console
from .helpers import resolve_meeting_id

console = get_console()
_logger = logging.getLogger(__name__)

@@ -45,9 +44,6 @@ def fetch_tdoc(
    Raises:
        Exception: If fetching fails for any reason.
    """
    # Import here to avoid circular imports
    from tdoc_crawler.crawlers import extract_tdoc_url_from_portal, resolve_via_whatthespec

    if use_whatthespec:
        # Always use WhatTheSpec method (Method 3)
        _logger.debug(f"Fetching {tdoc_id} via WhatTheSpec API")
@@ -178,11 +174,6 @@ def _fetch_via_whatthespec(
        cache_dir: Cache directory path
        missing_ids: List of TDoc IDs to fetch
    """
    # Import at runtime to avoid circular dependency
    from tdoc_crawler.crawlers import (  # noqa: PLC0415
        WhatTheSpecResolutionError,
        resolve_via_whatthespec,
    )

    http_cache = HttpCacheConfig()

+1 −1
Original line number Diff line number Diff line
@@ -17,7 +17,7 @@ from urllib3.util import Retry
from tdoc_crawler.crawlers.constants import LOGIN_URL, PORTAL_BASE_URL, TDOC_DOWNLOAD_URL, TDOC_VIEW_URL
from tdoc_crawler.models.tdocs import TDocMetadata


# TODO: cache_dir not used at all in PortalSession? should be used for caching HTTP requests to portal pages via hishel package - wrong HTTPAdapter? See tdoc_crawler.models.base.HttpCacheConfig
def create_cached_session(cache_dir: Path, ttl: int = 7200, refresh_ttl_on_access: bool = True, max_retries: int = 3) -> requests.Session:
    """Create an HTTP session with caching."""
    # Create session
+3 −3
Original line number Diff line number Diff line
@@ -15,7 +15,8 @@ from pathlib import Path

from tdoc_crawler.models.base import HttpCacheConfig
from tdoc_crawler.models.tdocs import TDocMetadata

from tdoc_crawler.crawlers.portal import extract_tdoc_url_from_portal, fetch_tdoc_metadata
from tdoc_crawler.crawlers.whatthespec import resolve_via_whatthespec
logger = logging.getLogger(__name__)


@@ -46,8 +47,7 @@ def fetch_tdoc(
        Exception: If fetching fails for any reason.
    """
    # Import here to avoid circular imports
    from tdoc_crawler.crawlers.portal import extract_tdoc_url_from_portal, fetch_tdoc_metadata
    from tdoc_crawler.crawlers.whatthespec import resolve_via_whatthespec


    if use_whatthespec:
        # Always use WhatTheSpec method (Method 3)