Commit 5798d900 authored by Jan Reimes's avatar Jan Reimes
Browse files

feat(02-02): Migrate HTTP session and credentials to TDocCrawlerConfig

- Update resolve_ssl_verify() to accept optional http_config parameter
- Update create_cached_session() to accept optional http_config parameter
- Use http_config for SSL verification and cache settings when provided
- Update resolve_credentials() to accept optional credentials_config parameter
- Maintain resolution order: CLI > Config > Env > Prompt
- Add integration tests for HTTP session with config
- Add comprehensive credentials resolver tests
- Preserve backward compatibility for existing calls without config

Files modified:
- src/tdoc_crawler/http_client/session.py
- src/tdoc_crawler/credentials.py
- tests/test_http_client.py (new tests added)
- tests/test_credentials.py (new file)
parent 277bbda8
Loading
Loading
Loading
Loading
+22 −12
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ import sys

import typer

from tdoc_crawler.config.settings import CredentialsConfig
from tdoc_crawler.models.base import PortalCredentials


@@ -27,32 +28,41 @@ def set_credentials(username: str | None, password: str | None, prompt: bool | N


def resolve_credentials(
    username: str | None,
    password: str | None,
    username: str | None = None,
    password: str | None = None,
    prompt: bool | None = None,
    credentials_config: CredentialsConfig | None = None,
) -> PortalCredentials | None:
    """Resolve portal credentials from parameters, environment, or prompt.
    """Resolve portal credentials with TDocCrawlerConfig integration.

    Resolution order:
    Resolution order (per D-03):
    1. CLI parameters (username, password)
    2. Environment variables (TDC_EOL_USERNAME, TDC_EOL_PASSWORD)
    3. Interactive prompt (if TDC_EOL_PROMPT=true or prompt=True, and stdin is a TTY)
    2. credentials_config values (from config file)
    3. Environment variables (TDC_EOL_USERNAME, TDC_EOL_PASSWORD)
    4. Interactive prompt (if enabled)

    Args:
        username: CLI-provided username
        password: CLI-provided password
        prompt: Whether to prompt interactively. If None, reads from EOL_PROMPT env var.
        username: CLI-provided username (highest precedence)
        password: CLI-provided password (highest precedence)
        prompt: Whether to prompt interactively
        credentials_config: Optional CredentialsConfig from TDocCrawlerConfig

    Returns:
        PortalCredentials instance if resolved, None otherwise
    """
    resolved_username = username or os.getenv("TDC_EOL_USERNAME")
    resolved_password = password or os.getenv("TDC_EOL_PASSWORD")
    # Resolution order: CLI > Config > Env > Prompt
    resolved_username = username or (credentials_config.username if credentials_config else None) or os.getenv("TDC_EOL_USERNAME")
    resolved_password = password or (credentials_config.password if credentials_config else None) or os.getenv("TDC_EOL_PASSWORD")

    if resolved_username and resolved_password:
        return PortalCredentials(username=resolved_username, password=resolved_password)

    should_prompt = prompt if prompt is not None else os.getenv("TDC_EOL_PROMPT", "").lower() in ("true", "1", "yes")
    # Determine if we should prompt
    config_prompt = credentials_config.prompt if credentials_config else None
    should_prompt = prompt if prompt is not None else (config_prompt.lower() in ("true", "1", "yes") if config_prompt else None)
    if should_prompt is None:
        should_prompt = os.getenv("TDC_EOL_PROMPT", "").lower() in ("true", "1", "yes")

    if should_prompt and not sys.stdin.isatty():
        should_prompt = False

+32 −6
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@ from truststore import SSLContext as TruststoreSSLContext
from urllib3.util.retry import Retry

from tdoc_crawler.config import resolve_cache_manager
from tdoc_crawler.config.settings import HttpConfig
from tdoc_crawler.constants.urls import BROWSER_HEADERS
from tdoc_crawler.logging import get_logger
from tdoc_crawler.models import HttpCacheConfig
@@ -80,17 +81,30 @@ class PoolConfig:
    retry_attempts: int = 3


def resolve_ssl_verify(verify: bool | str | None = None) -> bool | str:
def resolve_ssl_verify(
    verify: bool | str | None = None,
    http_config: HttpConfig | None = None,
) -> bool | str:
    """Resolve SSL verification behavior from explicit argument or environment.

    Resolution order:
    1. Explicit `verify` parameter
    2. http_config.verify_ssl if provided
    3. TDC_VERIFY_SSL environment variable
    4. Default (True)

    Args:
        verify: Optional explicit SSL verification mode.
        http_config: Optional HttpConfig from TDocCrawlerConfig. When provided,
                    settings are taken from here with fallback to env vars.

    Returns:
        Either a boolean verification flag or a certificate bundle path.
    """
    if verify is not None:
        resolved = verify
    elif http_config is not None:
        resolved = http_config.verify_ssl
    else:
        env_value = os.getenv("TDC_VERIFY_SSL")
        if env_value is None:
@@ -105,7 +119,7 @@ def resolve_ssl_verify(verify: bool | str | None = None) -> bool | str:
                resolved = True

    if resolved is False:
        logger.warning("SSL certificate verification is disabled (TDC_VERIFY_SSL=false). This is insecure.")
        logger.warning("SSL certificate verification is disabled. This is insecure.")

    return resolved

@@ -134,6 +148,7 @@ def download_to_file(
    http_cache_enabled: bool | None = None,
    pool_config: PoolConfig | None = None,
    verify: bool | str | None = None,
    http_config: HttpConfig | None = None,
) -> requests.Session | None:
    """Download a file from URL to destination path.

@@ -147,6 +162,8 @@ def download_to_file(
        http_cache_enabled: Whether to enable HTTP caching. If None, defaults to True.
        pool_config: Optional connection pool configuration.
        verify: SSL certificate verification mode. Can be bool or CA bundle path.
        http_config: Optional HttpConfig from TDocCrawlerConfig. When provided,
                    settings are taken from here with fallback to env vars.

    Raises:
        ValueError: If URL scheme is not supported
@@ -167,6 +184,7 @@ def download_to_file(
            http_cache_enabled=http_cache_enabled,
            pool_config=pool_config,
            verify=verify,
            http_config=http_config,
        )
        active_session = temp_session
    else:
@@ -197,6 +215,7 @@ def create_cached_session(
    http_cache_enabled: bool | None = None,
    pool_config: PoolConfig | None = None,
    verify: bool | str | None = None,
    http_config: HttpConfig | None = None,
) -> requests.Session:
    """Create a requests.Session with hishel caching enabled.

@@ -209,6 +228,8 @@ def create_cached_session(
                When provided, pool settings are applied to the active adapter
                (cache adapter when caching is enabled, HTTPAdapter otherwise).
        verify: SSL certificate verification mode. Can be bool or CA bundle path.
        http_config: Optional HttpConfig from TDocCrawlerConfig. When provided,
                    settings are taken from here with fallback to env vars.

    Returns:
        Configured requests.Session with caching enabled (unless disabled)
@@ -217,12 +238,17 @@ def create_cached_session(
    session = requests.Session()
    session.headers.update(BROWSER_HEADERS)

    verify_mode = resolve_ssl_verify(verify)
    # Use http_config for SSL if no explicit verify provided
    verify_mode = http_config.verify_ssl if verify is None and http_config is not None else resolve_ssl_verify(verify, http_config)
    ssl_context = _resolve_ssl_context(verify_mode)
    session.verify = verify_mode

    # Check if caching is disabled via parameter or environment variable
    # Check if caching is disabled via parameter, http_config, or environment variable
    if http_cache_enabled is None:
        if http_config is not None:
            http_cache_enabled = http_config.cache_enabled
        else:
            # Fallback to env var (backward compatibility)
            env_enabled = os.getenv("HTTP_CACHE_ENABLED", "").lower()
            http_cache_enabled = env_enabled not in ("false", "0", "no", "off", "f", "n")

+143 −0
Original line number Diff line number Diff line
"""Tests for credentials resolution with TDocCrawlerConfig integration."""

from __future__ import annotations

import pytest

from tdoc_crawler.config.settings import CredentialsConfig
from tdoc_crawler.credentials import resolve_credentials
from tdoc_crawler.models.base import PortalCredentials


class TestCredentialsWithConfig:
    """Test credentials resolution with CredentialsConfig."""

    def test_resolve_uses_config_credentials(self) -> None:
        """resolve_credentials uses credentials_config when no CLI params."""
        config = CredentialsConfig(username="config_user", password="config_pass")

        result = resolve_credentials(credentials_config=config)

        assert result is not None
        assert result.username == "config_user"
        assert result.password == "config_pass"

    def test_resolve_cli_overrides_config(self) -> None:
        """CLI parameters take precedence over credentials_config."""
        config = CredentialsConfig(username="config_user", password="config_pass")

        result = resolve_credentials(
            username="cli_user",
            password="cli_pass",
            credentials_config=config,
        )

        assert result.username == "cli_user"
        assert result.password == "cli_pass"

    def test_resolve_config_overrides_env(self, monkeypatch) -> None:
        """credentials_config takes precedence over env vars."""
        monkeypatch.setenv("TDC_EOL_USERNAME", "env_user")
        monkeypatch.setenv("TDC_EOL_PASSWORD", "env_pass")
        config = CredentialsConfig(username="config_user", password="config_pass")

        result = resolve_credentials(credentials_config=config)

        # Config should win over env
        assert result.username == "config_user"
        assert result.password == "config_pass"

    def test_resolve_env_when_no_config(self, monkeypatch) -> None:
        """Without credentials_config, falls back to env vars."""
        monkeypatch.setenv("TDC_EOL_USERNAME", "env_user")
        monkeypatch.setenv("TDC_EOL_PASSWORD", "env_pass")

        result = resolve_credentials()

        assert result is not None
        assert result.username == "env_user"
        assert result.password == "env_pass"

    def test_resolve_returns_none_when_no_creds(self) -> None:
        """Returns None when no credentials found anywhere."""
        result = resolve_credentials()

        assert result is None

    def test_resolve_config_prompt_flag(self) -> None:
        """credentials_config.prompt controls prompting."""
        config = CredentialsConfig(prompt="true")

        # Should attempt to prompt (but won't in test due to non-TTY)
        result = resolve_credentials(credentials_config=config)

        # Result is None because stdin is not a TTY in tests
        assert result is None


class TestCredentialsResolutionOrder:
    """Test resolution order: CLI > Config > Env > Prompt."""

    def test_full_resolution_order(self, monkeypatch) -> None:
        """Verify complete precedence chain."""
        monkeypatch.setenv("TDC_EOL_USERNAME", "env_user")
        monkeypatch.setenv("TDC_EOL_PASSWORD", "env_pass")

        # CLI should win over everything
        result = resolve_credentials(
            username="cli_user",
            password="cli_pass",
            credentials_config=CredentialsConfig(username="config_user", password="config_pass"),
        )

        assert result.username == "cli_user"
        assert result.password == "cli_pass"

    def test_config_beats_env(self, monkeypatch) -> None:
        """Config values should override env vars."""
        monkeypatch.setenv("TDC_EOL_USERNAME", "env_user")
        monkeypatch.setenv("TDC_EOL_PASSWORD", "env_pass")

        result = resolve_credentials(
            credentials_config=CredentialsConfig(username="config_user", password="config_pass"),
        )

        assert result.username == "config_user"
        assert result.password == "config_pass"

    def test_env_beats_prompt(self, monkeypatch) -> None:
        """Env vars should prevent prompting."""
        monkeypatch.setenv("TDC_EOL_USERNAME", "env_user")
        monkeypatch.setenv("TDC_EOL_PASSWORD", "env_pass")

        # Even with prompt=true, env vars should be used
        result = resolve_credentials(
            credentials_config=CredentialsConfig(prompt="true"),
        )

        assert result is not None
        assert result.username == "env_user"


class TestCredentialsBackwardCompatibility:
    """Test backward compatibility without credentials_config."""

    def test_resolve_with_no_config_works(self, monkeypatch) -> None:
        """resolve_credentials works without credentials_config parameter."""
        monkeypatch.setenv("TDC_EOL_USERNAME", "legacy_user")
        monkeypatch.setenv("TDC_EOL_PASSWORD", "legacy_pass")

        # Call without credentials_config (old API)
        result = resolve_credentials()

        assert result is not None
        assert result.username == "legacy_user"
        assert result.password == "legacy_pass"

    def test_resolve_with_cli_only(self) -> None:
        """CLI-only credentials still work."""
        result = resolve_credentials(username="direct_user", password="direct_pass")

        assert result is not None
        assert result.username == "direct_user"
        assert result.password == "direct_pass"
+90 −0
Original line number Diff line number Diff line
@@ -448,3 +448,93 @@ class TestSslContextAdapters:

if __name__ == "__main__":
    pytest.main([__file__, "-v"])


class TestHttpSessionWithConfig:
    """Test HTTP session functions with HttpConfig integration."""

    @pytest.fixture(autouse=True)
    def setup_cache(self) -> None:
        """Reset cache managers before each test."""
        reset_cache_managers()

    def test_resolve_ssl_verify_uses_http_config(self) -> None:
        """resolve_ssl_verify uses http_config.verify_ssl when no explicit verify."""
        from tdoc_crawler.config.settings import HttpConfig

        http_config = HttpConfig(verify_ssl=False)

        result = resolve_ssl_verify(http_config=http_config)

        assert result is False

    def test_resolve_ssl_verify_explicit_overrides_config(self) -> None:
        """Explicit verify parameter takes precedence over http_config."""
        from tdoc_crawler.config.settings import HttpConfig

        http_config = HttpConfig(verify_ssl=False)

        result = resolve_ssl_verify(verify=True, http_config=http_config)

        assert result is True

    def test_resolve_ssl_verify_fallback_to_env(self, monkeypatch) -> None:
        """Without http_config, falls back to env var."""
        monkeypatch.setenv("TDC_VERIFY_SSL", "false")

        result = resolve_ssl_verify()

        assert result is False

    def test_create_cached_session_uses_config_cache_enabled(self, test_cache_dir: Path) -> None:
        """create_cached_session respects http_config.cache_enabled=False."""
        from tdoc_crawler.config.settings import HttpConfig

        CacheManager(root_path=test_cache_dir, name="test_config_cache").register()
        http_config = HttpConfig(cache_enabled=False)

        session = create_cached_session(cache_manager_name="test_config_cache", http_config=http_config)

        # Session should be plain requests session (no cache adapter)
        assert session is not None
        # When caching is disabled, it should use HTTPAdapter, not CacheAdapter
        assert not isinstance(session.adapters["http://"], CacheAdapter)
        session.close()

    def test_create_cached_session_explicit_overrides_config(self, test_cache_dir: Path) -> None:
        """Explicit http_cache_enabled overrides http_config."""
        from tdoc_crawler.config.settings import HttpConfig

        CacheManager(root_path=test_cache_dir, name="test_explicit_override").register()
        http_config = HttpConfig(cache_enabled=False)

        # Explicit True should override config False
        session = create_cached_session(
            cache_manager_name="test_explicit_override",
            http_config=http_config,
            http_cache_enabled=True,
        )

        assert session is not None
        # Should have cache adapter since explicit True overrides config False
        assert isinstance(session.adapters["http://"], CacheAdapter)
        session.close()


class TestHttpConfigBackwardCompat:
    """Test backward compatibility without HttpConfig."""

    def test_resolve_ssl_verify_no_config_uses_default(self) -> None:
        """Without config or env, defaults to True."""
        result = resolve_ssl_verify()

        assert result is True

    def test_create_cached_session_no_config_works(self, test_cache_dir: Path) -> None:
        """create_cached_session works without http_config parameter."""
        CacheManager(root_path=test_cache_dir, name="test_no_config").register()
        session = create_cached_session(cache_manager_name="test_no_config")

        assert session is not None
        assert hasattr(session, "headers")
        session.close()