Commit bd5dc5e2 authored by Jan Reimes's avatar Jan Reimes
Browse files

feat(02-03): Consolidate load_dotenv calls in CLI

- Move load_dotenv() to tdoc_app.py at module level
- Remove load_dotenv() from crawl.py, query.py, specs.py
- Add noqa comments for E402 (imports after load_dotenv)
- Fix __all__ export in tdoc_app.py (app -> tdoc_app)
- Add comment explaining why load_dotenv is at module level
- Create tests/test_cli_dotenv.py to verify consolidation

All CLI submodules now rely on tdoc_app.py loading .env first.
parent 5798d900
Loading
Loading
Loading
Loading
+0 −3
Original line number Diff line number Diff line
@@ -6,7 +6,6 @@ import asyncio
from datetime import date, datetime

import typer
from dotenv import load_dotenv

from tdoc_crawler.cli._shared import console, create_progress_bar, handle_clear_options
from tdoc_crawler.cli.args import (
@@ -67,8 +66,6 @@ from tdoc_crawler.tdocs.operations.crawl import TDocCrawlResult
from tdoc_crawler.utils.date_parser import parse_partial_date
from tdoc_crawler.utils.parse import collect_spec_numbers, parse_subgroups, parse_working_groups

load_dotenv()

HELP_PANEL = "Crawling Commands"


+0 −3
Original line number Diff line number Diff line
@@ -7,7 +7,6 @@ from datetime import UTC, datetime
from typing import Annotated

import typer
from dotenv import load_dotenv

from tdoc_crawler.cli._shared import console, handle_clear_options
from tdoc_crawler.cli.args import (
@@ -62,8 +61,6 @@ from tdoc_crawler.tdocs.operations.fetch import fetch_missing_tdocs
from tdoc_crawler.utils.date_parser import parse_partial_date
from tdoc_crawler.utils.parse import collect_spec_numbers, parse_subgroups, parse_working_groups

load_dotenv()

HELP_PANEL = "Query Commands"


+0 −3
Original line number Diff line number Diff line
@@ -7,7 +7,6 @@ from pathlib import Path
from typing import Annotated

import typer
from dotenv import load_dotenv

from tdoc_crawler.cli._shared import console
from tdoc_crawler.cli.args import (
@@ -29,8 +28,6 @@ from tdoc_crawler.specs.downloads import SpecDownloads
from tdoc_crawler.specs.operations.checkout import build_default_spec_sources
from tdoc_crawler.utils.parse import collect_spec_numbers

load_dotenv()

HELP_PANEL = "Main Commands"


+28 −24
Original line number Diff line number Diff line
@@ -2,16 +2,22 @@

from __future__ import annotations

import asyncio
import zipfile
from typing import Any, cast

import typer
# Load environment variables from .env file before any other imports
# that might read from environment. This ensures .env values are available
# for TDocCrawlerConfig and other components.
from dotenv import load_dotenv
from rich.table import Table

from tdoc_crawler.cli._shared import console, create_progress_bar
from tdoc_crawler.cli.args import (
load_dotenv()

import asyncio  # noqa: E402
import zipfile  # noqa: E402
from typing import Any, cast  # noqa: E402

import typer  # noqa: E402
from rich.table import Table  # noqa: E402

from tdoc_crawler.cli._shared import console, create_progress_bar  # noqa: E402
from tdoc_crawler.cli.args import (  # noqa: E402
    CacheDirOption,
    CheckoutTDocIdsArgument,
    EolPasswordOption,
@@ -23,21 +29,19 @@ from tdoc_crawler.cli.args import (
    UseWhatTheSpecOption,
    VerbosityOption,
)
from tdoc_crawler.cli.crawl import crawl_meetings, crawl_tdocs
from tdoc_crawler.cli.query import query_meetings, query_tdocs
from tdoc_crawler.cli.utils import launch_file
from tdoc_crawler.config import CacheManager
from tdoc_crawler.credentials import set_credentials
from tdoc_crawler.database import MeetingDatabase, TDocDatabase
from tdoc_crawler.http_client import create_cached_session
from tdoc_crawler.logging import DEFAULT_LEVEL as DEFAULT_VERBOSITY
from tdoc_crawler.logging import set_verbosity
from tdoc_crawler.tdocs.models import TDocQueryConfig
from tdoc_crawler.tdocs.operations.checkout import checkout_tdoc, prepare_tdoc_file
from tdoc_crawler.tdocs.operations.fetch import fetch_missing_tdocs
from tdoc_crawler.utils.normalization import normalize_tdoc_id, normalize_tdoc_ids

load_dotenv()
from tdoc_crawler.cli.crawl import crawl_meetings, crawl_tdocs  # noqa: E402
from tdoc_crawler.cli.query import query_meetings, query_tdocs  # noqa: E402
from tdoc_crawler.cli.utils import launch_file  # noqa: E402
from tdoc_crawler.config import CacheManager  # noqa: E402
from tdoc_crawler.credentials import set_credentials  # noqa: E402
from tdoc_crawler.database import MeetingDatabase, TDocDatabase  # noqa: E402
from tdoc_crawler.http_client import create_cached_session  # noqa: E402
from tdoc_crawler.logging import DEFAULT_LEVEL as DEFAULT_VERBOSITY  # noqa: E402
from tdoc_crawler.logging import set_verbosity  # noqa: E402
from tdoc_crawler.tdocs.models import TDocQueryConfig  # noqa: E402
from tdoc_crawler.tdocs.operations.checkout import checkout_tdoc, prepare_tdoc_file  # noqa: E402
from tdoc_crawler.tdocs.operations.fetch import fetch_missing_tdocs  # noqa: E402
from tdoc_crawler.utils.normalization import normalize_tdoc_id, normalize_tdoc_ids  # noqa: E402

tdoc_app = typer.Typer(help="3GPP Crawler - TDocs and Meetings")

@@ -268,4 +272,4 @@ tdoc_app.command("qt", rich_help_panel=HELP_PANEL_QUERY, hidden=True)(query_tdoc
tdoc_app.command("qm", rich_help_panel=HELP_PANEL_QUERY, hidden=True)(query_meetings)


__all__ = ["app"]
__all__ = ["tdoc_app"]
+102 −0
Original line number Diff line number Diff line
"""Tests for load_dotenv consolidation in CLI entry point."""

from __future__ import annotations

import ast
from pathlib import Path

import pytest


class TestLoadDotenvConsolidation:
    """Test that load_dotenv is called once at CLI entry."""

    def test_no_duplicate_dotenv_calls(self) -> None:
        """Verify load_dotenv is not called in submodule CLI files."""
        cli_dir = Path(__file__).parent.parent / "src" / "tdoc_crawler" / "cli"
        submodule_files = [
            "crawl.py",
            "query.py",
            "specs.py",
        ]

        for filename in submodule_files:
            filepath = cli_dir / filename
            if filepath.exists():
                source = filepath.read_text()
                tree = ast.parse(source)

                # Check for load_dotenv import
                has_import = any(
                    isinstance(node, ast.ImportFrom) and node.module == "dotenv" and any(alias.name == "load_dotenv" for alias in node.names)
                    for node in ast.walk(tree)
                )

                # Check for load_dotenv() call
                has_call = any(isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == "load_dotenv" for node in ast.walk(tree))

                assert not has_import, f"{filename} should not import load_dotenv"
                assert not has_call, f"{filename} should not call load_dotenv()"

    def test_tdoc_app_has_load_dotenv(self) -> None:
        """tdoc_app.py should import and call load_dotenv."""
        app_file = Path(__file__).parent.parent / "src" / "tdoc_crawler" / "cli" / "tdoc_app.py"
        source = app_file.read_text()
        tree = ast.parse(source)

        # Check for load_dotenv import
        has_import = any(
            isinstance(node, ast.ImportFrom) and node.module == "dotenv" and any(alias.name == "load_dotenv" for alias in node.names) for node in ast.walk(tree)
        )

        # Check for load_dotenv() call
        has_call = any(isinstance(node, ast.Call) and isinstance(node.func, ast.Name) and node.func.id == "load_dotenv" for node in ast.walk(tree))

        assert has_import, "tdoc_app.py should import load_dotenv"
        assert has_call, "tdoc_app.py should call load_dotenv()"


class TestBackwardCompatibility:
    """Test that existing .env patterns still work."""

    def test_existing_env_vars_still_work(self, monkeypatch) -> None:
        """Direct environment variables still take precedence."""
        test_path = Path.home() / ".explicit_cache_test"
        monkeypatch.setenv("TDC_CACHE_DIR", str(test_path))

        from tdoc_crawler.config import TDocCrawlerConfig

        config = TDocCrawlerConfig()

        assert config.path.cache_dir == test_path

    def test_http_cache_env_vars_work(self, monkeypatch) -> None:
        """HTTP_CACHE_TTL env var still works."""
        monkeypatch.setenv("HTTP_CACHE_TTL", "3600")

        from tdoc_crawler.config import TDocCrawlerConfig

        config = TDocCrawlerConfig()

        assert config.http.cache_ttl == 3600

    def test_dotenv_called_before_other_imports(self) -> None:
        """load_dotenv() is called at module level before other imports."""
        app_file = Path(__file__).parent.parent / "src" / "tdoc_crawler" / "cli" / "tdoc_app.py"
        source = app_file.read_text()

        # Find line numbers of key elements
        lines = source.split("\n")

        load_dotenv_line = -1
        typer_import_line = -1

        for i, line in enumerate(lines):
            if "load_dotenv()" in line and not line.strip().startswith("#"):
                load_dotenv_line = i
            if "import typer" in line and not line.strip().startswith("#"):
                typer_import_line = i

        assert load_dotenv_line > 0, "load_dotenv() call should exist"
        assert typer_import_line > 0, "import typer should exist"
        assert load_dotenv_line < typer_import_line, "load_dotenv() should be called before other imports"