Commit 625de3c2 authored by Jan Reimes's avatar Jan Reimes
Browse files

test: update tests for refactored code, remove deprecated test suites

parent a229402d
Loading
Loading
Loading
Loading
+5 −7
Original line number Diff line number Diff line
@@ -16,12 +16,6 @@ from tdoc_crawler.meetings.models import MeetingMetadata
from tdoc_crawler.tdocs.models import TDocMetadata


def pytest_ignore_collect(collection_path: object, config: pytest.Config) -> bool:
    """Ignore legacy tests/ai suite in the core tdoc_crawler package."""
    path_text = str(collection_path).replace("\\", "/").lower()
    return "/tests/ai/" in path_text or path_text.endswith("/tests/ai")


@pytest.fixture(autouse=True)
def _reset_cache_manager() -> None:
    """Reset CacheManager singleton before each test.
@@ -34,17 +28,21 @@ def _reset_cache_manager() -> None:


@pytest.fixture
def test_cache_dir(tmp_path: Path) -> Path:
def test_cache_dir(tmp_path: Path, monkeypatch: pytest.MonkeyPatch) -> Path:
    """Create a temporary cache directory for tests.

    Also sets TDC_CACHE_DIR env var so CLI tests pick up the temp directory.

    Args:
        tmp_path: Pytest's tmp_path fixture
        monkeypatch: Pytest's monkeypatch fixture

    Returns:
        Path to test cache directory
    """
    cache_dir = tmp_path / "test-cache"
    cache_dir.mkdir(parents=True, exist_ok=True)
    monkeypatch.setenv("TDC_CACHE_DIR", str(cache_dir))
    return cache_dir


+0 −6
Original line number Diff line number Diff line
@@ -72,12 +72,6 @@ class TestCacheManager:
        manager = CacheManager(tmp_path)
        assert manager.workspace_registry_file == tmp_path / "workspaces.json"

    def test_is_registered(self, tmp_path: Path) -> None:
        """Test is_registered class method."""
        assert CacheManager.is_registered() is False
        CacheManager(tmp_path).register()
        assert CacheManager.is_registered() is True


class TestCacheManagerIntegration:
    """Test CacheManager integration with actual filesystem."""
+180 −1
Original line number Diff line number Diff line
@@ -9,7 +9,12 @@ import pytest
from packaging.version import Version

from tdoc_crawler.tdocs.models import TDocMetadata
from tdoc_crawler.tdocs.operations.checkout import checkout_tdoc, get_checked_out_tdocs, get_checkout_path
from tdoc_crawler.tdocs.operations.checkout import (
    _resolve_corrected_url,
    checkout_tdoc,
    get_checked_out_tdocs,
    get_checkout_path,
)
from tdoc_crawler.workspaces import resolve_tdoc_checkout_path


@@ -198,6 +203,180 @@ class TestCheckoutTDoc:
        with pytest.raises(ValueError, match="unsupported-url-scheme"):
            checkout_tdoc(metadata, checkout_dir)

    @patch("tdoc_crawler.tdocs.operations.checkout._resolve_corrected_url")
    @patch("tdoc_crawler.tdocs.operations.checkout.download_to_file")
    def test_case_correct_fallback_on_bad_zip(
        self,
        mock_download_to_file: Mock,
        mock_resolve_url: Mock,
        sample_tdoc_metadata: TDocMetadata,
        checkout_dir: Path,
    ) -> None:
        """Test that checkout retries with case-correct URL when first download is invalid."""
        corrected_url = "https://www.3gpp.org/ftp/tsg_sa/SA4/S4-251234.zip"
        mock_resolve_url.return_value = corrected_url

        call_count = 0

        def mock_download(url: str, destination: Path, session: object = None) -> None:
            nonlocal call_count
            call_count += 1
            destination.parent.mkdir(parents=True, exist_ok=True)
            content = io.BytesIO()
            if call_count == 1:
                # First call: invalid/empty zip
                destination.write_bytes(b"not-a-zip")
            else:
                # Second call: valid zip
                with zipfile.ZipFile(content, "w") as zf:
                    zf.writestr("doc.txt", "document content")
                destination.write_bytes(content.getvalue())

        mock_download_to_file.side_effect = mock_download

        result = checkout_tdoc(sample_tdoc_metadata, checkout_dir)

        assert result == get_checkout_path(sample_tdoc_metadata, checkout_dir)
        assert (result / "doc.txt").exists()
        assert call_count == 2
        mock_resolve_url.assert_called_once()

    @patch("tdoc_crawler.tdocs.operations.checkout._resolve_corrected_url")
    @patch("tdoc_crawler.tdocs.operations.checkout.download_to_file")
    def test_fallback_propagates_when_no_corrected_url(
        self,
        mock_download_to_file: Mock,
        mock_resolve_url: Mock,
        sample_tdoc_metadata: TDocMetadata,
        checkout_dir: Path,
    ) -> None:
        """Test that BadZipFile is re-raised when no case-correct URL is found."""
        mock_resolve_url.return_value = None

        def mock_download(url: str, destination: Path, session: object = None) -> None:
            destination.parent.mkdir(parents=True, exist_ok=True)
            destination.write_bytes(b"not-a-zip")

        mock_download_to_file.side_effect = mock_download

        with pytest.raises(zipfile.BadZipFile):
            checkout_tdoc(sample_tdoc_metadata, checkout_dir)


class TestResolveCorrectedUrl:
    """Tests for _resolve_corrected_url function."""

    @patch("tdoc_crawler.tdocs.operations.checkout.requests.Session")
    def test_finds_case_insensitive_match(self, mock_session_cls: Mock) -> None:
        """Test that function finds a case-insensitive filename match in directory listing."""
        mock_response = Mock()
        mock_response.content = b"""
        <html><body>
        <a href="S4aA230110.zip">S4aA230110.zip</a>
        <a href="OTHER.zip">OTHER.zip</a>
        </body></html>
        """
        mock_response.raise_for_status = Mock()
        mock_session = Mock()
        mock_session.get.return_value = mock_response
        mock_session_cls.return_value.__enter__ = Mock(return_value=mock_session)
        mock_session_cls.return_value.__exit__ = Mock(return_value=False)

        metadata = TDocMetadata(
            tdoc_id="S4AA230110",
            url="https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/Docs/S4AA230110.zip",
            title="Test",
            meeting_id=123,
            source="Test",
            contact="test@example.com",
            tdoc_type="Test Type",
            for_purpose="Test Purpose",
            agenda_item_nbr=Version("1.0"),
            agenda_item_text="Test Agenda Item",
            status="available",
            meeting_name="Test Meeting",
            is_revision_of=None,
            file_size=None,
            date_created=None,
            validated=False,
            validation_failed=False,
        )

        result = _resolve_corrected_url(metadata)

        assert result == "https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/Docs/S4aA230110.zip"
        mock_session.get.assert_called_once_with(
            "https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/Docs/",
            timeout=60,
        )

    @patch("tdoc_crawler.tdocs.operations.checkout.requests.Session")
    def test_returns_none_when_no_match(self, mock_session_cls: Mock) -> None:
        """Test that function returns None when no case-insensitive match exists."""
        mock_response = Mock()
        mock_response.content = b"<html><body><a href='OTHER.zip'>OTHER.zip</a></body></html>"
        mock_response.raise_for_status = Mock()
        mock_session = Mock()
        mock_session.get.return_value = mock_response
        mock_session_cls.return_value.__enter__ = Mock(return_value=mock_session)
        mock_session_cls.return_value.__exit__ = Mock(return_value=False)

        metadata = TDocMetadata(
            tdoc_id="S4AA230110",
            url="https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/Docs/S4AA230110.zip",
            title="Test",
            meeting_id=123,
            source="Test",
            contact="test@example.com",
            tdoc_type="Test Type",
            for_purpose="Test Purpose",
            agenda_item_nbr=Version("1.0"),
            agenda_item_text="Test Agenda Item",
            status="available",
            meeting_name="Test Meeting",
            is_revision_of=None,
            file_size=None,
            date_created=None,
            validated=False,
            validation_failed=False,
        )

        result = _resolve_corrected_url(metadata)

        assert result is None

    @patch("tdoc_crawler.tdocs.operations.checkout.requests.Session")
    def test_returns_none_on_request_error(self, mock_session_cls: Mock) -> None:
        """Test that function returns None when directory listing request fails."""
        mock_session = Mock()
        mock_session.get.side_effect = ConnectionError("network error")
        mock_session_cls.return_value.__enter__ = Mock(return_value=mock_session)
        mock_session_cls.return_value.__exit__ = Mock(return_value=False)

        metadata = TDocMetadata(
            tdoc_id="S4AA230110",
            url="https://www.3gpp.org/ftp/TSG_SA/WG4_CODEC/Docs/S4AA230110.zip",
            title="Test",
            meeting_id=123,
            source="Test",
            contact="test@example.com",
            tdoc_type="Test Type",
            for_purpose="Test Purpose",
            agenda_item_nbr=Version("1.0"),
            agenda_item_text="Test Agenda Item",
            status="available",
            meeting_name="Test Meeting",
            is_revision_of=None,
            file_size=None,
            date_created=None,
            validated=False,
            validation_failed=False,
        )

        result = _resolve_corrected_url(metadata)

        assert result is None


class TestGetCheckedOutTdocs:
    """Tests for get_checked_out_tdocs function."""
+23 −25
Original line number Diff line number Diff line
@@ -53,7 +53,7 @@ class TestCrawlCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "crawl", "--limit-tdocs", "10"],
            ["crawl", "--limit-tdocs", "10"],
        )

        assert result.exit_code == 0
@@ -89,7 +89,7 @@ class TestCrawlCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "crawl", "--working-group", "RAN"],
            ["crawl", "--working-group", "RAN"],
        )

        assert result.exit_code == 0
@@ -128,8 +128,6 @@ class TestCrawlMeetingsCommand:
            tdoc_app,
            [
                "crawl-meetings",
                "--cache-dir",
                str(test_cache_dir),
                "--no-prompt-credentials",
            ],
        )
@@ -157,7 +155,7 @@ class TestQueryCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "query"],
            ["query"],
        )

        assert result.exit_code == 0
@@ -177,7 +175,7 @@ class TestQueryCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "query", "--output", "table"],
            ["query", "--output", "table"],
        )

        assert result.exit_code == 0
@@ -197,7 +195,7 @@ class TestQueryCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "query", "--output", "json"],
            ["query", "--output", "json"],
        )

        assert result.exit_code == 0
@@ -218,7 +216,7 @@ class TestQueryCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "query", "R1-2301234"],
            ["query", "R1-2301234"],
        )

        assert result.exit_code == 0
@@ -240,7 +238,7 @@ class TestQueryMeetingsCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "query-meetings"],
            ["query-meetings"],
        )

        assert result.exit_code == 0
@@ -259,7 +257,7 @@ class TestQueryMeetingsCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "query-meetings", "-s", "SA4"],
            ["query-meetings", "-s", "SA4"],
        )

        assert result.exit_code == 0
@@ -280,7 +278,7 @@ class TestQueryMeetingsCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "query-meetings", "-s", "S4"],
            ["query-meetings", "-s", "S4"],
        )

        assert result.exit_code == 0
@@ -301,7 +299,7 @@ class TestQueryMeetingsCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "query-meetings", "-s", "RP"],
            ["query-meetings", "-s", "RP"],
        )

        assert result.exit_code == 0
@@ -322,7 +320,7 @@ class TestQueryMeetingsCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "query-meetings", "-w", "SP"],
            ["query-meetings", "-w", "SP"],
        )

        assert result.exit_code == 0
@@ -343,7 +341,7 @@ class TestQueryMeetingsCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "query-meetings", "-w", "SA", "-s", "S4"],
            ["query-meetings", "-w", "SA", "-s", "S4"],
        )

        assert result.exit_code == 0
@@ -374,7 +372,7 @@ class TestStatsCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_db_path.parent), "stats"],
            ["stats"],
        )

        assert result.exit_code == 0
@@ -388,7 +386,7 @@ class TestStatsCommand:
        """Test stats command with missing database."""
        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "stats"],
            ["stats"],
        )

        assert result.exit_code == 1
@@ -422,7 +420,7 @@ class TestOpenCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "open", "R1-2301234"],
            ["open", "R1-2301234"],
        )

        assert result.exit_code == 0
@@ -442,7 +440,7 @@ class TestOpenCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "open", "INVALID-ID"],
            ["open", "INVALID-ID"],
        )

        assert result.exit_code == 1
@@ -511,7 +509,7 @@ class TestOpenCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "open", "S4-260001"],
            ["open", "S4-260001"],
        )

        assert result.exit_code == 0
@@ -578,7 +576,7 @@ class TestOpenCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "open", "R1-260001"],
            ["open", "R1-260001"],
        )

        # Should succeed (exit code 0) and use whatthespec fallback
@@ -666,7 +664,7 @@ class TestCheckoutCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "checkout", "S4-260001", "S4-260002"],
            ["checkout", "S4-260001", "S4-260002"],
        )

        assert result.exit_code == 0
@@ -709,7 +707,7 @@ class TestCheckoutCommand:

        result = runner.invoke(
            tdoc_app,
            ["--cache-dir", str(test_cache_dir), "checkout", "S4-999999"],
            ["checkout", "S4-999999"],
        )

        # Should exit with error code due to missing TDoc
@@ -864,7 +862,7 @@ class TestEnvironmentVariables:
        db_file = PathConfig(cache_dir=test_cache_dir).db_file
        db_file.touch()

        result = runner.invoke(tdoc_app, ["--cache-dir", str(test_cache_dir), "stats"])
        result = runner.invoke(tdoc_app, ["stats"])

        # Verify resolve_credentials was called
        # (Actual credential resolution happens via Typer's envvar parameter)
@@ -887,7 +885,7 @@ class TestEnvironmentVariables:
        db_file = PathConfig(cache_dir=test_cache_dir).db_file
        db_file.touch()

        result = runner.invoke(tdoc_app, ["--cache-dir", str(test_cache_dir), "stats"])
        result = runner.invoke(tdoc_app, ["stats"])

        assert result.exit_code == 0

@@ -930,6 +928,6 @@ class TestEnvironmentVariables:
        db_file = PathConfig(cache_dir=test_cache_dir).db_file
        db_file.touch()

        result = runner.invoke(tdoc_app, ["--cache-dir", str(test_cache_dir), "stats"])
        result = runner.invoke(tdoc_app, ["stats"])

        assert result.exit_code == 0

tests/test_config_defaults.py

deleted100644 → 0
+0 −42
Original line number Diff line number Diff line
"""Tests to verify configuration defaults alignment.

These tests ensure that .env.example documentation is consistent
and matches what the code actually uses.
"""

from __future__ import annotations

import re
from pathlib import Path


def test_embedding_model_env_comment_matches_value() -> None:
    """Verify .env.example TDC_AI_EMBEDDING_MODEL comment and value are consistent.

    The comment recommends a model, but if the actual value differs,
    users get confusing instructions about which model to use.
    """
    env_example = Path(".env.example").read_text()

    # Find the embedding model line and its preceding comment
    embed_section = re.search(
        r"# Embedding model[^\n]*\n"
        r"(?:# [^\n]*\n)?"  # Optional recommended line
        r"TDC_AI_EMBEDDING_MODEL=(\S+)",
        env_example,
        re.MULTILINE,
    )
    assert embed_section, "TDC_AI_EMBEDDING_MODEL not found in .env.example"

    value = embed_section.group(1)

    # If there's a recommended line, extract it and check consistency
    recommended_match = re.search(r"# Recommended: ([^\n]+)", embed_section.group(0))
    if recommended_match:
        recommended = recommended_match.group(1)
        # Extract model name from "provider/model:tag" format
        recommended_model = re.search(r"(\S+/\S+:?\S*)", recommended)
        if recommended_model:
            assert value == recommended_model.group(1), (
                f".env.example embedding model comment ({recommended_model.group(1)}) differs from value ({value}). Update comment to match value."
            )
Loading