Commit 28ddf849 authored by Jan Reimes's avatar Jan Reimes
Browse files

test(02-checkout-graph-deprecation-config-01): add empty folder detection tests

- Test resolve_tdoc_checkout_path with 3GPP FTP hierarchy
- Test _checkout_tdoc_if_needed with empty .ai-only folders
- Test re-download triggers for empty/non-existent folders
- Test populated folders don't trigger re-download
parent a9e9c1e8
Loading
Loading
Loading
Loading
+115 −0
Original line number Diff line number Diff line
@@ -10,6 +10,7 @@ from packaging.version import Version

from tdoc_crawler.tdocs.models import TDocMetadata
from tdoc_crawler.tdocs.operations.checkout import checkout_tdoc, get_checked_out_tdocs, get_checkout_path
from threegpp_ai.operations.workspaces import resolve_tdoc_checkout_path, _checkout_tdoc_if_needed


@pytest.fixture
@@ -223,3 +224,117 @@ class TestGetCheckedOutTdocs:
        result = get_checked_out_tdocs(checkout_dir)

        assert sorted(result) == ["R1-2301234", "S4-251234"]


class TestResolveTdocCheckoutPath:
    """Tests for resolve_tdoc_checkout_path function."""

    def test_finds_tdoc_in_nested_docs_structure(self, tmp_path: Path) -> None:
        """Test resolve_tdoc_checkout_path finds TDoc in 3GPP FTP hierarchy."""
        checkout_base = tmp_path / "checkout"
        
        # Create realistic 3GPP FTP structure
        tdoc_path = checkout_base / "TSG_SA" / "WG4_CODEC" / "TSGS4_131-bis-e" / "Docs" / "S4-250638"
        tdoc_path.mkdir(parents=True)
        (tdoc_path / "S4-250638.docx").write_text("content")
        
        result = resolve_tdoc_checkout_path("S4-250638", checkout_base)
        assert result == tdoc_path

    def test_finds_tdoc_in_ran_structure(self, tmp_path: Path) -> None:
        """Test resolve_tdoc_checkout_path finds TDoc in RAN hierarchy."""
        checkout_base = tmp_path / "checkout"
        
        # Create RAN structure
        tdoc_path = checkout_base / "TSG_RAN" / "WG1_RH" / "TSGR1_115" / "Docs" / "R1-2300001"
        tdoc_path.mkdir(parents=True)
        (tdoc_path / "R1-2300001.pdf").write_text("content")
        
        result = resolve_tdoc_checkout_path("R1-2300001", checkout_base)
        assert result == tdoc_path

    def test_returns_none_for_nonexistent_tdoc(self, tmp_path: Path) -> None:
        """Test resolve_tdoc_checkout_path returns None when TDoc not found."""
        checkout_base = tmp_path / "checkout"
        checkout_base.mkdir(parents=True)
        
        result = resolve_tdoc_checkout_path("S4-999999", checkout_base)
        assert result is None


class TestCheckoutTdocIfNeeded:
    """Tests for _checkout_tdoc_if_needed function behavior."""

    def test_empty_folder_only_ai_subfolder_triggers_redownload(
        self,
        tmp_path: Path,
        sample_tdoc_metadata: TDocMetadata,
    ) -> None:
        """Test that folder with only .ai subfolder triggers re-download."""
        checkout_base = tmp_path / "checkout"
        
        # Create folder structure with only .ai subfolder (simulating processed but deleted docs)
        tdoc_path = checkout_base / "TSG_SA" / "WG4" / "TSGS4_131" / "Docs" / "S4-251234"
        ai_folder = tdoc_path / ".ai"
        ai_folder.mkdir(parents=True)
        (ai_folder / "processed.md").write_text("processed content")
        
        # Mock checkout_tdoc to simulate re-download
        with patch("threegpp_ai.operations.workspaces.checkout_tdoc") as mock_checkout:
            mock_checkout.return_value = tdoc_path
            
            result = _checkout_tdoc_if_needed("S4-251234", sample_tdoc_metadata, checkout_base)
            
            # Should trigger re-download because folder only has .ai/ subfolder
            mock_checkout.assert_called_once()
            assert result == tdoc_path

    def test_populated_folder_does_not_trigger_redownload(
        self,
        tmp_path: Path,
        sample_tdoc_metadata: TDocMetadata,
    ) -> None:
        """Test that folder with actual document files does NOT trigger re-download."""
        checkout_base = tmp_path / "checkout"
        
        # Create folder with actual document files
        tdoc_path = checkout_base / "TSG_SA" / "WG4" / "TSGS4_131" / "Docs" / "S4-251234"
        tdoc_path.mkdir(parents=True)
        (tdoc_path / "S4-251234.docx").write_text("document content")
        ai_folder = tdoc_path / ".ai"
        ai_folder.mkdir(parents=True)
        (ai_folder / "processed.md").write_text("processed content")
        
        # Mock checkout_tdoc to verify it's NOT called
        with patch("threegpp_ai.operations.workspaces.checkout_tdoc") as mock_checkout:
            result = _checkout_tdoc_if_needed("S4-251234", sample_tdoc_metadata, checkout_base)
            
            # Should NOT trigger re-download because folder has actual files
            mock_checkout.assert_not_called()
            assert result == tdoc_path

    def test_nonexistent_folder_triggers_checkout(
        self,
        tmp_path: Path,
        sample_tdoc_metadata: TDocMetadata,
    ) -> None:
        """Test that non-existent folder triggers checkout."""
        checkout_base = tmp_path / "checkout"
        checkout_base.mkdir(parents=True)
        
        # Mock both resolve_tdoc_checkout_path (returns None) and checkout_tdoc
        with (
            patch("threegpp_ai.operations.workspaces.resolve_tdoc_checkout_path") as mock_resolve,
            patch("threegpp_ai.operations.workspaces.checkout_tdoc") as mock_checkout,
        ):
            mock_resolve.return_value = None  # Simulate non-existent folder
            new_path = checkout_base / "TSG_SA" / "WG4" / "TSGS4_131" / "Docs" / "S4-251234"
            new_path.mkdir(parents=True)
            (new_path / "test.docx").write_text("content")
            mock_checkout.return_value = new_path
            
            result = _checkout_tdoc_if_needed("S4-251234", sample_tdoc_metadata, checkout_base)
            
            # Should trigger checkout because folder doesn't exist
            mock_checkout.assert_called_once()
            assert result == new_path