Commit 4f21ba72 authored by Jan Reimes's avatar Jan Reimes
Browse files

feat(02-checkout-graph-deprecation-config-01): enhance classification error messages

- Include expected file types in warning
- Add re-download suggestion with CLI command
- Log folder contents for debugging
- Add test for improved error messages
parent e70e1df2
Loading
Loading
Loading
Loading
+17 −1
Original line number Diff line number Diff line
@@ -229,7 +229,23 @@ def classify_document_files(

    files = _scan_document_files(folder_path)
    if not files:
        logger.warning("No document files found in %s for document %s", folder_path, document_id)
        # Log folder contents for debugging
        try:
            folder_contents = list(folder_path.iterdir())
            contents_summary = [f.name for f in folder_contents]
        except (OSError, PermissionError):
            contents_summary = ["<cannot read folder>"]
        
        logger.warning(
            "No document files found in %s for document %s. "
            "Expected: .pdf/.docx/.xlsx/.pptx files. "
            "Folder contents: %s. "
            "Run 'tdoc-crawler checkout %s' to re-download if folder is empty.",
            folder_path,
            document_id,
            contents_summary,
            document_id,
        )
        return []

    if len(files) == 1:
+28 −0
Original line number Diff line number Diff line
@@ -338,3 +338,31 @@ class TestCheckoutTdocIfNeeded:
            # Should trigger checkout because folder doesn't exist
            mock_checkout.assert_called_once()
            assert result == new_path


class TestClassifyDocumentFiles:
    """Tests for classify_document_files error messages."""

    def test_empty_folder_warning_includes_redownload_suggestion(
        self,
        tmp_path: Path,
        caplog: pytest.LogCaptureFixture,
    ) -> None:
        """Test that empty folder warning includes re-download suggestion."""
        from threegpp_ai.operations.classify import classify_document_files
        
        folder_path = tmp_path / "empty_folder"
        folder_path.mkdir(parents=True)
        # Add only .ai subfolder
        ai_folder = folder_path / ".ai"
        ai_folder.mkdir(parents=True)
        (ai_folder / "processed.md").write_text("content")
        
        with caplog.at_level("WARNING"):
            result = classify_document_files("S4-251234", folder_path)
        
        assert result == []
        assert "No document files found" in caplog.text
        assert ".pdf/.docx/.xlsx/.pptx" in caplog.text
        assert "tdoc-crawler checkout S4-251234" in caplog.text
        assert "Folder contents:" in caplog.text