fix(tests): fix test failures after 3gpp-ai removal (b26e1dad) · Commits · Jan Reimes / 3gpp-crawler

tests/conftest.py

+12 −0

Original line number	Diff line number	Diff line
		@@ -9,6 +9,7 @@ from pathlib import Path
		import pytest
		from packaging.version import Version

		from tdoc_crawler.config import CacheManager
		from tdoc_crawler.config.settings import PathConfig
		from tdoc_crawler.database import MeetingDatabase, TDocDatabase
		from tdoc_crawler.meetings.models import MeetingMetadata
		@@ -21,6 +22,17 @@ def pytest_ignore_collect(collection_path: object, config: pytest.Config) -> boo
		return "/tests/ai/" in path_text or path_text.endswith("/tests/ai")


		@pytest.fixture(autouse=True)
		def _reset_cache_manager() -> None:
		"""Reset CacheManager singleton before each test.

		The CLI callback registers a CacheManager singleton on every invocation.
		Without resetting between tests, the second test that invokes the CLI
		will fail with "CacheManager already registered".
		"""
		CacheManager._instance = None


		@pytest.fixture
		def test_cache_dir(tmp_path: Path) -> Path:
		"""Create a temporary cache directory for tests.

tests/test_checkout.py

+1 −106

Original line number	Diff line number	Diff line
		@@ -7,11 +7,10 @@ from unittest.mock import Mock, patch

		import pytest
		from packaging.version import Version
		from threegpp_ai.operations.classify import classify_document_files
		from threegpp_ai.operations.workspaces import _checkout_tdoc_if_needed, resolve_tdoc_checkout_path

		from tdoc_crawler.tdocs.models import TDocMetadata
		from tdoc_crawler.tdocs.operations.checkout import checkout_tdoc, get_checked_out_tdocs, get_checkout_path
		from tdoc_crawler.workspaces import resolve_tdoc_checkout_path


		@pytest.fixture
		@@ -261,107 +260,3 @@ class TestResolveTdocCheckoutPath:

		result = resolve_tdoc_checkout_path("S4-999999", checkout_base)
		assert result is None


		class TestCheckoutTdocIfNeeded:
		"""Tests for _checkout_tdoc_if_needed function behavior."""

		def test_empty_folder_only_ai_subfolder_triggers_redownload(
		self,
		tmp_path: Path,
		sample_tdoc_metadata: TDocMetadata,
		) -> None:
		"""Test that folder with only .ai subfolder triggers re-download."""
		checkout_base = tmp_path / "checkout"

		# Create folder structure with only .ai subfolder (simulating processed but deleted docs)
		tdoc_path = checkout_base / "TSG_SA" / "WG4" / "TSGS4_131" / "Docs" / "S4-251234"
		ai_folder = tdoc_path / ".ai"
		ai_folder.mkdir(parents=True)
		(ai_folder / "processed.md").write_text("processed content")

		# Mock checkout_tdoc to simulate re-download
		with patch("threegpp_ai.operations.workspaces.checkout_tdoc") as mock_checkout:
		mock_checkout.return_value = tdoc_path

		result = _checkout_tdoc_if_needed("S4-251234", sample_tdoc_metadata, checkout_base)

		# Should trigger re-download because folder only has .ai/ subfolder
		mock_checkout.assert_called_once()
		assert result == tdoc_path

		def test_populated_folder_does_not_trigger_redownload(
		self,
		tmp_path: Path,
		sample_tdoc_metadata: TDocMetadata,
		) -> None:
		"""Test that folder with actual document files does NOT trigger re-download."""
		checkout_base = tmp_path / "checkout"

		# Create folder with actual document files
		tdoc_path = checkout_base / "TSG_SA" / "WG4" / "TSGS4_131" / "Docs" / "S4-251234"
		tdoc_path.mkdir(parents=True)
		(tdoc_path / "S4-251234.docx").write_text("document content")
		ai_folder = tdoc_path / ".ai"
		ai_folder.mkdir(parents=True)
		(ai_folder / "processed.md").write_text("processed content")

		# Mock checkout_tdoc to verify it's NOT called
		with patch("threegpp_ai.operations.workspaces.checkout_tdoc") as mock_checkout:
		result = _checkout_tdoc_if_needed("S4-251234", sample_tdoc_metadata, checkout_base)

		# Should NOT trigger re-download because folder has actual files
		mock_checkout.assert_not_called()
		assert result == tdoc_path

		def test_nonexistent_folder_triggers_checkout(
		self,
		tmp_path: Path,
		sample_tdoc_metadata: TDocMetadata,
		) -> None:
		"""Test that non-existent folder triggers checkout."""
		checkout_base = tmp_path / "checkout"
		checkout_base.mkdir(parents=True)

		# Mock both resolve_tdoc_checkout_path (returns None) and checkout_tdoc
		with (
		patch("threegpp_ai.operations.workspaces.resolve_tdoc_checkout_path") as mock_resolve,
		patch("threegpp_ai.operations.workspaces.checkout_tdoc") as mock_checkout,
		):
		mock_resolve.return_value = None # Simulate non-existent folder
		new_path = checkout_base / "TSG_SA" / "WG4" / "TSGS4_131" / "Docs" / "S4-251234"
		new_path.mkdir(parents=True)
		(new_path / "test.docx").write_text("content")
		mock_checkout.return_value = new_path

		result = _checkout_tdoc_if_needed("S4-251234", sample_tdoc_metadata, checkout_base)

		# Should trigger checkout because folder doesn't exist
		mock_checkout.assert_called_once()
		assert result == new_path


		class TestClassifyDocumentFiles:
		"""Tests for classify_document_files error messages."""

		def test_empty_folder_warning_includes_redownload_suggestion(
		self,
		tmp_path: Path,
		caplog: pytest.LogCaptureFixture,
		) -> None:
		"""Test that empty folder warning includes re-download suggestion."""
		folder_path = tmp_path / "empty_folder"
		folder_path.mkdir(parents=True)
		# Add only .ai subfolder
		ai_folder = folder_path / ".ai"
		ai_folder.mkdir(parents=True)
		(ai_folder / "processed.md").write_text("content")

		with caplog.at_level("WARNING"):
		result = classify_document_files("S4-251234", folder_path)

		assert result == []
		assert "No document files found" in caplog.text
		assert ".pdf/.docx/.xlsx/.pptx" in caplog.text
		assert "tdoc-crawler checkout S4-251234" in caplog.text
		assert "Folder contents:" in caplog.text

tests/test_config_defaults.py

+0 −23

Original line number	Diff line number	Diff line
		@@ -9,8 +9,6 @@ from __future__ import annotations
		import re
		from pathlib import Path

		from threegpp_ai.config import AiConfig


		def test_embedding_model_env_comment_matches_value() -> None:
		"""Verify .env.example TDC_AI_EMBEDDING_MODEL comment and value are consistent.
		@@ -42,24 +40,3 @@ def test_embedding_model_env_comment_matches_value() -> None:
		assert value == recommended_model.group(1), (
		f".env.example embedding model comment ({recommended_model.group(1)}) differs from value ({value}). Update comment to match value."
		)


		def test_embedding_model_not_hardcoded_in_code() -> None:
		"""Verify embedding model is read from config, not hardcoded.

		The embedding model should be configurable via TDC_AI_EMBEDDING_MODEL
		environment variable and stored in AiConfig.embedding_model.
		"""
		# This test verifies the configuration structure exists
		# The actual default value is defined in AiConfig (or LightRAGConfig)
		# and TDC_AI_EMBEDDING_MODEL should be the documented way to override it

		# AiConfig should accept embedding_model via env var or config
		# If TDC_AI_EMBEDDING_MODEL is set, it should be used
		config = AiConfig()

		# Verify the config has the expected structure
		assert hasattr(config, "llm_model"), "AiConfig should have llm_model field"

		# Note: embedding_model field may not exist yet in AiConfig -
		# this test documents the expected configuration structure

tests/test_credentials.py

+5 −1

Original line number	Diff line number	Diff line
		@@ -66,8 +66,12 @@ class TestCredentialsWithConfig:

		assert result is None

		def test_resolve_config_prompt_flag(self) -> None:
		def test_resolve_config_prompt_flag(self, monkeypatch: pytest.MonkeyPatch) -> None:
		"""credentials_config.prompt controls prompting."""
		# Clear env vars that might provide credentials before prompting
		monkeypatch.delenv("TDC_EOL_USERNAME", raising=False)
		monkeypatch.delenv("TDC_EOL_PASSWORD", raising=False)

		config = CredentialsConfig(prompt="true")

		# Should attempt to prompt (but won't in test due to non-TTY)

tests/test_graph_errors.py

+4 −95

Original line number	Diff line number	Diff line
		"""Tests for graph error handling and deprecated import verification.
		"""Tests for deprecated import verification and datetime scoping.

		This test module verifies:
		1. No deprecated imports remain in the codebase (AiStorage, EmbeddingsManager, lancedb, etc.)
		2. Graph error handling readiness for future LightRAG integration
		3. Datetime scoping is correct in CLI operations
		2. Datetime scoping is correct in CLI operations
		"""

		from __future__ import annotations

		import sys
		from datetime import UTC, datetime

		import pytest
		import threegpp_ai
		import threegpp_ai.operations.workspaces as workspaces_ops
		from threegpp_ai import cli
		from threegpp_ai.operations.workspaces import (
		checkout_spec_to_workspace,
		checkout_tdoc_to_workspace,
		ensure_ai_subfolder,
		)

		from tdoc_crawler.utils.date_parser import parse_partial_date


		class TestDeprecatedImports:
		"""Verify deprecated modules are not imported in the codebase."""

		def test_no_aistorage_import(self) -> None:
		"""AiStorage deprecated class should not be imported."""
		# Verify AiStorage not in module namespaces
		assert not hasattr(threegpp_ai, "AiStorage"), "AiStorage should not be exported"
		assert not hasattr(threegpp_ai.operations, "AiStorage"), "AiStorage should not be in operations"

		def test_no_embeddings_manager_import(self) -> None:
		"""EmbeddingsManager deprecated class should not be imported."""
		# Verify EmbeddingsManager not in module namespaces
		assert not hasattr(threegpp_ai, "EmbeddingsManager"), "EmbeddingsManager should not be exported"

		def test_no_lancedb_import(self) -> None:
		"""Lancedb deprecated module should not be imported."""
		# Verify lancedb not in loaded modules
		assert "lancedb" not in sys.modules, "lancedb module should not be loaded"
		assert "tdoc_ai.storage.lancedb" not in sys.modules, "tdoc_ai.storage.lancedb should not be loaded"

		def test_no_sentence_transformers_import(self) -> None:
		"""sentence_transformers deprecated module should not be imported."""
		# Verify sentence_transformers not in loaded modules
		assert "sentence_transformers" not in sys.modules, "sentence_transformers should not be loaded"

		def test_no_legacy_pipeline_import(self) -> None:
		"""Legacy pipeline module should not be imported."""
		# Verify legacy pipeline not in loaded modules
		assert "tdoc_ai.operations.pipeline" not in sys.modules, "Legacy pipeline should not be loaded"


		class TestDatetimeScoping:
		"""Verify datetime variables are properly scoped."""

		def test_datetime_module_level_import(self) -> None:
		"""Datetime should be imported at module level in cli.py."""
		# Verify datetime is available in cli module
		assert hasattr(cli, "datetime"), "datetime should be imported in cli module"
		assert hasattr(cli, "UTC"), "UTC should be imported in cli module"

		def test_datetime_combine_usage(self) -> None:
		"""datetime.combine should be used with proper scoping."""
		# Test that datetime.combine works correctly with parse_partial_date
		start_date_str = "2025-01"
		parsed_date = parse_partial_date(start_date_str)
		combined = datetime.combine(parsed_date, datetime.min.time(), tzinfo=UTC)
		@@ -76,7 +41,6 @@ class TestDatetimeScoping:

		def test_datetime_combine_end_date(self) -> None:
		"""datetime.combine should work for end dates with is_end flag."""
		# Test end date parsing
		end_date_str = "2025-03"
		parsed_date = parse_partial_date(end_date_str, is_end=True)
		combined = datetime.combine(parsed_date, datetime.max.time(), tzinfo=UTC)
		@@ -85,58 +49,3 @@ class TestDatetimeScoping:
		assert combined.tzinfo == UTC
		assert combined.year == 2025
		assert combined.month == 3


		class TestGraphErrorHandling:
		"""Test graph error handling readiness.

		Note: Currently there is NO LightRAG integration or graph building code
		in the 3gpp-ai package. These tests document the expected behavior when
		graph operations are added in a future phase.
		"""

		def test_graph_operations_not_yet_implemented(self) -> None:
		"""Document that graph operations are not yet implemented.

		This test verifies the current state: no LightRAG or graph building
		code exists in the codebase. When LightRAG integration is added,
		this test should be updated to verify actual graph error handling.
		"""
		# Verify no LightRAG classes are exported
		assert not hasattr(threegpp_ai, "LightRAGConfig"), "LightRAGConfig not yet implemented"
		assert not hasattr(threegpp_ai, "TDocRAG"), "TDocRAG not yet implemented"
		assert not hasattr(threegpp_ai, "TDocProcessor"), "TDocProcessor not yet implemented"

		# Verify no graph-related functions in workspaces
		assert not hasattr(workspaces_ops, "build_graph"), "build_graph not yet implemented"
		assert not hasattr(workspaces_ops, "insert_to_graph"), "insert_to_graph not yet implemented"

		def test_workspace_process_continues_without_graph(self) -> None:
		"""Workspace processing should work without graph operations.

		Currently workspace processing only does extraction, not graph building.
		This test documents that extraction works independently.
		"""
		# Verify core workspace operations exist and work without graph
		assert checkout_tdoc_to_workspace is not None
		assert checkout_spec_to_workspace is not None
		assert ensure_ai_subfolder is not None

		@pytest.mark.skip(reason="LightRAG integration not yet implemented")
		def test_graph_error_caught_and_logged(self) -> None:
		"""When LightRAG is integrated, graph errors should be caught.

		Future test: Verify that when rag.insert() raises ValueError or
		RuntimeError, the error is caught, logged with document ID, and
		processing continues for other documents.
		"""
		pass

		@pytest.mark.skip(reason="LightRAG integration not yet implemented")
		def test_graph_error_doesnt_crash_workspace_process(self) -> None:
		"""When LightRAG is integrated, graph errors should not crash processing.

		Future test: Verify that graph building failures for individual
		documents don't stop the entire workspace process.
		"""
		pass