Commit f0e5367f authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor(tests): streamline imports and update cache directory references

* Consolidate import statements in test files for clarity.
* Update references from `ai_store_dir` to `ai_cache_dir` in tests.
* Ensure consistent usage of AiStorage across test cases.
parent 0d4fc48c
Loading
Loading
Loading
Loading
+3 −7
Original line number Diff line number Diff line
@@ -6,11 +6,13 @@ import hashlib
import sys
import zipfile
from pathlib import Path
from unittest.mock import MagicMock
from unittest.mock import MagicMock, patch

import pytest
from typer.testing import CliRunner

from tdoc_crawler.ai.container import AiServiceContainer
from tdoc_crawler.ai.storage import AiStorage
from tdoc_crawler.http_client import download_to_file

# Mock kreuzberg which might not be installed in all environments.
@@ -132,8 +134,6 @@ def reset_ai_service_container() -> None:
    This fixture ensures each test starts with a fresh AiServiceContainer
    singleton, preventing state leakage between tests that use storage.
    """
    from tdoc_crawler.ai.container import AiServiceContainer

    AiServiceContainer.reset_instance()
    yield
    # Clean up after test
@@ -154,10 +154,6 @@ def ai_storage(test_cache_dir: Path) -> AiStorage:
    Returns:
        AiStorage instance with temporary storage
    """
    from unittest.mock import patch

    from tdoc_crawler.ai.container import AiServiceContainer

    lancedb_dir = test_cache_dir / "ai" / "lancedb"
    lancedb_dir.mkdir(parents=True, exist_ok=True)

+2 −2
Original line number Diff line number Diff line
@@ -62,8 +62,8 @@ def test_default_store_path_resolves_under_cache_dir(tmp_path: Path) -> None:
    config = AiConfig(cache_manager_name="test-ai-config")

    # Path should be under .ai directory with embedding model subdirectory
    assert ".ai" in str(config.ai_store_dir)
    assert "sentence-transformers" in str(config.ai_store_dir) or "all-MiniLM-L6-v2" in str(config.ai_store_dir)
    assert ".ai" in str(config.ai_cache_dir)
    assert "sentence-transformers" in str(config.ai_cache_dir) or "all-MiniLM-L6-v2" in str(config.ai_cache_dir)


def test_no_hardcoded_models_in_ai_package() -> None:
+5 −12
Original line number Diff line number Diff line
@@ -7,6 +7,11 @@ from unittest.mock import MagicMock

from tdoc_crawler.ai.models import GraphEdge, GraphEdgeType, GraphNode, GraphNodeType
from tdoc_crawler.ai.operations import graph
from tdoc_crawler.ai.operations.graph import (
    extract_change_requests,
    extract_company_entities,
    extract_work_items,
)


class TestGraph:
@@ -169,8 +174,6 @@ class TestEntityExtractors:

    def test_extract_company_entities(self) -> None:
        """Test company name extraction."""
        from tdoc_crawler.ai.operations.graph import extract_company_entities

        text = "This document was submitted by Huawei and Nokia for discussion at 3GPP."
        companies = extract_company_entities(text)

@@ -180,8 +183,6 @@ class TestEntityExtractors:

    def test_extract_work_items(self) -> None:
        """Test work item extraction."""
        from tdoc_crawler.ai.operations.graph import extract_work_items

        text = "This relates to WI-12345 and Work Item 67890 for 5G enhancement."
        wis = extract_work_items(text)

@@ -190,8 +191,6 @@ class TestEntityExtractors:

    def test_extract_change_requests(self) -> None:
        """Test change request extraction."""
        from tdoc_crawler.ai.operations.graph import extract_change_requests

        text = "This CR-001234 and Change Request 5678 propose modifications to the spec."
        crs = extract_change_requests(text)

@@ -200,12 +199,6 @@ class TestEntityExtractors:

    def test_extract_all_entity_types(self) -> None:
        """Test extraction of all entity types together."""
        from tdoc_crawler.ai.operations.graph import (
            extract_change_requests,
            extract_company_entities,
            extract_work_items,
        )

        text = """
        Samsung proposes WI-99999 to address CR-11111.
        This work item relates to change request CP-230001.
+3 −2
Original line number Diff line number Diff line
@@ -12,6 +12,7 @@ from tdoc_crawler.ai import process_all as process_all_api
from tdoc_crawler.ai.models import PipelineStage, ProcessingStatus, WorkspaceMember
from tdoc_crawler.ai.operations import pipeline
from tdoc_crawler.ai.operations.pipeline import process_all, run_pipeline
from tdoc_crawler.ai.storage import AiStorage
from tdoc_crawler.config import CacheManager
from tdoc_crawler.utils.misc import utc_now

@@ -150,7 +151,7 @@ class TestProcessTdocApi:
        mock_storage.get_status.side_effect = lambda doc_id, workspace=None: completed if doc_id == "S4-251003" else None

        # Mock the container to return our mock storage
        def mock_get_instance():
        def mock_get_instance() -> MagicMock:
            container = MagicMock()
            container.get_ai_storage.return_value = mock_storage
            container.get_embeddings_manager.return_value = MagicMock()
@@ -163,7 +164,7 @@ class TestProcessTdocApi:
        (tmp_path / "S4-260001").mkdir()

        # Mock run_pipeline to avoid actual processing
        def mock_run_pipeline(doc_id, folder_path, storage, **kwargs):
        def mock_run_pipeline(doc_id: str, folder_path: Path, storage: AiStorage, **kwargs) -> ProcessingStatus:
            return completed if doc_id == "S4-251003" else pending

        monkeypatch.setattr("tdoc_crawler.ai.operations.pipeline.run_pipeline", mock_run_pipeline)
+1 −0
Original line number Diff line number Diff line
@@ -4,6 +4,7 @@ from __future__ import annotations

from tdoc_crawler.ai.models import DocumentChunk, PipelineStage, ProcessingStatus, SourceKind
from tdoc_crawler.ai.operations import workspaces
from tdoc_crawler.ai.storage import AiStorage


def test_workspace_member_isolation_across_workspaces() -> None: