Commit 285f20ae authored by Jan Reimes's avatar Jan Reimes
Browse files

fix(lint): resolve ANN001 and E402 in rag.py and test_integration.py

parent 9b1cf380
Loading
Loading
Loading
Loading
+25 −13
Original line number Diff line number Diff line
"""Integration tests for LightRAG pipeline."""

import shutil
from collections.abc import Generator
from pathlib import Path

import pytest
from threegpp_ai.lightrag import (
    QueryMode,
    RAGMetadata,
    TDocRAG,
    enrich_text,
@@ -12,7 +14,7 @@ from threegpp_ai.lightrag import (


@pytest.fixture
def test_workspace():
def test_workspace() -> Generator[Path]:
    """Create a test workspace directory."""
    workspace_dir = Path("./test_lightrag_workspace")
    if workspace_dir.exists():
@@ -28,7 +30,7 @@ class TestTDocRAG:
    """Test TDocRAG integration."""

    @pytest.mark.asyncio
    async def test_rag_insert_and_query(self, test_workspace) -> None:
    async def test_rag_insert_and_query(self, test_workspace: Path) -> None:
        """Test inserting and querying documents."""
        async with TDocRAG() as rag:
            # Insert test document
@@ -48,7 +50,7 @@ class TestTDocRAG:
            assert result.startswith("mocked:")

    @pytest.mark.asyncio
    async def test_rag_with_metadata_enrichment(self, test_workspace) -> None:
    async def test_rag_with_metadata_enrichment(self, test_workspace: Path) -> None:
        """Test inserting with metadata enrichment."""
        metadata = RAGMetadata(
            tdoc_id="S4-250001",
@@ -68,7 +70,7 @@ class TestTDocRAG:
            assert "Document: S4-250001" in enriched_text

    @pytest.mark.asyncio
    async def test_rag_workspace_isolation(self, test_workspace) -> None:
    async def test_rag_workspace_isolation(self, test_workspace: Path) -> None:
        """Test that workspaces are isolated."""
        # Insert into workspace1
        async with TDocRAG() as rag1:
@@ -92,16 +94,21 @@ class TestTDocRAG:
    def _mock_runtime(self, monkeypatch: pytest.MonkeyPatch) -> None:
        """Mock heavy LightRAG runtime calls so tests stay deterministic."""

        async def _start(self, workspace: str | None = None) -> None:
        async def _start(self: TDocRAG, workspace: str | None = None) -> None:
            self._started_workspace = workspace or "default"

        async def _stop(self) -> None:
        async def _stop(self: TDocRAG) -> None:
            return None

        async def _insert(self, text: str, **kwargs: dict) -> None:
        async def _insert(self: TDocRAG, text: str, **kwargs: dict) -> None:
            self._last_insert = {"text": text, "kwargs": kwargs}

        async def _query(self, query: str, mode=None, **kwargs: dict) -> str:
        async def _query(
            self: TDocRAG,
            query: str,
            mode: QueryMode | str | None = None,
            **kwargs: dict,
        ) -> str:
            return f"mocked:{query}"

        monkeypatch.setattr(TDocRAG, "start", _start)
@@ -114,7 +121,7 @@ class TestMetadataEnrichmentIntegration:
    """Test metadata enrichment in real pipeline."""

    @pytest.mark.asyncio
    async def test_full_pipeline_with_metadata(self, test_workspace) -> None:
    async def test_full_pipeline_with_metadata(self, test_workspace: Path) -> None:
        """Test full pipeline: metadata -> enrich -> insert -> query."""
        # Create metadata
        metadata = RAGMetadata(
@@ -147,16 +154,21 @@ class TestMetadataEnrichmentIntegration:
    def _mock_runtime(self, monkeypatch: pytest.MonkeyPatch) -> None:
        """Mock LightRAG runtime calls to avoid external dependencies."""

        async def _start(self, workspace: str | None = None) -> None:
        async def _start(self: TDocRAG, workspace: str | None = None) -> None:
            self._started_workspace = workspace or "default"

        async def _stop(self) -> None:
        async def _stop(self: TDocRAG) -> None:
            return None

        async def _insert(self, text: str, **kwargs: dict) -> None:
        async def _insert(self: TDocRAG, text: str, **kwargs: dict) -> None:
            self._last_insert = {"text": text, "kwargs": kwargs}

        async def _query(self, query: str, mode=None, **kwargs: dict) -> str:
        async def _query(
            self: TDocRAG,
            query: str,
            mode: QueryMode | str | None = None,
            **kwargs: dict,
        ) -> str:
            return f"mocked:{query}"

        monkeypatch.setattr(TDocRAG, "start", _start)
+17 −17
Original line number Diff line number Diff line
@@ -27,20 +27,26 @@ from lightrag.llm.openai import openai_complete, openai_embed
from lightrag.llm.zhipu import zhipu_complete, zhipu_embedding
from lightrag.utils import EmbeddingFunc

from tdoc_crawler.config import resolve_cache_manager

from .config import LightRAGConfig, QueryMode, StorageBackend
from .pg0_manager import Pg0Manager
from .shared_storage import WorkspaceIndex

# Patch zhipu_complete_if_cache
original_zhipu_complete_if_cache = zhipu_module.zhipu_complete_if_cache


@wraps(original_zhipu_complete_if_cache)
async def patched_zhipu_complete_if_cache(
    prompt,
    model="glm-4-flashx",
    api_key=None,
    system_prompt=None,
    history_messages=None,
    enable_cot=False,
    **kwargs,
):
    prompt: str,
    model: str = "glm-4-flashx",
    api_key: str | None = None,
    system_prompt: str | None = None,
    history_messages: list | None = None,
    enable_cot: bool = False,
    **kwargs: Any,
) -> Any:
    # Remove unsupported kwargs including base_url
    if history_messages is None:
        history_messages = []
@@ -64,9 +70,9 @@ original_zhipu_embedding = zhipu_module.zhipu_embedding
async def patched_zhipu_embedding(
    texts: list[str],
    model: str = "embedding-3",
    api_key: str = None,
    **kwargs,
):
    api_key: str | None = None,
    **kwargs: Any,
) -> Any:
    # Remove unsupported kwargs including base_url
    kwargs = {k: v for k, v in kwargs.items() if k not in ["base_url"]}
    return await original_zhipu_embedding(
@@ -83,12 +89,6 @@ zhipu_module.zhipu_embedding = patched_zhipu_embedding
zhipu_complete_if_cache = patched_zhipu_complete_if_cache
zhipu_embedding = patched_zhipu_embedding

from tdoc_crawler.config import resolve_cache_manager

from .config import LightRAGConfig, QueryMode, StorageBackend
from .pg0_manager import Pg0Manager
from .shared_storage import WorkspaceIndex

STORAGES["SharedNanoVectorDBStorage"] = "threegpp_ai.lightrag.shared_storage"
logger = logging.getLogger(__name__)