Commit bc6c8589 authored by Jan Reimes's avatar Jan Reimes
Browse files

test(CONFIG-01): add alignment test for .env.example embedding model

- Test verifies comment matches value to prevent user confusion
- Documents expected config structure for embedding model

[no ci]
parent b2e76a30
Loading
Loading
Loading
Loading
+67 −0
Original line number Diff line number Diff line
"""Tests to verify configuration defaults alignment.

These tests ensure that .env.example documentation is consistent
and matches what the code actually uses.
"""

from __future__ import annotations

import re
from pathlib import Path


def test_embedding_model_env_comment_matches_value():
    """Verify .env.example TDC_AI_EMBEDDING_MODEL comment and value are consistent.

    The comment recommends a model, but if the actual value differs,
    users get confusing instructions about which model to use.
    """
    env_example = Path(".env.example").read_text()

    # Find the embedding model line and its preceding comment
    pattern = r"# (Recommended:[^\n]*\n[^\n]*\n)?"  # Optional comment block
    embed_section = re.search(
        r"# Embedding model[^\n]*\n"
        r"(?:# [^\n]*\n)?"  # Optional recommended line
        r"TDC_AI_EMBEDDING_MODEL=(\S+)",
        env_example,
        re.MULTILINE,
    )
    assert embed_section, "TDC_AI_EMBEDDING_MODEL not found in .env.example"

    value = embed_section.group(1)

    # If there's a recommended line, extract it and check consistency
    recommended_match = re.search(r"# Recommended: ([^\n]+)", embed_section.group(0))
    if recommended_match:
        recommended = recommended_match.group(1)
        # Extract model name from "provider/model:tag" format
        recommended_model = re.search(r"(\S+/\S+:?\S*)", recommended)
        if recommended_model:
            assert value == recommended_model.group(1), (
                f".env.example embedding model comment ({recommended_model.group(1)}) "
                f"differs from value ({value}). Update comment to match value."
            )


def test_embedding_model_not_hardcoded_in_code():
    """Verify embedding model is read from config, not hardcoded.

    The embedding model should be configurable via TDC_AI_EMBEDDING_MODEL
    environment variable and stored in AiConfig.embedding_model.
    """
    # This test verifies the configuration structure exists
    # The actual default value is defined in AiConfig (or LightRAGConfig)
    # and TDC_AI_EMBEDDING_MODEL should be the documented way to override it

    from threegpp_ai.config import AiConfig

    # AiConfig should accept embedding_model via env var or config
    # If TDC_AI_EMBEDDING_MODEL is set, it should be used
    config = AiConfig()

    # Verify the config has the expected structure
    assert hasattr(config, "llm_model"), "AiConfig should have llm_model field"

    # Note: embedding_model field may not exist yet in AiConfig -
    # this test documents the expected configuration structure