test(CONFIG-01): add alignment test for .env.example embedding model (bc6c8589) · Commits · Jan Reimes / 3gpp-crawler

tests/test_config_defaults.py

0 → 100644

+67 −0

Original line number	Diff line number	Diff line
		"""Tests to verify configuration defaults alignment.

		These tests ensure that .env.example documentation is consistent
		and matches what the code actually uses.
		"""

		from __future__ import annotations

		import re
		from pathlib import Path


		def test_embedding_model_env_comment_matches_value():
		"""Verify .env.example TDC_AI_EMBEDDING_MODEL comment and value are consistent.

		The comment recommends a model, but if the actual value differs,
		users get confusing instructions about which model to use.
		"""
		env_example = Path(".env.example").read_text()

		# Find the embedding model line and its preceding comment
		pattern = r"# (Recommended:[^\n]\n[^\n]\n)?" # Optional comment block
		embed_section = re.search(
		r"# Embedding model[^\n]*\n"
		r"(?:# [^\n]*\n)?" # Optional recommended line
		r"TDC_AI_EMBEDDING_MODEL=(\S+)",
		env_example,
		re.MULTILINE,
		)
		assert embed_section, "TDC_AI_EMBEDDING_MODEL not found in .env.example"

		value = embed_section.group(1)

		# If there's a recommended line, extract it and check consistency
		recommended_match = re.search(r"# Recommended: ([^\n]+)", embed_section.group(0))
		if recommended_match:
		recommended = recommended_match.group(1)
		# Extract model name from "provider/model:tag" format
		recommended_model = re.search(r"(\S+/\S+:?\S*)", recommended)
		if recommended_model:
		assert value == recommended_model.group(1), (
		f".env.example embedding model comment ({recommended_model.group(1)}) "
		f"differs from value ({value}). Update comment to match value."
		)


		def test_embedding_model_not_hardcoded_in_code():
		"""Verify embedding model is read from config, not hardcoded.

		The embedding model should be configurable via TDC_AI_EMBEDDING_MODEL
		environment variable and stored in AiConfig.embedding_model.
		"""
		# This test verifies the configuration structure exists
		# The actual default value is defined in AiConfig (or LightRAGConfig)
		# and TDC_AI_EMBEDDING_MODEL should be the documented way to override it

		from threegpp_ai.config import AiConfig

		# AiConfig should accept embedding_model via env var or config
		# If TDC_AI_EMBEDDING_MODEL is set, it should be used
		config = AiConfig()

		# Verify the config has the expected structure
		assert hasattr(config, "llm_model"), "AiConfig should have llm_model field"

		# Note: embedding_model field may not exist yet in AiConfig -
		# this test documents the expected configuration structure