Commit 25cbc653 authored by Jan Reimes's avatar Jan Reimes
Browse files

feat(config): introduce ConfigEnvVar for environment variable mapping

* Added ConfigEnvVar enum for environment variable to config field path mappings.
* Updated CLI argument definitions to use ConfigEnvVar for environment variables.
* Removed compat.py as it is no longer needed.
* Imported ConfigEnvVar in relevant modules for consistency.
parent 267b7d85
Loading
Loading
Loading
Loading
+14 −8
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ from pathlib import Path
from typing import Annotated

import typer
from tdoc_crawler.config import ConfigEnvVar
from tdoc_crawler.models.base import OutputFormat

from threegpp_ai.lightrag.config import QueryMode
@@ -13,11 +14,11 @@ from threegpp_ai.lightrag.config import QueryMode
# Common
OutputFormatOption = Annotated[
    str,
    typer.Option("--output", "-o", help="Output format (table, json, ison, toon, yaml)", envvar="TDC_AI_OUTPUT_FORMAT"),
    typer.Option("--output", "-o", help="Output format (table, json, ison, toon, yaml)", envvar=ConfigEnvVar.TDC_AI_OUTPUT_FORMAT.name),
]
CacheDirOption = Annotated[
    Path | None,
    typer.Option("--cache-dir", "-c", help="Cache directory", envvar="TDC_CACHE_DIR"),
    typer.Option("--cache-dir", "-c", help="Cache directory", envvar=ConfigEnvVar.TDC_CACHE_DIR.name),
]

# Summarize
@@ -49,7 +50,12 @@ WorkspaceKindOption = Annotated[str, typer.Option("--kind", help="Source kind (t
WorkspaceCheckoutOption = Annotated[bool, typer.Option("--checkout/--no-checkout", help="Checkout/download documents if not present")]
ConvertPdfOption = Annotated[
    bool,
    typer.Option("--convert-pdf/--no-convert-pdf", "-cp", help="Convert office documents to PDF during add-members", envvar="TDC_AI_CONVERT_PDF"),
    typer.Option(
        "--convert-pdf/--no-convert-pdf",
        "-cp",
        help="Convert office documents to PDF during add-members",
        envvar=ConfigEnvVar.TDC_AI_CONVERT_PDF.name,
    ),
]
ConvertMdOption = Annotated[
    bool,
@@ -57,7 +63,7 @@ ConvertMdOption = Annotated[
        "--convert-md/--no-convert-md",
        "-cm",
        help="Extract markdown from PDFs (implies --convert-pdf). Saves tables, figures, equations, metadata to .ai folder",
        envvar="TDC_AI_CONVERT_MD",
        envvar=ConfigEnvVar.TDC_AI_CONVERT_MD.name,
    ),
]
WorkspaceEmbedOption = Annotated[
@@ -82,7 +88,7 @@ WorkspaceProcessVlmOption = Annotated[
    typer.Option(
        "--vlm/--no-vlm",
        help="Enable VLM picture description and formula enrichment",
        envvar="TDC_AI_VLM",
        envvar=ConfigEnvVar.TDC_AI_VLM.name,
    ),
]
WorkspacePreserveArtifactsOption = Annotated[
@@ -99,7 +105,7 @@ AcceleratorDeviceOption = Annotated[
    typer.Option(
        "--device",
        help="Compute device for document extraction: auto, cpu, cuda, mps, xpu, or cuda:N",
        envvar="TDC_AI_DEVICE",
        envvar=ConfigEnvVar.TDC_AI_DEVICE.name,
    ),
]
AcceleratorThreadsOption = Annotated[
@@ -107,7 +113,7 @@ AcceleratorThreadsOption = Annotated[
    typer.Option(
        "--threads",
        help="Number of threads for CPU-bound extraction operations",
        envvar="TDC_AI_NUM_THREADS",
        envvar=ConfigEnvVar.TDC_AI_NUM_THREADS.name,
    ),
]
AcceleratorBatchSizeOption = Annotated[
@@ -115,7 +121,7 @@ AcceleratorBatchSizeOption = Annotated[
    typer.Option(
        "--batch-size",
        help="Batch size for OCR, layout, and table structure. Higher values benefit GPU",
        envvar="TDC_AI_BATCH_SIZE",
        envvar=ConfigEnvVar.TDC_AI_BATCH_SIZE.name,
    ),
]

+35 −28
Original line number Diff line number Diff line
@@ -7,6 +7,8 @@ from typing import Annotated, Literal

import typer

from tdoc_crawler.config import ConfigEnvVar

# Arguments
TDocIdsArgument = Annotated[list[str] | None, typer.Argument(help="TDoc identifiers to query")]
TDocIdArgument = Annotated[str, typer.Argument(help="TDoc identifier to download and open")]
@@ -16,26 +18,27 @@ SpecArgument = Annotated[list[str] | None, typer.Argument(help="Spec number(s) t
# Options - TDocs/Meetings
WorkingGroupOption = Annotated[
    list[str] | None,
    typer.Option("--working-group", "-w", help="Filter by working group (e.g., 'R'/'RAN', 'S'/'SA', 'C'/'CT')", envvar="TDC_WORKING_GROUP"),
    typer.Option("--working-group", "-w", help="Filter by working group (e.g., 'R'/'RAN', 'S'/'SA', 'C'/'CT')", envvar=ConfigEnvVar.TDC_WORKING_GROUP.name),
]
SubgroupOption = Annotated[
    list[str] | None,
    typer.Option("--sub-group", "-s", help="Filter by sub-working group (e.g., 'R2/RAN2, SA4, CT1, CP, ...')", envvar="TDC_SUB_GROUP"),
    typer.Option("--sub-group", "-s", help="Filter by sub-working group (e.g., 'R2/RAN2, SA4, CT1, CP, ...')", envvar=ConfigEnvVar.TDC_SUB_GROUP.name),
]
LimitMeetingsOption = Annotated[int | None, typer.Option("--limit-meetings", help="Limit meetings overall", envvar="TDC_LIMIT_MEETINGS")]
LimitMeetingsOption = Annotated[int | None, typer.Option("--limit-meetings", help="Limit meetings overall", envvar=ConfigEnvVar.TDC_LIMIT_MEETINGS.name)]
LimitMeetingsPerSubWgOption = Annotated[
    int | None, typer.Option("--limit-meetings-per-subwg", help="Limit meetings per sub-working group", envvar="TDC_LIMIT_MEETINGS_PER_SUBWG")
    int | None,
    typer.Option("--limit-meetings-per-subwg", help="Limit meetings per sub-working group", envvar=ConfigEnvVar.TDC_LIMIT_MEETINGS_PER_SUBWG.name),
]
LimitSubWgsOption = Annotated[int | None, typer.Option("--limit-subwgs", help="Limit number of sub-working groups")]
LimitOption = Annotated[int | None, typer.Option("--limit", "-l", help="Maximum number of rows")]
OrderOption = Annotated[str, typer.Option("--order", help="Sort order (asc|desc)")]
StartDateOption = Annotated[
    str | None,
    typer.Option("--start-date", help="Filter from ISO timestamp (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS)", envvar="TDC_START_DATE"),
    typer.Option("--start-date", help="Filter from ISO timestamp (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS)", envvar=ConfigEnvVar.TDC_START_DATE.name),
]
EndDateOption = Annotated[
    str | None,
    typer.Option("--end-date", help="Filter until ISO timestamp (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS)", envvar="TDC_END_DATE"),
    typer.Option("--end-date", help="Filter until ISO timestamp (YYYY-MM-DD or YYYY-MM-DDTHH:MM:SS)", envvar=ConfigEnvVar.TDC_END_DATE.name),
]
NoFetchOption = Annotated[
    bool,
@@ -48,15 +51,15 @@ IncrementalOption = Annotated[
        help=("When true, skip items already present in the database (incremental mode). Use --full to re-scan everything."),
    ),
]
LimitTDocsOption = Annotated[int | None, typer.Option("--limit-tdocs", help="Limit number of TDocs", envvar="TDC_LIMIT_TDOCS")]
LimitTDocsOption = Annotated[int | None, typer.Option("--limit-tdocs", help="Limit number of TDocs", envvar=ConfigEnvVar.TDC_LIMIT_TDOCS.name)]
ClearTDocsOption = Annotated[bool, typer.Option("--clear-tdocs", help="Clear all TDocs before crawling")]
_ = Annotated[
    int | None,
    typer.Option("--overall-timeout", help="Maximum total crawl duration in seconds (None = unlimited)", envvar="TDC_OVERALL_TIMEOUT"),
    typer.Option("--overall-timeout", help="Maximum total crawl duration in seconds (None = unlimited)", envvar=ConfigEnvVar.TDC_OVERALL_TIMEOUT.name),
]
OutputFormatOption = Annotated[str, typer.Option("--output", "-o", help="Output format (table, json, ison, toon, yaml)", envvar="TDC_OUTPUT")]
EolUsernameOption = Annotated[str | None, typer.Option("--eol-username", help="ETSI Online account username", envvar="TDC_EOL_USERNAME")]
EolPasswordOption = Annotated[str | None, typer.Option("--eol-password", help="ETSI Online account password", envvar="TDC_EOL_PASSWORD")]
OutputFormatOption = Annotated[str, typer.Option("--output", "-o", help="Output format (table, json, ison, toon, yaml)", envvar=ConfigEnvVar.TDC_OUTPUT.name)]
EolUsernameOption = Annotated[str | None, typer.Option("--eol-username", help="ETSI Online account username", envvar=ConfigEnvVar.TDC_EOL_USERNAME.name)]
EolPasswordOption = Annotated[str | None, typer.Option("--eol-password", help="ETSI Online account password", envvar=ConfigEnvVar.TDC_EOL_PASSWORD.name)]
PromptCredentialsOption = Annotated[
    bool | None,
    typer.Option("--prompt-credentials/--no-prompt-credentials", help="Prompt for credentials when missing"),
@@ -69,27 +72,29 @@ FullMetadataOption = Annotated[bool, typer.Option("--full-metadata", help="Fetch
# Glob pattern filters for TDocs
SourcePatternOption = Annotated[
    list[str] | None,
    typer.Option("--source", help="Glob pattern for source field (e.g., '*huawei*'). Multiple values are OR'd.", envvar="TDC_SOURCE_PATTERN"),
    typer.Option("--source", help="Glob pattern for source field (e.g., '*huawei*'). Multiple values are OR'd.", envvar=ConfigEnvVar.TDC_SOURCE_PATTERN.name),
]
SourcePatternExcludeOption = Annotated[
    list[str] | None,
    typer.Option("--source-ex", help="Glob pattern to exclude source field. Multiple values are OR'd.", envvar="TDC_SOURCE_PATTERN_EXCLUDE"),
    typer.Option("--source-ex", help="Glob pattern to exclude source field. Multiple values are OR'd.", envvar=ConfigEnvVar.TDC_SOURCE_PATTERN_EXCLUDE.name),
]
TitlePatternOption = Annotated[
    list[str] | None,
    typer.Option("--title", help="Glob pattern for title field (e.g., '*AI*'). Multiple values are OR'd.", envvar="TDC_TITLE_PATTERN"),
    typer.Option("--title", help="Glob pattern for title field (e.g., '*AI*'). Multiple values are OR'd.", envvar=ConfigEnvVar.TDC_TITLE_PATTERN.name),
]
TitlePatternExcludeOption = Annotated[
    list[str] | None,
    typer.Option("--title-ex", help="Glob pattern to exclude title field. Multiple values are OR'd.", envvar="TDC_TITLE_PATTERN_EXCLUDE"),
    typer.Option("--title-ex", help="Glob pattern to exclude title field. Multiple values are OR'd.", envvar=ConfigEnvVar.TDC_TITLE_PATTERN_EXCLUDE.name),
]
AgendaPatternOption = Annotated[
    list[str] | None,
    typer.Option("--agenda", help="Glob pattern for agenda_item_text field. Multiple values are OR'd.", envvar="TDC_AGENDA_PATTERN"),
    typer.Option("--agenda", help="Glob pattern for agenda_item_text field. Multiple values are OR'd.", envvar=ConfigEnvVar.TDC_AGENDA_PATTERN.name),
]
AgendaPatternExcludeOption = Annotated[
    list[str] | None,
    typer.Option("--agenda-ex", help="Glob pattern to exclude agenda_item_text field. Multiple values are OR'd.", envvar="TDC_AGENDA_PATTERN_EXCLUDE"),
    typer.Option(
        "--agenda-ex", help="Glob pattern to exclude agenda_item_text field. Multiple values are OR'd.", envvar=ConfigEnvVar.TDC_AGENDA_PATTERN_EXCLUDE.name
    ),
]


@@ -114,22 +119,24 @@ ReleaseOption = Annotated[
DocOnlyOption = Annotated[bool, typer.Option("--doc-only/--no-doc-only", help="Attempt document-only download")]

# Options - General/Common
CacheDirOption = Annotated[Path | None, typer.Option("--cache-dir", "-c", help="Cache directory", envvar="TDC_CACHE_DIR")]
CacheDirOption = Annotated[Path | None, typer.Option("--cache-dir", "-c", help="Cache directory", envvar=ConfigEnvVar.TDC_CACHE_DIR.name)]
ClearDbOption = Annotated[bool, typer.Option("--clear-db", help="Clear all meetings and TDocs before crawling")]
CheckoutOption = Annotated[
    bool, typer.Option("--checkout/--no-checkout", help="Download and extract metadata results to checkout folder", envvar="TDC_CHECKOUT")
    bool,
    typer.Option("--checkout/--no-checkout", help="Download and extract metadata results to checkout folder", envvar=ConfigEnvVar.TDC_CHECKOUT.name),
]
CheckoutDirOption = Annotated[Path | None, typer.Option("--checkout-dir", help="Directory for checkout files", envvar="TDC_CHECKOUT_DIR")]
WorkersOption = Annotated[int, typer.Option("--workers", help="Number of parallel subinterpreter workers", envvar="TDC_WORKERS")]
MaxRetriesOption = Annotated[int, typer.Option("--max-retries", help="HTTP retry attempts", envvar="TDC_MAX_RETRIES")]
TimeoutOption = Annotated[int, typer.Option("--timeout", help="HTTP timeout seconds", envvar="TDC_TIMEOUT")]
CheckoutDirOption = Annotated[Path | None, typer.Option("--checkout-dir", help="Directory for checkout files", envvar=ConfigEnvVar.TDC_CHECKOUT_DIR.name)]
WorkersOption = Annotated[int, typer.Option("--workers", help="Number of parallel subinterpreter workers", envvar=ConfigEnvVar.TDC_WORKERS.name)]
MaxRetriesOption = Annotated[int, typer.Option("--max-retries", help="HTTP retry attempts", envvar=ConfigEnvVar.TDC_MAX_RETRIES.name)]
TimeoutOption = Annotated[int, typer.Option("--timeout", help="HTTP timeout seconds", envvar=ConfigEnvVar.TDC_TIMEOUT.name)]
VerbosityOption = Annotated[
    str,
    typer.Option("--verbosity", "-v", help="Logging verbosity level (DEBUG, INFO, WARNING, ERROR, CRITICAL)", envvar="TDC_VERBOSITY"),
    typer.Option("--verbosity", "-v", help="Logging verbosity level (DEBUG, INFO, WARNING, ERROR, CRITICAL)", envvar=ConfigEnvVar.TDC_VERBOSITY.name),
]

UseWhatTheSpecOption = Annotated[
    bool, typer.Option("--use-whatthespec/--no-use-whatthespec", help="Use WhatTheSpec API for fetching", envvar="TDC_USE_WHATTHESPEC")
    bool,
    typer.Option("--use-whatthespec/--no-use-whatthespec", help="Use WhatTheSpec API for fetching", envvar=ConfigEnvVar.TDC_USE_WHATTHESPEC.name),
]


@@ -140,7 +147,7 @@ HttpCacheOption = Annotated[
    typer.Option(
        "--http-cache/--no-http-cache",
        help="Enable/disable HTTP caching. If not specified, uses HTTP_CACHE_ENABLED env var or defaults to enabled.",
        envvar="HTTP_CACHE_ENABLED",
        envvar=ConfigEnvVar.HTTP_CACHE_ENABLED.name,
    ),
]

@@ -181,7 +188,7 @@ EmbeddingBackendOption = Annotated[
        "--accelerate",
        "-a",
        help="Embedding backend (torch, onnx, openvino)",
        envvar="TDC_AI_EMBEDDING_BACKEND",
        envvar=ConfigEnvVar.TDC_AI_EMBEDDING_BACKEND.name,
    ),
]
_ = Annotated[str | None, typer.Option("--checkout-path", help="Path to checkout document")]
@@ -205,7 +212,7 @@ WorkspaceActivateOption = Annotated[
        True,
        "--activate/--no-activate",
        help="Activate workspace after creation",
        envvar="TDC_AI_WORKSPACE_ACTIVATE",
        envvar=ConfigEnvVar.TDC_AI_WORKSPACE_ACTIVATE.name,
    ),
]

+3 −1
Original line number Diff line number Diff line
@@ -7,9 +7,10 @@ from pathlib import Path
from typing import Self

# Import settings and sources modules
from tdoc_crawler.config.compat import (
from tdoc_crawler.config.env_vars import (
    DEPRECATED_ENV_VARS,
    ENV_VAR_MAPPINGS,
    ConfigEnvVar,
    log_deprecation_warning,
)
from tdoc_crawler.config.settings import (
@@ -47,6 +48,7 @@ __all__ = [
    "DEPRECATED_ENV_VARS",
    "ENV_VAR_MAPPINGS",
    "CacheManager",
    "ConfigEnvVar",
    "ConfigLoadError",
    "CrawlConfig",
    "CredentialsConfig",

src/tdoc_crawler/config/compat.py

deleted100644 → 0
+0 −95
Original line number Diff line number Diff line
"""Backward compatibility layer for environment variable mapping.

This module provides mappings between legacy environment variable names and
their corresponding config field paths, enabling seamless migration from
env-var-based configuration to the new config-file-based approach.

All existing TDC_*, HTTP_CACHE_*, and LIGHTRAG_* environment variables
remain functional via pydantic's AliasChoices mechanism.
"""

from __future__ import annotations

import logging

logger = logging.getLogger(__name__)

# Maps environment variable names to their config field paths.
# Used for documentation and validation purposes.
ENV_VAR_MAPPINGS: dict[str, str] = {
    # Path/Cache (TDC_*)
    "TDC_CACHE_DIR": "path.cache_dir",
    "TDC_AI_STORE_PATH": "path.ai_cache_dir",
    # Credentials (TDC_*)
    "TDC_EOL_USERNAME": "credentials.username",
    "TDC_EOL_PASSWORD": "credentials.password",
    "TDC_EOL_PROMPT": "credentials.prompt",
    # HTTP/SQLite (overrides)
    "TDC_VERIFY_SSL": "http.verify_ssl",
    "TDC_TIMEOUT": "http.timeout",
    "TDC_MAX_RETRIES": "http.max_retries",
    # HTTP Cache (HTTP_CACHE_*)
    "HTTP_CACHE_TTL": "http.cache_ttl",
    "HTTP_CACHE_ENABLED": "http.cache_enabled",
    "HTTP_CACHE_REFRESH_ON_ACCESS": "http.cache_refresh_on_access",
    # Crawl filters (TDC_ prefixed)
    "TDC_WORKING_GROUP": "crawl.working_group",
    "TDC_SUB_GROUP": "crawl.sub_group",
    "TDC_START_DATE": "crawl.date_start",
    "TDC_END_DATE": "crawl.date_end",
    "TDC_SOURCE_LIKE": "crawl.source_like",
    "TDC_AGENDA_LIKE": "crawl.agenda_like",
    "TDC_TITLE_LIKE": "crawl.title_like",
    "TDC_LIMIT_TDOCS": "crawl.limit",
    "TDC_WORKERS": "crawl.workers",
    # AI/LightRAG (TDC_AI_* and LIGHTRAG_*)
    "TDC_AI_LLM_MODEL": "ai.llm_model",
    "TDC_AI_LLM_API_BASE": "ai.llm_api_base",
    "TDC_AI_LLM_API_KEY": "ai.llm_api_key",
    "TDC_AI_EMBEDDING_MODEL": "ai.embedding_model",
    "TDC_AI_EMBEDDING_API_BASE": "ai.embedding_api_base",
    "TDC_AI_EMBEDDING_API_KEY": "ai.embedding_api_key",
    "TDC_AI_MAX_CHUNK_SIZE": "ai.max_chunk_size",
    "TDC_AI_CHUNK_OVERLAP": "ai.chunk_overlap",
    "TDC_AI_CONVERT_PDF": "ai.convert_pdf",
    "TDC_AI_CONVERT_MD": "ai.convert_md",
    "TDC_AI_VLM": "ai.vlm",
    "TDC_AI_ABSTRACT_MIN_WORDS": "ai.abstract_min_words",
    "TDC_AI_ABSTRACT_MAX_WORDS": "ai.abstract_max_words",
    "TDC_AI_PARALLELISM": "ai.parallelism",
    "TDC_GRAPH_QUERY_LEVEL": "ai.graph_query_level",
    "TDC_LIGHTRAG_SHARED_STORAGE": "ai.lightrag.shared_storage",
    "LIGHTRAG_SHARED_STORAGE": "ai.lightrag.shared_storage",
    "LIGHTRAG_DB_BACKEND": "ai.lightrag.db_backend",
}

# Deprecated environment variables that will produce warnings.
# Format: "OLD_VAR": "Use NEW_VAR instead"
DEPRECATED_ENV_VARS: dict[str, str] = {}


def log_deprecation_warning(env_var_name: str) -> None:
    """Log a warning for a deprecated environment variable.

    Args:
        env_var_name: Name of the deprecated environment variable.
    """
    if env_var_name in DEPRECATED_ENV_VARS:
        replacement = DEPRECATED_ENV_VARS[env_var_name]
        logger.warning(
            "Environment variable '%s' is deprecated. %s",
            env_var_name,
            replacement,
        )
    else:
        logger.debug(
            "Environment variable '%s' is set but has no documented mapping",
            env_var_name,
        )


__all__ = [
    "DEPRECATED_ENV_VARS",
    "ENV_VAR_MAPPINGS",
    "log_deprecation_warning",
]
+138 −0
Original line number Diff line number Diff line
"""
Environment variable name constants mapped to their corresponding config field paths.

All TDC_*, HTTP_CACHE_*, and LIGHTRAG_* environment variables remain functional
via pydantic's AliasChoices mechanism.

This module provides a StrEnum `ConfigEnvVar` where:
- Enum member names are the environment variable names (e.g., TDC_WORKING_GROUP)
- Enum member values are the TOML config field paths (e.g., "crawl.working_group")

Use these constants in CLI argument definitions to avoid hardcoding strings:
    # In args.py:
    from tdoc_crawler.config import ConfigEnvVar

    WorkingGroupOption = Annotated[
        list[str] | None,
        typer.Option("--working-group", envvar=ConfigEnvVar.TDC_WORKING_GROUP),
    ]
"""

from __future__ import annotations

import logging
from enum import StrEnum

logger = logging.getLogger(__name__)


class ConfigEnvVar(StrEnum):
    """Environment variable to TOML config field path mappings.

    Enum member names are env var names, values are config paths.
    Use member values (str) for pydantic validation_alias and
    member names for typer Option envvar parameter.
    """

    # Path/Cache (TDC_*)
    TDC_CACHE_DIR = "path.cache_dir"
    TDC_AI_STORE_PATH = "path.ai_cache_dir"
    # Credentials (TDC_*)
    TDC_EOL_USERNAME = "credentials.username"
    TDC_EOL_PASSWORD = "credentials.password"  # noqa: S105
    TDC_EOL_PROMPT = "credentials.prompt"
    # HTTP/SQLite (overrides)
    TDC_VERIFY_SSL = "http.verify_ssl"
    TDC_TIMEOUT = "http.timeout"
    TDC_MAX_RETRIES = "http.max_retries"
    # HTTP Cache (HTTP_CACHE_*)
    HTTP_CACHE_TTL = "http.cache_ttl"
    HTTP_CACHE_ENABLED = "http.cache_enabled"
    HTTP_CACHE_REFRESH_ON_ACCESS = "http.cache_refresh_on_access"
    # Crawl filters (TDC_ prefixed)
    TDC_WORKING_GROUP = "crawl.working_group"
    TDC_SUB_GROUP = "crawl.sub_group"
    TDC_START_DATE = "crawl.date_start"
    TDC_END_DATE = "crawl.date_end"
    TDC_SOURCE_LIKE = "crawl.source_like"
    TDC_AGENDA_LIKE = "crawl.agenda_like"
    TDC_TITLE_LIKE = "crawl.title_like"
    TDC_LIMIT_TDOCS = "crawl.limit"
    TDC_WORKERS = "crawl.workers"
    # AI/LightRAG (TDC_AI_* and LIGHTRAG_*)
    TDC_AI_LLM_MODEL = "ai.llm_model"
    TDC_AI_LLM_API_BASE = "ai.llm_api_base"
    TDC_AI_LLM_API_KEY = "ai.llm_api_key"
    TDC_AI_EMBEDDING_MODEL = "ai.embedding_model"
    TDC_AI_EMBEDDING_API_BASE = "ai.embedding_api_base"
    TDC_AI_EMBEDDING_API_KEY = "ai.embedding_api_key"
    TDC_AI_MAX_CHUNK_SIZE = "ai.max_chunk_size"
    TDC_AI_CHUNK_OVERLAP = "ai.chunk_overlap"
    TDC_AI_CONVERT_PDF = "ai.convert_pdf"
    TDC_AI_CONVERT_MD = "ai.convert_md"
    TDC_AI_VLM = "ai.vlm"
    TDC_AI_ABSTRACT_MIN_WORDS = "ai.abstract_min_words"
    TDC_AI_ABSTRACT_MAX_WORDS = "ai.abstract_max_words"
    TDC_AI_PARALLELISM = "ai.parallelism"
    TDC_GRAPH_QUERY_LEVEL = "ai.graph_query_level"
    TDC_LIGHTRAG_SHARED_STORAGE = "ai.lightrag.shared_storage"
    LIGHTRAG_SHARED_STORAGE = "ai.lightrag.shared_storage"
    LIGHTRAG_DB_BACKEND = "ai.lightrag.db_backend"
    # AI-specific (not in settings.py but used in CLI args)
    TDC_AI_OUTPUT_FORMAT = "ai.output_format"
    TDC_LIMIT_MEETINGS = "crawl.limit_meetings"
    TDC_LIMIT_MEETINGS_PER_SUBWG = "crawl.limit_meetings_per_subwg"
    TDC_OVERALL_TIMEOUT = "crawl.overall_timeout"
    TDC_OUTPUT = "output_format"
    TDC_SOURCE_PATTERN = "crawl.source_pattern"
    TDC_SOURCE_PATTERN_EXCLUDE = "crawl.source_pattern_exclude"
    TDC_TITLE_PATTERN = "crawl.title_pattern"
    TDC_TITLE_PATTERN_EXCLUDE = "crawl.title_pattern_exclude"
    TDC_AGENDA_PATTERN = "crawl.agenda_pattern"
    TDC_AGENDA_PATTERN_EXCLUDE = "crawl.agenda_pattern_exclude"
    TDC_CHECKOUT = "crawl.checkout"
    TDC_CHECKOUT_DIR = "path.checkout_dir"
    TDC_VERBOSITY = "verbosity"
    TDC_USE_WHATTHESPEC = "http.use_whatthespec"
    TDC_AI_EMBEDDING_BACKEND = "ai.embedding_backend"
    TDC_AI_WORKSPACE_ACTIVATE = "ai.workspace_activate"
    TDC_AI_DEVICE = "ai.device"
    TDC_AI_NUM_THREADS = "ai.num_threads"
    TDC_AI_BATCH_SIZE = "ai.batch_size"


# Derived dict for backward compatibility (tools/docs that need dict form)
ENV_VAR_MAPPINGS: dict[str, str] = {e.name: e.value for e in ConfigEnvVar}


# Deprecated environment variables that will produce warnings.
# Format: "OLD_VAR": "Use NEW_VAR instead"
DEPRECATED_ENV_VARS: dict[str, str] = {}


def log_deprecation_warning(env_var_name: str) -> None:
    """Log a warning for a deprecated environment variable.

    Args:
        env_var_name: Name of the deprecated environment variable.
    """
    if env_var_name in DEPRECATED_ENV_VARS:
        replacement = DEPRECATED_ENV_VARS[env_var_name]
        logger.warning(
            "Environment variable '%s' is deprecated. %s",
            env_var_name,
            replacement,
        )
    else:
        logger.debug(
            "Environment variable '%s' is set but has no documented mapping",
            env_var_name,
        )


__all__ = [
    "DEPRECATED_ENV_VARS",
    "ENV_VAR_MAPPINGS",
    "ConfigEnvVar",
    "log_deprecation_warning",
]