Loading 3gpp-ai.tomldeleted 100644 → 0 +0 −175 Original line number Diff line number Diff line # 3GPP AI Configuration # Generated by: 3gpp-ai config init # # Configuration precedence (highest to lowest): # 1. CLI arguments # 2. This config file # 3. Environment variables (TDC_*, LIGHTRAG_*) # 4. Hard-coded defaults # # Sensitive values use ${ENV_VAR} syntax for security. # Environment variables are interpolated at runtime. [path] # Root cache directory for storing downloaded files and metadata # Default: C:\Users\Jan.Reimes\.3gpp-crawler cache_dir = "C:\Users\Jan.Reimes\.3gpp-crawler" # SQLite database filename for storing crawl metadata # Default: 3gpp_crawler.db db_filename = "3gpp_crawler.db" # Subdirectory name for checked-out documents # Default: checkout checkout_dirname = "checkout" # Subdirectory name for AI-related cache (embeddings, graphs) # Default: lightrag ai_cache_dirname = "lightrag" [http] # Time-to-live for HTTP cache entries in seconds # Default: 7200 cache_ttl = 7200 # Enable HTTP response caching # Default: True cache_enabled = true # Refresh cache TTL on each access # Default: True cache_refresh_on_access = true # Verify SSL certificates for HTTPS requests # Default: True verify_ssl = true # Maximum number of retry attempts for failed requests # Default: 3 max_retries = 3 # HTTP request timeout in seconds # Default: 30 timeout = 60 [credentials] # Username for ETSI Online (EOL) portal authentication username = "reimes" # Password for ETSI Online (EOL) portal authentication password = "1y4RDXua9HOuUC" # Custom prompt message for interactive credential entry prompt = "false" [crawl] # Filter by working group (e.g., S4, RAN1, CT3) working_group = null # Filter by sub-working group sub_group = null # Start date filter (YYYY-MM-DD, YYYY-MM, or YYYY format) date_start = null # End date filter (YYYY-MM-DD, YYYY-MM, or YYYY format) date_end = null # SQL LIKE pattern to match document source source_like = null # SQL LIKE pattern to match agenda item agenda_like = null # SQL LIKE pattern to match document title title_like = null # Maximum number of documents to crawl # Default: 1000 limit = 1000 # Number of concurrent workers for crawling # Default: 4 workers = 4 [llm] # LLM model name in <provider>/<model> format # Default: openrouter/openrouter/free model = "openai/glm-4.7" # LLM API base URL # Default: http://localhost:11434 api_base = "https://api.z.ai/api/coding/paas/v4" # API key for LLM provider # Environment: TDC_AI_LLM_API_KEY api_key = "${TDC_AI_LLM_API_KEY}" [embedding] # Embedding model name in <provider>/<model> format # Default: ollama/qwen3-embedding:0.6b model = "ollama/embeddinggemma:latest" # Embedding API base URL # Default: http://localhost:11434 api_base = "http://localhost:11434" # API key for embedding provider api_key = null [database] # Storage backend to use (file or pg0) # Default: file backend = "file" # pg0 instance name # Default: 3gpp-crawler pg0_instance_name = "3gpp-crawler" # pg0 PostgreSQL port # Default: 15432 pg0_port = 15432 # pg0 database name # Default: tdoc pg0_database = "tdoc" [extraction] # Enable extraction and indexing of table elements # Default: True tables = true # Enable extraction and indexing of figure elements # Default: True figures = true # Enable extraction and indexing of equation elements # Default: True equations = true # Enable figure description generation with vision-capable models # Default: True figure_description = true [workspace] # Default workspace name # Default: default default_name = "default" # Default query mode (naive, local, global, hybrid, mix, bypass) # Default: hybrid default_query_mode = "hybrid" # Enable shared embedding storage across workspaces (deduplication) # Default: False shared_storage = false Loading
3gpp-ai.tomldeleted 100644 → 0 +0 −175 Original line number Diff line number Diff line # 3GPP AI Configuration # Generated by: 3gpp-ai config init # # Configuration precedence (highest to lowest): # 1. CLI arguments # 2. This config file # 3. Environment variables (TDC_*, LIGHTRAG_*) # 4. Hard-coded defaults # # Sensitive values use ${ENV_VAR} syntax for security. # Environment variables are interpolated at runtime. [path] # Root cache directory for storing downloaded files and metadata # Default: C:\Users\Jan.Reimes\.3gpp-crawler cache_dir = "C:\Users\Jan.Reimes\.3gpp-crawler" # SQLite database filename for storing crawl metadata # Default: 3gpp_crawler.db db_filename = "3gpp_crawler.db" # Subdirectory name for checked-out documents # Default: checkout checkout_dirname = "checkout" # Subdirectory name for AI-related cache (embeddings, graphs) # Default: lightrag ai_cache_dirname = "lightrag" [http] # Time-to-live for HTTP cache entries in seconds # Default: 7200 cache_ttl = 7200 # Enable HTTP response caching # Default: True cache_enabled = true # Refresh cache TTL on each access # Default: True cache_refresh_on_access = true # Verify SSL certificates for HTTPS requests # Default: True verify_ssl = true # Maximum number of retry attempts for failed requests # Default: 3 max_retries = 3 # HTTP request timeout in seconds # Default: 30 timeout = 60 [credentials] # Username for ETSI Online (EOL) portal authentication username = "reimes" # Password for ETSI Online (EOL) portal authentication password = "1y4RDXua9HOuUC" # Custom prompt message for interactive credential entry prompt = "false" [crawl] # Filter by working group (e.g., S4, RAN1, CT3) working_group = null # Filter by sub-working group sub_group = null # Start date filter (YYYY-MM-DD, YYYY-MM, or YYYY format) date_start = null # End date filter (YYYY-MM-DD, YYYY-MM, or YYYY format) date_end = null # SQL LIKE pattern to match document source source_like = null # SQL LIKE pattern to match agenda item agenda_like = null # SQL LIKE pattern to match document title title_like = null # Maximum number of documents to crawl # Default: 1000 limit = 1000 # Number of concurrent workers for crawling # Default: 4 workers = 4 [llm] # LLM model name in <provider>/<model> format # Default: openrouter/openrouter/free model = "openai/glm-4.7" # LLM API base URL # Default: http://localhost:11434 api_base = "https://api.z.ai/api/coding/paas/v4" # API key for LLM provider # Environment: TDC_AI_LLM_API_KEY api_key = "${TDC_AI_LLM_API_KEY}" [embedding] # Embedding model name in <provider>/<model> format # Default: ollama/qwen3-embedding:0.6b model = "ollama/embeddinggemma:latest" # Embedding API base URL # Default: http://localhost:11434 api_base = "http://localhost:11434" # API key for embedding provider api_key = null [database] # Storage backend to use (file or pg0) # Default: file backend = "file" # pg0 instance name # Default: 3gpp-crawler pg0_instance_name = "3gpp-crawler" # pg0 PostgreSQL port # Default: 15432 pg0_port = 15432 # pg0 database name # Default: tdoc pg0_database = "tdoc" [extraction] # Enable extraction and indexing of table elements # Default: True tables = true # Enable extraction and indexing of figure elements # Default: True figures = true # Enable extraction and indexing of equation elements # Default: True equations = true # Enable figure description generation with vision-capable models # Default: True figure_description = true [workspace] # Default workspace name # Default: default default_name = "default" # Default query mode (naive, local, global, hybrid, mix, bypass) # Default: hybrid default_query_mode = "hybrid" # Enable shared embedding storage across workspaces (deduplication) # Default: False shared_storage = false