Loading .env.example +0 −3 Original line number Diff line number Diff line Loading @@ -53,9 +53,6 @@ TDC_MAX_RETRIES=3 # Time-to-live for cached HTTP responses in seconds (default: 7200 = 2 hours) # HTTP_CACHE_TTL=7200 # Refresh TTL when a cached response is accessed (default: true) # HTTP_CACHE_REFRESH_ON_ACCESS=true # ============================================================================ # CRAWL FILTER CONFIGURATION # ============================================================================ Loading demo.bat +2 −2 Original line number Diff line number Diff line Loading @@ -35,5 +35,5 @@ tdoc-crawler query --agenda "*atias*" --start-date 2018 :: overview 3gpp-crawler workspace members :: convert tdocs/specs to PDF/Markdown/artefacts for AI processing 3gpp-crawler workspace process :: convert tdocs/specs to PDF/artefacts for AI processing (portable fallback profile) 3gpp-crawler workspace process --profile pdf-only docs/development.md +5 −3 Original line number Diff line number Diff line Loading @@ -77,7 +77,7 @@ uv run ty check **Two complementary systems:** 1. **`ThreeGPPConfig`** (pydantic-settings, alias `TDocCrawlerConfig`) — Type-safe configuration from files/env vars 2. **`CacheManager`** (runtime paths) — File system path resolution 1. **`CacheManager`** (runtime paths) — File system path resolution ### ThreeGPPConfig (Settings) Loading @@ -100,15 +100,17 @@ config.crawl.workers # Concurrent crawl workers ``` **Config file discovery order** (later overrides earlier): 1. Global: `~/.config/3gpp-crawler/config.toml` 2. Project: `3gpp-crawler.toml`, `.3gpp-crawler.toml`, `.3gpp-crawler/config.toml` 3. Config dir: `.config/.3gpp-crawler/conf.d/*.toml` (alphabetical) 1. Project: `3gpp-crawler.toml`, `.3gpp-crawler.toml`, `.3gpp-crawler/config.toml` 1. Config dir: `.config/.3gpp-crawler/conf.d/*.toml` (alphabetical) **Precedence:** CLI args > Config file > Environment variables > Defaults **Supported formats:** TOML (primary), YAML, JSON **Environment variable prefixes:** - `TDC_*` — Path settings - `TDC_EOL_*` — Portal credentials - `TDC_CRAWL_*` — Crawl filters Loading ruff.toml +0 −1 Original line number Diff line number Diff line Loading @@ -68,7 +68,6 @@ max-locals = 20 "tests/**/*.py" = ["S101", "S106", "PLR6301", "S603", "PLW1510"] # load_dotenv() must run before all other imports to populate env vars before pydantic-settings reads them "src/tdoc_crawler/cli/tdoc_app.py" = ["E402"] "packages/3gpp-ai/threegpp_ai/cli.py" = ["E402"] [lint.pydocstyle] convention = "google" Loading src/tdoc_crawler/config/AGENTS.md +3 −2 Original line number Diff line number Diff line Loading @@ -19,9 +19,10 @@ config.crawl.workers # Concurrent workers ``` **Config discovery** (later overrides earlier): 1. Global: `~/.config/3gpp-crawler/config.toml` 2. Project: `3gpp-crawler.toml`, `.3gpp-crawler.toml`, `.3gpp-crawler/config.toml` 3. Config dir: `.config/.3gpp-crawler/conf.d/*.toml` 1. Project: `3gpp-crawler.toml`, `.3gpp-crawler.toml`, `.3gpp-crawler/config.toml` 1. Config dir: `.config/.3gpp-crawler/conf.d/*.toml` **Precedence:** CLI args > Config file > Env vars > Defaults Loading Loading
.env.example +0 −3 Original line number Diff line number Diff line Loading @@ -53,9 +53,6 @@ TDC_MAX_RETRIES=3 # Time-to-live for cached HTTP responses in seconds (default: 7200 = 2 hours) # HTTP_CACHE_TTL=7200 # Refresh TTL when a cached response is accessed (default: true) # HTTP_CACHE_REFRESH_ON_ACCESS=true # ============================================================================ # CRAWL FILTER CONFIGURATION # ============================================================================ Loading
demo.bat +2 −2 Original line number Diff line number Diff line Loading @@ -35,5 +35,5 @@ tdoc-crawler query --agenda "*atias*" --start-date 2018 :: overview 3gpp-crawler workspace members :: convert tdocs/specs to PDF/Markdown/artefacts for AI processing 3gpp-crawler workspace process :: convert tdocs/specs to PDF/artefacts for AI processing (portable fallback profile) 3gpp-crawler workspace process --profile pdf-only
docs/development.md +5 −3 Original line number Diff line number Diff line Loading @@ -77,7 +77,7 @@ uv run ty check **Two complementary systems:** 1. **`ThreeGPPConfig`** (pydantic-settings, alias `TDocCrawlerConfig`) — Type-safe configuration from files/env vars 2. **`CacheManager`** (runtime paths) — File system path resolution 1. **`CacheManager`** (runtime paths) — File system path resolution ### ThreeGPPConfig (Settings) Loading @@ -100,15 +100,17 @@ config.crawl.workers # Concurrent crawl workers ``` **Config file discovery order** (later overrides earlier): 1. Global: `~/.config/3gpp-crawler/config.toml` 2. Project: `3gpp-crawler.toml`, `.3gpp-crawler.toml`, `.3gpp-crawler/config.toml` 3. Config dir: `.config/.3gpp-crawler/conf.d/*.toml` (alphabetical) 1. Project: `3gpp-crawler.toml`, `.3gpp-crawler.toml`, `.3gpp-crawler/config.toml` 1. Config dir: `.config/.3gpp-crawler/conf.d/*.toml` (alphabetical) **Precedence:** CLI args > Config file > Environment variables > Defaults **Supported formats:** TOML (primary), YAML, JSON **Environment variable prefixes:** - `TDC_*` — Path settings - `TDC_EOL_*` — Portal credentials - `TDC_CRAWL_*` — Crawl filters Loading
ruff.toml +0 −1 Original line number Diff line number Diff line Loading @@ -68,7 +68,6 @@ max-locals = 20 "tests/**/*.py" = ["S101", "S106", "PLR6301", "S603", "PLW1510"] # load_dotenv() must run before all other imports to populate env vars before pydantic-settings reads them "src/tdoc_crawler/cli/tdoc_app.py" = ["E402"] "packages/3gpp-ai/threegpp_ai/cli.py" = ["E402"] [lint.pydocstyle] convention = "google" Loading
src/tdoc_crawler/config/AGENTS.md +3 −2 Original line number Diff line number Diff line Loading @@ -19,9 +19,10 @@ config.crawl.workers # Concurrent workers ``` **Config discovery** (later overrides earlier): 1. Global: `~/.config/3gpp-crawler/config.toml` 2. Project: `3gpp-crawler.toml`, `.3gpp-crawler.toml`, `.3gpp-crawler/config.toml` 3. Config dir: `.config/.3gpp-crawler/conf.d/*.toml` 1. Project: `3gpp-crawler.toml`, `.3gpp-crawler.toml`, `.3gpp-crawler/config.toml` 1. Config dir: `.config/.3gpp-crawler/conf.d/*.toml` **Precedence:** CLI args > Config file > Env vars > Defaults Loading