Commit 95573882 authored by Jan Reimes's avatar Jan Reimes
Browse files

feat(env): update environment variable names and add new configurations

- Rename environment variables for ETSI Online credentials to TDC_ prefix.
- Introduce new environment variables for cache directory, checkout directory,
  worker count, HTTP timeout, maximum retries, and limits for TDocs and meetings.
- Update CLI argument definitions to use new environment variable names.
- Enhance tests to verify environment variable support in CLI options.
parent 6747fbe4
Loading
Loading
Loading
Loading
+60 −7
Original line number Diff line number Diff line
@@ -5,14 +5,67 @@
# Sign up for an EOL account at: https://portal.etsi.org/

# Your ETSI Online username
EOL_USERNAME=your_username_here
TDC_EOL_USERNAME=your_username_here

# Your ETSI Online password
EOL_PASSWORD=your_password_here
TDC_EOL_PASSWORD=your_password_here

# Whether to prompt for credentials when missing (default: false unless EOL_PROMPT=true)
# Whether to prompt for credentials when missing (default: false unless TDC_EOL_PROMPT=true)
# Set to "true", "1", or "yes" to enable interactive prompting
EOL_PROMPT=false
TDC_EOL_PROMPT=false

# Cache and Directory Configuration

# Cache directory for storing downloaded metadata and files (default: ~/.tdoc-crawler)
TDC_CACHE_DIR=/path/to/cache/dir

# Checkout directory for downloaded TDocs (default: ./checkout)
TDC_CHECKOUT_DIR=/path/to/checkout/dir

# Crawler Configuration

# Number of parallel subinterpreter workers (default: 4)
TDC_WORKERS=4

# HTTP timeout in seconds (default: 60)
TDC_TIMEOUT=60

# Maximum HTTP retry attempts (default: 3)
TDC_MAX_RETRIES=3

# Maximum total crawl duration in seconds (default: None = unlimited)
TDC_OVERALL_TIMEOUT=

# Filtering and Limits

# Filter by working group (comma-separated list)
TDC_WORKING_GROUP=SA2,RAN1

# Filter by sub-working group (comma-separated list)
TDC_SUB_GROUP=RAN1,RAN2

# Limit number of TDocs to crawl (default: None = no limit)
TDC_LIMIT_TDOCS=100

# Limit total meetings to crawl (default: None = no limit)
TDC_LIMIT_MEETINGS=10

# Query date range - start date (ISO 8601 timestamp, e.g., 2024-01-01T00:00:00Z)
TDC_START_DATE=2024-01-01T00:00:00Z

# Query date range - end date (ISO 8601 timestamp, e.g., 2024-12-31T23:59:59Z)
TDC_END_DATE=2024-12-31T23:59:59Z

# Output Configuration

# Output format for query results (e.g., table, csv, json)
TDC_OUTPUT=table

# Logging

# Enable verbose logging (default: false)
# Set to "true", "1", or "yes" to enable
TDC_VERBOSE=false

# HTTP Cache Configuration
# Controls caching behavior for all HTTP requests
@@ -20,9 +73,9 @@ EOL_PROMPT=false
# Time-to-live for cached HTTP responses in seconds (default: 7200 = 2 hours)
HTTP_CACHE_TTL=7200

# Whether to refresh the TTL when a cached response is accessed (default: true)
# Whether to refresh TTL when a cached response is accessed (default: true)
# Set to "true", "1", "yes", or "on" to enable; anything else disables it
HTTP_CACHE_REFRESH_ON_ACCESS=true

# Note: Never commit the actual .env file to version control!
# Copy this file to .env and replace the placeholders with your actual credentials.
# Note: Never commit actual .env file to version control!
# Copy this file to .env and replace placeholders with your actual credentials and preferences.
+16 −16
Original line number Diff line number Diff line
@@ -7,42 +7,42 @@ from typing import Annotated

import typer

CacheDirOption = Annotated[Path, typer.Option("--cache-dir", "-c", help="Cache directory")]
WorkingGroupOption = Annotated[list[str] | None, typer.Option("--working-group", "-w", help="Filter by working group")]
SubgroupOption = Annotated[list[str] | None, typer.Option("--sub-group", "-s", help="Filter by sub-working group")]
CacheDirOption = Annotated[Path, typer.Option("--cache-dir", "-c", help="Cache directory", envvar="TDC_CACHE_DIR")]
WorkingGroupOption = Annotated[list[str] | None, typer.Option("--working-group", "-w", help="Filter by working group", envvar="TDC_WORKING_GROUP")]
SubgroupOption = Annotated[list[str] | None, typer.Option("--sub-group", "-s", help="Filter by sub-working group", envvar="TDC_SUB_GROUP")]
IncrementalOption = Annotated[bool, typer.Option("--incremental/--full", help="Toggle incremental mode")]
ClearTDocsOption = Annotated[bool, typer.Option("--clear-tdocs", help="Clear all TDocs before crawling")]
ClearSpecsOption = Annotated[bool, typer.Option("--clear-specs", help="Clear all specs before crawling")]
ClearDbOption = Annotated[bool, typer.Option("--clear-db", help="Clear all meetings and TDocs before crawling")]
CheckoutOption = Annotated[bool, typer.Option("--checkout", help="Download and extract metadata results to checkout folder")]
LimitTDocsOption = Annotated[int | None, typer.Option("--limit-tdocs", help="Limit number of TDocs")]
LimitMeetingsOption = Annotated[int | None, typer.Option("--limit-meetings", help="Limit meetings overall")]
LimitTDocsOption = Annotated[int | None, typer.Option("--limit-tdocs", help="Limit number of TDocs", envvar="TDC_LIMIT_TDOCS")]
LimitMeetingsOption = Annotated[int | None, typer.Option("--limit-meetings", help="Limit meetings overall", envvar="TDC_LIMIT_MEETINGS")]
LimitMeetingsPerWgOption = Annotated[int | None, typer.Option("--limit-meetings-per-wg", help="Limit meetings per working group")]
LimitWgsOption = Annotated[int | None, typer.Option("--limit-wgs", help="Limit number of working groups")]
WorkersOption = Annotated[int, typer.Option("--workers", help="Number of parallel subinterpreter workers")]
WorkersOption = Annotated[int, typer.Option("--workers", help="Number of parallel subinterpreter workers", envvar="TDC_WORKERS")]
OverallTimeoutOption = Annotated[
    int | None,
    typer.Option("--overall-timeout", help="Maximum total crawl duration in seconds (None = unlimited)"),
    typer.Option("--overall-timeout", help="Maximum total crawl duration in seconds (None = unlimited)", envvar="TDC_OVERALL_TIMEOUT"),
]
MaxRetriesOption = Annotated[int, typer.Option("--max-retries", help="HTTP retry attempts")]
TimeoutOption = Annotated[int, typer.Option("--timeout", help="HTTP timeout seconds")]
VerboseOption = Annotated[bool, typer.Option("--verbose", "-v", help="Enable verbose logging")]
MaxRetriesOption = Annotated[int, typer.Option("--max-retries", help="HTTP retry attempts", envvar="TDC_MAX_RETRIES")]
TimeoutOption = Annotated[int, typer.Option("--timeout", help="HTTP timeout seconds", envvar="TDC_TIMEOUT")]
VerboseOption = Annotated[bool, typer.Option("--verbose", "-v", help="Enable verbose logging", envvar="TDC_VERBOSE")]

TDocIdsArgument = Annotated[list[str] | None, typer.Argument(help="TDoc identifiers to query")]
OutputFormatOption = Annotated[str, typer.Option("--output", "-o", help="Output format")]
OutputFormatOption = Annotated[str, typer.Option("--output", "-o", help="Output format", envvar="TDC_OUTPUT")]

FullMetadataOption = Annotated[bool, typer.Option("--full-metadata", help="Fetch full metadata instead of URL only")]
UseWhatTheSpecOption = Annotated[bool, typer.Option("--use-whatthespec", help="Use WhatTheSpec API for fetching")]
LimitOption = Annotated[int | None, typer.Option("--limit", "-l", help="Maximum number of rows")]
OrderOption = Annotated[str, typer.Option("--order", help="Sort order (asc|desc)")]
StartDateOption = Annotated[str | None, typer.Option("--start-date", help="Filter from ISO timestamp")]
EndDateOption = Annotated[str | None, typer.Option("--end-date", help="Filter until ISO timestamp")]
StartDateOption = Annotated[str | None, typer.Option("--start-date", help="Filter from ISO timestamp", envvar="TDC_START_DATE")]
EndDateOption = Annotated[str | None, typer.Option("--end-date", help="Filter until ISO timestamp", envvar="TDC_END_DATE")]
NoFetchOption = Annotated[
    bool,
    typer.Option("--no-fetch", help="Disable automatic fetching of missing TDocs from portal"),
]
EolUsernameOption = Annotated[str | None, typer.Option("--eol-username", help="ETSI Online account username")]
EolPasswordOption = Annotated[str | None, typer.Option("--eol-password", help="ETSI Online account password")]
EolUsernameOption = Annotated[str | None, typer.Option("--eol-username", help="ETSI Online account username", envvar="TDC_EOL_USERNAME")]
EolPasswordOption = Annotated[str | None, typer.Option("--eol-password", help="ETSI Online account password", envvar="TDC_EOL_PASSWORD")]
PromptCredentialsOption = Annotated[
    bool | None,
    typer.Option("--prompt-credentials/--no-prompt-credentials", help="Prompt for credentials when missing"),
@@ -61,4 +61,4 @@ SpecArgument = Annotated[list[str] | None, typer.Argument(help="Spec number(s) t
SpecFileOption = Annotated[Path | None, typer.Option("--spec-file", help="File with spec numbers")]
ReleaseOption = Annotated[str, typer.Option("--release", help="Spec release selector")]
DocOnlyOption = Annotated[bool, typer.Option("--doc-only/--no-doc-only", help="Attempt document-only download")]
CheckoutDirOption = Annotated[Path | None, typer.Option("--checkout-dir", help="Spec checkout base directory")]
CheckoutDirOption = Annotated[Path | None, typer.Option("--checkout-dir", help="Spec checkout base directory", envvar="TDC_CHECKOUT_DIR")]
+8 −8
Original line number Diff line number Diff line
@@ -19,11 +19,11 @@ def set_credentials(username: str | None, password: str | None, prompt: bool | N
        prompt: Whether to prompt for credentials when missing (optional)
    """
    if username is not None:
        os.environ["EOL_USERNAME"] = username
        os.environ["TDC_EOL_USERNAME"] = username
    if password is not None:
        os.environ["EOL_PASSWORD"] = password
        os.environ["TDC_EOL_PASSWORD"] = password
    if prompt is not None:
        os.environ["EOL_PROMPT"] = "true" if prompt else "false"
        os.environ["TDC_EOL_PROMPT"] = "true" if prompt else "false"


def resolve_credentials(
@@ -35,8 +35,8 @@ def resolve_credentials(

    Resolution order:
    1. CLI parameters (username, password)
    2. Environment variables (EOL_USERNAME, EOL_PASSWORD)
    3. Interactive prompt (if EOL_PROMPT=true or prompt=True, and stdin is a TTY)
    2. Environment variables (TDC_EOL_USERNAME, TDC_EOL_PASSWORD)
    3. Interactive prompt (if TDC_EOL_PROMPT=true or prompt=True, and stdin is a TTY)

    Args:
        username: CLI-provided username
@@ -46,13 +46,13 @@ def resolve_credentials(
    Returns:
        PortalCredentials instance if resolved, None otherwise
    """
    resolved_username = username or os.getenv("EOL_USERNAME")
    resolved_password = password or os.getenv("EOL_PASSWORD")
    resolved_username = username or os.getenv("TDC_EOL_USERNAME")
    resolved_password = password or os.getenv("TDC_EOL_PASSWORD")

    if resolved_username and resolved_password:
        return PortalCredentials(username=resolved_username, password=resolved_password)

    should_prompt = prompt if prompt is not None else os.getenv("EOL_PROMPT", "").lower() in ("true", "1", "yes")
    should_prompt = prompt if prompt is not None else os.getenv("TDC_EOL_PROMPT", "").lower() in ("true", "1", "yes")
    if should_prompt and not sys.stdin.isatty():
        should_prompt = False

+1 −1
Original line number Diff line number Diff line
@@ -115,7 +115,7 @@ def fetch_missing_tdocs_batch(

    if not credentials:
        logger.info("Portal credentials not available, skipping portal authentication fetch")
        errors.append("Portal credentials required for targeted fetch. Set EOL_USERNAME and EOL_PASSWORD.")
        errors.append("Portal credentials required for targeted fetch. Set TDC_EOL_USERNAME and TDC_EOL_PASSWORD.")
        return TDocCrawlResult(processed=len(missing_ids), inserted=0, updated=0, errors=errors)

    inserted_count = 0
+283 −112

File changed.

Preview size limit exceeded, changes collapsed.

Loading