Commit 0f155976 authored by Jan Reimes's avatar Jan Reimes
Browse files

feat(cli): introduce shared argument and option definitions for Typer

- Added a new `args.py` file to define common Typer arguments and options.
- Refactored CLI commands in `app.py` to utilize these shared definitions.
- Improved code readability and maintainability by centralizing argument definitions.
parent 6bc8a94e
Loading
Loading
Loading
Loading
+83 −53
Original line number Diff line number Diff line
@@ -8,7 +8,6 @@ import sys
import zipfile
from datetime import datetime
from pathlib import Path
from typing import Annotated

import typer
import yaml
@@ -21,6 +20,37 @@ from tdoc_crawler.crawlers import MeetingCrawler, TDocCrawler
from tdoc_crawler.database import TDocDatabase
from tdoc_crawler.models import MeetingCrawlConfig, MeetingQueryConfig, OutputFormat, QueryConfig, SortOrder, TDocCrawlConfig

from .args import (
    CacheDirOption,
    CheckoutTDocIdsArgument,
    ClearDbOption,
    ClearTDocsOption,
    EndDateOption,
    EolPasswordOption,
    EolUsernameOption,
    ForceOption,
    IncludeWithoutFilesOption,
    IncrementalOption,
    LimitMeetingsOption,
    LimitMeetingsPerWgOption,
    LimitOption,
    LimitTDocsOption,
    LimitWgsOption,
    MaxRetriesOption,
    NoFetchOption,
    OrderOption,
    OutputFormatOption,
    OverallTimeoutOption,
    PromptCredentialsOption,
    StartDateOption,
    SubgroupOption,
    TDocIdArgument,
    TDocIdsArgument,
    TimeoutOption,
    VerboseOption,
    WorkersOption,
    WorkingGroupOption,
)
from .console import get_console
from .fetching import maybe_fetch_missing_tdocs
from .helpers import build_limits, database_path, launch_file, parse_subgroups, parse_working_groups, prepare_tdoc_file, resolve_credentials
@@ -41,20 +71,20 @@ HELP_PANEL_QUERY = "Query Commands"

@app.command("crawl-tdocs", rich_help_panel=HELP_PANEL_CRAWLING)
def crawl_tdocs(
    cache_dir: Annotated[Path, typer.Option("--cache-dir", "-c", help="Cache directory")] = DEFAULT_CACHE_DIR,
    working_group: Annotated[list[str] | None, typer.Option("--working-group", "-w", help="Working groups to crawl")] = None,
    subgroup: Annotated[list[str] | None, typer.Option("--sub-group", "-s", help="Filter by sub-working group")] = None,
    incremental: Annotated[bool, typer.Option("--incremental/--full", help="Toggle incremental mode")] = True,
    clear_tdocs: Annotated[bool, typer.Option("--clear-tdocs", help="Clear all TDocs before crawling")] = False,
    limit_tdocs: Annotated[int | None, typer.Option("--limit-tdocs", help="Limit number of TDocs")] = None,
    limit_meetings: Annotated[int | None, typer.Option("--limit-meetings", help="Limit meetings considered")] = None,
    limit_meetings_per_wg: Annotated[int | None, typer.Option("--limit-meetings-per-wg", help="Limit meetings per working group")] = None,
    limit_wgs: Annotated[int | None, typer.Option("--limit-wgs", help="Limit number of working groups")] = None,
    workers: Annotated[int, typer.Option("--workers", help="Number of parallel subinterpreter workers")] = 4,
    overall_timeout: Annotated[int | None, typer.Option("--overall-timeout", help="Maximum total crawl duration in seconds (None = unlimited)")] = None,
    max_retries: Annotated[int, typer.Option("--max-retries", help="HTTP connection retry attempts")] = 3,
    timeout: Annotated[int, typer.Option("--timeout", help="HTTP request timeout seconds")] = 30,
    verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Enable verbose logging")] = False,
    cache_dir: CacheDirOption = DEFAULT_CACHE_DIR,
    working_group: WorkingGroupOption = None,
    subgroup: SubgroupOption = None,
    incremental: IncrementalOption = True,
    clear_tdocs: ClearTDocsOption = False,
    limit_tdocs: LimitTDocsOption = None,
    limit_meetings: LimitMeetingsOption = None,
    limit_meetings_per_wg: LimitMeetingsPerWgOption = None,
    limit_wgs: LimitWgsOption = None,
    workers: WorkersOption = 4,
    overall_timeout: OverallTimeoutOption = None,
    max_retries: MaxRetriesOption = 3,
    timeout: TimeoutOption = 30,
    verbose: VerboseOption = False,
) -> None:
    """Crawl TDocs from 3GPP FTP directories."""
    subgroups = parse_subgroups(subgroup)
@@ -147,20 +177,20 @@ def crawl_tdocs(

@app.command("crawl-meetings", rich_help_panel=HELP_PANEL_CRAWLING)
def crawl_meetings(
    cache_dir: Annotated[Path, typer.Option("--cache-dir", "-c", help="Cache directory")] = DEFAULT_CACHE_DIR,
    working_group: Annotated[list[str] | None, typer.Option("--working-group", "-w", help="Working groups to crawl")] = None,
    subgroup: Annotated[list[str] | None, typer.Option("--sub-group", "-s", help="Filter by sub-working group")] = None,
    incremental: Annotated[bool, typer.Option("--incremental/--full", help="Toggle incremental mode")] = True,
    clear_db: Annotated[bool, typer.Option("--clear-db", help="Clear all meetings and TDocs before crawling")] = False,
    limit_meetings: Annotated[int | None, typer.Option("--limit-meetings", help="Limit meetings overall")] = None,
    limit_meetings_per_wg: Annotated[int | None, typer.Option("--limit-meetings-per-wg", help="Limit meetings per working group")] = None,
    limit_wgs: Annotated[int | None, typer.Option("--limit-wgs", help="Limit number of working groups")] = None,
    max_retries: Annotated[int, typer.Option("--max-retries", help="HTTP retry attempts")] = 3,
    timeout: Annotated[int, typer.Option("--timeout", help="HTTP timeout seconds")] = 30,
    verbose: Annotated[bool, typer.Option("--verbose", "-v", help="Enable verbose logging")] = False,
    eol_username: Annotated[str | None, typer.Option("--eol-username", help="ETSI Online account username")] = None,
    eol_password: Annotated[str | None, typer.Option("--eol-password", help="ETSI Online account password")] = None,
    prompt_credentials: Annotated[bool, typer.Option("--prompt-credentials/--no-prompt-credentials", help="Prompt for credentials when missing")] = True,
    cache_dir: CacheDirOption = DEFAULT_CACHE_DIR,
    working_group: WorkingGroupOption = None,
    subgroup: SubgroupOption = None,
    incremental: IncrementalOption = True,
    clear_db: ClearDbOption = False,
    limit_meetings: LimitMeetingsOption = None,
    limit_meetings_per_wg: LimitMeetingsPerWgOption = None,
    limit_wgs: LimitWgsOption = None,
    max_retries: MaxRetriesOption = 3,
    timeout: TimeoutOption = 30,
    verbose: VerboseOption = False,
    eol_username: EolUsernameOption = None,
    eol_password: EolPasswordOption = None,
    prompt_credentials: PromptCredentialsOption = True,
) -> None:
    """Crawl meeting metadata from 3GPP portal."""
    subgroups = parse_subgroups(subgroup)
@@ -238,17 +268,17 @@ def crawl_meetings(

@app.command("query-tdocs", rich_help_panel=HELP_PANEL_QUERY)
def query_tdocs(
    tdoc_ids: Annotated[list[str] | None, typer.Argument(help="TDoc identifiers to query")] = None,
    cache_dir: Annotated[Path, typer.Option("--cache-dir", "-c", help="Cache directory")] = DEFAULT_CACHE_DIR,
    working_group: Annotated[list[str] | None, typer.Option("--working-group", "-w", help="Filter by working group")] = None,
    output_format: Annotated[str, typer.Option("--output", "-o", help="Output format")] = OutputFormat.TABLE.value,
    limit: Annotated[int | None, typer.Option("--limit", "-l", help="Maximum number of rows")] = None,
    order: Annotated[str, typer.Option("--order", help="Sort order (asc|desc)")] = SortOrder.DESC.value,
    start_date: Annotated[str | None, typer.Option("--start-date", help="Filter from ISO timestamp")] = None,
    end_date: Annotated[str | None, typer.Option("--end-date", help="Filter until ISO timestamp")] = None,
    no_fetch: Annotated[bool, typer.Option("--no-fetch", help="Disable automatic fetching of missing TDocs from portal")] = False,
    eol_username: Annotated[str | None, typer.Option("--eol-username", help="ETSI Online Account username")] = None,
    eol_password: Annotated[str | None, typer.Option("--eol-password", help="ETSI Online Account password")] = None,
    tdoc_ids: TDocIdsArgument = None,
    cache_dir: CacheDirOption = DEFAULT_CACHE_DIR,
    working_group: WorkingGroupOption = None,
    output_format: OutputFormatOption = OutputFormat.TABLE.value,
    limit: LimitOption = None,
    order: OrderOption = SortOrder.DESC.value,
    start_date: StartDateOption = None,
    end_date: EndDateOption = None,
    no_fetch: NoFetchOption = False,
    eol_username: EolUsernameOption = None,
    eol_password: EolPasswordOption = None,
) -> None:
    """Query TDoc metadata from database."""
    working_groups = parse_working_groups(working_group)
@@ -306,13 +336,13 @@ def query_tdocs(

@app.command("query-meetings", rich_help_panel=HELP_PANEL_QUERY)
def query_meetings(
    cache_dir: Annotated[Path, typer.Option("--cache-dir", "-c", help="Cache directory")] = DEFAULT_CACHE_DIR,
    working_group: Annotated[list[str] | None, typer.Option("--working-group", "-w", help="Filter by working group")] = None,
    subgroup: Annotated[list[str] | None, typer.Option("--sub-group", "-s", help="Filter by sub-working group")] = None,
    output_format: Annotated[str, typer.Option("--output", "-o", help="Output format")] = OutputFormat.TABLE.value,
    limit: Annotated[int | None, typer.Option("--limit", "-l", help="Maximum number of rows")] = None,
    order: Annotated[str, typer.Option("--order", help="Sort order (asc|desc)")] = SortOrder.DESC.value,
    include_without_files: Annotated[bool, typer.Option("--include-without-files", help="Include meetings without files URLs")] = False,
    cache_dir: CacheDirOption = DEFAULT_CACHE_DIR,
    working_group: WorkingGroupOption = None,
    subgroup: SubgroupOption = None,
    output_format: OutputFormatOption = OutputFormat.TABLE.value,
    limit: LimitOption = None,
    order: OrderOption = SortOrder.DESC.value,
    include_without_files: IncludeWithoutFilesOption = False,
) -> None:
    """Query meeting metadata from database."""
    working_groups = parse_working_groups(working_group)
@@ -355,8 +385,8 @@ def query_meetings(

@app.command("open")
def open_tdoc(
    tdoc_id: Annotated[str, typer.Argument(help="TDoc identifier to download and open")],
    cache_dir: Annotated[Path, typer.Option("--cache-dir", "-c", help="Cache directory")] = DEFAULT_CACHE_DIR,
    tdoc_id: TDocIdArgument,
    cache_dir: CacheDirOption = DEFAULT_CACHE_DIR,
) -> None:
    """Download, extract, and open a TDoc file."""
    normalized_id = tdoc_id.strip().upper()
@@ -386,9 +416,9 @@ def open_tdoc(

@app.command()
def checkout(
    tdoc_id: Annotated[list[str], typer.Argument(help="TDoc identifier(s) to checkout")],
    cache_dir: Annotated[Path, typer.Option("--cache-dir", "-c", help="Cache directory")] = DEFAULT_CACHE_DIR,
    force: Annotated[bool, typer.Option("--force", "-f", help="Re-download even if already checked out")] = False,
    tdoc_id: CheckoutTDocIdsArgument,
    cache_dir: CacheDirOption = DEFAULT_CACHE_DIR,
    force: ForceOption = False,
) -> None:
    """Download and extract TDoc(s) to checkout folder."""
    normalized_ids = [tid.strip().upper() for tid in tdoc_id]
@@ -441,7 +471,7 @@ def checkout(

@app.command()
def stats(
    cache_dir: Annotated[Path, typer.Option("--cache-dir", "-c", help="Cache directory")] = DEFAULT_CACHE_DIR,
    cache_dir: CacheDirOption = DEFAULT_CACHE_DIR,
) -> None:
    """Display database statistics."""
    db_path = database_path(cache_dir)
+52 −0
Original line number Diff line number Diff line
"""Shared Typer argument and option definitions."""

from __future__ import annotations

from pathlib import Path
from typing import Annotated

import typer

CacheDirOption = Annotated[Path, typer.Option("--cache-dir", "-c", help="Cache directory")]
WorkingGroupOption = Annotated[list[str] | None, typer.Option("--working-group", "-w", help="Filter by working group")]
SubgroupOption = Annotated[list[str] | None, typer.Option("--sub-group", "-s", help="Filter by sub-working group")]
IncrementalOption = Annotated[bool, typer.Option("--incremental/--full", help="Toggle incremental mode")]
ClearTDocsOption = Annotated[bool, typer.Option("--clear-tdocs", help="Clear all TDocs before crawling")]
ClearDbOption = Annotated[bool, typer.Option("--clear-db", help="Clear all meetings and TDocs before crawling")]
LimitTDocsOption = Annotated[int | None, typer.Option("--limit-tdocs", help="Limit number of TDocs")]
LimitMeetingsOption = Annotated[int | None, typer.Option("--limit-meetings", help="Limit meetings overall")]
LimitMeetingsPerWgOption = Annotated[int | None, typer.Option("--limit-meetings-per-wg", help="Limit meetings per working group")]
LimitWgsOption = Annotated[int | None, typer.Option("--limit-wgs", help="Limit number of working groups")]
WorkersOption = Annotated[int, typer.Option("--workers", help="Number of parallel subinterpreter workers")]
OverallTimeoutOption = Annotated[
    int | None,
    typer.Option("--overall-timeout", help="Maximum total crawl duration in seconds (None = unlimited)"),
]
MaxRetriesOption = Annotated[int, typer.Option("--max-retries", help="HTTP retry attempts")]
TimeoutOption = Annotated[int, typer.Option("--timeout", help="HTTP timeout seconds")]
VerboseOption = Annotated[bool, typer.Option("--verbose", "-v", help="Enable verbose logging")]

TDocIdsArgument = Annotated[list[str] | None, typer.Argument(help="TDoc identifiers to query")]
OutputFormatOption = Annotated[str, typer.Option("--output", "-o", help="Output format")]
LimitOption = Annotated[int | None, typer.Option("--limit", "-l", help="Maximum number of rows")]
OrderOption = Annotated[str, typer.Option("--order", help="Sort order (asc|desc)")]
StartDateOption = Annotated[str | None, typer.Option("--start-date", help="Filter from ISO timestamp")]
EndDateOption = Annotated[str | None, typer.Option("--end-date", help="Filter until ISO timestamp")]
NoFetchOption = Annotated[
    bool,
    typer.Option("--no-fetch", help="Disable automatic fetching of missing TDocs from portal"),
]
EolUsernameOption = Annotated[str | None, typer.Option("--eol-username", help="ETSI Online account username")]
EolPasswordOption = Annotated[str | None, typer.Option("--eol-password", help="ETSI Online account password")]
PromptCredentialsOption = Annotated[
    bool,
    typer.Option("--prompt-credentials/--no-prompt-credentials", help="Prompt for credentials when missing"),
]
IncludeWithoutFilesOption = Annotated[
    bool,
    typer.Option("--include-without-files", help="Include meetings without files URLs"),
]

TDocIdArgument = Annotated[str, typer.Argument(help="TDoc identifier to download and open")]
CheckoutTDocIdsArgument = Annotated[list[str], typer.Argument(help="TDoc identifier(s) to checkout")]
ForceOption = Annotated[bool, typer.Option("--force", "-f", help="Re-download even if already checked out")]
+2 −3
Original line number Diff line number Diff line
@@ -301,9 +301,8 @@ def parse_tdoc_portal_page(html: str, tdoc_id: str, url: str | None = None) -> T
        logger.warning(error_msg)
        raise PortalParsingError(error_msg)

    # Generate URL if not provided
    if url is None:
        url = f"https://www.3gpp.org/ftp/tsg_ran/.../{tdoc_id.upper()}.zip"
    # URL is extracted from the status field download link during parsing
    # If no download link was found, url will be None

    # Parse agenda_item_nbr as Decimal
    try: