Commit b479349d authored by Jan Reimes's avatar Jan Reimes
Browse files

chore(refactor): resolve PLC0415, standardize logging, and modernize HTTP/cache APIs

- Remove temporary local imports causing PLC0415 (database/tdocs.py, tdocs operations)
- Replace module-level stdlib loggers with project get_logger
- Update HTTP cached-session API and callers (use HttpCacheConfig / cache_manager_name)
- Refactor PortalClient: session handling, credential resolution, and remove legacy helpers
- Rename QueryConfig -> TDocQueryConfig and simplify TDocCrawlConfig; update CLI and tests accordingly
- Misc CLI improvements (verbosity handling, manager.db_file usage, checkout arg rename)
parent 4e21e15e
Loading
Loading
Loading
Loading
+4 −1
Original line number Diff line number Diff line
@@ -15,5 +15,8 @@
    "chat.tools.terminal.autoApprove": {
        ".specify/scripts/bash/": true,
        ".specify/scripts/powershell/": true
    }
    },
    "ruff.path": [
        "c:\\users\\jan.reimes\\appdata\\local\\mise\\shims\\ruff.exe"
    ]
}
+1 −0
Original line number Diff line number Diff line
@@ -50,6 +50,7 @@ dev = [
    "undersort>=0.1.5",
    "specify-cli",
    "pydeps>=3.0.2",
    "ruff>=0.15.0",
]

[build-system]
+58 −23
Original line number Diff line number Diff line
@@ -3,12 +3,14 @@ import tempfile
import time
from pathlib import Path

import typer
from typer.testing import CliRunner

from tdoc_crawler.cli import app
from tdoc_crawler.cli.console import get_console
from tdoc_crawler.logging import get_console, get_logger

this_dir = Path(__file__).parent
logger = get_logger(__name__)

# Example data
TDOC1 = "S4-260001"  # docx
@@ -24,49 +26,82 @@ WORKING_GROUP2 = "SA#4"
DATE1 = "2024-01-01"
DATE2 = "2024-02-01"


def main() -> None:
SPEC1 = "26.130"
SPEC2 = "26.131"
SPEC3 = "26.132"
SPECS = [SPEC1, SPEC2, SPEC3]

runner = CliRunner()
console = get_console()
    tmp_dir_args = {"suffix": "tdoc", "dir": this_dir, "delete": True}  # Set to False to inspect cache contents after run
cache_dir = this_dir / "cache"  # Default cache dir if not using temp dir
    shutil.rmtree(cache_dir, ignore_errors=True)  # Clean up any existing cache dir before run
common_args = ["--cache-dir", cache_dir, "-v", "debug"]  #


def clean_cache() -> None:
    shutil.rmtree(cache_dir, ignore_errors=True)


def run_command(command: str, args: list[str]) -> None:
    logger.info(f"Running command: {command} with args: {args}")
    res = runner.invoke(app, [command] + args)
    typer.echo(res.output)

    # with tempfile.TemporaryDirectory(**tmp_dir_args) as cache_dir:
    common_args = ["--cache-dir", cache_dir, "-v", "debug"]

    # 1. Simply open documents (no metadata crawling)
def demo_tdocs() -> None:
    clean_cache()

    # 1. Test checkout command (no metadata crawling if not explicitly requested)
    run_command("checkout", TDOCS + common_args)

    # 2. Simply open documents (no metadata crawling)
    for tdoc in TDOCS:
        console.print(f"Testing with {tdoc}...")
        res = runner.invoke(app, ["open", tdoc] + common_args)
        console.print(res.output)
        run_command("open", [tdoc] + common_args)

    # wait until documents are opened before testing checkout, to avoid potential race conditions
    time.sleep(5)
    # 3. Crawl Meetings run_command("crawl-meetings", TDOCS + common_args)

    # with tempfile.TemporaryDirectory(**tmp_dir_args) as cache_dir:
    # common_args = ["--cache-dir", cache_dir, "-v", "debug"]
    # # 4. Crawl TDocs run_command("crawl-tdocs", TDOCS + common_args) # 5. Query TDocs run_command("query-tdocs", ["--tdoc-ids"] + TDOCS + common_args) # 6. Crawl Specs # 7. Query Specs

    # 8. Crawl spec metadata

    # 9. Query spec metadata


def demo_specs() -> None:
    # checkout specs
    res = runner.invoke(app, ["checkout-spec"] + SPECS + common_args)
    # logger.info(res.output)

    # Simply open specs (no metadata crawling)
    for spec in SPECS:
        logger.info(f"Testing with spec {spec}...")
        res = runner.invoke(app, ["open-spec", spec] + common_args)
        typer.echo(res.output)

        break

    # 2. Test checkout command (no metadata crawling if not explicitly requested)
    res = runner.invoke(app, ["checkout"] + TDOCS + common_args)
    console.print(res.output)
    # logger.info(res.output)

    # 3. Crawl Meetings
    res = runner.invoke(app, ["crawl-meetings"] + TDOCS + common_args)
    console.print(res.output)
    # logger.info(res.output)

    # 4. Crawl TDocs
    # 5. Query TDocs

    # 6. Simply open spec documents (no metadata crawling)

    # 7. Checkout spec documents (no metadata crawling)
def main() -> None:

    # 8. Crawl spec metadata
    # tmp_dir_args = {"suffix": "tdoc", "dir": this_dir, "delete": True}  # Set to False to inspect cache contents after run
    #   # Clean up any existing cache dir before run

    # 9. Query spec metadata
    # with tempfile.TemporaryDirectory(**tmp_dir_args) as cache_dir:

    # with tempfile.TemporaryDirectory(**tmp_dir_args) as cache_dir:
    # common_args = ["--cache-dir", cache_dir, "-v", "debug"]
    demo_tdocs()
    # demo_specs()

if __name__ == "__main__":
    main()
+37 −47
Original line number Diff line number Diff line
@@ -14,7 +14,6 @@ from rich.progress import BarColumn, MofNCompleteColumn, Progress, SpinnerColumn
from rich.table import Table

from tdoc_crawler.cli.args import (
    DEFAULT_VERBOSITY,
    CacheDirOption,
    CheckoutDirOption,
    CheckoutOption,
@@ -56,7 +55,6 @@ from tdoc_crawler.cli.args import (
    WorkersOption,
    WorkingGroupOption,
)
from tdoc_crawler.cli.console import get_console
from tdoc_crawler.cli.printing import (
    meeting_to_dict,
    print_checkout_results,
@@ -74,7 +72,8 @@ from tdoc_crawler.credentials import set_credentials
from tdoc_crawler.database import MeetingDatabase, TDocDatabase
from tdoc_crawler.database.specs import SpecDatabase
from tdoc_crawler.http_client import create_cached_session
from tdoc_crawler.logging import set_verbosity
from tdoc_crawler.logging import DEFAULT_LEVEL as DEFAULT_VERBOSITY
from tdoc_crawler.logging import get_console, set_verbosity
from tdoc_crawler.meetings.models import MeetingCrawlConfig, MeetingQueryConfig
from tdoc_crawler.meetings.operations.crawl import MeetingCrawler
from tdoc_crawler.models.base import OutputFormat, SortOrder
@@ -86,7 +85,7 @@ from tdoc_crawler.specs.operations.checkout import (
    checkout_specs,
    clear_checkout_specs,
)
from tdoc_crawler.tdocs.models import QueryConfig, TDocCrawlConfig
from tdoc_crawler.tdocs.models import TDocCrawlConfig, TDocQueryConfig
from tdoc_crawler.tdocs.operations import TDocCrawler
from tdoc_crawler.tdocs.operations.checkout import (
    checkout_meeting_tdocs,
@@ -115,6 +114,7 @@ HELP_PANEL_QUERY = "Query Commands"
# - ...
# - tdoc_crawler/cli/app.py (with remaining commands like open and checkout)


@app.command("crawl-tdocs", rich_help_panel=HELP_PANEL_CRAWLING)
def crawl_tdocs(
    working_group: WorkingGroupOption = None,
@@ -132,7 +132,7 @@ def crawl_tdocs(
    max_retries: MaxRetriesOption = 3,
    overall_timeout: OverallTimeoutOption = None,
    cache_dir: CacheDirOption = None,
    verbosity: VerbosityOption = DEFAULT_VERBOSITY,
    verbosity: VerbosityOption = str(DEFAULT_VERBOSITY),
) -> None:
    """Crawl TDocs from 3GPP FTP directories."""
    """No credentials needed, crawl-tdocs always resolves meetings first -> parse Excel files that includes metadata"""
@@ -145,7 +145,6 @@ def crawl_tdocs(

    limits = CrawlLimits.build(limit_tdocs, limit_meetings, limit_meetings_per_wg, limit_wgs)
    config = TDocCrawlConfig(
        cache_dir=manager.root,
        working_groups=working_groups,
        subgroups=subgroups,
        meeting_ids=None,
@@ -155,7 +154,6 @@ def crawl_tdocs(
        force_revalidate=False,
        workers=workers,
        overall_timeout=overall_timeout,
        max_retries=max_retries,
        timeout=timeout,
        limits=limits,
        target_ids=None,
@@ -233,7 +231,7 @@ def crawl_tdocs(

        if checkout:
            checkout_limit = limit_tdocs if limit_tdocs and limit_tdocs > 0 else None
            query_config = QueryConfig(
            query_config = TDocQueryConfig(
                cache_dir=manager.root,
                working_groups=working_groups,
                limit=checkout_limit,
@@ -242,7 +240,7 @@ def crawl_tdocs(
            results = database.query_tdocs(query_config)

            # Use a shared session for checkout downloads
            with create_cached_session(manager.http_cache_file) as session:
            with create_cached_session() as session:
                checkout_result = checkout_tdocs(results, checkout_dir, force=False, session=session)

            console.print(f"\n[cyan]Checked out {checkout_result.success_count} TDoc(s)[/cyan]")
@@ -281,7 +279,7 @@ def crawl_meetings(
    eol_password: EolPasswordOption = None,
    prompt_credentials: PromptCredentialsOption = None,
    cache_dir: CacheDirOption = None,
    verbosity: VerbosityOption = DEFAULT_VERBOSITY,
    verbosity: VerbosityOption = str(DEFAULT_VERBOSITY),
) -> None:
    """Crawl meeting metadata from 3GPP portal."""
    # Set logging verbosity early to ensure all log messages respect the configured level
@@ -295,7 +293,6 @@ def crawl_meetings(
    limits = CrawlLimits.build(None, limit_meetings, limit_meetings_per_wg, limit_wgs)

    config = MeetingCrawlConfig(
        cache_dir=manager.root,
        working_groups=working_groups,
        subgroups=subgroups,
        incremental=incremental,
@@ -382,7 +379,6 @@ def crawl_meetings(

    if checkout:
        query_config = MeetingQueryConfig(
            cache_dir=manager.root,
            working_groups=working_groups,
            subgroups=subgroups,
            limit=limit_meetings if limit_meetings and limit_meetings > 0 else None,
@@ -392,7 +388,7 @@ def crawl_meetings(
        with MeetingDatabase(db_file) as database:
            meetings = database.query_meetings(query_config)

        with create_cached_session(manager.http_cache_file) as session:
        with create_cached_session() as session:
            checkout_meeting_tdocs(meetings, manager.checkout_dir, manager.http_cache_file, session=session)


@@ -413,7 +409,7 @@ def query_tdocs(
    eol_password: EolPasswordOption = None,
    prompt_credentials: PromptCredentialsOption = None,
    cache_dir: CacheDirOption = None,
    verbosity: VerbosityOption = DEFAULT_VERBOSITY,
    verbosity: VerbosityOption = str(DEFAULT_VERBOSITY),
) -> None:
    """Query TDoc metadata from database."""
    set_verbosity(verbosity)
@@ -438,7 +434,7 @@ def query_tdocs(
        console.print("[red]Invalid order value; use asc or desc")
        raise typer.Exit(code=2) from exc

    config = QueryConfig(
    config = TDocQueryConfig(
        cache_dir=manager.root,
        output_format=output_format,
        tdoc_ids=tdoc_ids,
@@ -469,7 +465,7 @@ def query_tdocs(
        results = database.query_tdocs(config)
        if not no_fetch:
            # Use cached session for missing TDoc fetching
            with create_cached_session(manager.http_cache_file) as session:
            with create_cached_session() as session:
                result = fetch_missing_tdocs(
                    database,
                    config,
@@ -488,7 +484,7 @@ def query_tdocs(
        return

    if checkout:
        with create_cached_session(manager.http_cache_file) as session:
        with create_cached_session() as session:
            checkout_tdocs(results, manager.checkout_dir, force=False, session=session)

    if config.output_format is OutputFormat.JSON:
@@ -511,7 +507,7 @@ def query_meetings(
    clear_tdocs: ClearTDocsOption = False,
    clear_specs: ClearSpecsOption = False,
    cache_dir: CacheDirOption = None,
    verbosity: VerbosityOption = DEFAULT_VERBOSITY,
    verbosity: VerbosityOption = str(DEFAULT_VERBOSITY),
) -> None:
    """Query meeting metadata from database."""
    set_verbosity(verbosity)
@@ -525,7 +521,6 @@ def query_meetings(
        raise typer.Exit(code=2) from exc

    config = MeetingQueryConfig(
        cache_dir=manager.root,
        working_groups=working_groups,
        subgroups=subgroups,
        limit=limit,
@@ -557,7 +552,7 @@ def query_meetings(
        return

    if checkout:
        with create_cached_session(manager.http_cache_file) as session:
        with create_cached_session() as session:
            checkout_meeting_tdocs(meetings, manager.checkout_dir, manager.http_cache_file, session=session)

    try:
@@ -585,7 +580,7 @@ def query_specs(
    clear_specs: ClearSpecsOption = False,
    spec_file: SpecFileOption = None,
    cache_dir: CacheDirOption = None,
    verbosity: VerbosityOption = DEFAULT_VERBOSITY,
    verbosity: VerbosityOption = str(DEFAULT_VERBOSITY),
) -> None:
    """Query spec metadata from database."""
    set_verbosity(verbosity)
@@ -652,7 +647,7 @@ def open_tdoc(
    eol_password: EolPasswordOption = None,
    prompt_credentials: PromptCredentialsOption = None,
    cache_dir: CacheDirOption = None,
    verbosity: VerbosityOption = DEFAULT_VERBOSITY,
    verbosity: VerbosityOption = str(DEFAULT_VERBOSITY),
) -> None:
    """Download, extract, and open a TDoc file."""
    set_verbosity(verbosity)
@@ -660,14 +655,12 @@ def open_tdoc(

    manager = CacheManager(cache_dir).register()
    normalized_id = tdoc_id.strip().upper()
    config = QueryConfig(
        cache_dir=manager.root,
    config = TDocQueryConfig(
        tdoc_ids=[normalized_id],
    )

    db_file = manager.db_file
    with create_cached_session(manager.http_cache_file) as session:
        with TDocDatabase(db_file) as database:
    with create_cached_session() as session:
        with TDocDatabase(manager.db_file) as database:
            results = database.query_tdocs(config)

            result = fetch_missing_tdocs(
@@ -699,7 +692,7 @@ def open_tdoc(

@app.command("checkout", rich_help_panel=HELP_PANEL_MAIN)
def checkout(
    tdoc_id: CheckoutTDocIdsArgument,
    tdoc_ids: CheckoutTDocIdsArgument,
    force: ForceOption = False,
    full_metadata: FullMetadataOption = False,
    use_whatthespec: UseWhatTheSpecOption = False,
@@ -707,22 +700,20 @@ def checkout(
    eol_password: EolPasswordOption = None,
    prompt_credentials: PromptCredentialsOption = None,
    cache_dir: CacheDirOption = None,
    verbosity: VerbosityOption = DEFAULT_VERBOSITY,
    verbosity: VerbosityOption = str(DEFAULT_VERBOSITY),
) -> None:
    """Download and extract TDoc(s) to checkout folder."""
    set_verbosity(verbosity)
    set_credentials(eol_username, eol_password, prompt=prompt_credentials)

    manager = CacheManager(cache_dir).register()
    normalized_ids = [tid.strip().upper() for tid in tdoc_id]
    config = QueryConfig(
        cache_dir=manager.root,
        tdoc_ids=normalized_ids,
    normalized_ids = [tid.strip().upper() for tid in tdoc_ids]
    config = TDocQueryConfig(
        target_ids=normalized_ids,
    )

    db_file = manager.db_file
    with create_cached_session(manager.http_cache_file) as session:
        with TDocDatabase(db_file) as database:
    with create_cached_session() as session:
        with TDocDatabase(manager.db_file) as database:
            results = database.query_tdocs(config)

            result = fetch_missing_tdocs(
@@ -771,13 +762,13 @@ def checkout(
@app.command("stats", rich_help_panel=HELP_PANEL_MAIN)
def stats(
    cache_dir: CacheDirOption = None,
    verbosity: VerbosityOption = DEFAULT_VERBOSITY,
    verbosity: VerbosityOption = str(DEFAULT_VERBOSITY),
) -> None:
    """Display database statistics."""
    set_verbosity(verbosity)
    manager = CacheManager(cache_dir).register()
    db_file = manager.db_file
    if not db_file.exists():

    if not (db_file := manager.db_file).exists():
        console.print(f"[red]Database not found: {db_file}[/red]")
        raise typer.Exit(code=1)

@@ -812,13 +803,13 @@ def crawl_specs(
    clear_specs: ClearSpecsOption = False,
    spec_file: SpecFileOption = None,
    cache_dir: CacheDirOption = None,
    verbosity: VerbosityOption = DEFAULT_VERBOSITY,
    verbosity: VerbosityOption = str(DEFAULT_VERBOSITY),
) -> None:
    """Crawl spec metadata from configured sources."""
    set_verbosity(verbosity)
    manager = CacheManager(cache_dir).register()
    if spec_numbers is None:
        spec_numbers = []
    spec_numbers = spec_numbers or []

    specs = collect_spec_numbers(spec_numbers, spec_file)
    try:
        output = OutputFormat(output_format.lower())
@@ -828,8 +819,7 @@ def crawl_specs(

    sources = build_default_spec_sources()

    db_file = manager.db_file
    with SpecDatabase(db_file) as database:
    with SpecDatabase(manager.db_file) as database:
        checkout_dir = manager.checkout_dir
        if clear_tdocs:
            deleted_count = database.clear_tdocs()
@@ -852,7 +842,7 @@ def crawl_specs(
        return

    if checkout:
        with SpecDatabase(db_file) as database:
        with SpecDatabase(manager.db_file) as database:
            checkout_specs(
                [result.spec_number for result in results],
                manager.checkout_dir,
@@ -876,7 +866,7 @@ def checkout_spec(
    spec_file: SpecFileOption = None,
    cache_dir: CacheDirOption = None,
    checkout_dir: CheckoutDirOption = None,
    verbosity: VerbosityOption = DEFAULT_VERBOSITY,
    verbosity: VerbosityOption = str(DEFAULT_VERBOSITY),
) -> None:
    """Download and extract spec documents."""
    set_verbosity(verbosity)
@@ -907,7 +897,7 @@ def open_spec(
    release: ReleaseOption = "latest",
    doc_only: DocOnlyOption = False,
    cache_dir: CacheDirOption = None,
    verbosity: VerbosityOption = DEFAULT_VERBOSITY,
    verbosity: VerbosityOption = str(DEFAULT_VERBOSITY),
) -> None:
    """Download and open a spec document."""
    set_verbosity(verbosity)
+7 −6
Original line number Diff line number Diff line
@@ -15,25 +15,26 @@ console = get_console()
logger = get_logger(__name__)


def launch_file(path: Path) -> None:
def launch_file(filename: Path) -> None:
    """Launch file in system's default application."""
    if not path.exists():
        logger.error(f"File not found: {path}")
    if not filename.exists():
        logger.error(f"File not found: {filename}")
        raise typer.Exit(code=1)

    try:
        if sys.platform.startswith("win"):
            os.startfile(path)  # noqa: S606
            os.startfile(filename)  # noqa: S606
        elif sys.platform == "darwin":
            open_cmd = Path("/usr/bin/open")
            if open_cmd.exists():
                subprocess.run([str(open_cmd), str(path)], check=False)  # noqa: S603
                subprocess.run([str(open_cmd), str(filename)], check=False)  # noqa: S603
            else:
                logger.warning("/usr/bin/open not available")
        else:
            # Linux and other Unix-like systems
            xdg_cmd = Path("/usr/bin/xdg-open")
            if xdg_cmd.exists():
                subprocess.run([str(xdg_cmd), str(path)], check=False)  # noqa: S603
                subprocess.run([str(xdg_cmd), str(filename)], check=False)  # noqa: S603
            else:
                logger.warning("xdg-open command not available")
    except OSError as exc:
Loading