Commit b539daa5 authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor(cli): simplify handle_clear_options usage in crawl and query commands

- Updated handle_clear_options to accept db_file and database_cls directly.
- Removed redundant database instance creation in crawl and query functions.
- Improved clarity and maintainability of the code.
parent f4328534
Loading
Loading
Loading
Loading
+26 −19
Original line number Diff line number Diff line
@@ -3,10 +3,10 @@
from __future__ import annotations

from pathlib import Path
from typing import Any

from rich.progress import BarColumn, MofNCompleteColumn, Progress, SpinnerColumn, TaskID, TextColumn

from tdoc_crawler.database.base import DocDatabase
from tdoc_crawler.logging import get_console
from tdoc_crawler.specs.operations.checkout import clear_checkout_specs
from tdoc_crawler.tdocs.operations.checkout import clear_checkout_tdocs
@@ -15,8 +15,9 @@ console = get_console()


def handle_clear_options(
    database: Any,
    db_file: Path,
    checkout_dir: Path,
    database_cls: type[DocDatabase],
    clear_tdocs: bool = False,
    clear_specs: bool = False,
    clear_db: bool = False,
@@ -24,20 +25,26 @@ def handle_clear_options(
    """Handle clear options for TDoc and spec data.

    Args:
        database: Database instance (TDocDatabase or MeetingDatabase)
        db_file: Database file path
        checkout_dir: Path to checkout directory
        database_cls: Database class to open for clear operations
        clear_tdocs: Whether to clear TDocs
        clear_specs: Whether to clear specs
        clear_db: Whether to clear all data (only for MeetingDatabase)
    """
    if not (clear_db or clear_tdocs or clear_specs):
        return

    if clear_db:
        tdocs_count, meetings_count = database.clear_all_data()
        console.print(f"[yellow]Cleared {tdocs_count} TDocs and {meetings_count} meetings from database[/yellow]")
        if db_file.exists():
            db_file.unlink()
            console.print(f"[yellow]Deleted database file: {db_file}[/yellow]")
        removed = clear_checkout_tdocs(checkout_dir)
        if removed:
            console.print(f"[yellow]Cleared {removed} checkout entries for TDocs[/yellow]")
        return

    with database_cls(db_file) as database:
        if clear_tdocs:
            deleted_count = database.clear_tdocs()
            console.print(f"[yellow]Cleared {deleted_count} TDocs from database[/yellow]")
+24 −9
Original line number Diff line number Diff line
@@ -124,10 +124,15 @@ def crawl_tdocs(
        scope_parts.append(f"working groups: {', '.join(wg.value for wg in working_groups)}")
    console.print(f"[cyan]Crawling TDocs ({', '.join(scope_parts)})[/cyan]")

    with TDocDatabase(db_file) as database:
        checkout_dir = manager.checkout_dir
        handle_clear_options(database, checkout_dir, clear_tdocs=clear_tdocs, clear_specs=clear_specs)
    handle_clear_options(
        db_file,
        manager.checkout_dir,
        TDocDatabase,
        clear_tdocs=clear_tdocs,
        clear_specs=clear_specs,
    )

    with TDocDatabase(db_file) as database:
        crawler = TDocCrawler(database)
        crawl_id = database.log_crawl_start("tdoc", [wg.value for wg in config.working_groups], config.incremental)

@@ -234,9 +239,14 @@ def crawl_meetings(
        scope_parts.append(f"working groups: {', '.join(wg.value for wg in working_groups)}")
    console.print(f"[cyan]Crawling meetings ({', '.join(scope_parts)})[/cyan]")

    with MeetingDatabase(db_file) as database:
        checkout_dir = manager.checkout_dir
        handle_clear_options(database, checkout_dir, clear_tdocs=clear_tdocs, clear_specs=clear_specs, clear_db=clear_db)
    handle_clear_options(
        db_file,
        manager.checkout_dir,
        MeetingDatabase,
        clear_tdocs=clear_tdocs,
        clear_specs=clear_specs,
        clear_db=clear_db,
    )

    with MeetingDatabase(db_file) as database:
        crawl_id = database.log_crawl_start("meeting", [wg.value for wg in config.working_groups], config.incremental)
@@ -306,10 +316,15 @@ def crawl_specs(

    sources = build_default_spec_sources()

    with SpecDatabase(manager.db_file) as database:
        checkout_dir = manager.checkout_dir
        handle_clear_options(database, checkout_dir, clear_tdocs=clear_tdocs, clear_specs=clear_specs)
    handle_clear_options(
        manager.db_file,
        manager.checkout_dir,
        SpecDatabase,
        clear_tdocs=clear_tdocs,
        clear_specs=clear_specs,
    )

    with SpecDatabase(manager.db_file) as database:
        results = database.crawl_specs(specs, release, sources)

    if not results:
+21 −9
Original line number Diff line number Diff line
@@ -114,10 +114,14 @@ def query_tdocs(
    )

    db_file = manager.db_file
    handle_clear_options(
        db_file,
        manager.checkout_dir,
        TDocDatabase,
        clear_tdocs=clear_tdocs,
        clear_specs=clear_specs,
    )
    with TDocDatabase(db_file) as database:
        checkout_dir = manager.checkout_dir
        handle_clear_options(database, checkout_dir, clear_tdocs=clear_tdocs, clear_specs=clear_specs)

        results = database.query_tdocs(config)
        if not no_fetch:
            with create_cached_session() as session:
@@ -183,10 +187,14 @@ def query_meetings(
    )

    db_file = manager.db_file
    handle_clear_options(
        db_file,
        manager.checkout_dir,
        MeetingDatabase,
        clear_tdocs=clear_tdocs,
        clear_specs=clear_specs,
    )
    with MeetingDatabase(db_file) as database:
        checkout_dir = manager.checkout_dir
        handle_clear_options(database, checkout_dir, clear_tdocs=clear_tdocs, clear_specs=clear_specs)

        meetings = database.query_meetings(config)

    if not meetings:
@@ -244,10 +252,14 @@ def query_specs(
        raise typer.Exit(code=2) from exc

    db_file = manager.db_file
    handle_clear_options(
        db_file,
        manager.checkout_dir,
        SpecDatabase,
        clear_tdocs=clear_tdocs,
        clear_specs=clear_specs,
    )
    with SpecDatabase(db_file) as database:
        checkout_dir = manager.checkout_dir
        handle_clear_options(database, checkout_dir, clear_tdocs=clear_tdocs, clear_specs=clear_specs)

        results = database.query_specs(filters)

    if not results: