Commit b676d158 authored by Jan Reimes's avatar Jan Reimes
Browse files

fix(async): fix unawaited coroutines and update callers to new DB API

Systematic sweep fixing async-mismatch bugs introduced during oxyde
migration. Unawaited async DB calls return a coroutine silently,
producing zero-result crawls and missing writes instead of errors.

- cli/crawl.py: await get_existing_meeting_ids (caused 0-result crawl)
- cli/{app,query,tdoc_app}.py: update to manager-based query API
- meetings/operations/crawl.py: await all async DB calls
- tdocs/operations/{crawl,fetch}.py: await DB calls; fix async context
- specs/{downloads,operations/checkout}.py: await DB calls
- clients/portal.py, http_client/session.py: minor API alignment
- meetings/models.py: update field references for new schema
- packages/3gpp-ai: fix unawaited coroutines in cli.py, convert.py,
  workspaces.py
- demo.bat: uncomment and reorder commands now that crawl/AI work
parent f4458448
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -4,19 +4,19 @@ call .venv\scripts\activate.bat

SET TDC_AI_CONVERT_MD=1

:: tdoc-crawler crawl-meetings -s S4
:: tdoc-crawler crawl-tdocs --start-date 2016
:: tdoc-crawler query-tdocs --agenda "*atias*" --start-date 2018
tdoc-crawler crawl-meetings -s S4
tdoc-crawler crawl-tdocs --start-date 2016
tdoc-crawler query-tdocs --agenda "*atias*" --start-date 2018

3gpp-ai workspace deactivate
3gpp-ai workspace delete atias --delete-artifacts
3gpp-ai workspace create atias
:: 3gpp-ai workspace activate atias
3gpp-ai workspace add-members --kind tdocs --agenda "*atias*" --start-date 2017
3gpp-ai workspace add-members 26131 26132 26260 26261 21905 --kind specs --release 19
3gpp-ai workspace add-members 26260 --kind specs --release 18.1.0
3gpp-ai workspace add-members 26260 26261 --kind specs --release 18.0.0
3gpp-ai workspace add-members 26260 --kind specs --release 17
3gpp-ai workspace add-members --kind tdocs --agenda "*atias*" --start-date 2017
3gpp-ai workspace list-members
:: 3gpp-ai workspace process
:: 3gpp-ai query "Please summarize the evolution of test methods in all ATIAS work items between the releases, in particular focusing on IVAS-capable devices"
+28 −24
Original line number Diff line number Diff line
@@ -326,7 +326,7 @@ async def _process_single_item(
        try:
            if source_kind == SourceKind.TDOC:
                # TDoc extraction - uses TDoc ID to fetch files
                asyncio.run(convert_document_to_markdown(document_id=item, output_path=None, force=False))
                convert_document_to_markdown(document_id=item, output_path=None, force=False)
            else:
                # Generic extraction (specs, other) - uses file path directly
                doc_file = _resolve_process_file(Path(source_path))
@@ -849,8 +849,10 @@ def workspace_add_members(
            f"[cyan]Processing {len(resolved_items)} item(s)...",
            total=len(resolved_items),
        )
        async def _process_items() -> None:
            nonlocal converted_count, md_extracted_count
            for item in resolved_items:
            member, skip_reason, was_converted, was_md_extracted = _process_single_item(
                member, skip_reason, was_converted, was_md_extracted = await _process_single_item(
                    item=item,
                    workspace=workspace_name,
                    source_kind=source_kind,
@@ -874,6 +876,8 @@ def workspace_add_members(
                    else:
                        progress.update(task, advance=1, description=f"[cyan]{item}")

        asyncio.run(_process_items())

    if skipped:
        console.print("\n[yellow]Skipped invalid items:[/yellow]")
        for item_id, reason in skipped:
+13 −5
Original line number Diff line number Diff line
@@ -9,6 +9,7 @@ conversion is delegated to the unified extraction module.

from __future__ import annotations

import asyncio
from pathlib import Path

from tdoc_crawler.config import resolve_cache_manager
@@ -41,12 +42,19 @@ def _get_meeting_info(meeting_id: int) -> str | None:
        Meeting short name if found, None otherwise.
    """
    try:
        asyncio.get_running_loop()
        return None
    except RuntimeError:
        pass

    async def _resolve_meeting_short_name() -> str | None:
        manager = resolve_cache_manager()
        with MeetingDatabase(manager.db_file) as db:
            meetings = db._table_rows("meetings")
            for meeting in meetings:
                if meeting.meeting_id == meeting_id:
                    return meeting.short_name
        async with MeetingDatabase(manager.db_file) as db:
            meeting = await db._get_meeting(meeting_id)
            return meeting.short_name if meeting is not None else None

    try:
        return asyncio.run(_resolve_meeting_short_name())
    except Exception as exc:
        logger.debug("Failed to get meeting info for %s: %s", meeting_id, exc)
    return None
+5 −8
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@ from tdoc_crawler.config import resolve_cache_manager
from tdoc_crawler.database import TDocDatabase
from tdoc_crawler.database.specs import SpecDatabase
from tdoc_crawler.logging import get_logger
from tdoc_crawler.specs.operations.checkout import checkout_specs
from tdoc_crawler.specs.operations.checkout import checkout_specs_async
from tdoc_crawler.tdocs.models import TDocMetadata
from tdoc_crawler.tdocs.operations.checkout import checkout_tdoc
from tdoc_crawler.tdocs.sources.base import TDocSourceConfig
@@ -560,16 +560,13 @@ async def checkout_spec_to_workspace(
    normalized = normalize_spec_number(spec_number)
    undotted = normalized.replace(".", "")  # e.g., "26.260" -> "26260"
    requested_release = release.strip()

    # First check if already checked out with the SAME release version
    specs_dir = checkout_base / "Specs"
    if specs_dir.exists():
        # Resolve release to actual version(s) to match against 3GPP version codes (e.g., "h00", "i10")
        # We'll reuse the DB connection for checkout if needed
    cache_manager = resolve_cache_manager("default")
    db_path = db_file if db_file is not None else cache_manager.db_file
    resolved_release, version_codes = await resolve_spec_release_from_db(spec_number, requested_release, db_file=db_file)

    # First check if already checked out with the SAME release version
    specs_dir = checkout_base / "Specs"
    if specs_dir.exists():
        # Search for spec with matching version code
        for spec_dir in specs_dir.rglob("*"):
            if not spec_dir.is_dir():
@@ -587,7 +584,7 @@ async def checkout_spec_to_workspace(
    # Need to checkout the spec - reuse DB connection from resolve_spec_release_from_db
    try:
        async with SpecDatabase(db_path) as db:
            checkout_paths = checkout_specs(
            checkout_paths = await checkout_specs_async(
                spec_numbers=[spec_number],
                checkout_dir=checkout_base,
                database=db,
+2 −2
Original line number Diff line number Diff line
@@ -90,7 +90,7 @@ def open_tdoc(
        with create_cached_session() as session:
            async with TDocDatabase(manager.db_file) as database:
                results = await database.query_tdocs(config)
                result = fetch_missing_tdocs(
                result = await fetch_missing_tdocs(
                    database,
                    config,
                    results,
@@ -143,7 +143,7 @@ def checkout(
        with create_cached_session() as session:
            async with TDocDatabase(manager.db_file) as database:
                results = await database.query_tdocs(config)
                result = fetch_missing_tdocs(
                result = await fetch_missing_tdocs(
                    database,
                    config,
                    results,
Loading