fix(async): fix unawaited coroutines and update callers to new DB API (b676d158) · Commits · Jan Reimes / 3gpp-crawler

demo.bat

+4 −4

Original line number	Diff line number	Diff line
		@@ -4,19 +4,19 @@ call .venv\scripts\activate.bat

		SET TDC_AI_CONVERT_MD=1

		:: tdoc-crawler crawl-meetings -s S4
		:: tdoc-crawler crawl-tdocs --start-date 2016
		:: tdoc-crawler query-tdocs --agenda "atias" --start-date 2018
		tdoc-crawler crawl-meetings -s S4
		tdoc-crawler crawl-tdocs --start-date 2016
		tdoc-crawler query-tdocs --agenda "atias" --start-date 2018

		3gpp-ai workspace deactivate
		3gpp-ai workspace delete atias --delete-artifacts
		3gpp-ai workspace create atias
		:: 3gpp-ai workspace activate atias
		3gpp-ai workspace add-members --kind tdocs --agenda "atias" --start-date 2017
		3gpp-ai workspace add-members 26131 26132 26260 26261 21905 --kind specs --release 19
		3gpp-ai workspace add-members 26260 --kind specs --release 18.1.0
		3gpp-ai workspace add-members 26260 26261 --kind specs --release 18.0.0
		3gpp-ai workspace add-members 26260 --kind specs --release 17
		3gpp-ai workspace add-members --kind tdocs --agenda "atias" --start-date 2017
		3gpp-ai workspace list-members
		:: 3gpp-ai workspace process
		:: 3gpp-ai query "Please summarize the evolution of test methods in all ATIAS work items between the releases, in particular focusing on IVAS-capable devices"

packages/3gpp-ai/threegpp_ai/cli.py

+28 −24

Original line number	Diff line number	Diff line
		@@ -326,7 +326,7 @@ async def _process_single_item(
		try:
		if source_kind == SourceKind.TDOC:
		# TDoc extraction - uses TDoc ID to fetch files
		asyncio.run(convert_document_to_markdown(document_id=item, output_path=None, force=False))
		convert_document_to_markdown(document_id=item, output_path=None, force=False)
		else:
		# Generic extraction (specs, other) - uses file path directly
		doc_file = _resolve_process_file(Path(source_path))
		@@ -849,8 +849,10 @@ def workspace_add_members(
		f"[cyan]Processing {len(resolved_items)} item(s)...",
		total=len(resolved_items),
		)
		async def _process_items() -> None:
		nonlocal converted_count, md_extracted_count
		for item in resolved_items:
		member, skip_reason, was_converted, was_md_extracted = _process_single_item(
		member, skip_reason, was_converted, was_md_extracted = await _process_single_item(
		item=item,
		workspace=workspace_name,
		source_kind=source_kind,
		@@ -874,6 +876,8 @@ def workspace_add_members(
		else:
		progress.update(task, advance=1, description=f"[cyan]{item}")

		asyncio.run(_process_items())

		if skipped:
		console.print("\n[yellow]Skipped invalid items:[/yellow]")
		for item_id, reason in skipped:

packages/3gpp-ai/threegpp_ai/operations/convert.py

+13 −5

Original line number	Diff line number	Diff line
		@@ -9,6 +9,7 @@ conversion is delegated to the unified extraction module.

		from __future__ import annotations

		import asyncio
		from pathlib import Path

		from tdoc_crawler.config import resolve_cache_manager
		@@ -41,12 +42,19 @@ def _get_meeting_info(meeting_id: int) -> str \| None:
		Meeting short name if found, None otherwise.
		"""
		try:
		asyncio.get_running_loop()
		return None
		except RuntimeError:
		pass

		async def _resolve_meeting_short_name() -> str \| None:
		manager = resolve_cache_manager()
		with MeetingDatabase(manager.db_file) as db:
		meetings = db._table_rows("meetings")
		for meeting in meetings:
		if meeting.meeting_id == meeting_id:
		return meeting.short_name
		async with MeetingDatabase(manager.db_file) as db:
		meeting = await db._get_meeting(meeting_id)
		return meeting.short_name if meeting is not None else None

		try:
		return asyncio.run(_resolve_meeting_short_name())
		except Exception as exc:
		logger.debug("Failed to get meeting info for %s: %s", meeting_id, exc)
		return None

packages/3gpp-ai/threegpp_ai/operations/workspaces.py

+5 −8

Original line number	Diff line number	Diff line
		@@ -13,7 +13,7 @@ from tdoc_crawler.config import resolve_cache_manager
		from tdoc_crawler.database import TDocDatabase
		from tdoc_crawler.database.specs import SpecDatabase
		from tdoc_crawler.logging import get_logger
		from tdoc_crawler.specs.operations.checkout import checkout_specs
		from tdoc_crawler.specs.operations.checkout import checkout_specs_async
		from tdoc_crawler.tdocs.models import TDocMetadata
		from tdoc_crawler.tdocs.operations.checkout import checkout_tdoc
		from tdoc_crawler.tdocs.sources.base import TDocSourceConfig
		@@ -560,16 +560,13 @@ async def checkout_spec_to_workspace(
		normalized = normalize_spec_number(spec_number)
		undotted = normalized.replace(".", "") # e.g., "26.260" -> "26260"
		requested_release = release.strip()

		# First check if already checked out with the SAME release version
		specs_dir = checkout_base / "Specs"
		if specs_dir.exists():
		# Resolve release to actual version(s) to match against 3GPP version codes (e.g., "h00", "i10")
		# We'll reuse the DB connection for checkout if needed
		cache_manager = resolve_cache_manager("default")
		db_path = db_file if db_file is not None else cache_manager.db_file
		resolved_release, version_codes = await resolve_spec_release_from_db(spec_number, requested_release, db_file=db_file)

		# First check if already checked out with the SAME release version
		specs_dir = checkout_base / "Specs"
		if specs_dir.exists():
		# Search for spec with matching version code
		for spec_dir in specs_dir.rglob("*"):
		if not spec_dir.is_dir():
		@@ -587,7 +584,7 @@ async def checkout_spec_to_workspace(
		# Need to checkout the spec - reuse DB connection from resolve_spec_release_from_db
		try:
		async with SpecDatabase(db_path) as db:
		checkout_paths = checkout_specs(
		checkout_paths = await checkout_specs_async(
		spec_numbers=[spec_number],
		checkout_dir=checkout_base,
		database=db,

src/tdoc_crawler/cli/app.py

+2 −2

Original line number	Diff line number	Diff line
		@@ -90,7 +90,7 @@ def open_tdoc(
		with create_cached_session() as session:
		async with TDocDatabase(manager.db_file) as database:
		results = await database.query_tdocs(config)
		result = fetch_missing_tdocs(
		result = await fetch_missing_tdocs(
		database,
		config,
		results,
		@@ -143,7 +143,7 @@ def checkout(
		with create_cached_session() as session:
		async with TDocDatabase(manager.db_file) as database:
		results = await database.query_tdocs(config)
		result = fetch_missing_tdocs(
		result = await fetch_missing_tdocs(
		database,
		config,
		results,