Commit 318e8c4a authored by Jan Reimes's avatar Jan Reimes
Browse files

feat(workspace): enhance workspace member handling and spec resolution

* Add duplicate member warning in workspace add command.
* Update spec release resolution to support auto-crawling from 3GPP.
* Refactor existing member ID checks to use set for efficiency.
* Improve error handling in JSON reading for workspace processes.
parent d76918a8
Loading
Loading
Loading
Loading
+3 −0
Original line number Diff line number Diff line
@@ -32,6 +32,9 @@ tdoc-crawler query --agenda "*atias*" --start-date 2018
3gpp-crawler workspace add 26260 26261 --kind spec --release 18.0
3gpp-crawler workspace add 26260 --kind spec --release 17

:: duplicates should give a warning
3gpp-crawler workspace add 26260 --kind spec --release 18.1

:: overview
3gpp-crawler workspace members

+6 −1
Original line number Diff line number Diff line
@@ -78,7 +78,9 @@ def _resolve_spec_release_for_add(
    if source_kind != SourceKind.SPEC:
        return release
    try:
        resolved, _ = asyncio.run(resolve_spec_release_from_db(item, release))
        resolved, _ = asyncio.run(
            resolve_spec_release_from_db(item, release, auto_crawl=auto_crawl),
        )
        console.print(f"[dim]  Resolved {item} release '{release}'{resolved}[/dim]")
        return resolved
    except ValueError as exc:
@@ -197,6 +199,9 @@ def workspace_add(

    added = add_workspace_members(normalized, members)
    mode = "from query" if has_filters else "directly"
    duplicates = len(members) - added
    if duplicates > 0:
        console.print(f"[yellow]Skipped {duplicates} duplicate(s) already in workspace '{normalized}'.[/yellow]")
    console.print(f"[green]Added {added} item(s) to workspace '{normalized}' ({mode}).[/green]")


+1 −1
Original line number Diff line number Diff line
@@ -105,7 +105,7 @@ def _read_page_count(json_dir: Path) -> int:
    try:
        data = json.loads(json_files[0].read_text(encoding="utf-8"))
        return int(data.get("number of pages", 0))
    except (json.JSONDecodeError, OSError, ValueError):
    except json.JSONDecodeError, OSError, ValueError:
        return 0


+5 −5
Original line number Diff line number Diff line
@@ -44,20 +44,20 @@ def add_workspace_members(
    added_count = 0
    skipped_count = 0
    for member in members:
        existing_ids = [_normalize_member_id(str(m.get("source_item_id", ""))) for m in metadata.members]
        existing_ids = {_normalize_member_id(str(m.get("source_item_id", ""))) for m in metadata.members}
        new_id = _normalize_member_id(member.source_item_id)

        if new_id in existing_ids:
            skipped_count += 1
            _logger.warning("Skipping duplicate member '%s' — already in workspace '%s'", member.source_item_id, normalized_workspace)
            continue

        metadata.add_member(member)
        added_count += 1

    if added_count > 0:
        registry.save()

    if skipped_count > 0:
        pass

    return added_count


+52 −8
Original line number Diff line number Diff line
@@ -18,6 +18,8 @@ from tdoc_crawler.specs.operations.checkout import (
    build_default_spec_sources,
    checkout_specs_async,
)
from tdoc_crawler.specs.sources.base import FunctionSpecSource
from tdoc_crawler.specs.sources.threegpp import fetch_threegpp_metadata
from tdoc_crawler.tdocs.models import TDocMetadata
from tdoc_crawler.tdocs.operations.checkout import checkout_tdoc
from tdoc_crawler.tdocs.sources.whatthespec import resolve_via_whatthespec
@@ -61,6 +63,8 @@ async def resolve_spec_release_from_db(
    spec_number: str,
    requested_release: str,
    db_file: Path | None = None,
    *,
    auto_crawl: bool = False,
) -> tuple[str, list[str]]:
    """Resolve a short release string to a full version from the spec database.

@@ -73,6 +77,7 @@ async def resolve_spec_release_from_db(
        spec_number: Spec number (e.g., "26.260").
        requested_release: Release version (e.g., "16", "17.0.0", or "latest").
        db_file: Optional path to database file. If None, uses PathConfig default.
        auto_crawl: If True, crawl spec metadata from 3GPP when DB has no version info.

    Returns:
        Tuple of (resolved_version, version_codes) where:
@@ -111,31 +116,45 @@ async def resolve_spec_release_from_db(
                    spec_number,
                    exc,
                )
        # If we can't find a version, return "latest" as fallback
        if auto_crawl and resolved.lower() in ("latest", requested_release.lower()):
            resolved, version_codes = await _auto_crawl_and_resolve(
                normalized_spec,
                requested_release,
                db_path,
                db_file,
            )
        return resolved, version_codes

    # Normal release resolution (not "latest")
    db_path = db_file if db_file is not None else PathConfig().db_file
    if db_path is None or not db_path.exists():
        if auto_crawl:
            return await _auto_crawl_and_resolve(
                normalized_spec,
                requested_release,
                db_path,
                db_file,
            )
        return resolved, version_codes

    try:
        async with SpecDatabase(db_path) as db:
            versions = await db.get_spec_versions(normalized_spec)
            if versions:
                # Extract the major release number from requested version
                # e.g., "19" -> 19, "19.1" -> 19, "19.0.0" -> 19
                requested_normalized = normalize_release_version(requested_release)
                requested_major = requested_normalized.split(".")[0] if "." in requested_normalized else requested_normalized

                # Find matching versions whose first component matches the requested release
                requested_major = requested_normalized.split(".")[0] if "." in requested_normalized else requested_release
                matching = [v for v in versions if v.version and v.version.split(".")[0] == requested_major]

                if matching:
                    # Use the first matching version as the resolved version
                    resolved = matching[0].version
                    # Extract version codes (suffix after "-" in original version)
                    version_codes.extend(v.version.split("-", 1)[1] for v in matching if "-" in v.version)
            elif auto_crawl:
                return await _auto_crawl_and_resolve(
                    normalized_spec,
                    requested_release,
                    db_path,
                    db_file,
                )
    except (OSError, ValueError) as exc:
        _logger.debug(
            "Could not resolve release '%s' for spec %s: %s",
@@ -147,6 +166,31 @@ async def resolve_spec_release_from_db(
    return resolved, version_codes


async def _auto_crawl_and_resolve(
    normalized_spec: str,
    requested_release: str,
    db_path: Path | None,
    db_file: Path | None,
) -> tuple[str, list[str]]:
    """Crawl spec metadata from 3GPP, store versions, then retry resolution."""
    try:
        sources = [FunctionSpecSource("3gpp", fetch_threegpp_metadata)]
        effective_db = db_path if db_path is not None else PathConfig().db_file
        if effective_db is None:
            return requested_release, []
        async with SpecDatabase(effective_db) as db:
            await db.crawl_specs([normalized_spec], requested_release, sources)
        return await resolve_spec_release_from_db(
            normalized_spec,
            requested_release,
            db_file=db_file,
            auto_crawl=False,
        )
    except Exception as exc:
        _logger.debug("Auto-crawl failed for spec %s: %s", normalized_spec, exc)
        return requested_release, []


async def checkout_tdoc_to_workspace(
    tdoc_id: str,
    checkout_base: Path,