Commit f121cfa0 authored by Jan Reimes's avatar Jan Reimes
Browse files

feat(workspace): enhance spec addition with range syntax support

* Update workspace command to accept ranges for spec numbers.
* Modify argument help to include range syntax examples.
* Implement range expansion logic in workspace member addition.
* Add tests for new range syntax functionality in spec normalization.
parent 5c2ed818
Loading
Loading
Loading
Loading
+22 −1
Original line number Diff line number Diff line
@@ -44,9 +44,29 @@ Add documents to a workspace. Accepts TDoc IDs and spec numbers.

# Add multiple items
3gpp-crawler workspace add 26260 26261 --kind spec --release 18.0

# Add a range of specs (colon or dash separator)
3gpp-crawler workspace add 26.260-26.266 --kind spec
3gpp-crawler workspace add 26250:26258 --kind spec

# Short-format right side (inherits series from left)
3gpp-crawler workspace add 26.131-132 --kind spec
3gpp-crawler workspace add 26.260-265 --kind spec

# Offset syntax
3gpp-crawler workspace add 26.260+6 --kind spec
```

Spec members added without `--release` resolve to the latest available version from the database. If the database has no version information, the spec is auto-crawled from 3GPP.
Spec members added without `--release` resolve to the latest available version from the database. If the database has no version information, the spec is auto-crawled from 3GPP. Specs that cannot be found are skipped with an error message.

Range syntax (`--kind spec` only):

| Syntax | Example | Result |
|--------|---------|--------|
| `X.YYY-X.ZZZ` | `26.260-26.266` | Full range, both dotted |
| `XXXXX-XXXXX` | `26250-26258` | Full range, both undotted |
| `X.YYY:ZZZ` | `26.131-132` | Short right side, inherits series from left |
| `X.YYY+N` | `26.260+6` | Offset (26.260 through 26.266) |

### `workspace members`

@@ -160,6 +180,7 @@ Members added with `--release 18.0` use the explicit release. Members added with
# 2. Add documents
3gpp-crawler workspace add S4-250638
3gpp-crawler workspace add 26260 --kind spec --release 18.0
3gpp-crawler workspace add 26.131-132 --kind spec   # range: 26.131, 26.132

# 3. Extract
3gpp-crawler workspace process --profile markdown-only
+1 −1
Original line number Diff line number Diff line
@@ -15,7 +15,7 @@ TDocIdsArgument = Annotated[list[str] | None, typer.Argument(help="TDoc identifi
TDocIdArgument = Annotated[str, typer.Argument(help="TDoc identifier to download and open")]
CheckoutTDocIdsArgument = Annotated[list[str], typer.Argument(help="TDoc identifier(s) to checkout")]
SpecArgument = Annotated[list[str] | None, typer.Argument(help="Spec number(s) to query (dotted or undotted)")]
WorkspaceItemsArgument = Annotated[list[str] | None, typer.Argument(help="Items to add (TDoc IDs, spec numbers, etc.)")]
WorkspaceItemsArgument = Annotated[list[str] | None, typer.Argument(help="Items to add: TDoc IDs, spec numbers, or ranges like 26.260-26.266 / 26250:258")]

# Options - TDocs/Meetings
WorkingGroupOption = Annotated[
+17 −1
Original line number Diff line number Diff line
@@ -33,6 +33,7 @@ from tdoc_crawler.models.base import OutputFormat, SortOrder
from tdoc_crawler.models.workspaces import SourceKind
from tdoc_crawler.tdocs.models import TDocQueryConfig
from tdoc_crawler.utils.date_parser import parse_partial_date
from tdoc_crawler.utils.normalization import expand_spec_ranges_batch
from tdoc_crawler.workspaces import (
    add_workspace_members,
    get_active_workspace,
@@ -70,10 +71,11 @@ def _resolve_spec_release_for_add(
    release: str,
    source_kind: SourceKind,
    auto_crawl: bool,
) -> str:
) -> str | None:
    """Resolve a spec release selector to a concrete version for workspace add.

    For non-spec kinds or when resolution fails, returns *release* unchanged.
    Returns None when the spec does not exist in the database.
    """
    if source_kind != SourceKind.SPEC:
        return release
@@ -81,6 +83,11 @@ def _resolve_spec_release_for_add(
        resolved, _ = asyncio.run(
            resolve_spec_release_from_db(item, release, auto_crawl=auto_crawl),
        )
        # If auto-crawl was tried and resolved is still "latest", the spec
        # doesn't exist anywhere.
        if resolved.lower() == "latest" and release.lower() == "latest":
            console.print(f"[red]  Spec {item} not found, skipping.[/red]")
            return None
        console.print(f"[dim]  Resolved {item} release '{release}' -> {resolved}[/dim]")
        return resolved
    except ValueError as exc:
@@ -178,8 +185,17 @@ def workspace_add(
            )

    elif items:
        if source_kind == SourceKind.SPEC:
            original_count = len(list(items))
            expanded = expand_spec_ranges_batch(list(items))
            skipped = original_count - len(expanded)
            if skipped > 0:
                console.print(f"[yellow]Skipped {skipped} invalid spec input(s).[/yellow]")
            items = expanded
        for item in items:
            resolved_release = _resolve_spec_release_for_add(item, release, source_kind, auto_crawl_specs)
            if resolved_release is None:
                continue
            members.append(
                make_workspace_member(
                    source_item_id=item,
+28 −8
Original line number Diff line number Diff line
@@ -116,15 +116,35 @@ def _expand_offset_range(cleaned: str) -> Generator[str]:


def _expand_dash_range(left: str, right: str, original: str) -> Generator[str]:
    """Expand dash/colon range syntax like '26.260-26.266'."""
    """Expand dash/colon range syntax like '26.260-26.266' or '26.131-132'.

    The right side can be:
    - A full spec number: ``26.260-26.266``
    - A short suffix replacing the trailing digits: ``26.131-132``
    - An undotted full number: ``26250-26258``
    """
    series1, num1, format1, digits1 = _parse_spec_number(left)

    # Try right as a full spec number first; fall back to short suffix
    try:
        series2, num2, format2, digits2 = _parse_spec_number(right)
        if series1 != series2:
            # Series mismatch — maybe right is a short numeric suffix?
            msg = f"Series numbers don't match: {series1} vs {series2}"
        raise NormalizationError(msg)
            raise NormalizationError(msg)  # noqa: TRY301
        if format1 != format2 and (digits1 == 1 or digits2 == 1):
        msg = "Both range endpoints must use the same format"
        raise NormalizationError(msg)
            # Format mismatch due to 1-digit increment — treat as short suffix
            raise NormalizationError  # noqa: TRY301
    except NormalizationError:
        # Right side is a short numeric suffix (e.g. "132" in "26.131-132")
        right_stripped = right.strip()
        if not right_stripped.isdigit():
            raise
        # Reuse the series and format from the left side
        series2 = series1
        num2 = right_stripped.zfill(3)
        format2 = format1
        digits2 = len(right_stripped)
    start_num = int(num1)
    end_num = int(num2)
    _validate_range(start_num, end_num, original)
+15 −2
Original line number Diff line number Diff line
@@ -227,9 +227,10 @@ def test_expand_all_range_variants() -> None:
    result7 = list(expand_spec_ranges_batch(["TS 26.260:TR 26.266"]))
    assert result7 == ["26.260", "26.261", "26.262", "26.263", "26.264", "26.265", "26.266"]

    # Test that 26.260-266 (ambiguous) is silently skipped
    # 26.260-266: right side "266" parses as spec 26.006, same series,
    # so it expands via short-suffix fallback to 260..266
    result = list(expand_spec_ranges_batch(["26.260-266"]))
    assert result == []
    assert result == ["26.260", "26.261", "26.262", "26.263", "26.264", "26.265", "26.266"]

    # Test dotted with one-digit suffix is allowed and normalized
    result8 = list(expand_spec_ranges_batch(["26.2-26.4"]))
@@ -266,6 +267,18 @@ def test_expand_edge_cases() -> None:
    result = list(expand_spec_ranges_batch(["23.498-23.502"]))
    assert result == ["23.498", "23.499", "23.500", "23.501", "23.502"]

    # Short-format suffix: dotted left, numeric-only right
    result = list(expand_spec_ranges_batch(["26.131-132"]))
    assert result == ["26.131", "26.132"]

    # Short-format suffix: undotted left, short right (colon)
    result = list(expand_spec_ranges_batch(["26250:258"]))
    assert result == ["26.250", "26.251", "26.252", "26.253", "26.254", "26.255", "26.256", "26.257", "26.258"]

    # Full dotted left, short right (dash)
    result = list(expand_spec_ranges_batch(["26.260-265"]))
    assert result == ["26.260", "26.261", "26.262", "26.263", "26.264", "26.265"]


def test_expand_invalid_ranges() -> None:
    """Test invalid range syntax - invalid specs are silently skipped in batch mode."""