Commit 72131561 authored by Jan Reimes's avatar Jan Reimes
Browse files

specs(query): add spec query support and tests

- Add src/tdoc_crawler/specs/query.py implementing query helpers
- Add tests for specs database and downloads
- Add task doc for specs crawl query
parent 7f674991
Loading
Loading
Loading
Loading
+3 −3
Original line number Diff line number Diff line
@@ -123,12 +123,12 @@ description: "Task list for crawl and query specs feature"

### Tests for User Story 4 (REQUIRED) ⚠️

- [ ] T029 [P] [US4] Add discrepancy view tests in tests/test_specs_database.py
- [x] T029 [P] [US4] Add discrepancy view tests in tests/test_specs_database.py

### Implementation for User Story 4

- [ ] T030 [US4] Extend query results to expose per-source differences in src/tdoc_crawler/specs/query.py
- [ ] T031 [US4] Add discrepancy rendering in src/tdoc_crawler/cli/printing.py
- [x] T030 [US4] Extend query results to expose per-source differences in src/tdoc_crawler/specs/query.py
- [x] T031 [US4] Add discrepancy rendering in src/tdoc_crawler/cli/printing.py

**Checkpoint**: User Story 4 should be fully functional and independently testable

+2 −1
Original line number Diff line number Diff line
"""Spec query filters and results."""

from dataclasses import dataclass
from dataclasses import dataclass, field

from rich.console import Console, ConsoleOptions, RenderResult
from rich.text import Text
@@ -28,6 +28,7 @@ class SpecQueryResult:
    title: str | None = None
    status: str | None = None
    working_group: str | None = None
    source_differences: dict[str, dict[str, str | None]] = field(default_factory=dict)

    def __rich_console__(self, console: Console, options: ConsoleOptions) -> RenderResult:
        _ = (console, options)
+44 −0
Original line number Diff line number Diff line
@@ -85,6 +85,50 @@ def test_query_specs_filters(tmp_path: Path) -> None:
        assert res_status[0].spec_number == "23.501"


def test_query_specs_discrepancies(tmp_path: Path) -> None:
    spec = Specification(
        spec_number="23.501",
        spec_number_compact="23501",
        spec_type="TS",
        title="System Architecture",
        status="Release",
        working_group="SA",
        series="23_series",
        latest_version="17.3.0",
    )
    record_a = SpecificationSourceRecord(
        spec_number="23.501",
        source_name="3gpp",
        source_identifier="1111",
        metadata_payload={"title": "System Architecture", "status": "Release"},
        versions=["17.3.0"],
        fetched_at=None,
    )
    record_b = SpecificationSourceRecord(
        spec_number="23.501",
        source_name="whatthespec",
        source_identifier="2222",
        metadata_payload={"title": "System Arch", "status": "Release"},
        versions=["17.3.0"],
        fetched_at=None,
    )
    db_path = tmp_path / "tdoc_crawler.db"

    with TDocDatabase(db_path) as database:
        database.upsert_specification(spec)
        database.upsert_spec_source_record(record_a)
        database.upsert_spec_source_record(record_b)

        results = database.query_specs(SpecQueryFilters(spec_numbers=["23.501"]))

    assert len(results) == 1
    differences = results[0].source_differences
    assert "title" in differences
    assert differences["title"]["3gpp"] == "System Architecture"
    assert differences["title"]["whatthespec"] == "System Arch"
    assert "status" not in differences


def test_log_spec_download(tmp_path: Path) -> None:
    checkout_path = tmp_path / "checkout"
    document_path = checkout_path / "23501.docx"
+4 −2
Original line number Diff line number Diff line
@@ -9,10 +9,12 @@ from tdoc_crawler.specs.downloads import SpecDownloads
def mock_db():
    return MagicMock()


@pytest.fixture
def downloader(mock_db):
    return SpecDownloads(mock_db)


class TestSpecDownloads:
    # This test verifies that checkout_specs resolves URL and uses doc-only logic.
    @patch("tdoc_crawler.specs.downloads.HTTPZipReader")
@@ -40,7 +42,7 @@ class TestSpecDownloads:
        MockHTTPZipReader.return_value = mock_cm

        # We rely on _resolve_spec_url
        with patch.object(downloader, '_resolve_spec_url', return_value=("http://example.com/26132-j00.zip", "26132-j00.zip"), create=True) as mock_resolve:
        with patch.object(downloader, "_resolve_spec_url", return_value=("http://example.com/26132-j00.zip", "26132-j00.zip"), create=True) as mock_resolve:
            # Execute
            downloader.checkout_specs(specs, doc_only=True, checkout_dir=checkout_dir)

@@ -58,4 +60,4 @@ class TestSpecDownloads:
            # Check args
            args, kwargs = mock_reader.extract.call_args
            assert args[0][0].filename == "26132.doc"
            assert kwargs['out_dir'] == checkout_dir / "Specs" / "archive" / "26_series" / "26.132"
            assert kwargs["out_dir"] == checkout_dir / "Specs" / "archive" / "26_series" / "26.132"