Commit 6aaa3b45 authored by Jan Reimes's avatar Jan Reimes
Browse files

test: migrate test suite to async patterns for oxyde DB backend

All tests that exercise the database layer are converted to use
async def + pytest-asyncio strict mode, matching the async nature
of the oxyde ORM manager API.

- conftest.py: async fixtures; populated_db uses async DB internals
- test_database.py: full async rewrite (13 tests)
- test_targeted_fetch.py: full async rewrite (17 tests)
- test_whatthespec.py: async DB seeding in test_meeting_id_lazy_resolution
- test_cli.py: async migration; stats test uses asyncio.run bridge
- test_crawler.py: async migration for all DB-touching tests
- test_specs_database.py: async migration for spec DB tests
parent b676d158
Loading
Loading
Loading
Loading
+16 −6
Original line number Diff line number Diff line
@@ -2,6 +2,7 @@

from __future__ import annotations

import asyncio
from datetime import UTC, date, datetime
from pathlib import Path

@@ -178,14 +179,18 @@ def populated_db(
    Returns:
        Populated TDocDatabase instance
    """
    with TDocDatabase(test_db_path) as db:
        insert_sample_meetings(db, sample_meetings)

    async def _populate() -> None:
        async with TDocDatabase(test_db_path) as db:
            await insert_sample_meetings_async(db, sample_meetings)
            for tdoc in sample_tdocs:
            _ = db.upsert_tdoc(tdoc)
                _ = await db.upsert_tdoc(tdoc)

    asyncio.run(_populate())
    return TDocDatabase(test_db_path)


def insert_sample_meetings(database: MeetingDatabase, meetings: list[dict[str, object]]) -> None:
async def insert_sample_meetings_async(database: MeetingDatabase, meetings: list[dict[str, object]]) -> None:
    """Insert sample meetings into database using the proper database API.

    This function uses db.upsert_meeting() instead of raw SQL to ensure
@@ -236,7 +241,12 @@ def insert_sample_meetings(database: MeetingDatabase, meetings: list[dict[str, o
        )

        # Use the proper database API instead of raw SQL
        _ = database.upsert_meeting(meeting_metadata)
        _ = await database.upsert_meeting(meeting_metadata)


def insert_sample_meetings(database: MeetingDatabase, meetings: list[dict[str, object]]) -> None:
    """Synchronous compatibility wrapper around async meeting insertion helper."""
    asyncio.run(insert_sample_meetings_async(database, meetings))


if __name__ == "__main__":
+51 −45
Original line number Diff line number Diff line
@@ -2,8 +2,9 @@

from __future__ import annotations

import asyncio
from pathlib import Path
from unittest.mock import MagicMock, patch
from unittest.mock import AsyncMock, MagicMock, patch

import pytest
from packaging.version import Version
@@ -17,7 +18,7 @@ from tdoc_crawler.tdocs.models import TDocMetadata
from tdoc_crawler.tdocs.operations.crawl import TDocCrawlResult
from tdoc_crawler.tdocs.operations.fetch import FetchMissingResult

from .conftest import insert_sample_meetings
from .conftest import insert_sample_meetings_async

runner = CliRunner()

@@ -41,12 +42,12 @@ class TestCrawlCommand:

        mock_crawler = MagicMock()
        mock_crawler_class.return_value = mock_crawler
        mock_crawler.crawl.return_value = MagicMock(
        mock_crawler.crawl = AsyncMock(return_value=MagicMock(
            processed=10,
            inserted=10,
            updated=0,
            errors=[],
        )
        ))

        result = runner.invoke(
            app,
@@ -75,12 +76,12 @@ class TestCrawlCommand:

        mock_crawler = MagicMock()
        mock_crawler_class.return_value = mock_crawler
        mock_crawler.crawl.return_value = MagicMock(
        mock_crawler.crawl = AsyncMock(return_value=MagicMock(
            processed=5,
            inserted=5,
            updated=0,
            errors=[],
        )
        ))

        result = runner.invoke(
            app,
@@ -95,7 +96,7 @@ class TestCrawlMeetingsCommand:
    """Tests for crawl-meetings command."""

    @patch("tdoc_crawler.cli.crawl.MeetingCrawler")
    @patch("tdoc_crawler.cli.crawl.SpecDatabase")
    @patch("tdoc_crawler.cli.crawl.MeetingDatabase")
    def test_crawl_meetings_basic(
        self,
        mock_db_class: MagicMock,
@@ -104,17 +105,18 @@ class TestCrawlMeetingsCommand:
    ) -> None:
        """Test basic crawl-meetings command execution."""
        mock_db = MagicMock()
        mock_db_class.return_value.__enter__.return_value = mock_db
        mock_db.log_crawl_start.return_value = 1
        mock_db_class.return_value.__aenter__.return_value = mock_db
        mock_db.log_crawl_start = AsyncMock(return_value=1)
        mock_db.log_crawl_end = AsyncMock(return_value=None)

        mock_crawler = MagicMock()
        mock_crawler_class.return_value = mock_crawler
        mock_crawler.crawl.return_value = MagicMock(
        mock_crawler.crawl = AsyncMock(return_value=MagicMock(
            processed=20,
            inserted=20,
            updated=0,
            errors=[],
        )
        ))

        result = runner.invoke(
            app,
@@ -144,8 +146,8 @@ class TestQueryCommand:
    ) -> None:
        """Test query command with no results."""
        mock_db = MagicMock(spec=TDocDatabase)
        mock_db_class.return_value.__enter__.return_value = mock_db
        mock_db.query_tdocs.return_value = []
        mock_db_class.return_value.__aenter__.return_value = mock_db
        mock_db.query_tdocs = AsyncMock(return_value=[])

        result = runner.invoke(
            app,
@@ -164,8 +166,8 @@ class TestQueryCommand:
    ) -> None:
        """Test query with results in table format."""
        mock_db = MagicMock(spec=TDocDatabase)
        mock_db_class.return_value.__enter__.return_value = mock_db
        mock_db.query_tdocs.return_value = sample_tdocs
        mock_db_class.return_value.__aenter__.return_value = mock_db
        mock_db.query_tdocs = AsyncMock(return_value=sample_tdocs)

        result = runner.invoke(
            app,
@@ -184,8 +186,8 @@ class TestQueryCommand:
    ) -> None:
        """Test query with JSON output format."""
        mock_db = MagicMock(spec=TDocDatabase)
        mock_db_class.return_value.__enter__.return_value = mock_db
        mock_db.query_tdocs.return_value = sample_tdocs[:1]
        mock_db_class.return_value.__aenter__.return_value = mock_db
        mock_db.query_tdocs = AsyncMock(return_value=sample_tdocs[:1])

        result = runner.invoke(
            app,
@@ -205,8 +207,8 @@ class TestQueryCommand:
    ) -> None:
        """Test query for specific TDoc IDs."""
        mock_db = MagicMock(spec=TDocDatabase)
        mock_db_class.return_value.__enter__.return_value = mock_db
        mock_db.query_tdocs.return_value = sample_tdocs[:1]
        mock_db_class.return_value.__aenter__.return_value = mock_db
        mock_db.query_tdocs = AsyncMock(return_value=sample_tdocs[:1])

        result = runner.invoke(
            app,
@@ -219,7 +221,7 @@ class TestQueryCommand:
class TestQueryMeetingsCommand:
    """Tests for query-meetings command."""

    @patch("tdoc_crawler.cli.query.SpecDatabase")
    @patch("tdoc_crawler.cli.query.MeetingDatabase")
    def test_query_meetings_no_results(
        self,
        mock_db_class: MagicMock,
@@ -227,8 +229,8 @@ class TestQueryMeetingsCommand:
    ) -> None:
        """Test query-meetings with no results."""
        mock_db = MagicMock()
        mock_db_class.return_value.__enter__.return_value = mock_db
        mock_db.query_meetings.return_value = []
        mock_db_class.return_value.__aenter__.return_value = mock_db
        mock_db.query_meetings = AsyncMock(return_value=[])

        result = runner.invoke(
            app,
@@ -246,8 +248,8 @@ class TestQueryMeetingsCommand:
    ) -> None:
        """Test query-meetings with subgroup filter."""
        mock_db = MagicMock()
        mock_db_class.return_value.__enter__.return_value = mock_db
        mock_db.query_meetings.return_value = []
        mock_db_class.return_value.__aenter__.return_value = mock_db
        mock_db.query_meetings = AsyncMock(return_value=[])

        result = runner.invoke(
            app,
@@ -255,7 +257,7 @@ class TestQueryMeetingsCommand:
        )

        assert result.exit_code == 0
        # Verify that query_meetings was called with normalized subgroups (SA4 -> S4)
        # Verify that query_meetings was called with normalized subgroup code.
        call_args = mock_db.query_meetings.call_args[0][0]
        assert call_args.subgroups == ["S4"]

@@ -267,8 +269,8 @@ class TestQueryMeetingsCommand:
    ) -> None:
        """Test query-meetings with subgroup alias (S4 -> SA4)."""
        mock_db = MagicMock()
        mock_db_class.return_value.__enter__.return_value = mock_db
        mock_db.query_meetings.return_value = []
        mock_db_class.return_value.__aenter__.return_value = mock_db
        mock_db.query_meetings = AsyncMock(return_value=[])

        result = runner.invoke(
            app,
@@ -276,7 +278,7 @@ class TestQueryMeetingsCommand:
        )

        assert result.exit_code == 0
        # Verify that S4 alias was normalized
        # Verify that S4 alias was normalized to subgroup code.
        call_args = mock_db.query_meetings.call_args[0][0]
        assert "S4" in call_args.subgroups

@@ -288,8 +290,8 @@ class TestQueryMeetingsCommand:
    ) -> None:
        """Test query-meetings with plenary subgroup alias (RP -> RAN Plenary)."""
        mock_db = MagicMock()
        mock_db_class.return_value.__enter__.return_value = mock_db
        mock_db.query_meetings.return_value = []
        mock_db_class.return_value.__aenter__.return_value = mock_db
        mock_db.query_meetings = AsyncMock(return_value=[])

        result = runner.invoke(
            app,
@@ -297,7 +299,7 @@ class TestQueryMeetingsCommand:
        )

        assert result.exit_code == 0
        # Verify that RP alias was normalized
        # Verify that RP alias was normalized to subgroup code.
        call_args = mock_db.query_meetings.call_args[0][0]
        assert "RP" in call_args.subgroups

@@ -309,8 +311,8 @@ class TestQueryMeetingsCommand:
    ) -> None:
        """Test query-meetings with working group alias (SP -> SA)."""
        mock_db = MagicMock()
        mock_db_class.return_value.__enter__.return_value = mock_db
        mock_db.query_meetings.return_value = []
        mock_db_class.return_value.__aenter__.return_value = mock_db
        mock_db.query_meetings = AsyncMock(return_value=[])

        result = runner.invoke(
            app,
@@ -330,8 +332,8 @@ class TestQueryMeetingsCommand:
    ) -> None:
        """Test query-meetings with combined working group and subgroup filters."""
        mock_db = MagicMock()
        mock_db_class.return_value.__enter__.return_value = mock_db
        mock_db.query_meetings.return_value = []
        mock_db_class.return_value.__aenter__.return_value = mock_db
        mock_db.query_meetings = AsyncMock(return_value=[])

        result = runner.invoke(
            app,
@@ -355,10 +357,14 @@ class TestStatsCommand:
        sample_meetings: list[dict[str, object]],
    ) -> None:
        """Test stats command execution."""
        with TDocDatabase(test_db_path) as db:
            insert_sample_meetings(db, sample_meetings)

        async def _prepare_db() -> None:
            async with TDocDatabase(test_db_path) as db:
                await insert_sample_meetings_async(db, sample_meetings)
                for tdoc in sample_tdocs:
                _ = db.upsert_tdoc(tdoc)
                    _ = await db.upsert_tdoc(tdoc)

        asyncio.run(_prepare_db())

        result = runner.invoke(
            app,
@@ -726,12 +732,12 @@ class TestEnvironmentVariables:

        mock_crawler = MagicMock()
        mock_crawler_class.return_value = mock_crawler
        mock_crawler.crawl.return_value = MagicMock(
        mock_crawler.crawl = AsyncMock(return_value=MagicMock(
            processed=0,
            inserted=0,
            updated=0,
            errors=[],
        )
        ))

        # Set environment variable via monkeypatch
        monkeypatch.setenv("TDC_CACHE_DIR", str(test_cache_dir))
@@ -757,12 +763,12 @@ class TestEnvironmentVariables:

        mock_crawler = MagicMock()
        mock_crawler_class.return_value = mock_crawler
        mock_crawler.crawl.return_value = MagicMock(
        mock_crawler.crawl = AsyncMock(return_value=MagicMock(
            processed=0,
            inserted=0,
            updated=0,
            errors=[],
        )
        ))

        # Set environment variable via monkeypatch
        monkeypatch.setenv("TDC_WORKERS", "8")
@@ -784,8 +790,8 @@ class TestEnvironmentVariables:
    ) -> None:
        """Test that TDC_WORKING_GROUP environment variable filters by working group."""
        mock_db = MagicMock()
        mock_db_class.return_value.__enter__.return_value = mock_db
        mock_db.query_meetings.return_value = []
        mock_db_class.return_value.__aenter__.return_value = mock_db
        mock_db.query_meetings = AsyncMock(return_value=[])

        # Set environment variable via monkeypatch
        monkeypatch.setenv("TDC_WORKING_GROUP", "SA")
+21 −16
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ from datetime import date
from pathlib import Path
from unittest.mock import MagicMock, patch

import pytest
from bs4 import BeautifulSoup
from packaging.version import Version

@@ -23,9 +24,10 @@ from tdoc_crawler.tdocs.operations import TDocCrawler
class TestTDocCrawler:
    """Tests for TDocCrawler class."""

    def test_crawler_initialization(self, test_db_path: Path) -> None:
    @pytest.mark.asyncio
    async def test_crawler_initialization(self, test_db_path: Path) -> None:
        """Ensure crawler stores the provided database instance."""
        with TDocDatabase(test_db_path) as database:
        async with TDocDatabase(test_db_path) as database:
            crawler = TDocCrawler(database)
            assert crawler.database is database

@@ -45,7 +47,8 @@ class TestTDocCrawler:
        assert not TDOC_PATTERN.search("data.csv")

    @patch("tdoc_crawler.tdocs.operations.crawl.fetch_meeting_document_list_subinterpreter")
    def test_crawl_connection_failure(
    @pytest.mark.asyncio
    async def test_crawl_connection_failure(
        self,
        mock_fetch: MagicMock,
        test_db_path: Path,
@@ -53,9 +56,9 @@ class TestTDocCrawler:
        """Handle document list fetch failures gracefully."""
        mock_fetch.side_effect = OSError("Connection refused")

        with TDocDatabase(test_db_path) as database:
        async with TDocDatabase(test_db_path) as database:
            meeting = _create_test_meeting()
            _ = database.upsert_meeting(meeting)
            _ = await database.upsert_meeting(meeting)

            crawler = TDocCrawler(database)
            config = TDocCrawlConfig(
@@ -72,7 +75,7 @@ class TestTDocCrawler:
                max_retries=3,
                target_ids=None,
            )
            result = crawler.crawl(config)
            result = await crawler.crawl(config)

        assert result.processed == 0
        assert result.inserted == 0
@@ -80,7 +83,8 @@ class TestTDocCrawler:
        assert result.errors

    @patch("tdoc_crawler.tdocs.operations.crawl.fetch_meeting_document_list_subinterpreter")
    def test_crawl_collects_tdocs(
    @pytest.mark.asyncio
    async def test_crawl_collects_tdocs(
        self,
        mock_fetch: MagicMock,
        test_db_path: Path,
@@ -106,9 +110,9 @@ class TestTDocCrawler:
        )
        mock_fetch.return_value = json.dumps([tdoc.model_dump_json()])

        with TDocDatabase(test_db_path) as database:
        async with TDocDatabase(test_db_path) as database:
            meeting = _create_test_meeting()
            _ = database.upsert_meeting(meeting)
            _ = await database.upsert_meeting(meeting)

            crawler = TDocCrawler(database)
            config = TDocCrawlConfig(
@@ -125,19 +129,20 @@ class TestTDocCrawler:
                max_retries=3,
                target_ids=None,
            )
            result = crawler.crawl(config)
            result = await crawler.crawl(config)
            assert result.processed == 1
            assert result.inserted == 1
            assert result.updated == 0
            assert not result.errors

            stored = database.query_tdocs(TDocQueryConfig())
            stored = await database.query_tdocs(TDocQueryConfig())
            assert len(stored) == 1
            assert stored[0].tdoc_id == "R1-2301234"
            assert stored[0].file_size == 2048

    @patch("tdoc_crawler.tdocs.operations.crawl.fetch_meeting_document_list_subinterpreter")
    def test_crawl_targets_specific_ids(
    @pytest.mark.asyncio
    async def test_crawl_targets_specific_ids(
        self,
        mock_fetch: MagicMock,
        test_db_path: Path,
@@ -186,9 +191,9 @@ class TestTDocCrawler:
            ]
        )

        with TDocDatabase(test_db_path) as database:
        async with TDocDatabase(test_db_path) as database:
            meeting = _create_test_meeting()
            _ = database.upsert_meeting(meeting)
            _ = await database.upsert_meeting(meeting)

            crawler = TDocCrawler(database)
            config = TDocCrawlConfig(
@@ -205,12 +210,12 @@ class TestTDocCrawler:
                max_retries=3,
                target_ids=["R1-2301234"],
            )
            result = crawler.crawl(config)
            result = await crawler.crawl(config)

            assert result.processed == 1
            assert result.inserted == 1
            assert result.updated == 0
            stored = database.query_tdocs(TDocQueryConfig())
            stored = await database.query_tdocs(TDocQueryConfig())
            assert {record.tdoc_id for record in stored} == {"R1-2301234"}


+99 −64

File changed.

Preview size limit exceeded, changes collapsed.

+33 −25
Original line number Diff line number Diff line
@@ -2,11 +2,15 @@

from pathlib import Path

import pytest

from tdoc_crawler.database.oxyde_models import Specification
from tdoc_crawler.database.specs import SpecDatabase
from tdoc_crawler.specs.models import Specification, SpecificationSourceRecord, SpecificationVersion, SpecQueryFilters
from tdoc_crawler.specs.models import SpecificationSourceRecord, SpecificationVersion, SpecQueryFilters


def test_upsert_specification_roundtrip(tmp_path: Path) -> None:
@pytest.mark.asyncio
async def test_upsert_specification_roundtrip(tmp_path: Path) -> None:
    spec = Specification(
        spec_number="26.132",
        spec_number_compact="26132",
@@ -18,13 +22,14 @@ def test_upsert_specification_roundtrip(tmp_path: Path) -> None:
        latest_version="19.0.0",
    )
    db_path = tmp_path / "tdoc_crawler.db"
    with SpecDatabase(db_path) as database:
        created, changed = database.upsert_specification(spec)
    async with SpecDatabase(db_path) as database:
        created, changed = await database.upsert_specification(spec)
        assert created is True
        assert changed is False


def test_query_specs_returns_results(tmp_path: Path) -> None:
@pytest.mark.asyncio
async def test_query_specs_returns_results(tmp_path: Path) -> None:
    spec = Specification(
        spec_number="23.501",
        spec_number_compact="23501",
@@ -36,14 +41,15 @@ def test_query_specs_returns_results(tmp_path: Path) -> None:
        latest_version="17.3.0",
    )
    db_path = tmp_path / "tdoc_crawler.db"
    with SpecDatabase(db_path) as database:
        database.upsert_specification(spec)
        results = database.query_specs(SpecQueryFilters(spec_numbers=["23.501"]))
    async with SpecDatabase(db_path) as database:
        await database.upsert_specification(spec)
        results = await database.query_specs(SpecQueryFilters(spec_numbers=["23.501"]))
    assert len(results) == 1
    assert results[0].spec_number == "23.501"


def test_query_specs_filters(tmp_path: Path) -> None:
@pytest.mark.asyncio
async def test_query_specs_filters(tmp_path: Path) -> None:
    s1 = Specification(
        spec_number="26.132",
        spec_number_compact="26132",
@@ -64,27 +70,28 @@ def test_query_specs_filters(tmp_path: Path) -> None:
    )
    db_path = tmp_path / "tdoc_crawler.db"

    with SpecDatabase(db_path) as database:
        database.upsert_specification(s1)
        database.upsert_specification(s2)
    async with SpecDatabase(db_path) as database:
        await database.upsert_specification(s1)
        await database.upsert_specification(s2)

        # Title filter
        res_title = database.query_specs(SpecQueryFilters(title="audio"))
        res_title = await database.query_specs(SpecQueryFilters(title="audio"))
        assert len(res_title) == 1
        assert res_title[0].spec_number == "26.132"

        # WG filter
        res_wg = database.query_specs(SpecQueryFilters(working_group="SA2"))
        res_wg = await database.query_specs(SpecQueryFilters(working_group="SA2"))
        assert len(res_wg) == 1
        assert res_wg[0].spec_number == "23.501"

        # Status filter
        res_status = database.query_specs(SpecQueryFilters(status="Frozen"))
        res_status = await database.query_specs(SpecQueryFilters(status="Frozen"))
        assert len(res_status) == 1
        assert res_status[0].spec_number == "23.501"


def test_query_specs_discrepancies(tmp_path: Path) -> None:
@pytest.mark.asyncio
async def test_query_specs_discrepancies(tmp_path: Path) -> None:
    spec = Specification(
        spec_number="23.501",
        spec_number_compact="23501",
@@ -113,12 +120,12 @@ def test_query_specs_discrepancies(tmp_path: Path) -> None:
    )
    db_path = tmp_path / "tdoc_crawler.db"

    with SpecDatabase(db_path) as database:
        database.upsert_specification(spec)
        database.upsert_spec_source_record(record_a)
        database.upsert_spec_source_record(record_b)
    async with SpecDatabase(db_path) as database:
        await database.upsert_specification(spec)
        await database.upsert_spec_source_record(record_a)
        await database.upsert_spec_source_record(record_b)

        results = database.query_specs(SpecQueryFilters(spec_numbers=["23.501"]))
        results = await database.query_specs(SpecQueryFilters(spec_numbers=["23.501"]))

    assert len(results) == 1
    differences = results[0].source_differences
@@ -128,7 +135,8 @@ def test_query_specs_discrepancies(tmp_path: Path) -> None:
    assert "status" not in differences


def test_upsert_spec_source_and_version(tmp_path: Path) -> None:
@pytest.mark.asyncio
async def test_upsert_spec_source_and_version(tmp_path: Path) -> None:
    source_record = SpecificationSourceRecord(
        spec_number="26.132",
        source_name="3gpp",
@@ -144,6 +152,6 @@ def test_upsert_spec_source_and_version(tmp_path: Path) -> None:
        source_name="3gpp",
    )
    db_path = tmp_path / "tdoc_crawler.db"
    with SpecDatabase(db_path) as database:
        database.upsert_spec_source_record(source_record)
        database.upsert_spec_version(version)
    async with SpecDatabase(db_path) as database:
        await database.upsert_spec_source_record(source_record)
        await database.upsert_spec_version(version)
Loading