Commit 4fe04531 authored by Jan Reimes's avatar Jan Reimes
Browse files

Refactor: Import domain models directly

Import domain model classes (TDocMetadata, MeetingMetadata, etc.) directly from their canonical locations instead of re-exporting through tdoc_crawler.models package.

This eliminates circular import issues caused by deprecated re-export modules (models/tdocs.py, models/meetings.py).

Updated files:
- database/base.py
- database/__init__.py
- database/meetings.py
- database/tdocs.py
- database/specs.py
parent b5aa97bf
Loading
Loading
Loading
Loading
+0 −19
Original line number Diff line number Diff line
@@ -19,23 +19,4 @@ __all__ = [
    "MeetingDatabase",
    "SpecDatabase",
    "TDocDatabase",
    "resolve_meeting_id",
]


def resolve_meeting_id(database: MeetingDatabase, meeting_name: str) -> int | None:
    """Resolve meeting name to meeting_id from database.

    Uses fuzzy matching to handle variations in meeting names:
    - Exact match (case-insensitive)
    - Normalized name match
    - Prefix/suffix matching for variations like "SA4-e" vs "3GPPSA4-e"

    Args:
        database: Meeting database connection
        meeting_name: Meeting identifier (e.g., "SA4#133-e" or "S4-133-e")

    Returns:
        Meeting ID if found, None otherwise
    """
    return database.resolve_meeting_id(meeting_name)
+52 −7
Original line number Diff line number Diff line
@@ -10,19 +10,17 @@ from typing import Self
from pydantic_sqlite import DataBase

from tdoc_crawler.database.errors import DatabaseError
from tdoc_crawler.models import (
    WORKING_GROUP_RECORDS,
    CrawlLogEntry,
    MeetingMetadata,
    TDocMetadata,
)
from tdoc_crawler.models.specs import (
from tdoc_crawler.meetings.models import MeetingMetadata
from tdoc_crawler.models.crawl_log import CrawlLogEntry
from tdoc_crawler.specs.models import (
    Specification,
    SpecificationDownload,
    SpecificationSourceRecord,
    SpecificationVersion,
)
from tdoc_crawler.models.subworking_groups import SUBWORKING_GROUP_RECORDS
from tdoc_crawler.models.working_groups import WORKING_GROUP_RECORDS
from tdoc_crawler.tdocs.models import TDocMetadata

_logger = logging.getLogger(__name__)

@@ -105,6 +103,53 @@ class DocDatabase:
        """
        return self._clear_tables(["spec_downloads", "spec_versions", "spec_source_records", "specs"])

    def log_crawl_start(self, crawl_type: str, filters: list[str] | None, incremental: bool) -> int:
        """Log the start of a crawl operation.

        Args:
            crawl_type: Type of crawl (e.g., 'tdoc', 'meeting', 'spec')
            filters: List of filter strings applied to the crawl
            incremental: Whether this is an incremental crawl

        Returns:
            The ID of the created crawl log entry
        """
        from tdoc_crawler.models import CrawlLogEntry

        entry = CrawlLogEntry(
            crawl_type=crawl_type,
            filters=json.dumps(filters or []),
            incremental=incremental,
            start_time=datetime.now(),
            end_time=None,
            items_added=0,
            items_updated=0,
            errors_count=0,
        )
        self.connection.add("crawl_log", entry, pk=True)
        return entry.id

    def log_crawl_end(
        self,
        crawl_id: int,
        items_added: int,
        items_updated: int,
        errors_count: int,
    ) -> None:
        """Log the end of a crawl operation.

        Args:
            crawl_id: ID of the crawl log entry to update
            items_added: Number of new items added
            items_updated: Number of existing items updated
            errors_count: Number of errors encountered
        """
        self.connection._db.execute(
            "UPDATE crawl_log SET end_time = ?, items_added = ?, items_updated = ?, errors_count = ? WHERE id = ?",
            (datetime.now(), items_added, items_updated, errors_count, crawl_id),
        )
        self.connection._db.commit()

    def _ensure_reference_data(self) -> None:
        """Populate reference tables for working and subworking groups."""
        database = self.connection
+5 −9
Original line number Diff line number Diff line
@@ -6,15 +6,11 @@ from collections.abc import Callable, Iterable
from datetime import datetime

from tdoc_crawler.database.base import DocDatabase
from tdoc_crawler.models import (
    CODE_INDEX,
    WORKING_GROUP_RECORDS,
    CrawlLogEntry,
    MeetingMetadata,
    MeetingQueryConfig,
    SortOrder,
    WorkingGroup,
)
from tdoc_crawler.meetings.models import MeetingMetadata, MeetingQueryConfig
from tdoc_crawler.models.crawl_log import CrawlLogEntry
from tdoc_crawler.models.subworking_groups import CODE_INDEX
from tdoc_crawler.models.working_groups import WORKING_GROUP_RECORDS, WorkingGroup
from tdoc_crawler.models.base import SortOrder
from tdoc_crawler.utils.misc import utc_now
from tdoc_crawler.utils.normalization import normalize_portal_meeting_name

+3 −3
Original line number Diff line number Diff line
@@ -10,14 +10,14 @@ from dataclasses import dataclass
from datetime import datetime

from tdoc_crawler.database.base import DocDatabase
from tdoc_crawler.models import SpecQueryFilters, SpecQueryResult
from tdoc_crawler.models.specs import (
from tdoc_crawler.specs.models import (
    Specification,
    SpecificationDownload,
    SpecificationSourceRecord,
    SpecificationVersion,
    SpecQueryFilters,
    SpecQueryResult,
)
from tdoc_crawler.specs.sources.base import SpecSource
from tdoc_crawler.utils.normalization import normalize_spec_number

_logger = logging.getLogger(__name__)
+5 −4
Original line number Diff line number Diff line
@@ -5,18 +5,19 @@ from collections.abc import Callable, Iterable
from datetime import UTC, datetime
from decimal import Decimal

from tdoc_crawler.database.meetings import MeetingDatabase
from tdoc_crawler.models import QueryConfig, TDocMetadata, WorkingGroup
from tdoc_crawler.database.base import DocDatabase
from tdoc_crawler.models import WorkingGroup
from tdoc_crawler.tdocs.models import QueryConfig, TDocMetadata
from tdoc_crawler.utils.misc import utc_now

_logger = logging.getLogger(__name__)


class TDocDatabase(MeetingDatabase):
class TDocDatabase(DocDatabase):
    """Unified database operations for TDocs and Meetings.

    This class provides a unified interface for both TDoc and Meeting operations
    by inheriting from MeetingDatabase. This maintains backward compatibility
    by inheriting from DocDatabase. This maintains backward compatibility
    with code that expects a single database interface.
    """