Commit a110a426 authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor(database): replace resolve_meeting_id function with method on TDocDatabase

- Removed standalone resolve_meeting_id function from the database module.
- Updated references to use the new method on TDocDatabase.
- Improved code clarity by encapsulating meeting ID resolution within the database class.
parent 76b2aa01
Loading
Loading
Loading
Loading
+0 −58
Original line number Diff line number Diff line
@@ -171,64 +171,6 @@ def infer_working_groups_from_ids(ids: Iterable[str]) -> list[WorkingGroup]:
    return resolved or [WorkingGroup.RAN, WorkingGroup.SA, WorkingGroup.CT]


def resolve_meeting_id(database: TDocDatabase, meeting_name: str) -> int | None:
    """Resolve meeting name to meeting_id from database.

    Uses fuzzy matching to handle variations in meeting names:
    - Exact match (case-insensitive)
    - Normalized name match
    - Prefix/suffix matching for variations like "SA4-e" vs "3GPPSA4-e"

    Args:
        database: Database connection
        meeting_name: Meeting identifier (e.g., "SA4#133-e" or "S4-133-e")

    Returns:
        Meeting ID if found, None otherwise
    """
    # Query all meetings from database
    config = MeetingQueryConfig(
        cache_dir=database.db_file.parent,
        working_groups=None,
        subgroups=None,
        limit=None,
        order=SortOrder.DESC,
        include_without_files=True,
    )
    all_meetings = database.query_meetings(config)

    def _match_name(candidate: str, cached: str | None) -> bool:
        """Check if candidate matches cached name via fuzzy matching."""
        if not cached:
            return False
        candidate_lower = candidate.lower()
        cached_lower = cached.lower()

        # Exact match
        if candidate_lower == cached_lower:
            return True
        # Candidate is prefix/suffix of cached or vice versa
        return (
            cached_lower.startswith(candidate_lower)
            or cached_lower.endswith(candidate_lower)
            or candidate_lower.startswith(cached_lower)
            or candidate_lower.endswith(cached_lower)
        )

    # Try matches with original and normalized names
    normalized = normalize_portal_meeting_name(meeting_name)
    candidates = [meeting_name]
    if normalized != meeting_name:
        candidates.append(normalized)

    for candidate in candidates:
        for meeting in all_meetings:
            if _match_name(candidate, meeting.short_name):
                return meeting.meeting_id

    return None


def launch_file(path: Path) -> None:
    """Launch file in system's default application."""
    if not path.exists():
+10 −7
Original line number Diff line number Diff line
@@ -5,10 +5,12 @@ from __future__ import annotations
import logging
from decimal import Decimal, InvalidOperation
from pathlib import Path
from typing import Any

import requests

from tdoc_crawler.database import TDocDatabase, resolve_meeting_id
from tdoc_crawler.config import resolve_cache_manager
from tdoc_crawler.database import TDocDatabase
from tdoc_crawler.http_client import create_cached_session
from tdoc_crawler.models.base import HttpCacheConfig
from tdoc_crawler.models.tdocs import TDocMetadata
@@ -22,7 +24,7 @@ class WhatTheSpecResolutionError(Exception):
    """Raised when WhatTheSpec resolution fails."""


def _parse_agenda_item_nbr(value: object) -> Decimal:
def _parse_agenda_item_nbr(value: Any) -> Decimal:
    """Parse agenda item number as Decimal with fallback to zero."""
    if value is None:
        return Decimal(0)
@@ -34,12 +36,12 @@ def _parse_agenda_item_nbr(value: object) -> Decimal:


def _resolve_meeting_id(db_file: Path, meeting_name: str | None) -> int:
    """Resolve meeting name to meeting_id using local database."""
    """Helper to resolve meeting name to meeting_id using local database."""
    if not meeting_name:
        return 0
    try:
        with TDocDatabase(db_file) as database:
            resolved = resolve_meeting_id(database, meeting_name)
            resolved = database.resolve_meeting_id(meeting_name)
    except Exception as exc:
        logger.warning(f"Failed to resolve meeting '{meeting_name}': {exc}")
        return 0
@@ -48,29 +50,30 @@ def _resolve_meeting_id(db_file: Path, meeting_name: str | None) -> int:

def resolve_via_whatthespec(
    tdoc_id: str,
    cache_dir: Path,
    db_file: Path,
    http_cache: HttpCacheConfig,
    timeout: int = 30,
    session: requests.Session | None = None,
    cache_manager_name: str | None = None,
) -> TDocMetadata | None:
    """Resolve TDoc metadata from whatthespec.net API.

    Args:
        tdoc_id: TDoc identifier (e.g., "S4-260001").
        cache_dir: Path to HTTP cache SQLite database.
        db_file: Path to local TDoc database.
        http_cache: HTTP cache configuration for hishel.
        timeout: Request timeout in seconds.
        session: Optional requests.Session to reuse.
        cache_manager_name: Optional name of the cache manager to use for HTTP caching.

    Returns:
        TDocMetadata if found, otherwise None.
    """
    manager = resolve_cache_manager(cache_manager_name)
    temp_session: requests.Session | None = None
    if session is None:
        temp_session = create_cached_session(
            cache_path=cache_path,
            cache_dir=manager.http_cache_dir,
            ttl=http_cache.ttl,
            refresh_ttl_on_access=http_cache.refresh_ttl_on_access,
            max_retries=3,
+6 −63
Original line number Diff line number Diff line
@@ -4,71 +4,14 @@ from __future__ import annotations

import logging

from tdoc_crawler.database.connection import TDocDatabase
from tdoc_crawler.database.connection import SpecDatabase, TDocDatabase
from tdoc_crawler.database.errors import DatabaseError
from tdoc_crawler.models import MeetingMetadata, MeetingQueryConfig, SortOrder
from tdoc_crawler.specs.normalization import normalize_portal_meeting_name

logger = logging.getLogger(__name__)


def resolve_meeting_id(database: TDocDatabase, meeting_name: str) -> int | None:
    """Resolve meeting name to meeting_id from database.

    Uses fuzzy matching to handle variations in meeting names:
    - Exact match (case-insensitive)
    - Normalized name match
    - Prefix/suffix matching for variations like "SA4-e" vs "3GPPSA4-e"

    Args:
        database: Database connection
        meeting_name: Meeting identifier (e.g., "SA4#133-e" or "S4-133-e")

    Returns:
        Meeting ID if found, None otherwise
    """
    # Query all meetings from database
    config = MeetingQueryConfig(
        cache_dir=database.db_file.parent,
        working_groups=None,
        subgroups=None,
        limit=None,
        order=SortOrder.DESC,
        include_without_files=True,
    )
    all_meetings = database.query_meetings(config)

    def _match_name(candidate: str, cached: str | None) -> bool:
        """Check if candidate matches cached name via fuzzy matching."""
        if not cached:
            return False
        candidate_lower = candidate.lower()
        cached_lower = cached.lower()

        # Exact match
        if candidate_lower == cached_lower:
            return True
        # Candidate is prefix/suffix of cached or vice versa
        return (
            cached_lower.startswith(candidate_lower)
            or cached_lower.endswith(candidate_lower)
            or candidate_lower.startswith(cached_lower)
            or candidate_lower.endswith(cached_lower)
        )

    # Try matches with original and normalized names
    normalized = normalize_portal_meeting_name(meeting_name)

    candidates = [meeting_name]
    if normalized != meeting_name:
        candidates.append(normalized)

    for candidate in candidates:
        for meeting in all_meetings:
            if _match_name(candidate, meeting.short_name):
                return meeting.meeting_id

    return None


__all__ = ["DatabaseError", "MeetingMetadata", "TDocDatabase", "resolve_meeting_id"]
__all__ = [
    "DatabaseError",
    "SpecDatabase",
    "TDocDatabase",
]
+2 −2
Original line number Diff line number Diff line
@@ -158,8 +158,8 @@ def fetch_missing_tdocs_batch(
            metadata = client.fetch_tdoc_metadata(tdoc_id)

            if metadata.meeting_name:
                meeting_id = resolve_meeting_id(database, metadata.meeting_name)
                if meeting_id:
                meeting_id = database.resolve_meeting_id(metadata.meeting_name)
                if meeting_id is not None:
                    metadata.meeting_id = meeting_id
                else:
                    logger.warning(f"Could not resolve meeting '{metadata.meeting_name}' to meeting_id for {tdoc_id}")