Commit 4483b101 authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor(models,parsers): remove unused methods and protocols

Remove WorkingGroup.literal, unused parse() methods from parsers,
and unused get_session() methods from HTTP client protocols.
parent cbb8b0d0
Loading
Loading
Loading
Loading
+0 −23
Original line number Diff line number Diff line
@@ -2,21 +2,8 @@

from __future__ import annotations

from typing import Protocol, runtime_checkable

import requests

from tdoc_crawler.http_client.session import create_cached_session


@runtime_checkable
class HttpClientProvider(Protocol):
    """Protocol for providing HTTP sessions."""

    def get_session(self) -> requests.Session:
        """Return a configured requests.Session for HTTP operations."""
        ...


class DefaultHttpClientProvider:
    """Default HttpClientProvider backed by the cached session factory.
@@ -34,16 +21,6 @@ class DefaultHttpClientProvider:
        self._cache_manager_name = cache_manager_name
        self._session: requests.Session | None = None

    def get_session(self) -> requests.Session:
        """Return a cached requests.Session with hishel caching enabled.

        Returns:
            A requests.Session configured with caching.
        """
        if self._session is None:
            self._session = create_cached_session(cache_manager_name=self._cache_manager_name)
        return self._session

    def close(self) -> None:
        """Close and release the cached session."""
        if self._session is not None:
+0 −10
Original line number Diff line number Diff line
@@ -14,11 +14,6 @@ class WorkingGroup(StrEnum):
    SA = "SA"
    CT = "CT"

    @property
    def literal(self) -> str:
        """Return single letter literal for this working group."""
        return self.value[0].upper()

    @property
    def tbid(self) -> int:
        """Return the technical body ID for this working group."""
@@ -47,11 +42,6 @@ class WorkingGroupRecord(BaseModel):
    code: str = Field(..., description="Canonical short code (e.g., 'RAN')")
    name: str = Field(..., description="Display name for the working group")

    @property
    def ftp_identifier(self) -> str:
        """Return the FTP root path segment for the working group."""
        return f"/tsg_{self.code.lower()}"


WORKING_GROUP_RECORDS: tuple[WorkingGroupRecord, ...] = tuple(WorkingGroupRecord(tbid=wg.tbid, code=wg.value, name=wg.value) for wg in list(WorkingGroup))

+0 −20
Original line number Diff line number Diff line
@@ -52,26 +52,6 @@ class MeetingParser:
            self._session = create_cached_session()
        return self._session

    @staticmethod
    def parse(
        html: str,
        working_group: WorkingGroup,
        subgroup: str | None,
        get_subtb: Callable[[str], int | None] | None = None,
    ) -> list[MeetingMetadata]:
        """Parse meeting page HTML into list of MeetingMetadata.

        Args:
            html: HTML content of meeting page.
            working_group: The working group for these meetings.
            subgroup: Optional subgroup name.
            get_subtb: Optional callback to get subtb from subgroup code.

        Returns:
            List of parsed meeting metadata objects.
        """
        return parse_meeting_page(html, working_group, subgroup, get_subtb)


def parse_meeting_page(
    html: str,
+0 −18
Original line number Diff line number Diff line
@@ -47,24 +47,6 @@ class PortalParser:
            self._session = create_cached_session()
        return self._session

    @staticmethod
    def parse(html: str, tdoc_id: str, url: str | None = None) -> TDocMetadata:
        """Parse TDoc metadata from portal HTML page.

        Args:
            html: HTML content of the TDoc portal page.
            tdoc_id: TDoc identifier for logging.
            url: Optional TDoc URL (if known).

        Returns:
            TDocMetadata instance with portal metadata.

        Raises:
            PortalParsingError: If TDoc not found, metadata table not found,
                or mandatory fields missing.
        """
        return parse_tdoc_portal_page(html, tdoc_id, url)


class PortalParsingError(Exception):
    """Raised when portal page parsing fails."""
+0 −21
Original line number Diff line number Diff line
@@ -7,27 +7,6 @@ from typing import Any, Protocol, runtime_checkable
import requests


@runtime_checkable
class ParserProtocol(Protocol):
    """Protocol for HTML/data parser implementations.

    Parsers accept raw content (typically HTML strings) and return
    structured data objects.
    """

    def parse(self, content: str, **kwargs: Any) -> Any:
        """Parse raw content into structured data.

        Args:
            content: Raw content to parse (e.g., HTML string).
            **kwargs: Parser-specific keyword arguments.

        Returns:
            Parsed structured data (type depends on implementation).
        """
        ...


@runtime_checkable
class ClientProtocol(Protocol):
    """Protocol for HTTP client abstractions used by parsers and sources.