Commit 48a71f76 authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor(parsers,workers): consolidate HTML parsing and parallel worker logic

parent 92081207
Loading
Loading
Loading
Loading
+23 −0
Original line number Diff line number Diff line
"""HTTP and API clients."""

from __future__ import annotations

from tdoc_crawler.clients.portal import (
    PortalAuthenticationError,
    PortalClient,
    PortalSession,
    create_portal_client,
    extract_tdoc_url_from_portal,
)

# Re-export PortalParsingError from parsers for backward compatibility
from tdoc_crawler.parsers.portal import PortalParsingError

__all__ = [
    "PortalAuthenticationError",
    "PortalClient",
    "PortalParsingError",
    "PortalSession",
    "create_portal_client",
    "extract_tdoc_url_from_portal",
]
+563 −0

File added.

Preview size limit exceeded, changes collapsed.

+10 −0
Original line number Diff line number Diff line
"""Data parsers for HTML, Excel, and other formats."""

from __future__ import annotations

from tdoc_crawler.parsers.portal import PortalParsingError, parse_tdoc_portal_page

__all__ = [
    "PortalParsingError",
    "parse_tdoc_portal_page",
]
+164 −0

File added.

Preview size limit exceeded, changes collapsed.

+211 −0

File added.

Preview size limit exceeded, changes collapsed.

Loading