Commit 5a504eea authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor: remove model re-exports and update domain package imports

- Remove re-exports of domain models from tdoc_crawler.models package
- Consumers must import directly from domain packages (models.tdocs, models.meetings, specs.models)
- Clarify module responsibilities and reduce implicit dependencies
- Update import paths for SpecDatabase location change
parent 7daefd6f
Loading
Loading
Loading
Loading
+1 −9
Original line number Diff line number Diff line
@@ -2,12 +2,4 @@

from __future__ import annotations

from tdoc_crawler.meetings.operations import MeetingCrawler, MeetingCrawlResult
from tdoc_crawler.meetings.utils import normalize_subgroup_alias, normalize_working_group_alias

__all__ = [
    "MeetingCrawlResult",
    "MeetingCrawler",
    "normalize_subgroup_alias",
    "normalize_working_group_alias",
]
__all__ = []
+18 −37
Original line number Diff line number Diff line
"""Core data models and configuration primitives used across the CLI."""
"""Core data models and configuration primitives used across the CLI.

NOTE: Domain models (TDocMetadata, MeetingMetadata, etc.) should be imported
directly from their source modules:
- tdoc_crawler.tdocs.models (TDocMetadata, TDocCrawlConfig, QueryConfig)
- tdoc_crawler.meetings.models (MeetingMetadata, MeetingCrawlConfig, MeetingQueryConfig)
"""

from __future__ import annotations

# Re-export all public symbols
from .base import (
    BaseConfigModel,  # noqa: F401
    BaseConfigModel,
    HttpCacheConfig,
    OutputFormat,
    PortalCredentials,
    SortOrder,
)
from .crawl_limits import CrawlLimits  # noqa: F401
from .crawl_log import CrawlLogEntry  # noqa: F401
from .meetings import (
    MeetingCrawlConfig,
    MeetingMetadata,  # noqa: F401
    MeetingQueryConfig,
)
from .specs import (
    Specification,  # noqa: F401
    SpecificationDownload,
    SpecificationSourceRecord,
    SpecificationVersion,
    SpecQueryFilters,
    SpecQueryResult,
)
from .crawl_limits import CrawlLimits
from .crawl_log import CrawlLogEntry

# Note: Specification models have been moved to tdoc_crawler.specs.models
# Import from there directly to avoid circular dependencies
from .subworking_groups import (
    CODE_INDEX,
    SUBTB_INDEX,  # noqa: F401
    SUBTB_INDEX,
    SUBWORKING_GROUP_RECORDS,
    SubWorkingGroupRecord,
)
from .tdocs import (
    CrawlConfig,
    QueryConfig,
    TDocCrawlConfig,  # noqa: F401
    TDocMetadata,
)
from .working_groups import (
    WORKING_GROUP_RECORDS,
    WorkingGroup,  # noqa: F401
    WorkingGroup,
    WorkingGroupRecord,
)

__all__ = [
    "CODE_INDEX",
    "SUBTB_INDEX",
    "SUBWORKING_GROUP_RECORDS",
    "WORKING_GROUP_RECORDS",
    "BaseConfigModel",
    "CrawlConfig",
    "CrawlLimits",
    "CrawlLogEntry",
    "HttpCacheConfig",
    "MeetingCrawlConfig",
    "MeetingMetadata",
    "MeetingQueryConfig",
    "OutputFormat",
    "PortalCredentials",
    "QueryConfig",
    "SortOrder",
    "SpecQueryFilters",
    "SpecQueryResult",
@@ -67,8 +47,9 @@ __all__ = [
    "SpecificationSourceRecord",
    "SpecificationVersion",
    "SubWorkingGroupRecord",
    "TDocCrawlConfig",
    "TDocMetadata",
    "SUBTB_INDEX",
    "SUBWORKING_GROUP_RECORDS",
    "WorkingGroup",
    "WorkingGroupRecord",
    "WORKING_GROUP_RECORDS",
]
+1 −2
Original line number Diff line number Diff line
@@ -10,7 +10,6 @@ from zipinspect import HTTPZipReader
from tdoc_crawler.config import resolve_cache_manager
from tdoc_crawler.constants.urls import SPEC_URL_TEMPLATE
from tdoc_crawler.http_client import download_to_path
from tdoc_crawler.specs.database import SpecDatabase
from tdoc_crawler.specs.sources.base import SpecSource
from tdoc_crawler.utils.normalization import normalize_spec_number

@@ -20,7 +19,7 @@ _logger = logging.getLogger(__name__)
class SpecDownloads:
    """Download and extraction utilities for specs."""

    def __init__(self, database: SpecDatabase, cache_manager_name: str | None = None) -> None:
    def __init__(self, database, cache_manager_name: str | None = None) -> None:
        self._database = database
        self._cache_manager = resolve_cache_manager(cache_manager_name)

+1 −1
Original line number Diff line number Diff line
@@ -10,7 +10,7 @@ import shutil
from pathlib import Path
from typing import cast

from tdoc_crawler.specs.database import SpecDatabase
from tdoc_crawler.database.specs import SpecDatabase
from tdoc_crawler.specs.downloads import SpecDownloads
from tdoc_crawler.specs.sources.base import FunctionSpecSource, SpecSource
from tdoc_crawler.specs.sources.threegpp import fetch_threegpp_metadata
+4 −12
Original line number Diff line number Diff line
@@ -2,16 +2,8 @@

from __future__ import annotations

from tdoc_crawler.tdocs.operations import (
    HybridCrawlResult,
    HybridTDocCrawler,
    TDocCrawler,
    TDocCrawlResult,
)
# Note: Operations are available via explicit submodule imports
# Importing them here would create circular dependencies
# Use: from tdoc_crawler.tdocs.operations import TDocCrawler

__all__ = [
    "HybridCrawlResult",
    "HybridTDocCrawler",
    "TDocCrawlResult",
    "TDocCrawler",
]
__all__ = []