Commit 35d077cb authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor(meetings, models): optimize lookup methods and simplify classes

* Replace direct iteration with CODE_INDEX lookup in MeetingCrawler.
* Remove TODO comments suggesting simplification of BaseConfigModel.
* Update SpecificationDownload and TDocCrawlConfig to indicate potential use of dataclasses.
parent bb5eacda
Loading
Loading
Loading
Loading
+5 −9
Original line number Diff line number Diff line
@@ -13,7 +13,7 @@ from tdoc_crawler.http_client import create_cached_session
from tdoc_crawler.logging import get_logger
from tdoc_crawler.meetings.models import MeetingCrawlConfig, MeetingMetadata
from tdoc_crawler.models.crawl_limits import CrawlLimits
from tdoc_crawler.models.subworking_groups import SUBWORKING_GROUP_RECORDS
from tdoc_crawler.models.subworking_groups import CODE_INDEX, SUBWORKING_GROUP_RECORDS
from tdoc_crawler.models.working_groups import WorkingGroup
from tdoc_crawler.parsers.meetings import parse_meeting_page

@@ -85,15 +85,11 @@ class MeetingCrawler:
                        errors.append(message)
                        continue

                    # TODO: can be made much efficient by directly accessing properties of enum/dataclasses?
                    # Create callback for subtb lookup
                    # Create callback for subtb lookup using pre-built index
                    def get_subtb(subgroup_code: str) -> int | None:
                        """Get subtb from subgroup code via record lookup."""
                        # Using SUBWORKING_GROUP_RECORDS directly as it's the source of truth
                        for r in SUBWORKING_GROUP_RECORDS:
                            if r.code == subgroup_code:
                                return r.subtb
                        return None
                        """Get subtb from subgroup code via CODE_INDEX lookup."""
                        record = CODE_INDEX.get(subgroup_code.upper())
                        return record.subtb if record else None

                    parsed_meetings = parse_meeting_page(
                        response.text,
+0 −1
Original line number Diff line number Diff line
@@ -80,7 +80,6 @@ class HttpCacheConfig:
        return HttpCacheConfig(ttl=ttl, refresh_ttl_on_access=refresh_on_access, max_retries=max_retries, cache_file=cache_file)


# TODO: classes derived from BaseConfigModel should be simpler dataclasses, as they are not/never stored in DB and only used for config parsing. BaseModel is more useful for persistent models with validation needs.
class BaseConfigModel(BaseModel):
    """Shared configuration base enabling attribute parsing and whitespace handling."""

+0 −1
Original line number Diff line number Diff line
@@ -54,7 +54,6 @@ class SpecificationVersion(BaseModel):
    source_name: str


# TODO: this is rather a dataclass?
class SpecificationDownload(BaseModel):
    """Download and extraction outcome for a spec version."""

+0 −2
Original line number Diff line number Diff line
@@ -127,7 +127,6 @@ class TDocMetadata(BaseModel):
        return value.strip().upper()


# TODO: this is rather a dataclass?
class TDocCrawlConfig(BaseConfigModel):
    """Configuration for TDoc crawling runs."""

@@ -193,7 +192,6 @@ class TDocCrawlConfig(BaseConfigModel):
        return normalize_tdoc_ids(value)


# TODO: this is rather a dataclass?
class TDocQueryConfig(BaseConfigModel):
    """Configuration for querying TDoc metadata."""