Commit fd78d859 authored by Jan Reimes's avatar Jan Reimes
Browse files

test(models): add comprehensive tests for WorkingGroup and SubWorkingGroup...

test(models): add comprehensive tests for WorkingGroup and SubWorkingGroup enums and normalizer functions

- Add tests for WorkingGroup.literal, WorkingGroup.tbid, WorkingGroup.from_literal
- Add tests for SubWorkingGroup.subtb, SubWorkingGroup.from_wg_and_nbr
- Add tests for normalize_working_group_alias with variants (-, #, space)
- Add tests for normalize_subgroup_alias with all docstring examples
parent d2966ed4
Loading
Loading
Loading
Loading
+53 −57
Original line number Diff line number Diff line
@@ -2,82 +2,78 @@

from __future__ import annotations

from tdoc_crawler.constants.registry import MEETING_CODE_REGISTRY
import re

from tdoc_crawler.models.subworking_groups import SubWorkingGroup
from tdoc_crawler.models.working_groups import WorkingGroup

def normalize_working_group_alias(alias: str) -> str:
    """Normalize working group aliases to canonical names.

    Supports: RP→RAN, SP→SA, CP→CT, and their reverse mappings.
def normalize_working_group_alias(alias: str) -> WorkingGroup:
    """Normalize working group aliases to canonical working group enums.

    Supports: RAN, SA, CT and their common name variants.

    Args:
        alias: Working group alias or name

    Returns:
        Normalized working group name (RAN, SA, or CT)
        Canonical working group enum value (WorkingGroup.RAN, WorkingGroup.SA, WorkingGroup.CT)

    Raises:
        ValueError: When the alias cannot be resolved
    """
    alias_upper = alias.strip().upper()
    # Plenary aliases
    if alias_upper in ("RP", "RAN PLENARY"):
        return "RAN"
    if alias_upper in ("SP", "SA PLENARY"):
        return "SA"
    if alias_upper in ("CP", "CT PLENARY"):
        return "CT"
    # Standard working groups
    if alias_upper in ("RAN", "SA", "CT"):
        return alias_upper
    # If not recognized, return as-is (will be validated later)
    return alias_upper


def normalize_subgroup_alias(alias: str) -> list[str]:
    """Normalize subgroup aliases to canonical names.

    Returns a list of possible matching subgroup names.
    cleaned = re.sub(r"[#\-_\s]", "", alias_upper)

    # Easy check via list(WorkingGroup): match against name or value
    for working_group in WorkingGroup:
        if cleaned.startswith(working_group.value) or cleaned.startswith(working_group.name):
            return working_group

    # If not found: match first letter
    if cleaned:
        for working_group in WorkingGroup:
            if working_group.value[0] == cleaned[0]:
                return working_group

    # If not found: any match of name/value in cleaned
    for working_group in WorkingGroup:
        if working_group.value in cleaned or working_group.name in cleaned:
            return working_group

    raise ValueError(f"Unknown working group: {alias}")


def normalize_subgroup_alias(alias: str) -> SubWorkingGroup:
    """Normalize subgroup aliases to canonical subgroup enums.

    Returns a list of possible matching subgroup enum values (e.g., SubWorkingGroup.S4).
    Supports:
    - Short codes: S4, R1, C3
    - Full names: SA4, RAN1, CT3
    - Plenary codes: RP, SP, CP
    - Full plenary names: RAN PLENARY, SA PLENARY, CT PLENARY
    - Separators: SA-4, SA#4, SA 4
    - Plenary: RP, SP, CP, RAN PLENARY, etc.

    Args:
        alias: Subgroup alias or code

    Returns:
        List of possible matching canonical subgroup names
        List of possible matching canonical subgroup enum values
    """
    alias_upper = alias.strip().upper()
    matches: list[str] = []

    # Try to normalize long form to short form (SA4 → S4, RAN1 → R1, CT3 → C3)
    normalized_alias = alias_upper
    if alias_upper.startswith("SA") and len(alias_upper) > 2:
        # SA4 → S4, SA1 → S1, etc.
        normalized_alias = "S" + alias_upper[2:]
    elif alias_upper.startswith("RAN") and len(alias_upper) > 3:
        # RAN1 → R1, RAN2 → R2, etc.
        normalized_alias = "R" + alias_upper[3:]
    elif alias_upper.startswith("CT") and len(alias_upper) > 2:
        # CT1 → C1, CT2 → C2, etc.
        normalized_alias = "C" + alias_upper[2:]

    # Check all registries for matches
    for _working_group, codes in MEETING_CODE_REGISTRY.items():
        for code, subgroup in codes:
            # Check if normalized alias matches the code
            if code.upper() == normalized_alias or code.upper() == alias_upper:
                if subgroup:
                    matches.append(subgroup)
            # Check if subgroup name matches
            elif subgroup and subgroup.upper() == alias_upper:
                matches.append(subgroup)

    # If no matches found, return the original alias (might be exact name)
    if not matches:
        matches.append(alias_upper)

    return matches
    input_str = alias.strip().upper()
    cleaned = re.sub(r"[#\-_\s]", "", input_str)

    # determine WorkingGroup first
    wg = normalize_working_group_alias(cleaned)

    # check if cleaned contains a _single_ number or the letter "P"
    match = re.search(r"(P|\d+)", cleaned)
    if not match:
        raise ValueError(f"Cannot parse/normalize subgroup format: {alias}")
    elif len(match.groups()) != 1:
        raise ValueError(f"Multiple/ambiguous matches found when parsing subgroup: {alias}")

    return SubWorkingGroup.from_wg_and_nbr(wg, match.group(1))  # e.g., S4, R1, C3


__all__ = [
+84 −27
Original line number Diff line number Diff line
@@ -2,11 +2,82 @@

from __future__ import annotations

from enum import StrEnum
from functools import cache

from pydantic import BaseModel, Field

from tdoc_crawler.models.working_groups import WorkingGroup


class SubWorkingGroup(StrEnum):
    """Enumeration of canonical subgroup codes."""

    # SA working groups
    SP = "SA Plenary"
    S1 = "SA1"
    S2 = "SA2"
    S3 = "SA3"
    S4 = "SA4"
    S5 = "SA5"
    S6 = "SA6"

    # CT working groups
    CP = "CT Plenary"
    C1 = "CT1"
    C2 = "CT2"
    C3 = "CT3"
    C4 = "CT4"
    C5 = "CT5"
    C6 = "CT6"

    # RAN working groups
    RP = "RAN Plenary"
    R1 = "RAN1"
    R2 = "RAN2"
    R3 = "RAN3"
    R4 = "RAN4"
    R5 = "RAN5"
    R6 = "RAN6"

    @property
    def subtb(self) -> int:
        """Return the sub-technical body ID for this subgroup."""
        mapping: dict[SubWorkingGroup, int] = {
            SubWorkingGroup.SP: 375,
            SubWorkingGroup.S1: 384,
            SubWorkingGroup.S2: 385,
            SubWorkingGroup.S3: 386,
            SubWorkingGroup.S4: 387,
            SubWorkingGroup.S5: 388,
            SubWorkingGroup.S6: 825,
            SubWorkingGroup.CP: 649,
            SubWorkingGroup.C1: 651,
            SubWorkingGroup.C2: 652,
            SubWorkingGroup.C3: 653,
            SubWorkingGroup.C4: 654,
            SubWorkingGroup.C5: 655,
            SubWorkingGroup.C6: 656,
            SubWorkingGroup.RP: 373,
            SubWorkingGroup.R1: 379,
            SubWorkingGroup.R2: 380,
            SubWorkingGroup.R3: 381,
            SubWorkingGroup.R4: 382,
            SubWorkingGroup.R5: 657,
            SubWorkingGroup.R6: 843,
        }
        return mapping[self]

    @classmethod
    def from_wg_and_nbr(cls, wg: WorkingGroup, nbr: str | int) -> SubWorkingGroup:
        """Construct a SubWorkingGroup from a parent working group and subgroup number (or "P")."""
        code = f"{wg.value[0]}{str(nbr).upper()[0]}"
        try:
            return cls[code]
        except KeyError:
            raise ValueError(f"Unknown subgroup code: {code}") from None


class SubWorkingGroupRecord(BaseModel):
    """Persistent representation of a subworking group."""

@@ -23,34 +94,19 @@ class SubWorkingGroupRecord(BaseModel):
                return working_group
        raise ValueError

@cache
def _generate_swb_group_records() -> tuple[SubWorkingGroupRecord, ...]:
    """Generate SubWorkingGroupRecord entries for all defined SubWorkingGroup enums."""
    records: list[SubWorkingGroupRecord] = []
    for subgroup in SubWorkingGroup:
        subtb = subgroup.subtb
        wg = subgroup.name[0]  # First character indicates parent WG (R, S, C)
        tbid = WorkingGroup.from_literal(wg).tbid
        record = SubWorkingGroupRecord(subtb=subtb, tbid=tbid, code=subgroup.name, name=subgroup.value)
        records.append(record)
    return tuple(records)

SUBWORKING_GROUP_RECORDS: tuple[SubWorkingGroupRecord, ...] = (
    # SA working groups
    SubWorkingGroupRecord(subtb=375, tbid=WorkingGroup.SA.tbid, code="SP", name="SA Plenary"),
    SubWorkingGroupRecord(subtb=384, tbid=WorkingGroup.SA.tbid, code="S1", name="SA1"),
    SubWorkingGroupRecord(subtb=385, tbid=WorkingGroup.SA.tbid, code="S2", name="SA2"),
    SubWorkingGroupRecord(subtb=386, tbid=WorkingGroup.SA.tbid, code="S3", name="SA3"),
    SubWorkingGroupRecord(subtb=387, tbid=WorkingGroup.SA.tbid, code="S4", name="SA4"),
    SubWorkingGroupRecord(subtb=388, tbid=WorkingGroup.SA.tbid, code="S5", name="SA5"),
    SubWorkingGroupRecord(subtb=825, tbid=WorkingGroup.SA.tbid, code="S6", name="SA6"),
    # CT working groups
    SubWorkingGroupRecord(subtb=649, tbid=WorkingGroup.CT.tbid, code="CP", name="CT Plenary"),
    SubWorkingGroupRecord(subtb=651, tbid=WorkingGroup.CT.tbid, code="C1", name="CT1"),
    SubWorkingGroupRecord(subtb=652, tbid=WorkingGroup.CT.tbid, code="C2", name="CT2"),
    SubWorkingGroupRecord(subtb=653, tbid=WorkingGroup.CT.tbid, code="C3", name="CT3"),
    SubWorkingGroupRecord(subtb=654, tbid=WorkingGroup.CT.tbid, code="C4", name="CT4"),
    SubWorkingGroupRecord(subtb=655, tbid=WorkingGroup.CT.tbid, code="C5", name="CT5"),
    SubWorkingGroupRecord(subtb=656, tbid=WorkingGroup.CT.tbid, code="C6", name="CT6"),
    # RAN working groups
    SubWorkingGroupRecord(subtb=373, tbid=WorkingGroup.RAN.tbid, code="RP", name="RAN Plenary"),
    SubWorkingGroupRecord(subtb=379, tbid=WorkingGroup.RAN.tbid, code="R1", name="RAN1"),
    SubWorkingGroupRecord(subtb=380, tbid=WorkingGroup.RAN.tbid, code="R2", name="RAN2"),
    SubWorkingGroupRecord(subtb=381, tbid=WorkingGroup.RAN.tbid, code="R3", name="RAN3"),
    SubWorkingGroupRecord(subtb=382, tbid=WorkingGroup.RAN.tbid, code="R4", name="RAN4"),
    SubWorkingGroupRecord(subtb=657, tbid=WorkingGroup.RAN.tbid, code="R5", name="RAN5"),
    SubWorkingGroupRecord(subtb=843, tbid=WorkingGroup.RAN.tbid, code="R6", name="RAN6"),
)

SUBWORKING_GROUP_RECORDS: tuple[SubWorkingGroupRecord, ...] = _generate_swb_group_records()

SUBTB_INDEX: dict[int, SubWorkingGroupRecord] = {record.subtb: record for record in SUBWORKING_GROUP_RECORDS}
CODE_INDEX: dict[str, SubWorkingGroupRecord] = {record.code.upper(): record for record in SUBWORKING_GROUP_RECORDS}
@@ -60,5 +116,6 @@ __all__ = [
    "CODE_INDEX",
    "SUBTB_INDEX",
    "SUBWORKING_GROUP_RECORDS",
    "SubWorkingGroup",
    "SubWorkingGroupRecord",
]
+18 −19
Original line number Diff line number Diff line
@@ -14,6 +14,11 @@ class WorkingGroup(StrEnum):
    SA = "SA"
    CT = "CT"

    @property
    def literal(self) -> str:
        """Return single letter literal for this working group."""
        return self.value[0].upper()

    @property
    def tbid(self) -> int:
        """Return the technical body ID for this working group."""
@@ -24,20 +29,15 @@ class WorkingGroup(StrEnum):
        }
        return mapping[self]

    @property
    def ftp_root(self) -> str:
        """Return the FTP root path segment for the working group."""
        return f"/tsg_{self.value.lower()}"
    @classmethod
    def from_literal(cls, literal: str) -> WorkingGroup:
        """Parse a working group from first character of a user-provided literal string."""
        lit = literal.strip().upper()[0]
        for wg in list(WorkingGroup):
            if wg.name.startswith(lit):
                return wg

    @property
    def portal_meetings_code(self) -> str:
        """Return the meetings code (two characters) used for the dynareport endpoint."""
        mapping: dict[WorkingGroup, str] = {
            WorkingGroup.RAN: "R",
            WorkingGroup.SA: "S",
            WorkingGroup.CT: "C",
        }
        return mapping[self]
        raise ValueError(f"Unknown working group literal: {literal}")


class WorkingGroupRecord(BaseModel):
@@ -46,14 +46,13 @@ class WorkingGroupRecord(BaseModel):
    tbid: int = Field(..., description="Technical body identifier")
    code: str = Field(..., description="Canonical short code (e.g., 'RAN')")
    name: str = Field(..., description="Display name for the working group")
    ftp_identifier: str = Field(..., description="FTP root path segment")

    @property
    def ftp_identifier(self) -> str:
        """Return the FTP root path segment for the working group."""
        return f"/tsg_{self.code.lower()}"

WORKING_GROUP_RECORDS: tuple[WorkingGroupRecord, ...] = (
    WorkingGroupRecord(tbid=WorkingGroup.RAN.tbid, code="RAN", name="RAN", ftp_identifier=WorkingGroup.RAN.ftp_root),
    WorkingGroupRecord(tbid=WorkingGroup.SA.tbid, code="SA", name="SA", ftp_identifier=WorkingGroup.SA.ftp_root),
    WorkingGroupRecord(tbid=WorkingGroup.CT.tbid, code="CT", name="CT", ftp_identifier=WorkingGroup.CT.ftp_root),
)

WORKING_GROUP_RECORDS: tuple[WorkingGroupRecord, ...] = tuple(WorkingGroupRecord(tbid=wg.tbid, code=wg.value, name=wg.value) for wg in list(WorkingGroup))

__all__ = ["WORKING_GROUP_RECORDS", "WorkingGroup", "WorkingGroupRecord"]
+306 −21

File changed.

Preview size limit exceeded, changes collapsed.