Commit d6500226 authored by Jan Reimes's avatar Jan Reimes
Browse files

♻️ refactor(meetings): replace WorkingGroup with wg_id for meeting filtering

parent e2c39549
Loading
Loading
Loading
Loading
+7 −6
Original line number Diff line number Diff line
@@ -191,16 +191,17 @@ class MeetingCrawler:
        """Limit number of meetings per sub-working group."""
        if limit is None or limit == 0:
            return meetings
        order: dict[WorkingGroup, list[int]] = defaultdict(list)
        order: dict[int, list[int]] = defaultdict(list)
        for meeting in meetings:
            sequence = order[meeting.working_group]
            wg_id = meeting.subtb or meeting.tbid
            sequence = order[wg_id]
            if meeting.meeting_id not in sequence:
                sequence.append(meeting.meeting_id)
        allowed_ids: dict[WorkingGroup, set[int]] = {}
        for working_group, sequence in order.items():
        allowed_ids: dict[int, set[int]] = {}
        for wg_id, sequence in order.items():
            selected = sequence[:limit] if limit > 0 else sequence[limit:]
            allowed_ids[working_group] = set(selected)
        return [meeting for meeting in meetings if meeting.meeting_id in allowed_ids.get(meeting.working_group, {meeting.meeting_id})]
            allowed_ids[wg_id] = set(selected)
        return [meeting for meeting in meetings if meeting.meeting_id in allowed_ids.get(meeting.subtb or meeting.tbid, {meeting.meeting_id})]

    @staticmethod
    def _limit_meetings(
+8 −9
Original line number Diff line number Diff line
@@ -17,7 +17,6 @@ from tdoc_crawler.logging import get_logger
from tdoc_crawler.meetings.models import MeetingMetadata, MeetingQueryConfig
from tdoc_crawler.models.base import SortOrder
from tdoc_crawler.models.crawl_limits import CrawlLimits
from tdoc_crawler.models.working_groups import WorkingGroup
from tdoc_crawler.tdocs.models import TDocCrawlConfig, TDocMetadata
from tdoc_crawler.utils.normalization import normalize_tdoc_id
from tdoc_crawler.workers.tdoc_worker import fetch_meeting_document_list_subinterpreter
@@ -306,14 +305,14 @@ class TDocCrawler:
            return meetings

        max_per_subwg = abs(per_subwg_limit)
        per_subwg_counts: dict[WorkingGroup, int] = {}
        per_subwg_counts: dict[int, int] = {}
        filtered: list[MeetingMetadata] = []
        for meeting in meetings:
            working_group = meeting.working_group
            count = per_subwg_counts.get(working_group, 0)
            wg_id = meeting.subtb or meeting.tbid
            count = per_subwg_counts.get(wg_id, 0)
            if count >= max_per_subwg:
                continue
            per_subwg_counts[working_group] = count + 1
            per_subwg_counts[wg_id] = count + 1
            filtered.append(meeting)
        return filtered

@@ -324,16 +323,16 @@ class TDocCrawler:
            return meetings

        max_groups = abs(limit_subwgs)
        seen_groups: set[WorkingGroup] = set()
        seen_groups: set[int] = set()
        filtered: list[MeetingMetadata] = []
        for meeting in meetings:
            working_group = meeting.working_group
            if working_group in seen_groups:
            wg_id = meeting.subtb or meeting.tbid
            if wg_id in seen_groups:
                filtered.append(meeting)
                continue
            if len(seen_groups) >= max_groups:
                continue
            seen_groups.add(working_group)
            seen_groups.add(wg_id)
            filtered.append(meeting)
        return filtered