Commit 90c9b0ef authored by Jan Reimes's avatar Jan Reimes
Browse files

feat(database): enhance query capabilities for meetings and specs

* Push filters to SQL WHERE for meetings and specs using Oxyde ORM.
* Improve performance by reducing in-memory filtering for subgroups.
* Load source records only for matching specs to optimize queries.
* Refactor existing ID checks to utilize ORM for existing records.
parent 29bd9329
Loading
Loading
Loading
Loading
+33 −7
Original line number Diff line number Diff line
@@ -135,14 +135,36 @@ class MeetingDatabase(DocDatabase):
    async def query_meetings(self, config: MeetingQueryConfig) -> list[MeetingMetadata]:
        """Query meetings with filtering and sorting.

        Pushes working_group, start_date, end_date, and files_url filters
        to SQL WHERE via Oxyde ORM. Subgroup filters remain in-memory due
        to cross-table lookup requirements.

        Args:
            config: Query configuration with filters and options

        Returns:
            List of matching meeting metadata
        """
        meetings = await self._table_rows(MeetingMetadata)
        meetings = self._apply_meeting_filters(meetings, config)
        qs = MeetingMetadata.objects

        if config.working_groups:
            allowed_tbids = [wg.tbid for wg in config.working_groups]
            qs = qs.filter(tbid__in=allowed_tbids)

        if config.start_date is not None:
            qs = qs.filter(start_date__gte=config.start_date)

        if config.end_date is not None:
            qs = qs.filter(end_date__lte=config.end_date)

        if not config.include_without_files:
            qs = qs.exclude(files_url="")

        meetings = await qs.all()

        if config.subgroups:
            allowed_subgroups = {value.strip().upper() for value in config.subgroups}
            meetings = _filter_by_subgroup(meetings, allowed_subgroups)

        descending = config.order.value.lower() == "desc"
        meetings.sort(
@@ -167,12 +189,13 @@ class MeetingDatabase(DocDatabase):
        Returns:
            Set of meeting IDs
        """
        meetings = await self._table_rows(MeetingMetadata)
        if not working_groups:
            meetings = await self._table_rows(MeetingMetadata)
            return {meeting.meeting_id for meeting in meetings}

        allowed = {wg.tbid for wg in working_groups}
        return {meeting.meeting_id for meeting in meetings if meeting.tbid in allowed}
        allowed_tbids = [wg.tbid for wg in working_groups]
        meetings = await MeetingMetadata.objects.filter(tbid__in=allowed_tbids).all()
        return {meeting.meeting_id for meeting in meetings}

    async def resolve_meeting_id(self, meeting_name: str) -> int | None:
        """Resolve meeting name to meeting_id from database.
@@ -232,12 +255,14 @@ class MeetingDatabase(DocDatabase):
    async def get_statistics(self) -> dict[str, object]:
        """Get database statistics.

        Uses SQL aggregation for counts. ``by_working_group`` still needs
        tdoc + meeting data joined in-memory due to cross-table grouping.

        Returns:
            Dictionary with various statistics
        """
        tdocs = await self._table_rows(TDocMetadata)
        meetings = await self._meeting_map()
        crawl_entries = await self._table_rows(CrawlLogEntry)

        by_working_group: dict[str, int] = defaultdict(int)
        tbid_to_code = {working_group.tbid: working_group.value for working_group in WorkingGroup}
@@ -251,6 +276,7 @@ class MeetingDatabase(DocDatabase):
                continue
            by_working_group[code] += 1

        crawl_entries = await CrawlLogEntry.objects.order_by("-start_time").limit(10).all()
        recent_crawls = [
            {
                "crawl_type": entry.crawl_type,
@@ -262,7 +288,7 @@ class MeetingDatabase(DocDatabase):
                "errors_count": entry.errors_count,
                "status": entry.status,
            }
            for entry in sorted(crawl_entries, key=lambda entry: entry.start_time, reverse=True)[:10]
            for entry in crawl_entries
        ]

        return {
+34 −5
Original line number Diff line number Diff line
@@ -340,15 +340,44 @@ class SpecDatabase(DocDatabase):
            raise DatabaseError(msg, detail=str(exc)) from exc

    async def query_specs(self, filters: SpecQueryFilters) -> list[SpecQueryResult]:
        """Query stored spec metadata."""
        specs = await self._spec_table_rows()
        source_records = await self._table_rows(SpecificationSourceRecord)
        """Query stored spec metadata.

        Pushes spec_number, title, working_group, and status filters to
        SQL WHERE via Oxyde ORM. Source records are loaded only for matching
        specs.

        Args:
            filters: Filter criteria

        Returns:
            List of spec query results
        """
        qs = Specification.objects

        if filters.spec_numbers:
            allowed = [normalize_spec_number(value) for value in filters.spec_numbers]
            qs = qs.filter(spec_number__in=allowed)

        if filters.title:
            qs = qs.filter(title__icontains=filters.title.strip())

        if filters.working_group:
            qs = qs.filter(working_group__iexact=filters.working_group.strip())

        if filters.status:
            qs = qs.filter(status__iexact=filters.status.strip())

        specs = await qs.all()

        if not specs:
            return []

        spec_numbers = [spec.spec_number for spec in specs]
        source_records = await SpecificationSourceRecord.objects.filter(spec_number__in=spec_numbers).all()
        records_by_spec: dict[str, list[SpecificationSourceRecord]] = defaultdict(list)
        for record in source_records:
            records_by_spec[record.spec_number].append(record)

        specs = _apply_spec_filters(specs, filters)

        return [
            SpecQueryResult(
                spec_number=spec.spec_number,
+6 −8
Original line number Diff line number Diff line
@@ -106,11 +106,8 @@ class TDocDatabase(MeetingDatabase):
                record = await self._resolve_tbid(record)
            prepared.append(record)

        existing_ids = set()
        for record in prepared:
            existing = await self._get_tdoc(record.tdoc_id)
            if existing is not None:
                existing_ids.add(record.tdoc_id)
        all_ids = [record.tdoc_id for record in prepared]
        existing_ids = {record.tdoc_id for record in await TDocMetadata.objects.filter(tdoc_id__in=all_ids).all()}

        now = utc_now()
        to_create: list[TDocMetadata] = []
@@ -173,15 +170,16 @@ class TDocDatabase(MeetingDatabase):

        need_meeting_map = config.meeting_start_date is not None or config.meeting_end_date is not None

        meeting_map = {meeting.meeting_id: meeting for meeting in await self._table_rows(MeetingMetadata)} if need_meeting_map else {}

        if need_meeting_map:
            meeting_map = {meeting.meeting_id: meeting for meeting in await self._table_rows(MeetingMetadata)}
            records = self._filter_by_meeting_dates(records, meeting_map, config.meeting_start_date, config.meeting_end_date)

        records = self._filter_by_retrieved_dates(records, config.start_date, config.end_date)

        records = self._apply_pattern_filters(records, config)

        # Build meeting map for sort (need start_date from meetings)
        if not meeting_map:
            meeting_map = {meeting.meeting_id: meeting for meeting in await self._table_rows(MeetingMetadata)}
        descending = config.order.value.lower() == "desc"
        fallback = date.max if descending else date.min