Commit f84479e3 authored by Jan Reimes's avatar Jan Reimes
Browse files

feat(cli): enhance TDoc output with meeting details and agenda formatting

- Add functions to extract short agenda item text and format meeting date ranges.
- Update `tdoc_to_dict` to include meeting location and dates.
- Modify `print_tdoc_table` to display meeting information and short agenda text.
- Adjust query handling to build a meeting map for enriched output in JSON/YAML formats.
- Update Excel row conversion to handle new agenda item description column.
parent 7e797748
Loading
Loading
Loading
Loading
+142 −8
Original line number Diff line number Diff line
@@ -2,6 +2,8 @@

from __future__ import annotations

import re
from datetime import date
from typing import Any

from rich.table import Table
@@ -17,9 +19,117 @@ from tdoc_crawler.tdocs.models import TDocMetadata
console = get_console()


def tdoc_to_dict(result: TDocMetadata) -> dict[str, Any]:
    """Convert TDocMetadata to dictionary for JSON/YAML output."""
    return result.model_dump(mode="json")
def extract_agenda_item_short(text: str | None) -> str | None:
    """Extract short form of agenda item text.

    If the text is of the form "<text> (<anything>)",
    extracts the part before the opening parenthesis. Otherwise returns the original text.

    Args:
        text: The full agenda item text

    Returns:
        Shortened text before parentheses, or original if no match
    """
    if not text:
        return None
    # Match everything before the first opening parenthesis
    match = re.match(r"^([^()]+)\s*\(", text)
    if match:
        return match.group(1).strip()
    return text


def format_meeting_date_range(start_date: date | None, end_date: date | None) -> str | None:
    """Format meeting date range as "DD-DD Mon YYYY" or "DD Mon - DD Mon YYYY".

    Args:
        start_date: Meeting start date
        end_date: Meeting end date

    Returns:
        Formatted date range string, or None if dates not available
    """
    if not start_date or not end_date:
        return None

    months = [
        "Jan",
        "Feb",
        "Mar",
        "Apr",
        "May",
        "Jun",
        "Jul",
        "Aug",
        "Sep",
        "Oct",
        "Nov",
        "Dec",
    ]

    start_day = start_date.day
    end_day = end_date.day
    start_month = months[start_date.month - 1]
    end_month = months[end_date.month - 1]
    year = start_date.year

    # Same month: "01-05 Feb 2026"
    if start_date.month == end_date.month:
        return f"{start_day:02d}-{end_day:02d} {start_month} {year}"

    # Different months: "30 Jan - 02 Feb 2024"
    return f"{start_day:02d} {start_month} - {end_day:02d} {end_month} {year}"


def format_meeting_location_and_dates(meeting: MeetingMetadata | None) -> str:
    """Format meeting location and date range for TDoc output.

    Args:
        meeting: Meeting metadata, or None if not available

    Returns:
        Formatted string with location and dates, or "n/a" if unavailable
    """
    if meeting is None:
        return "n/a"

    location = meeting.location or "n/a"
    date_range = format_meeting_date_range(meeting.start_date, meeting.end_date)

    if date_range:
        return f"{location}, {date_range}"
    return location


def tdoc_to_dict(
    result: TDocMetadata,
    meeting_map: dict[int, MeetingMetadata] | None = None,
) -> dict[str, Any]:
    """Convert TDocMetadata to dictionary for JSON/YAML output.

    Args:
        result: TDoc metadata to convert
        meeting_map: Optional map of meeting_id to MeetingMetadata for enriched output

    Returns:
        Dictionary representation of the TDoc
    """
    data = result.model_dump(mode="json")

    # Add agenda_item_nbr as plain value (not Decimal for JSON)
    data["agenda_item"] = str(result.agenda_item_nbr) if result.agenda_item_nbr is not None else None

    # Add short form of agenda item text
    data["agenda_item_text_short"] = extract_agenda_item_short(result.agenda_item_text)

    # Add meeting location and dates
    meeting = None
    if meeting_map and result.meeting_id:
        meeting = meeting_map.get(result.meeting_id)
    data["meeting"] = format_meeting_location_and_dates(meeting)

    return data


def meeting_to_dict(meeting: MeetingMetadata) -> dict[str, Any]:
@@ -27,22 +137,46 @@ def meeting_to_dict(meeting: MeetingMetadata) -> dict[str, Any]:
    return meeting.model_dump(mode="json")


def print_tdoc_table(results: list[TDocMetadata]) -> None:
    """Print TDoc results as formatted table."""
def print_tdoc_table(
    results: list[TDocMetadata],
    meeting_map: dict[int, MeetingMetadata] | None = None,
) -> None:
    """Print TDoc results as formatted table.

    Args:
        results: List of TDoc metadata to display
        meeting_map: Optional map of meeting_id to MeetingMetadata for meeting info
    """
    table = Table(title=f"TDoc results ({len(results)} rows)")
    table.add_column("TDoc", style="cyan")
    table.add_column("Title", style="yellow")
    table.add_column("Source(s)", style="magenta")
    table.add_column("Type", style="green")
    table.add_column("Source", style="magenta")
    table.add_column("Meeting", style="green")
    table.add_column("Agenda", justify="right", style="cyan")
    table.add_column("Agenda Short", style="dim")
    table.add_column("Status", style="magenta")
    table.add_column("Size (KB)", justify="right", style="blue")

    for result in results[:100]:
        size_kb = f"{result.file_size // 1024}" if result.file_size else "?"

        # Get meeting info
        meeting = None
        if meeting_map and result.meeting_id:
            meeting = meeting_map.get(result.meeting_id)
        meeting_info = format_meeting_location_and_dates(meeting)

        # Short agenda item text
        agenda_short = extract_agenda_item_short(result.agenda_item_text) or "-"

        table.add_row(
            result.tdoc_id,
            result.title or "-",
            result.source or "-",
            result.tdoc_type or "-",
            meeting_info,
            str(result.agenda_item_nbr) if result.agenda_item_nbr else "-",
            agenda_short,
            result.status or "-",
            size_kb,
        )

+7 −3
Original line number Diff line number Diff line
@@ -146,12 +146,16 @@ def query_tdocs(
        with create_cached_session() as session:
            checkout_tdocs(results, manager.checkout_dir, force=False, session=session)

    # Build meeting map for enriched output
    with MeetingDatabase(db_file) as meeting_db:
        meeting_map = meeting_db._meeting_map()

    if config.output_format is OutputFormat.JSON:
        console.print(json.dumps([tdoc_to_dict(result) for result in results], indent=2))
        console.print(json.dumps([tdoc_to_dict(result, meeting_map) for result in results], indent=2))
    elif config.output_format is OutputFormat.YAML:
        console.print(yaml.dump([tdoc_to_dict(result) for result in results], sort_keys=False))
        console.print(yaml.dump([tdoc_to_dict(result, meeting_map) for result in results], sort_keys=False))
    else:
        print_tdoc_table(results)
        print_tdoc_table(results, meeting_map)


def query_meetings(
+21 −12
Original line number Diff line number Diff line
@@ -158,10 +158,11 @@ def convert_excel_row_to_tdoc_metadata(
    for_purpose = _get_column_value(row, ["For", "Purpose", "For Purpose"])
    source = _get_column_value(row, ["Source", "Organization", "Company"])
    contact = _get_column_value(row, ["Contact", "Contact Person", "Author"])
    agenda_item = _get_column_value(row, ["Agenda Item", "Agenda", "Agenda Ref"])
    status = _get_column_value(row, ["Status", "Document Status"])
    is_revision_of = _get_column_value(row, ["Revision of", "Is Revision of", "Based on"])
    date_created = _get_column_value(row, ["Date", "Created", "Date Created", "Submission Date"])
    agenda_item = _get_column_value(row, ["Agenda item"])
    agenda_item_desc = _get_column_value(row, ["Agenda item description"])
    status = _get_column_value(row, ["TDoc Status"])
    is_revision_of = _get_column_value(row, ["Is revision of"])
    date_created = _get_column_value(row, ["Date", "Created", "Date Created", "Submission Date", "Reservation date"])

    # Generate URL (this will be validated/updated later by the directory crawler)
    url = f"https://www.3gpp.org/ftp/tsg_ran/.../{tdoc_id.upper()}.zip"
@@ -169,8 +170,11 @@ def convert_excel_row_to_tdoc_metadata(
    now = datetime.now(UTC)

    try:
        # Parse agenda item number
        # Parse agenda item number and text
        agenda_nbr, agenda_text = _parse_agenda_item(agenda_item)
        # If we have a separate description column, use it instead
        if agenda_item_desc and agenda_text == "Unknown":
            agenda_text = agenda_item_desc

        # Parse date
        parsed_date = _parse_date(date_created)
@@ -239,7 +243,7 @@ def _is_valid_tdoc_id(tdoc_id: str) -> bool:


def _get_column_value(row: pd.Series, possible_names: list[str]) -> str | None:
    """Get value from row using possible column names.
    """Get value from row using possible column names (case-insensitive).

    Args:
        row: pandas Series representing one Excel row
@@ -248,8 +252,13 @@ def _get_column_value(row: pd.Series, possible_names: list[str]) -> str | None:
    Returns:
        Column value or None if not found
    """
    # Build lowercase lookup for case-insensitive matching
    row_lower = {k.lower(): k for k in row.index}

    for col_name in possible_names:
        value = row.get(col_name)
        actual_col = row_lower.get(col_name.lower())
        if actual_col is not None:
            value = row[actual_col]
            if value and isinstance(value, str):
                return value.strip()
            elif value is not None: