Commit 6d7af78a authored by Jan Reimes's avatar Jan Reimes
Browse files

fix(lint): apply ruff auto-fixes for imports and DRY violations

- embeddings.py: Move sentence_transformers import to top level (was lazy import inside method)
- extract.py: Use normalize_tdoc_id from tdocs/utils instead of duplicate _normalize_document_id
- workspace_registry.py: Use normalize_workspace_name from workspace_names module
- database/meetings.py: Import normalize_portal_meeting_name from meetings/utils (correct location)
- meetings/utils.py: Add normalize_portal_meeting_name function (moved from utils/normalization.py)
parent 2f6e950c
Loading
Loading
Loading
Loading
+1 −2
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ from collections.abc import Sequence
from pathlib import Path
from typing import Any, cast

import sentence_transformers
from sentence_transformers import SentenceTransformer

from tdoc_ai.config import AiConfig, Backend
@@ -204,8 +205,6 @@ class EmbeddingsManager:
    def _get_sentence_transformers_version(self) -> str:
        """Get the installed sentence-transformers version."""
        try:
            import sentence_transformers  # noqa: PLC0415

            return sentence_transformers.__version__
        except ImportError:
            return "unknown"
+2 −5
Original line number Diff line number Diff line
@@ -14,19 +14,16 @@ from kreuzberg import ExtractionConfig, KeywordAlgorithm, KeywordConfig, Languag
from tdoc_ai.models import ExtractionError, ProcessingStatus
from tdoc_ai.operations.workspace_names import normalize_workspace_name
from tdoc_ai.storage import AiStorage
from tdoc_crawler.tdocs.utils import normalize_tdoc_id
from tdoc_crawler.utils.misc import utc_now

logger = logging.getLogger(__name__)


def _normalize_document_id(document_id: str) -> str:
    return document_id.strip().upper()


def _artifact_path(docx_path: Path, document_id: str) -> Path:
    artifact_dir = docx_path.parent / ".ai"
    artifact_dir.mkdir(parents=True, exist_ok=True)
    return artifact_dir / f"{_normalize_document_id(document_id)}.md"
    return artifact_dir / f"{normalize_tdoc_id(document_id)}.md"


def _write_markdown_artifact(docx_path: Path, document_id: str, markdown: str) -> Path:
+4 −4
Original line number Diff line number Diff line
@@ -191,7 +191,7 @@ class WorkspaceRegistry:
        Raises:
            ValueError: If workspace already exists.
        """
        normalized_name = name.strip().lower()
        normalized_name = normalize_workspace_name(name)
        if not normalized_name:
            raise ValueError("Workspace name cannot be empty")

@@ -216,7 +216,7 @@ class WorkspaceRegistry:
        Returns:
            True if deleted, False if not found or if attempting to delete default.
        """
        normalized_name = name.strip().lower()
        normalized_name = normalize_workspace_name(name)
        if normalized_name == DEFAULT_WORKSPACE:
            logger.warning("Cannot delete the default workspace")
            return False
@@ -243,7 +243,7 @@ class WorkspaceRegistry:
        Returns:
            WorkspaceMetadata if found, None otherwise.
        """
        normalized_name = name.strip().lower() if name else DEFAULT_WORKSPACE
        normalized_name = normalize_workspace_name(name)
        return self.workspaces.get(normalized_name)

    def list_workspaces(self) -> list[WorkspaceDisplayInfo]:
@@ -279,7 +279,7 @@ class WorkspaceRegistry:
        Raises:
            ValueError: If workspace doesn't exist.
        """
        normalized_name = name.strip().lower()
        normalized_name = normalize_workspace_name(name)
        if normalized_name not in self.workspaces:
            raise ValueError(f"Workspace '{normalized_name}' does not exist")

+1 −1
Original line number Diff line number Diff line
@@ -7,11 +7,11 @@ from datetime import datetime
from tdoc_crawler.database.base import DocDatabase
from tdoc_crawler.logging import get_logger
from tdoc_crawler.meetings.models import MeetingMetadata, MeetingQueryConfig
from tdoc_crawler.meetings.utils import normalize_portal_meeting_name
from tdoc_crawler.models.base import SortOrder
from tdoc_crawler.models.subworking_groups import SUBTB_INDEX
from tdoc_crawler.models.working_groups import WORKING_GROUP_RECORDS, WorkingGroup
from tdoc_crawler.utils.misc import utc_now
from tdoc_crawler.utils.normalization import normalize_portal_meeting_name

_logger = get_logger(__name__)

+36 −0
Original line number Diff line number Diff line
@@ -8,6 +8,41 @@ from tdoc_crawler.models.subworking_groups import SubWorkingGroup
from tdoc_crawler.models.working_groups import WorkingGroup


def normalize_portal_meeting_name(portal_meeting: str | None) -> str:
    """Normalize portal meeting name to database format.

    The portal uses format like "SA4#133-e" while the database uses "S4-133-e".
    This function converts portal format to database format.

    Args:
        portal_meeting: Meeting name from portal (e.g., "SA4#133-e")

    Returns:
        Normalized meeting name (e.g., "S4-133-e")
    """
    if not portal_meeting:
        return ""

    # Replace "SA4#" with "S4-", "RAN1#" with "R1-", etc.
    normalized = portal_meeting.replace("#", "-")

    # Handle full working group names (SA, RAN, CT)
    for full_name, short_prefix in [("SA", "S"), ("RAN", "R"), ("CT", "C")]:
        # Match patterns like "SA4-" and replace with "S4-"
        if normalized.startswith(f"{full_name}"):
            # Extract subgroup number if present
            for i, char in enumerate(normalized[len(full_name) :]):
                if not char.isdigit():
                    subgroup_num = normalized[len(full_name) : len(full_name) + i] if i > 0 else ""
                    rest = normalized[len(full_name) + i :]
                    if subgroup_num:
                        normalized = f"{short_prefix}{subgroup_num}{rest}"
                    break
            break

    return normalized


def normalize_working_group_alias(alias: str) -> WorkingGroup:
    """Normalize working group aliases to canonical working group enums.

@@ -77,6 +112,7 @@ def normalize_subgroup_alias(alias: str) -> SubWorkingGroup:


__all__ = [
    "normalize_portal_meeting_name",
    "normalize_subgroup_alias",
    "normalize_working_group_alias",
]