fix(lint): apply ruff auto-fixes for imports and DRY violations (6d7af78a) · Commits · Jan Reimes / 3gpp-crawler

src/tdoc-ai/tdoc_ai/operations/embeddings.py

+1 −2

Original line number	Diff line number	Diff line
		@@ -7,6 +7,7 @@ from collections.abc import Sequence
		from pathlib import Path
		from typing import Any, cast

		import sentence_transformers
		from sentence_transformers import SentenceTransformer

		from tdoc_ai.config import AiConfig, Backend
		@@ -204,8 +205,6 @@ class EmbeddingsManager:
		def _get_sentence_transformers_version(self) -> str:
		"""Get the installed sentence-transformers version."""
		try:
		import sentence_transformers # noqa: PLC0415

		return sentence_transformers.__version__
		except ImportError:
		return "unknown"

src/tdoc-ai/tdoc_ai/operations/extract.py

+2 −5

Original line number	Diff line number	Diff line
		@@ -14,19 +14,16 @@ from kreuzberg import ExtractionConfig, KeywordAlgorithm, KeywordConfig, Languag
		from tdoc_ai.models import ExtractionError, ProcessingStatus
		from tdoc_ai.operations.workspace_names import normalize_workspace_name
		from tdoc_ai.storage import AiStorage
		from tdoc_crawler.tdocs.utils import normalize_tdoc_id
		from tdoc_crawler.utils.misc import utc_now

		logger = logging.getLogger(__name__)


		def _normalize_document_id(document_id: str) -> str:
		return document_id.strip().upper()


		def _artifact_path(docx_path: Path, document_id: str) -> Path:
		artifact_dir = docx_path.parent / ".ai"
		artifact_dir.mkdir(parents=True, exist_ok=True)
		return artifact_dir / f"{_normalize_document_id(document_id)}.md"
		return artifact_dir / f"{normalize_tdoc_id(document_id)}.md"


		def _write_markdown_artifact(docx_path: Path, document_id: str, markdown: str) -> Path:

src/tdoc-ai/tdoc_ai/operations/workspace_registry.py

+4 −4

Original line number	Diff line number	Diff line
		@@ -191,7 +191,7 @@ class WorkspaceRegistry:
		Raises:
		ValueError: If workspace already exists.
		"""
		normalized_name = name.strip().lower()
		normalized_name = normalize_workspace_name(name)
		if not normalized_name:
		raise ValueError("Workspace name cannot be empty")

		@@ -216,7 +216,7 @@ class WorkspaceRegistry:
		Returns:
		True if deleted, False if not found or if attempting to delete default.
		"""
		normalized_name = name.strip().lower()
		normalized_name = normalize_workspace_name(name)
		if normalized_name == DEFAULT_WORKSPACE:
		logger.warning("Cannot delete the default workspace")
		return False
		@@ -243,7 +243,7 @@ class WorkspaceRegistry:
		Returns:
		WorkspaceMetadata if found, None otherwise.
		"""
		normalized_name = name.strip().lower() if name else DEFAULT_WORKSPACE
		normalized_name = normalize_workspace_name(name)
		return self.workspaces.get(normalized_name)

		def list_workspaces(self) -> list[WorkspaceDisplayInfo]:
		@@ -279,7 +279,7 @@ class WorkspaceRegistry:
		Raises:
		ValueError: If workspace doesn't exist.
		"""
		normalized_name = name.strip().lower()
		normalized_name = normalize_workspace_name(name)
		if normalized_name not in self.workspaces:
		raise ValueError(f"Workspace '{normalized_name}' does not exist")

src/tdoc_crawler/database/meetings.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -7,11 +7,11 @@ from datetime import datetime
		from tdoc_crawler.database.base import DocDatabase
		from tdoc_crawler.logging import get_logger
		from tdoc_crawler.meetings.models import MeetingMetadata, MeetingQueryConfig
		from tdoc_crawler.meetings.utils import normalize_portal_meeting_name
		from tdoc_crawler.models.base import SortOrder
		from tdoc_crawler.models.subworking_groups import SUBTB_INDEX
		from tdoc_crawler.models.working_groups import WORKING_GROUP_RECORDS, WorkingGroup
		from tdoc_crawler.utils.misc import utc_now
		from tdoc_crawler.utils.normalization import normalize_portal_meeting_name

		_logger = get_logger(__name__)

src/tdoc_crawler/meetings/utils.py

+36 −0

Original line number	Diff line number	Diff line
		@@ -8,6 +8,41 @@ from tdoc_crawler.models.subworking_groups import SubWorkingGroup
		from tdoc_crawler.models.working_groups import WorkingGroup


		def normalize_portal_meeting_name(portal_meeting: str \| None) -> str:
		"""Normalize portal meeting name to database format.

		The portal uses format like "SA4#133-e" while the database uses "S4-133-e".
		This function converts portal format to database format.

		Args:
		portal_meeting: Meeting name from portal (e.g., "SA4#133-e")

		Returns:
		Normalized meeting name (e.g., "S4-133-e")
		"""
		if not portal_meeting:
		return ""

		# Replace "SA4#" with "S4-", "RAN1#" with "R1-", etc.
		normalized = portal_meeting.replace("#", "-")

		# Handle full working group names (SA, RAN, CT)
		for full_name, short_prefix in [("SA", "S"), ("RAN", "R"), ("CT", "C")]:
		# Match patterns like "SA4-" and replace with "S4-"
		if normalized.startswith(f"{full_name}"):
		# Extract subgroup number if present
		for i, char in enumerate(normalized[len(full_name) :]):
		if not char.isdigit():
		subgroup_num = normalized[len(full_name) : len(full_name) + i] if i > 0 else ""
		rest = normalized[len(full_name) + i :]
		if subgroup_num:
		normalized = f"{short_prefix}{subgroup_num}{rest}"
		break
		break

		return normalized


		def normalize_working_group_alias(alias: str) -> WorkingGroup:
		"""Normalize working group aliases to canonical working group enums.

		@@ -77,6 +112,7 @@ def normalize_subgroup_alias(alias: str) -> SubWorkingGroup:


		__all__ = [
		"normalize_portal_meeting_name",
		"normalize_subgroup_alias",
		"normalize_working_group_alias",
		]