fix(lint): resolve ruff issues in AI modules (cf67e609) · Commits · Jan Reimes / 3gpp-crawler

src/tdoc-ai/tdoc_ai/models.py

+38 −21

Original line number	Diff line number	Diff line
		@@ -9,14 +9,11 @@ from typing import Any
		from pydantic import BaseModel, Field, field_validator, model_validator

		from tdoc_ai.config import AiConfig
		from tdoc_ai.operations.workspace_names import normalize_workspace_name
		from tdoc_crawler.tdocs.utils import normalize_tdoc_id
		from tdoc_crawler.utils.misc import utc_now


		# TODO: the same functions is re-defined in operations/extract.py, src/tdoc_crawler/tdocs/models.py and src/tdoc_crawler/tdocs/utils.py - should exist only at a single location!
		def _normalize_document_id(value: str) -> str:
		return value.strip().upper()


		class PipelineStage(StrEnum):
		"""Stages of the AI processing pipeline."""

		@@ -124,11 +121,10 @@ class Workspace(BaseModel):
		@field_validator("workspace_name")
		@classmethod
		def _normalize_workspace_name(cls, value: str) -> str:
		normalized = value.strip().lower()
		if not normalized:
		if not value.strip():
		msg = "workspace_name must not be empty"
		raise ValueError(msg)
		return normalized
		return normalize_workspace_name(value)


		class WorkspaceMember(BaseModel):
		@@ -145,16 +141,15 @@ class WorkspaceMember(BaseModel):
		@field_validator("workspace_name")
		@classmethod
		def _normalize_workspace_name(cls, value: str) -> str:
		normalized = value.strip().lower()
		if not normalized:
		if not value.strip():
		msg = "workspace_name must not be empty"
		raise ValueError(msg)
		return normalized
		return normalize_workspace_name(value)

		@field_validator("source_item_id")
		@classmethod
		def _normalize_source_item_id(cls, value: str) -> str:
		normalized = value.strip().upper()
		normalized = normalize_tdoc_id(value)
		if not normalized:
		msg = "source_item_id must not be empty"
		raise ValueError(msg)
		@@ -173,11 +168,10 @@ class ArtifactScope(BaseModel):
		@field_validator("workspace_name")
		@classmethod
		def _normalize_workspace_name(cls, value: str) -> str:
		normalized = value.strip().lower()
		if not normalized:
		if not value.strip():
		msg = "workspace_name must not be empty"
		raise ValueError(msg)
		return normalized
		return normalize_workspace_name(value)


		class ProcessingStatus(BaseModel):
		@@ -200,7 +194,11 @@ class ProcessingStatus(BaseModel):
		@field_validator("document_id")
		@classmethod
		def _normalize_document_id(cls, value: str) -> str:
		return _normalize_document_id(value)
		normalized = normalize_tdoc_id(value)
		if not normalized:
		msg = "document_id must not be empty"
		raise ValueError(msg)
		return normalized


		class DocumentClassification(BaseModel):
		@@ -217,7 +215,11 @@ class DocumentClassification(BaseModel):
		@field_validator("document_id")
		@classmethod
		def _normalize_document_id(cls, value: str) -> str:
		return _normalize_document_id(value)
		normalized = normalize_tdoc_id(value)
		if not normalized:
		msg = "document_id must not be empty"
		raise ValueError(msg)
		return normalized

		@field_validator("confidence")
		@classmethod
		@@ -261,7 +263,11 @@ class DocumentChunk(BaseModel):
		@field_validator("document_id")
		@classmethod
		def _normalize_document_id(cls, value: str) -> str:
		return _normalize_document_id(value)
		normalized = normalize_tdoc_id(value)
		if not normalized:
		msg = "document_id must not be empty"
		raise ValueError(msg)
		return normalized


		class QueryResult(BaseModel):
		@@ -275,7 +281,11 @@ class QueryResult(BaseModel):
		@field_validator("document_id")
		@classmethod
		def _normalize_document_id(cls, value: str) -> str:
		return _normalize_document_id(value)
		normalized = normalize_tdoc_id(value)
		if not normalized:
		msg = "document_id must not be empty"
		raise ValueError(msg)
		return normalized


		class DocumentSummary(BaseModel):
		@@ -287,14 +297,21 @@ class DocumentSummary(BaseModel):
		action_items: list[str] = Field(default_factory=list, description="Action items")
		decisions: list[str] = Field(default_factory=list, description="Decisions recorded")
		affected_specs: list[str] = Field(default_factory=list, description="Affected specification IDs")
		llm_model: str = Field(default_factory=lambda: AiConfig().llm_model, description="Model used for generation")
		llm_model: str = Field(
		default_factory=lambda: AiConfig().llm_model,
		description="Model used for generation",
		)
		prompt_version: str = Field("v1", description="Prompt template version")
		generated_at: datetime = Field(default_factory=utc_now, description="Generation timestamp")

		@field_validator("document_id")
		@classmethod
		def _normalize_document_id(cls, value: str) -> str:
		return _normalize_document_id(value)
		normalized = normalize_tdoc_id(value)
		if not normalized:
		msg = "document_id must not be empty"
		raise ValueError(msg)
		return normalized


		class GraphNode(BaseModel):

src/tdoc-ai/tdoc_ai/operations/embeddings.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -204,7 +204,7 @@ class EmbeddingsManager:
		def _get_sentence_transformers_version(self) -> str:
		"""Get the installed sentence-transformers version."""
		try:
		import sentence_transformers
		import sentence_transformers # noqa: PLC0415

		return sentence_transformers.__version__
		except ImportError:

src/tdoc-ai/tdoc_ai/operations/graph.py

+2 −1

Original line number	Diff line number	Diff line
		@@ -6,6 +6,7 @@ import logging
		import re
		from datetime import datetime
		from pathlib import Path
		from typing import Any

		from tdoc_ai.config import AiConfig, GraphQueryLevel
		from tdoc_ai.models import GraphEdge, GraphEdgeType, GraphNode, GraphNodeType, GraphQueryResult
		@@ -730,7 +731,7 @@ def _synthesize_with_llm(
		Returns:
		LLM-generated answer.
		"""
		from tdoc_ai.operations.summarize import _get_llm_client
		from tdoc_ai.operations.summarize import _get_llm_client # noqa: PLC0415

		# Build embedding context
		embedding_context_parts = []

src/tdoc-ai/tdoc_ai/operations/pipeline.py

+1 −0

Original line number	Diff line number	Diff line
		@@ -293,6 +293,7 @@ def process_tdoc(
		checkout_path: Path to the TDoc checkout folder.
		force_rerun: If True, skip resume logic and run all stages.
		workspace: Optional workspace scope (defaults to "default").
		config: Optional AI configuration (defaults to environment config).

		Returns:
		ProcessingStatus with final pipeline state.

src/tdoc-ai/tdoc_ai/storage.py

+1 −1

Original line number	Diff line number	Diff line
		@@ -489,7 +489,7 @@ def _chunk_schema(embedding_dimension: int) -> pa.Schema:
		"""Create schema for document chunks with fixed-size vector.

		Args:
		pa.field("vector", pa.list_(pa.float32(), embedding_dimension)),
		embedding_dimension: Dimension of the embedding vectors.

		Returns:
		PyArrow schema with FixedSizeList vector field.