Commit cf67e609 authored by Jan Reimes's avatar Jan Reimes
Browse files

fix(lint): resolve ruff issues in AI modules

- graph.py: Add missing 'from typing import Any' import (F821)
- graph.py: Add noqa: PLC0415 for intentional lazy import of _get_llm_client
- models.py: Remove duplicate _normalize_document_id(), use shared normalize_tdoc_id from tdocs/utils
- models.py: Use normalize_workspace_name from operations/workspace_names
- storage.py: Fix _chunk_schema() docstring to document embedding_dimension parameter (D417)
- pipeline.py: Add missing 'config' parameter documentation (D417)
- embeddings.py: Add noqa: PLC0415 for intentional lazy import of sentence_transformers
parent 22a06e36
Loading
Loading
Loading
Loading
+38 −21
Original line number Diff line number Diff line
@@ -9,14 +9,11 @@ from typing import Any
from pydantic import BaseModel, Field, field_validator, model_validator

from tdoc_ai.config import AiConfig
from tdoc_ai.operations.workspace_names import normalize_workspace_name
from tdoc_crawler.tdocs.utils import normalize_tdoc_id
from tdoc_crawler.utils.misc import utc_now


# TODO: the same functions is re-defined in operations/extract.py, src/tdoc_crawler/tdocs/models.py and src/tdoc_crawler/tdocs/utils.py - should exist only at a single location!
def _normalize_document_id(value: str) -> str:
    return value.strip().upper()


class PipelineStage(StrEnum):
    """Stages of the AI processing pipeline."""

@@ -124,11 +121,10 @@ class Workspace(BaseModel):
    @field_validator("workspace_name")
    @classmethod
    def _normalize_workspace_name(cls, value: str) -> str:
        normalized = value.strip().lower()
        if not normalized:
        if not value.strip():
            msg = "workspace_name must not be empty"
            raise ValueError(msg)
        return normalized
        return normalize_workspace_name(value)


class WorkspaceMember(BaseModel):
@@ -145,16 +141,15 @@ class WorkspaceMember(BaseModel):
    @field_validator("workspace_name")
    @classmethod
    def _normalize_workspace_name(cls, value: str) -> str:
        normalized = value.strip().lower()
        if not normalized:
        if not value.strip():
            msg = "workspace_name must not be empty"
            raise ValueError(msg)
        return normalized
        return normalize_workspace_name(value)

    @field_validator("source_item_id")
    @classmethod
    def _normalize_source_item_id(cls, value: str) -> str:
        normalized = value.strip().upper()
        normalized = normalize_tdoc_id(value)
        if not normalized:
            msg = "source_item_id must not be empty"
            raise ValueError(msg)
@@ -173,11 +168,10 @@ class ArtifactScope(BaseModel):
    @field_validator("workspace_name")
    @classmethod
    def _normalize_workspace_name(cls, value: str) -> str:
        normalized = value.strip().lower()
        if not normalized:
        if not value.strip():
            msg = "workspace_name must not be empty"
            raise ValueError(msg)
        return normalized
        return normalize_workspace_name(value)


class ProcessingStatus(BaseModel):
@@ -200,7 +194,11 @@ class ProcessingStatus(BaseModel):
    @field_validator("document_id")
    @classmethod
    def _normalize_document_id(cls, value: str) -> str:
        return _normalize_document_id(value)
        normalized = normalize_tdoc_id(value)
        if not normalized:
            msg = "document_id must not be empty"
            raise ValueError(msg)
        return normalized


class DocumentClassification(BaseModel):
@@ -217,7 +215,11 @@ class DocumentClassification(BaseModel):
    @field_validator("document_id")
    @classmethod
    def _normalize_document_id(cls, value: str) -> str:
        return _normalize_document_id(value)
        normalized = normalize_tdoc_id(value)
        if not normalized:
            msg = "document_id must not be empty"
            raise ValueError(msg)
        return normalized

    @field_validator("confidence")
    @classmethod
@@ -261,7 +263,11 @@ class DocumentChunk(BaseModel):
    @field_validator("document_id")
    @classmethod
    def _normalize_document_id(cls, value: str) -> str:
        return _normalize_document_id(value)
        normalized = normalize_tdoc_id(value)
        if not normalized:
            msg = "document_id must not be empty"
            raise ValueError(msg)
        return normalized


class QueryResult(BaseModel):
@@ -275,7 +281,11 @@ class QueryResult(BaseModel):
    @field_validator("document_id")
    @classmethod
    def _normalize_document_id(cls, value: str) -> str:
        return _normalize_document_id(value)
        normalized = normalize_tdoc_id(value)
        if not normalized:
            msg = "document_id must not be empty"
            raise ValueError(msg)
        return normalized


class DocumentSummary(BaseModel):
@@ -287,14 +297,21 @@ class DocumentSummary(BaseModel):
    action_items: list[str] = Field(default_factory=list, description="Action items")
    decisions: list[str] = Field(default_factory=list, description="Decisions recorded")
    affected_specs: list[str] = Field(default_factory=list, description="Affected specification IDs")
    llm_model: str = Field(default_factory=lambda: AiConfig().llm_model, description="Model used for generation")
    llm_model: str = Field(
        default_factory=lambda: AiConfig().llm_model,
        description="Model used for generation",
    )
    prompt_version: str = Field("v1", description="Prompt template version")
    generated_at: datetime = Field(default_factory=utc_now, description="Generation timestamp")

    @field_validator("document_id")
    @classmethod
    def _normalize_document_id(cls, value: str) -> str:
        return _normalize_document_id(value)
        normalized = normalize_tdoc_id(value)
        if not normalized:
            msg = "document_id must not be empty"
            raise ValueError(msg)
        return normalized


class GraphNode(BaseModel):
+1 −1
Original line number Diff line number Diff line
@@ -204,7 +204,7 @@ class EmbeddingsManager:
    def _get_sentence_transformers_version(self) -> str:
        """Get the installed sentence-transformers version."""
        try:
            import sentence_transformers
            import sentence_transformers  # noqa: PLC0415

            return sentence_transformers.__version__
        except ImportError:
+2 −1
Original line number Diff line number Diff line
@@ -6,6 +6,7 @@ import logging
import re
from datetime import datetime
from pathlib import Path
from typing import Any

from tdoc_ai.config import AiConfig, GraphQueryLevel
from tdoc_ai.models import GraphEdge, GraphEdgeType, GraphNode, GraphNodeType, GraphQueryResult
@@ -730,7 +731,7 @@ def _synthesize_with_llm(
    Returns:
        LLM-generated answer.
    """
    from tdoc_ai.operations.summarize import _get_llm_client
    from tdoc_ai.operations.summarize import _get_llm_client  # noqa: PLC0415

    # Build embedding context
    embedding_context_parts = []
+1 −0
Original line number Diff line number Diff line
@@ -293,6 +293,7 @@ def process_tdoc(
        checkout_path: Path to the TDoc checkout folder.
        force_rerun: If True, skip resume logic and run all stages.
        workspace: Optional workspace scope (defaults to "default").
        config: Optional AI configuration (defaults to environment config).

    Returns:
        ProcessingStatus with final pipeline state.
+1 −1
Original line number Diff line number Diff line
@@ -489,7 +489,7 @@ def _chunk_schema(embedding_dimension: int) -> pa.Schema:
    """Create schema for document chunks with fixed-size vector.

    Args:
        pa.field("vector", pa.list_(pa.float32(), embedding_dimension)),
        embedding_dimension: Dimension of the embedding vectors.

    Returns:
        PyArrow schema with FixedSizeList vector field.