Loading src/tdoc_crawler/tdocs/models.py +9 −0 Original line number Diff line number Diff line Loading @@ -298,6 +298,15 @@ class TDocQueryConfig(BaseConfigModel): limit: int | None = Field(None, ge=1, description="Maximum results") order: SortOrder = Field(SortOrder.DESC, description="Sort order applied to date_retrieved") # TODO: normalization of tdoc name(s) is most likely done already several times in the project? Consider re-using existing utils function? @field_validator("tdoc_ids", mode="before") @classmethod def _normalize_tdoc_ids(cls, value: Iterable[str] | None) -> list[str] | None: """Normalize TDoc IDs to uppercase.""" if value is None: return None return [str(item).upper() for item in value] @classmethod def _normalize_working_groups(cls, value: Iterable[str | WorkingGroup] | None) -> list[WorkingGroup] | None: """Ensure the working group list is comprised of enum members.""" Loading tests/ai/conftest.py +1 −2 Original line number Diff line number Diff line Loading @@ -12,6 +12,7 @@ import pytest from typer.testing import CliRunner from tdoc_crawler.ai.container import AiServiceContainer from tdoc_crawler.ai.operations import workspaces as workspace_ops from tdoc_crawler.ai.storage import AiStorage from tdoc_crawler.http_client import download_to_file Loading Loading @@ -177,8 +178,6 @@ def test_workspace(ai_storage: AiStorage) -> str: Returns: Workspace name ("default") """ from tdoc_crawler.ai.operations import workspaces as workspace_ops workspace_name = "default" workspace_ops.create_workspace(workspace_name, auto_build=False) return workspace_name tests/ai/test_ai_pipeline.py +0 −3 Original line number Diff line number Diff line Loading @@ -140,9 +140,6 @@ class TestProcessTdocApi: """new_only mode should return only non-completed items.""" # This test verifies the filtering logic in process_all # by mocking the storage.get_status call from tdoc_crawler.ai.models import PipelineStage, ProcessingStatus from tdoc_crawler.ai.storage import AiStorage completed = ProcessingStatus(document_id="S4-251003", current_stage=PipelineStage.COMPLETED) pending = ProcessingStatus(document_id="S4-260001", current_stage=PipelineStage.PENDING) Loading tests/ai/test_ai_summarization.py +1 −2 Original line number Diff line number Diff line Loading @@ -7,6 +7,7 @@ from unittest.mock import patch import pytest from tdoc_crawler.ai.container import AiServiceContainer from tdoc_crawler.ai.models import DocumentSummary, LlmConfigError from tdoc_crawler.ai.operations import summarize from tdoc_crawler.ai.operations.summarize import _count_words, _should_skip_summary Loading Loading @@ -81,8 +82,6 @@ class TestSummarization: def test_missing_llm_model_config_raises_error(self, monkeypatch: pytest.MonkeyPatch) -> None: """Missing/invalid LLM config should raise LlmConfigError.""" from tdoc_crawler.ai.container import AiServiceContainer monkeypatch.setattr( AiServiceContainer, "get_instance", Loading Loading
src/tdoc_crawler/tdocs/models.py +9 −0 Original line number Diff line number Diff line Loading @@ -298,6 +298,15 @@ class TDocQueryConfig(BaseConfigModel): limit: int | None = Field(None, ge=1, description="Maximum results") order: SortOrder = Field(SortOrder.DESC, description="Sort order applied to date_retrieved") # TODO: normalization of tdoc name(s) is most likely done already several times in the project? Consider re-using existing utils function? @field_validator("tdoc_ids", mode="before") @classmethod def _normalize_tdoc_ids(cls, value: Iterable[str] | None) -> list[str] | None: """Normalize TDoc IDs to uppercase.""" if value is None: return None return [str(item).upper() for item in value] @classmethod def _normalize_working_groups(cls, value: Iterable[str | WorkingGroup] | None) -> list[WorkingGroup] | None: """Ensure the working group list is comprised of enum members.""" Loading
tests/ai/conftest.py +1 −2 Original line number Diff line number Diff line Loading @@ -12,6 +12,7 @@ import pytest from typer.testing import CliRunner from tdoc_crawler.ai.container import AiServiceContainer from tdoc_crawler.ai.operations import workspaces as workspace_ops from tdoc_crawler.ai.storage import AiStorage from tdoc_crawler.http_client import download_to_file Loading Loading @@ -177,8 +178,6 @@ def test_workspace(ai_storage: AiStorage) -> str: Returns: Workspace name ("default") """ from tdoc_crawler.ai.operations import workspaces as workspace_ops workspace_name = "default" workspace_ops.create_workspace(workspace_name, auto_build=False) return workspace_name
tests/ai/test_ai_pipeline.py +0 −3 Original line number Diff line number Diff line Loading @@ -140,9 +140,6 @@ class TestProcessTdocApi: """new_only mode should return only non-completed items.""" # This test verifies the filtering logic in process_all # by mocking the storage.get_status call from tdoc_crawler.ai.models import PipelineStage, ProcessingStatus from tdoc_crawler.ai.storage import AiStorage completed = ProcessingStatus(document_id="S4-251003", current_stage=PipelineStage.COMPLETED) pending = ProcessingStatus(document_id="S4-260001", current_stage=PipelineStage.PENDING) Loading
tests/ai/test_ai_summarization.py +1 −2 Original line number Diff line number Diff line Loading @@ -7,6 +7,7 @@ from unittest.mock import patch import pytest from tdoc_crawler.ai.container import AiServiceContainer from tdoc_crawler.ai.models import DocumentSummary, LlmConfigError from tdoc_crawler.ai.operations import summarize from tdoc_crawler.ai.operations.summarize import _count_words, _should_skip_summary Loading Loading @@ -81,8 +82,6 @@ class TestSummarization: def test_missing_llm_model_config_raises_error(self, monkeypatch: pytest.MonkeyPatch) -> None: """Missing/invalid LLM config should raise LlmConfigError.""" from tdoc_crawler.ai.container import AiServiceContainer monkeypatch.setattr( AiServiceContainer, "get_instance", Loading