Commit 26a0ecd3 authored by Jan Reimes's avatar Jan Reimes
Browse files

style: Fix linter issues (PLC0415, ANN202, E501, F811, PLR6301, SIM102)

- cli.py: Move convert_document_to_markdown import to top level
- migration.py: Break long TODO comment line
- rag.py: Remove duplicate zhipu_embedding definition
- shared_storage.py: Add @staticmethod decorator to _compute_text_hash
- workspaces.py: Combine nested if statements, fix indentation
- test_extraction_elements.py: Move import to top level
- test_operations_metrics.py: Add return type annotations to mock functions

All ruff checks now pass.
parent 08089377
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -2,6 +2,8 @@
cls
call .venv\scripts\activate.bat

SET TDC_AI_CONVERT_MD=1

:: tdoc-crawler crawl-meetings -s S4
:: tdoc-crawler crawl-tdocs --start-date 2016
:: tdoc-crawler query-tdocs --agenda "*atias*" --start-date 2018
+1 −2
Original line number Diff line number Diff line
@@ -7,6 +7,7 @@ from types import SimpleNamespace

import pytest

from threegpp_ai.lightrag import processor as processor_module
from threegpp_ai.lightrag.processor import DocumentProcessor, ProcessingResultStatus
from threegpp_ai.operations.extraction_result import (
    ExtractedTableElement,
@@ -82,8 +83,6 @@ async def test_processor_process_file_reports_structured_counts(monkeypatch: pyt

    # Mock extract_document_structured in the processor module where it's imported
    # This prevents actual kreuzberg extraction
    from threegpp_ai.lightrag import processor as processor_module

    monkeypatch.setattr(
        processor_module,
        "extract_document_structured",
+2 −2
Original line number Diff line number Diff line
@@ -57,7 +57,7 @@ def test_convert_tdoc_to_markdown_records_conversion_metric(monkeypatch: pytest.
    )

    # Mock extract_document_structured_from_tdoc to bypass actual extraction but still record metrics
    def mock_extract(*args, **kwargs):
    def mock_extract(*args, **kwargs) -> None:
        # Manually record the metric since we're bypassing timed_operation
        with timed_operation(tracker, "S4-260001", MetricType.CONVERSION):
            pass
@@ -92,7 +92,7 @@ def test_convert_tdoc_to_markdown_writes_table_sidecar(monkeypatch: pytest.Monke

    # Mock extract_document_structured_from_tdoc to return result with tables
    # Record the metric since we're bypassing the timed_operation wrapper
    def mock_extract(*args, **kwargs):
    def mock_extract(*args, **kwargs) -> None:
        with timed_operation(get_metrics_tracker(), "S4-260001", MetricType.CONVERSION):
            pass
        return SimpleNamespace(
+1 −3
Original line number Diff line number Diff line
@@ -83,7 +83,7 @@ from threegpp_ai.lightrag.config import LightRAGConfig
from threegpp_ai.lightrag.metadata import RAGMetadata
from threegpp_ai.lightrag.processor import DocumentProcessor
from threegpp_ai.lightrag.rag import PROVIDER_ALIASES, PROVIDERS
from threegpp_ai.operations.conversion import OFFICE_FORMATS, convert_to_pdf
from threegpp_ai.operations.conversion import OFFICE_FORMATS, convert_document_to_markdown, convert_to_pdf
from threegpp_ai.operations.workspace_registry import WorkspaceRegistry

# Load environment variables from .env file
@@ -333,8 +333,6 @@ def _process_single_item(
        try:
            # Extract markdown using unified pipeline - this will save to .ai folder
            # For TDocs: uses TDoc ID; for specs: uses spec number
            from threegpp_ai.operations.convert import convert_document_to_markdown

            convert_document_to_markdown(document_id=item, output_path=None, force=False)
            was_md_extracted = True
        except Exception as e:
+3 −1
Original line number Diff line number Diff line
@@ -8,7 +8,9 @@ Usage:
    >>> await migrate_to_shared_storage(working_dir, embedding_model)
"""

# TODO: Is this module needed at all? No need for legacy migration if we just switch to shared storage for all new workspaces. Maybe just keep the consolidation function for users who want to merge existing workspaces into shared storage?!
# TODO: Is this module needed at all? No need for legacy migration if we just switch
# to shared storage for all new workspaces. Maybe just keep the consolidation function
# for users who want to merge existing workspaces into shared storage?

from __future__ import annotations

Loading