Commit d878a8ad authored by Jan Reimes's avatar Jan Reimes
Browse files

feat(metrics): add performance metrics tracking for AI operations

* Implement MetricsTracker to aggregate and report performance metrics.
* Introduce timed_operation context manager for timing operations.
* Add DocumentMetric and TimedOperationResult for structured metric data.
* Update convert and summarize operations to record metrics during execution.
parent ce195997
Loading
Loading
Loading
Loading
+4 −4
Original line number Diff line number Diff line
@@ -118,10 +118,10 @@ dim = _get_embedding_dimension(model_name, provider)
### Why This Matters

1. **Maintainability**: Change once, update everywhere automatically
2. **Consistency**: No drift between different parts of the code
3. **Testability**: Easy to swap values in tests
4. **Security**: Secrets live in environment variables, not code
5. **DRY**: Eliminates duplicated logic and magic strings/numbers
1. **Consistency**: No drift between different parts of the code
1. **Testability**: Easy to swap values in tests
1. **Security**: Secrets live in environment variables, not code
1. **DRY**: Eliminates duplicated logic and magic strings/numbers

## Storage Layer

+15 −10
Original line number Diff line number Diff line
@@ -36,10 +36,11 @@ LLM Summarization
Fetches TDoc files from checkout directory or downloads from 3GPP FTP.

**Pipeline:**

1. Resolve TDoc ID to metadata via WhatTheSpec
2. Calculate checkout path
3. Download via `checkout_tdoc()` if not in checkout
4. Find available file types (PDF, DOCX, DOC)
1. Calculate checkout path
1. Download via `checkout_tdoc()` if not in checkout
1. Find available file types (PDF, DOCX, DOC)

**Returns:** `TDocFiles` dataclass with paths to available documents

@@ -50,12 +51,14 @@ Fetches TDoc files from checkout directory or downloads from 3GPP FTP.
Converts TDoc to markdown using full pipeline.

**Pipeline:**

1. Fetch TDoc files via `fetch_tdoc_files()`
2. Convert to PDF if needed (via convert-lo / LibreOffice)
3. Extract text using kreuzberg
4. Cache markdown to `.ai/<id>.md`
1. Convert to PDF if needed (via convert-lo / LibreOffice)
1. Extract text using kreuzberg
1. Cache markdown to `.ai/<id>.md`

**Caching:**

- Checks for existing `.md` file in `.ai` subdirectory
- Only re-converts if `force=True` or cache miss

@@ -66,11 +69,12 @@ Converts TDoc to markdown using full pipeline.
Generates LLM-powered summary of TDoc content.

**Pipeline:**

1. Get markdown via `convert_tdoc_to_markdown()`
2. Truncate to `SUMMARY_INPUT_LIMIT` (8000 chars)
3. Generate summary via LiteLLM
4. Extract keywords via LiteLLM
5. Return `SummarizeResult`
1. Truncate to `SUMMARY_INPUT_LIMIT` (8000 chars)
1. Generate summary via LiteLLM
1. Extract keywords via LiteLLM
1. Return `SummarizeResult`

## File Type Priority

@@ -92,6 +96,7 @@ Converted markdown files are cached in the TDoc checkout directory:
```

To force re-conversion:

```bash
3gpp-ai convert S4-250001 --force
3gpp-ai summarize S4-250001 --force
+75 −0
Original line number Diff line number Diff line
"""Unit tests for document chunking strategies."""

from __future__ import annotations

from threegpp_ai.operations.chunking import (
    ChunkingConfig,
    ChunkingStrategy,
    chunk_document,
    chunk_semantic,
    chunk_with_overlap,
)


def _longest_token_overlap(a: list[int], b: list[int], max_overlap: int) -> int:
    """Return the longest suffix/prefix token overlap length."""
    upper = min(len(a), len(b), max_overlap)
    for size in range(upper, 0, -1):
        if a[-size:] == b[:size]:
            return size
    return 0


def test_chunk_semantic_large_section_no_unit_mixing() -> None:
    """A large section split should not depend on token->char index conversion."""
    content = "# Title\n\n## Section\n\n" + ("alpha beta gamma delta epsilon zeta eta theta iota kappa lambda mu\n" * 250)
    config = ChunkingConfig(
        strategy=ChunkingStrategy.SEMANTIC,
        max_tokens=120,
        respect_sections=False,
    )

    chunks = chunk_semantic(content, config)

    assert len(chunks) > 1
    assert all(config.count_tokens(chunk) <= config.max_tokens for chunk in chunks)
    assert any("Section" in chunk for chunk in chunks)


def test_chunk_document_semantic_strategy_splits_on_sections() -> None:
    """Semantic strategy should keep section boundaries while splitting."""
    content = "# Intro\n\n" + ("intro text " * 120) + "\n\n## Body\n\n" + ("body text " * 120) + "\n\n## End\n\n" + ("end text " * 120)
    config = ChunkingConfig(
        strategy=ChunkingStrategy.SEMANTIC,
        max_tokens=100,
        respect_sections=True,
    )

    chunks = chunk_document(content, config)

    assert len(chunks) >= 2
    assert chunks[0].startswith("# Intro")
    assert any("## Body" in chunk for chunk in chunks)


def test_chunk_with_overlap_preserves_context_between_chunks() -> None:
    """Consecutive overlap chunks should share a token overlap region."""
    content = " ".join(f"token{i:04d}" for i in range(500))
    config = ChunkingConfig(
        strategy=ChunkingStrategy.OVERLAP,
        max_tokens=80,
        overlap_tokens=20,
    )

    chunks = chunk_with_overlap(content, config)

    assert len(chunks) > 1

    encoder = config.get_encoder()
    overlaps = []
    for first, second in zip(chunks, chunks[1:], strict=False):
        a_tokens = encoder.encode(first)
        b_tokens = encoder.encode(second)
        overlaps.append(_longest_token_overlap(a_tokens, b_tokens, config.overlap_tokens))

    assert all(overlap >= 5 for overlap in overlaps)
+87 −0
Original line number Diff line number Diff line
"""Tests for metrics wiring in AI operations entrypoints."""

from __future__ import annotations

from pathlib import Path
from types import SimpleNamespace

import pytest
from threegpp_ai.operations import convert as convert_ops
from threegpp_ai.operations import summarize as summarize_ops
from threegpp_ai.operations.metrics import (
    MetricsTracker,
    MetricType,
    TimedOperationResult,
    get_metrics_tracker,
    timed_operation,
)


class _DummyClient:
    """Simple LLM client stub returning deterministic outputs."""

    def __init__(self) -> None:
        self._calls = 0

    def complete(self, prompt: str, **_: object) -> str:
        self._calls += 1
        if self._calls == 1:
            return "This is a concise summary text for tests."
        return '["kw1", "kw2"]'


def test_timed_operation_returns_mutable_result() -> None:
    """timed_operation should yield a result object with final status and timing."""
    tracker = MetricsTracker()

    with timed_operation(tracker, "S4-260001", MetricType.CONVERSION) as result:
        assert isinstance(result, TimedOperationResult)

    assert result.success is True
    assert result.duration_seconds >= 0
    assert len(tracker.metrics) == 1
    assert tracker.metrics[0].metric_type == MetricType.CONVERSION


def test_convert_tdoc_to_markdown_records_conversion_metric(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
    """convert_tdoc_to_markdown should record conversion metrics."""
    tracker = get_metrics_tracker()
    tracker.clear()

    pdf_path = tmp_path / "doc.pdf"
    pdf_path.write_bytes(b"%PDF-1.7")

    monkeypatch.setattr(
        convert_ops,
        "fetch_tdoc_files",
        lambda _document_id, force_download=False: SimpleNamespace(primary_path=pdf_path, checkout_dir=tmp_path),
    )
    monkeypatch.setattr(convert_ops, "extract_file_sync", lambda _path: SimpleNamespace(content="# markdown"))

    output = convert_ops.convert_tdoc_to_markdown("S4-260001", force=True)

    assert output == "# markdown"
    conversion_metrics = tracker.by_type(MetricType.CONVERSION)
    assert len(conversion_metrics) == 1
    assert conversion_metrics[0].success is True


def test_summarize_tdoc_records_summarization_metric(monkeypatch: pytest.MonkeyPatch) -> None:
    """summarize_tdoc should record summarization metrics at entrypoint level."""
    tracker = get_metrics_tracker()
    tracker.clear()

    monkeypatch.setattr(summarize_ops, "convert_tdoc_to_markdown", lambda *_args, **_kwargs: "source content")
    monkeypatch.setattr(summarize_ops, "_get_llm_client", _DummyClient)
    monkeypatch.setattr(
        summarize_ops.AiConfig,
        "from_env",
        staticmethod(lambda: SimpleNamespace(llm_model="test-model")),
    )

    result = summarize_ops.summarize_tdoc("S4-260001")

    assert result.word_count > 0
    summary_metrics = tracker.by_type(MetricType.SUMMARIZATION)
    assert len(summary_metrics) == 1
    assert summary_metrics[0].success is True
+3 −5
Original line number Diff line number Diff line
@@ -15,7 +15,6 @@ import logging
from pathlib import Path
from typing import Any

from .config import LightRAGConfig
from .shared_storage import SharedNanoVectorDBStorage, WorkspaceIndex

logger = logging.getLogger(__name__)
@@ -59,8 +58,7 @@ async def migrate_to_shared_storage(
    # Find all workspace directories (exclude _shared)
    workspaces = []
    for item in embedding_dir.iterdir():
        if item.is_dir() and not item.name.startswith("_"):
            if workspace is None or item.name == workspace:
        if item.is_dir() and not item.name.startswith("_") and (workspace is None or item.name == workspace):
            workspaces.append(item)

    logger.info("Found %d workspace(s) to migrate", len(workspaces))
@@ -82,7 +80,7 @@ async def migrate_to_shared_storage(
            continue

        try:
            with open(vdb_file, "r", encoding="utf-8") as f:
            with open(vdb_file, encoding="utf-8") as f:
                ws_data = json.load(f)
        except Exception as e:
            logger.error("Failed to load %s: %s", vdb_file, e)
Loading