Commit e1f50943 authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor: resolve codebase concerns

- Fix SpecSource Protocol to use proper '...' syntax instead of NotImplementedError
- Consolidate normalization functions to tdoc_crawler.utils.normalization
  - Add normalize_tdoc_id(), normalize_tdoc_ids(), normalize_portal_meeting_name()
  - Re-export from tdocs/utils.py and meetings/utils.py for backward compatibility
- Improve workspace error handling
  - Add WorkspaceNotFoundError exception
  - List members and process commands now clearly distinguish workspace not found
  - Export WorkspaceNotFoundError from threegpp_ai package
- Update migration module documentation
  - Remove misleading TODO comment
  - Document module's role in shared storage consolidation
parent 25ce4f5a
Loading
Loading
Loading
Loading
+2 −1
Original line number Diff line number Diff line
@@ -27,7 +27,7 @@ from threegpp_ai.lightrag import (
    create_metadata_from_dict,
    enrich_text,
)
from threegpp_ai.models import SourceKind, SummarizeResult
from threegpp_ai.models import SourceKind, SummarizeResult, WorkspaceNotFoundError
from threegpp_ai.operations.convert import convert_tdoc as convert_document
from threegpp_ai.operations.convert import convert_tdoc_to_markdown
from threegpp_ai.operations.summarize import summarize_tdoc as summarize_document
@@ -81,6 +81,7 @@ __all__ = [
    "SummarizeResult",
    "TDocProcessor",
    "TDocRAG",
    "WorkspaceNotFoundError",
    "WorkspaceRegistry",
    "add_workspace_members",
    "checkout_spec_to_workspace",
+14 −2
Original line number Diff line number Diff line
@@ -83,6 +83,7 @@ from threegpp_ai.lightrag.config import LightRAGConfig
from threegpp_ai.lightrag.metadata import RAGMetadata
from threegpp_ai.lightrag.processor import DocumentProcessor
from threegpp_ai.lightrag.rag import PROVIDER_ALIASES, PROVIDERS
from threegpp_ai.models import WorkspaceNotFoundError
from threegpp_ai.operations.classify import pick_main_document
from threegpp_ai.operations.conversion import OFFICE_FORMATS, convert_to_pdf
from threegpp_ai.operations.convert import convert_document_to_markdown
@@ -802,7 +803,12 @@ def workspace_list_members(
    json_output: JsonOutputOption = False,
) -> None:
    workspace_name = _resolve_workspace_name(workspace)

    try:
        members = list_workspace_members(workspace_name, include_inactive=include_inactive)
    except WorkspaceNotFoundError:
        console.print(f"[red]Workspace '{workspace_name}' not found[/red]")
        raise typer.Exit(1)

    if json_output:
        typer.echo(
@@ -839,7 +845,13 @@ def workspace_process(
    json_output: JsonOutputOption = False,
) -> None:
    workspace_name = _resolve_workspace_name(workspace)

    try:
        members = list_workspace_members(workspace_name, include_inactive=False)
    except WorkspaceNotFoundError:
        console.print(f"[red]Workspace '{workspace_name}' not found[/red]")
        raise typer.Exit(1)

    if limit is not None and limit > 0:
        members = members[:limit]

+3 −3
Original line number Diff line number Diff line
@@ -8,9 +8,9 @@ Usage:
    >>> await migrate_to_shared_storage(working_dir, embedding_model)
"""

# TODO: Is this module needed at all? No need for legacy migration if we just switch
# to shared storage for all new workspaces. Maybe just keep the consolidation function
# for users who want to merge existing workspaces into shared storage?
# This module provides essential functionality for migrating existing workspace
# embeddings to shared storage format, eliminating duplicates and improving efficiency.
# Used by the `tdoc-crawler ai rag migrate` command.

from __future__ import annotations

+4 −0
Original line number Diff line number Diff line
@@ -75,6 +75,10 @@ class EmbeddingDimensionError(AiError):
    """Embedding model dimension mismatch with stored vectors."""


class WorkspaceNotFoundError(AiError):
    """Workspace does not exist in registry."""


class Workspace(BaseModel):
    """Logical workspace boundary for AI processing."""

+5 −1
Original line number Diff line number Diff line
@@ -22,6 +22,7 @@ from tdoc_crawler.tdocs.sources.whatthespec import resolve_via_whatthespec
from tdoc_crawler.tdocs.utils import normalize_tdoc_id
from tdoc_crawler.utils.normalization import normalize_spec_number, resolve_release_to_full_version

from threegpp_ai.models import WorkspaceNotFoundError
from threegpp_ai.operations.workspace_names import DEFAULT_WORKSPACE, is_default_workspace, normalize_workspace_name
from threegpp_ai.operations.workspace_registry import (
    WorkspaceMember,
@@ -250,13 +251,16 @@ def list_workspace_members(

    Returns:
        List of WorkspaceMember.

    Raises:
        WorkspaceNotFoundError: If workspace does not exist.
    """
    normalized_workspace = normalize_workspace_name(workspace)
    registry = _get_registry()

    metadata = registry.get_workspace(normalized_workspace)
    if metadata is None:
        return []
        raise WorkspaceNotFoundError(f"Workspace '{normalized_workspace}' not found")

    return metadata.list_members(include_inactive=include_inactive)

Loading