Commit 73964f39 authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor(logging): replace logging with custom logger across modules

* Updated logging implementation to use `get_logger` from tdoc_crawler.
* Refactored migration.py, pg0_manager.py, processor.py, rag.py, shared_storage.py, seeder.py, classify.py, extraction.py, figure_descriptor.py, llm_client.py, summarize.py, workspace_registry.py, and workspaces.py.
* Improved consistency in logging practices across the codebase.
parent 7ee219bd
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -15,15 +15,15 @@ Usage:
from __future__ import annotations

import json
import logging
from pathlib import Path
from typing import Any

from lightrag.utils import compute_mdhash_id
from tdoc_crawler.logging import get_logger

from .shared_storage import SharedNanoVectorDBStorage, WorkspaceIndex

logger = logging.getLogger(__name__)
logger = get_logger(__name__)


async def migrate_to_shared_storage(
+3 −2
Original line number Diff line number Diff line
@@ -4,16 +4,17 @@ This module manages pg0 lifecycle for PostgreSQL-backed LightRAG storage.
pg0 is a zero-config embedded PostgreSQL with pgvector extension.
"""

import logging
from collections.abc import Generator
from contextlib import contextmanager

from tdoc_crawler.logging import get_logger

try:
    from pg0 import Pg0 as _Pg0
except ImportError:
    _Pg0 = None

logger = logging.getLogger(__name__)
logger = get_logger(__name__)


class Pg0Manager:
+3 −2
Original line number Diff line number Diff line
@@ -6,12 +6,13 @@ for text extraction and artifact handling.

from __future__ import annotations

import logging
from dataclasses import dataclass
from enum import StrEnum
from pathlib import Path
from typing import Any

from tdoc_crawler.logging import get_logger

from threegpp_ai.models import ConversionError, ExtractionError
from threegpp_ai.operations.conversion import OFFICE_FORMATS
from threegpp_ai.operations.extraction import extract_document_structured
@@ -20,7 +21,7 @@ from .config import LightRAGConfig
from .metadata import RAGMetadata, enrich_text
from .rag import TDocRAG

logger = logging.getLogger(__name__)
logger = get_logger(__name__)

# All supported formats (re-exported from conversion module for convenience)
SUPPORTED_FORMATS = {".pdf", ".txt", ".md"} | OFFICE_FORMATS
+2 −2
Original line number Diff line number Diff line
@@ -8,7 +8,6 @@ This module provides a thin wrapper around LightRAG with:

from __future__ import annotations

import logging
from collections.abc import Callable
from dataclasses import dataclass
from functools import wraps
@@ -27,6 +26,7 @@ from lightrag.llm.openai import openai_complete, openai_embed
from lightrag.llm.zhipu import zhipu_complete, zhipu_embedding
from lightrag.utils import EmbeddingFunc
from tdoc_crawler.config import resolve_cache_manager
from tdoc_crawler.logging import get_logger

from .config import LightRAGConfig, QueryMode, StorageBackend
from .pg0_manager import Pg0Manager
@@ -87,7 +87,7 @@ zhipu_module.zhipu_complete_if_cache = patched_zhipu_complete_if_cache
zhipu_module.zhipu_embedding = patched_zhipu_embedding

STORAGES["SharedNanoVectorDBStorage"] = "threegpp_ai.lightrag.shared_storage"
logger = logging.getLogger(__name__)
logger = get_logger(__name__)


@dataclass(frozen=True)
+2 −2
Original line number Diff line number Diff line
@@ -6,17 +6,17 @@ into the LightRAG knowledge graph after document indexing.

from __future__ import annotations

import logging
from dataclasses import dataclass
from enum import StrEnum
from typing import Any

from tdoc_crawler.logging import get_logger
from tdoc_crawler.tdocs.utils import normalize_tdoc_id

from .metadata import normalize_release_label
from .rag import TDocRAG

logger = logging.getLogger(__name__)
logger = get_logger(__name__)


class EntityType(StrEnum):
Loading