Commit 2908a858 authored by Jan Reimes's avatar Jan Reimes
Browse files

chore: format code, fix imports and add env examples

Formatting and import fixes:
- Fix ruff E402 import order in session.py
- Add explicit type hints to test functions
- Remove trailing whitespace in classify.py
- Add opendataloader PDF settings to .env.example
- Add java to mise config for opendataloader
- Update demo.bat with extraction profile
parent d0c1c75d
Loading
Loading
Loading
Loading
+2 −0
Original line number Diff line number Diff line
@@ -16,6 +16,7 @@ ripgrep = "latest"

node = "latest"
bun = "latest"
java = "temurin"

[env]
GITLAB_HOST = "forge.3gpp.org"
@@ -115,6 +116,7 @@ run = [

    # 3GPP skills (TODO: fix - requires well-known endpoint)
    "bun x skills add https://forge.3gpp.org/rep/reimes/awesome-3gpp-skills/-/tree/main/skills -a universal -y",
    "bun x skills add https://github.com/lugasia/3gpp-skill -a universal -y",

    # skill for teddi-mcp/-cli
    "bun x skills add https://forge.3gpp.org/rep/reimes/teddi-mcp/-/tree/main/skills -a universal -y",
+19 −0
Original line number Diff line number Diff line
@@ -139,6 +139,25 @@ TDC_AI_PARALLELISM=4
# Set to "true", "1", or "yes" to enable
# TDC_AI_VLM=false

# ============================================================================
# OPENDATALOADER PDF SETTINGS
# ============================================================================
# OpenDataLoader is used for PDF extraction (replaces previous docling-based pipeline)
# Requires Java 11+ installed on system PATH
# See: https://github.com/opendataloader-project/opendataloader-pdf

# Enable hybrid AI mode for complex PDF pages (default: off)
# Options: off, docling-fast, docling-full
# Requires: pip install "opendataloader-pdf[hybrid]" and opendataloader-pdf-hybrid server running
# TDC_AI_HYBRID_MODE=off

# URL for hybrid AI server when enabled (default: http://localhost:5002)
# TDC_AI_HYBRID_URL=http://localhost:5002

# ============================================================================
# GRAPH QUERY CONFIGURATION
# ============================================================================

# Graph query level: simple|medium|advanced (default: simple)
# simple: Return count and list without synthesis
# medium: Parse query keywords, filter nodes, generate simple text summary
+6 −5
Original line number Diff line number Diff line
@@ -2,11 +2,12 @@
cls
call .venv\scripts\activate.bat

SET TDC_AI_CONVERT_MD=1
SET TDC_AI_VLM=1
:: SET TDC_AI_CONVERT_MD=1
:: SET TDC_AI_VLM=1
SET TDC_AI_EXTRACTION_PROFILE=optimum

tdoc-crawler crawl-meetings -s S4
tdoc-crawler crawl --start-date 2016
:: tdoc-crawler crawl-meetings -s S4
:: tdoc-crawler crawl --start-date 2016
    tdoc-crawler query --agenda "*atias*" --start-date 2018

3gpp-ai workspace deactivate
+2 −2
Original line number Diff line number Diff line
@@ -233,7 +233,7 @@ def classify_document_files(
        try:
            folder_contents = list(folder_path.iterdir())
            contents_summary = [f.name for f in folder_contents]
        except (OSError, PermissionError):
        except OSError, PermissionError:
            contents_summary = ["<cannot read folder>"]

        logger.warning(
+2 −4
Original line number Diff line number Diff line
@@ -7,9 +7,9 @@ from pathlib import Path
from typing import cast

import niquests as requests
from hishel import SyncBaseStorage, SyncSqliteStorage
from hishel._core._headers import Headers
from hishel._core.models import Request, Response
from hishel import SyncBaseStorage, SyncSqliteStorage
from hishel.requests import CacheAdapter, extract_metadata_from_headers, snake_to_header
from niquests.adapters import HTTPAdapter
from urllib3.response import HTTPResponse
@@ -258,9 +258,7 @@ def create_cached_session(
            )
            session.mount("http://", adapter)
            session.mount("https://", adapter)
            logger.debug(
                f"Configured connection pool without caching: max_connections={pool_config.max_connections}, max_per_host={pool_config.max_per_host}"
            )
            logger.debug(f"Configured connection pool without caching: max_connections={pool_config.max_connections}, max_per_host={pool_config.max_per_host}")

        logger.debug("Creating plain HTTP session (caching disabled)")
        return session
Loading