Commit 7f1e381e authored by Jan Reimes's avatar Jan Reimes
Browse files

Remove unused dependency injection infrastructure

Removed all DI-related code that was built but never integrated:

- Removed src/tdoc_crawler/di/ directory (ServiceContainer)
- Removed tests/di/ test directory
- Removed DiModeOption from CLI args
- Removed DI imports and logic from app.py
- Deleted src/tdoc_crawler/cli/di.py module
- Removed docs/di.md documentation
- Removed DI fixtures from conftest.py
- Removed tests/test_cli_di.py

The --di-mode flag created a ServiceContainer but never used it,
so the entire infrastructure was unused code.
parent 759592b7
Loading
Loading
Loading
Loading

docs/di.md

deleted100644 → 0
+0 −87
Original line number Diff line number Diff line
# Dependency Injection

This document describes the Dependency Injection (DI) patterns used in tdoc-crawler.

## Overview

The project uses a lightweight DI container (`ServiceContainer`) for centralized service lifecycle management. This is optional - existing code continues to work without changes.

## ServiceContainer

The `ServiceContainer` provides centralized access to core services:

```python
from tdoc_crawler.di import ServiceContainer

# Create container (optional cache directory)
container = ServiceContainer()

# Use services
session = container.get_session()
with container.get_tdoc_db() as db:
    # Business logic
    pass

# Cleanup
container.close()

# Or use context manager
with ServiceContainer() as container:
    # Use services
    pass
```

## CLI Integration

The CLI supports DI mode via the `--di-mode` flag:

```bash
# Use DI container
tdoc-crawler --di-mode query "search term"

# Default: backward compatible mode
tdoc-crawler query "search term"
```

## Services Provided

- `cache_manager` - Cache directory management
- `get_tdoc_db()` - TDoc database access
- `get_spec_db()` - Spec database access  
- `get_meeting_db()` - Meeting database access
- `get_ai_db()` - LanceDB AI storage
- `get_session()` - HTTP session with caching

## Protocols

The following protocols define interfaces for DI:

- `DatabaseFactory` - Database connection management
- `HttpClientProvider` - HTTP client abstraction
- `ParserProtocol` - Parser interface
- `ClientProtocol` - Client interface

See `src/tdoc_crawler/database/protocols.py` and `src/tdoc_crawler/http_client/protocols.py` for details.

## Configuration

Use `ConfigService` for unified configuration:

```python
from tdoc_crawler.config.service import ConfigService

config = ConfigService()
ai_config = config.get_ai_config()
```

## Testing

Use the `service_container` fixture in tests:

```python
def test_something(service_container):
    db = service_container.get_tdoc_db()
    # Test logic
```

See `tests/conftest.py` for available fixtures.
+2 −10
Original line number Diff line number Diff line
@@ -9,13 +9,11 @@ import typer
from dotenv import load_dotenv
from rich.table import Table

from tdoc_crawler.cli import di as cli_di
from tdoc_crawler.cli._shared import console, create_progress_bar
from tdoc_crawler.cli.ai import ai_app
from tdoc_crawler.cli.args import (
    CacheDirOption,
    CheckoutTDocIdsArgument,
    DiModeOption,
    EolPasswordOption,
    EolUsernameOption,
    ForceOption,
@@ -32,7 +30,6 @@ from tdoc_crawler.cli.utils import launch_file
from tdoc_crawler.config import CacheManager
from tdoc_crawler.credentials import set_credentials
from tdoc_crawler.database import MeetingDatabase, TDocDatabase
from tdoc_crawler.di.container import ServiceContainer
from tdoc_crawler.http_client import create_cached_session
from tdoc_crawler.logging import DEFAULT_LEVEL as DEFAULT_VERBOSITY
from tdoc_crawler.logging import set_verbosity
@@ -52,16 +49,11 @@ HELP_PANEL_QUERY = "Query Commands"
@app.callback()
def _app_callback(
    ctx: typer.Context,
    di_mode: DiModeOption = False,
    cache_dir: CacheDirOption = None,
) -> None:
    """Global CLI options."""
    if di_mode:
        container = ServiceContainer(cache_dir=cache_dir)
        cli_di.set_container(container)
        ctx.call_on_close(container.close)
    else:
        cli_di.set_container(None)
    pass  # No global options currently


# Register crawl commands
app.command("crawl-tdocs", rich_help_panel=HELP_PANEL_CRAWLING)(crawl_tdocs)
+0 −9
Original line number Diff line number Diff line
@@ -150,12 +150,3 @@ NoProgressOption = Annotated[
    bool,
    typer.Option("--no-progress", help="Disable progress bar (useful for scripts and CI)"),
]

DiModeOption = Annotated[
    bool,
    typer.Option(
        "--di-mode/--no-di-mode",
        help="Enable dependency injection container for service resolution",
        envvar="TDC_DI_MODE",
    ),
]

src/tdoc_crawler/cli/di.py

deleted100644 → 0
+0 −71
Original line number Diff line number Diff line
"""CLI dependency injection integration.

Provides container state management and service resolution helpers for CLI commands.
Commands can use get_container() to obtain services from the DI container when
``--di-mode`` is enabled, or create services directly for backward compatibility.
"""

from __future__ import annotations

from pathlib import Path

from tdoc_crawler.di.container import ServiceContainer

# Module-level container state, set by the app callback when --di-mode is active
_active_container: ServiceContainer | None = None


def set_container(container: ServiceContainer | None) -> None:
    """Set the active container for this CLI session.

    Called by the app callback when ``--di-mode`` is enabled.

    Args:
        container: Container to activate, or None to clear.
    """
    global _active_container  # noqa: PLW0603
    _active_container = container


def get_container() -> ServiceContainer | None:
    """Return the active container if DI mode is enabled.

    Returns:
        The active ServiceContainer, or None if DI mode is not enabled.
    """
    return _active_container


def is_di_mode() -> bool:
    """Check whether DI mode is currently active.

    Returns:
        True if a container is active, False otherwise.
    """
    return _active_container is not None


def resolve_cache_dir(cache_dir: Path | None = None) -> Path | None:
    """Resolve effective cache directory, preferring the active container.

    When DI mode is active and the container already has a cache manager, its
    root path is returned so callers do not need to re-read ``--cache-dir``.
    Falls back to the supplied ``cache_dir`` argument when DI mode is off.

    Args:
        cache_dir: Explicit cache directory passed via CLI option.

    Returns:
        Resolved cache directory path, or None to use the default.
    """
    if _active_container is not None and _active_container._cache_manager is not None:
        return _active_container._cache_manager.root
    return cache_dir


__all__ = [
    "get_container",
    "is_di_mode",
    "resolve_cache_dir",
    "set_container",
]

src/tdoc_crawler/di/__init__.py

deleted100644 → 0
+0 −10
Original line number Diff line number Diff line
# Dependency Injection Package

"""Dependency injection infrastructure for tdoc-crawler.

This package provides the ServiceContainer for centralized service lifecycle management.
"""

from tdoc_crawler.di.container import ServiceContainer

__all__ = ["ServiceContainer"]
Loading