Commit 3eaeedb6 authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor(core): replace Any with proper types in tdoc-crawler

- Use Executor/Future instead of Any in crawl.py
- Fix CacheManager import path in tdoc_app.py
- Narrow return type in export.py from Any to object
- Add missing Any import in formatting.py
- Fix test attribute name file_path to config_file in test_sources.py
parent d69f8b57
Loading
Loading
Loading
Loading
+1 −1
Original line number Diff line number Diff line
@@ -15,7 +15,7 @@ from __future__ import annotations

import json
from dataclasses import dataclass
from typing import Literal
from typing import Any, Literal

import pandas as pd
import yaml
+3 −2
Original line number Diff line number Diff line
@@ -26,12 +26,13 @@ from tdoc_crawler.cli.args import (
    UseWhatTheSpecOption,
    VerbosityOption,
)
from tdoc_crawler.cli.config import CacheManager, load_cli_config
from tdoc_crawler.cli.config import load_cli_config
from tdoc_crawler.cli.config_app import config_app
from tdoc_crawler.cli.constants import HELP_PANEL_CRAWLING, HELP_PANEL_MAIN, HELP_PANEL_QUERY
from tdoc_crawler.cli.crawl import crawl_meetings, crawl_tdocs
from tdoc_crawler.cli.query import query_meetings, query_tdocs
from tdoc_crawler.cli.utils import launch_file
from tdoc_crawler.config import CacheManager
from tdoc_crawler.credentials import set_credentials
from tdoc_crawler.database import MeetingDatabase, TDocDatabase
from tdoc_crawler.http_client import create_cached_session
@@ -81,7 +82,7 @@ def _app_callback(
        config.path.cache_dir = cache_dir

    # Register CacheManager for centralized path management
    manager = CacheManager(config.path.cache_dir).register()
    CacheManager(config.path.cache_dir).register()
    ctx.obj = config


+1 −1
Original line number Diff line number Diff line
@@ -14,7 +14,7 @@ from tdoc_crawler.config.settings import ThreeGPPConfig
FormatType = Literal["toml", "yaml", "json"]


def _default_value_for_field(info: FieldInfo) -> Any:
def _default_value_for_field(info: FieldInfo) -> object:
    """Extract the default value from a pydantic field info."""
    default = info.default
    if callable(default):
+3 −4
Original line number Diff line number Diff line
@@ -5,10 +5,9 @@ from __future__ import annotations
import json
import time
from collections.abc import Callable
from concurrent.futures import Future, as_completed
from concurrent.futures import Executor, Future, as_completed
from dataclasses import dataclass
from datetime import date
from typing import Any

from pool_executors import create_executor

@@ -238,14 +237,14 @@ class TDocCrawler:
        return {normalize_tdoc_id(item) for item in targets}

    @staticmethod
    def _create_executor_for_workers(workers: int) -> tuple[Any, str]:
    def _create_executor_for_workers(workers: int) -> tuple[Executor, str]:
        """Create configured executor and return it with a label for logging."""
        if workers == 1:
            return create_executor("serial"), "serial"
        return create_executor("multiprocessing", max_workers=workers), "multiprocessing"

    @staticmethod
    def _submit_meeting_futures(executor: Any, meetings: list[MeetingMetadata], config: TDocCrawlConfig) -> dict[Future[str], MeetingMetadata]:
    def _submit_meeting_futures(executor: Executor, meetings: list[MeetingMetadata], config: TDocCrawlConfig) -> dict[Future[str], MeetingMetadata]:
        """Submit worker tasks for each meeting and map futures back to meeting metadata."""
        futures: dict[Future[str], MeetingMetadata] = {}
        for meeting in meetings:
+3 −3
Original line number Diff line number Diff line
@@ -286,10 +286,10 @@ class TestConfigLoadError:
        assert str(config_file) in str(error)
        assert "Parse error" in str(error)

    def test_error_has_file_path_attribute(self, tmp_path: Path) -> None:
        """Error has file_path attribute with the problematic file."""
    def test_error_has_config_file_attribute(self, tmp_path: Path) -> None:
        """Error has config_file attribute with the problematic file."""
        config_file = tmp_path / "config.toml"
        error = ConfigLoadError(config_file, "Test reason")

        assert error.file_path == config_file
        assert error.config_file == config_file
        assert error.reason == "Test reason"