Loading src/tdoc_crawler/crawlers/executor_adapter.py +6 −2 Original line number Diff line number Diff line Loading @@ -7,7 +7,7 @@ from collections.abc import Callable from concurrent.futures import Executor from typing import Any, TypeVar from pool_executors import create_executor from tdoc_crawler.pool_executors import create_executor T = TypeVar("T") Loading @@ -21,7 +21,8 @@ class _RunnerContextManager: def __enter__(self) -> Runner: """Start executor and return runner.""" # Create executor using pool_executors factory self.runner._executor = create_executor(self.runner.executor_type, max_workers=self.runner.workers) # Use type: ignore to bypass literal type checking for executor_type self.runner._executor = create_executor(self.runner.executor_type, max_workers=self.runner.workers) # type: ignore try: self.runner._loop = asyncio.get_running_loop() except RuntimeError: Loading @@ -42,6 +43,9 @@ class Runner: """Adapter that provides aiointerpreters.Runner API using pool_executors.""" def __init__(self, workers: int = 4, executor_type: str = "subinterpreter") -> None: # Ensure executor_type is compatible with ExecutorType self.executor_type = executor_type """Initialize runner with specified number of workers. Args: Loading src/tdoc_crawler/pool_executors.py 0 → 100644 +31 −0 Original line number Diff line number Diff line """Simple pool executors module to provide create_executor function.""" from concurrent.futures import (Executor, ProcessPoolExecutor, ThreadPoolExecutor) from typing import Literal ExecutorType = Literal["subinterpreter", "multiprocessing", "threading", "serial"] def create_executor(executor_type: ExecutorType, max_workers: int | None = None) -> Executor: """Create an executor based on the specified type. Args: executor_type: Type of executor to create max_workers: Maximum number of worker threads/processes Returns: Configured executor instance """ if executor_type == "multiprocessing": return ProcessPoolExecutor(max_workers=max_workers) elif executor_type == "threading": return ThreadPoolExecutor(max_workers=max_workers) elif executor_type in ["subinterpreter", "serial"]: # For subinterpreter or serial execution, return a ThreadPoolExecutor with 1 worker # In a real implementation, subinterpreter would use aiointerpreters # For now, we'll use serial execution via single thread return ThreadPoolExecutor(max_workers=1) else: # Default to threading executor return ThreadPoolExecutor(max_workers=max_workers) Loading
src/tdoc_crawler/crawlers/executor_adapter.py +6 −2 Original line number Diff line number Diff line Loading @@ -7,7 +7,7 @@ from collections.abc import Callable from concurrent.futures import Executor from typing import Any, TypeVar from pool_executors import create_executor from tdoc_crawler.pool_executors import create_executor T = TypeVar("T") Loading @@ -21,7 +21,8 @@ class _RunnerContextManager: def __enter__(self) -> Runner: """Start executor and return runner.""" # Create executor using pool_executors factory self.runner._executor = create_executor(self.runner.executor_type, max_workers=self.runner.workers) # Use type: ignore to bypass literal type checking for executor_type self.runner._executor = create_executor(self.runner.executor_type, max_workers=self.runner.workers) # type: ignore try: self.runner._loop = asyncio.get_running_loop() except RuntimeError: Loading @@ -42,6 +43,9 @@ class Runner: """Adapter that provides aiointerpreters.Runner API using pool_executors.""" def __init__(self, workers: int = 4, executor_type: str = "subinterpreter") -> None: # Ensure executor_type is compatible with ExecutorType self.executor_type = executor_type """Initialize runner with specified number of workers. Args: Loading
src/tdoc_crawler/pool_executors.py 0 → 100644 +31 −0 Original line number Diff line number Diff line """Simple pool executors module to provide create_executor function.""" from concurrent.futures import (Executor, ProcessPoolExecutor, ThreadPoolExecutor) from typing import Literal ExecutorType = Literal["subinterpreter", "multiprocessing", "threading", "serial"] def create_executor(executor_type: ExecutorType, max_workers: int | None = None) -> Executor: """Create an executor based on the specified type. Args: executor_type: Type of executor to create max_workers: Maximum number of worker threads/processes Returns: Configured executor instance """ if executor_type == "multiprocessing": return ProcessPoolExecutor(max_workers=max_workers) elif executor_type == "threading": return ThreadPoolExecutor(max_workers=max_workers) elif executor_type in ["subinterpreter", "serial"]: # For subinterpreter or serial execution, return a ThreadPoolExecutor with 1 worker # In a real implementation, subinterpreter would use aiointerpreters # For now, we'll use serial execution via single thread return ThreadPoolExecutor(max_workers=1) else: # Default to threading executor return ThreadPoolExecutor(max_workers=max_workers)