Loading src/pool_executors/pool_executors/factory.py +4 −2 Original line number Diff line number Diff line Loading @@ -3,7 +3,8 @@ from __future__ import annotations import logging from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor from concurrent.futures import (Executor, ProcessPoolExecutor, ThreadPoolExecutor) from typing import Any from pool_executors.serial import SerialPoolExecutor Loading @@ -11,7 +12,8 @@ from pool_executors.types import ExecutorType # Import InterpreterPoolExecutor for Python 3.14+, handle gracefully for older versions try: from concurrent.futures import InterpreterPoolExecutor # type: ignore[attr-defined] # noqa: PLC0415 from concurrent.futures import \ InterpreterPoolExecutor # type: ignore[attr-defined] # noqa: PLC0415 HAS_INTERPRETER_POOL_EXECUTOR = True except (ImportError, AttributeError): Loading src/tdoc_crawler/cli/app.py +10 −4 Original line number Diff line number Diff line Loading @@ -13,16 +13,22 @@ import typer import yaml from dotenv import load_dotenv from rich.console import Console from rich.progress import BarColumn, MofNCompleteColumn, Progress, SpinnerColumn, TextColumn from rich.progress import (BarColumn, MofNCompleteColumn, Progress, SpinnerColumn, TextColumn) from rich.table import Table from tdoc_crawler.crawlers import MeetingCrawler, TDocCrawler from tdoc_crawler.database import TDocDatabase from tdoc_crawler.models import MeetingCrawlConfig, MeetingQueryConfig, OutputFormat, QueryConfig, SortOrder, TDocCrawlConfig from tdoc_crawler.models import (MeetingCrawlConfig, MeetingQueryConfig, OutputFormat, QueryConfig, SortOrder, TDocCrawlConfig) from .fetching import maybe_fetch_missing_tdocs from .helpers import build_limits, database_path, launch_file, parse_subgroups, parse_working_groups, prepare_tdoc_file, resolve_credentials from .printing import meeting_to_dict, print_meeting_table, print_tdoc_table, tdoc_to_dict from .helpers import (build_limits, database_path, launch_file, parse_subgroups, parse_working_groups, prepare_tdoc_file, resolve_credentials) from .printing import (meeting_to_dict, print_meeting_table, print_tdoc_table, tdoc_to_dict) load_dotenv() Loading src/tdoc_crawler/crawlers/hybrid.py +5 −2 Original line number Diff line number Diff line Loading @@ -9,9 +9,12 @@ from collections.abc import Callable from dataclasses import dataclass from tdoc_crawler.crawlers.executor_adapter import Runner from tdoc_crawler.crawlers.parallel import fetch_meeting_document_list_subinterpreter, fetch_meeting_tdocs from tdoc_crawler.crawlers.parallel import ( fetch_meeting_document_list_subinterpreter, fetch_meeting_tdocs) from tdoc_crawler.database import TDocDatabase from tdoc_crawler.models import CrawlLimits, MeetingMetadata, MeetingQueryConfig, SortOrder, TDocCrawlConfig, TDocMetadata, WorkingGroup from tdoc_crawler.models import (CrawlLimits, MeetingMetadata, MeetingQueryConfig, SortOrder, TDocCrawlConfig, TDocMetadata, WorkingGroup) logger = logging.getLogger(__name__) Loading src/tdoc_crawler/crawlers/meetings.py +6 −2 Original line number Diff line number Diff line Loading @@ -12,10 +12,14 @@ from urllib.parse import urljoin from bs4 import BeautifulSoup, Tag from tdoc_crawler.crawlers.constants import DATE_PATTERN, MEETING_CODE_REGISTRY, MEETINGS_BASE_URL, PORTAL_BASE_URL from tdoc_crawler.crawlers.constants import (DATE_PATTERN, MEETING_CODE_REGISTRY, MEETINGS_BASE_URL, PORTAL_BASE_URL) from tdoc_crawler.database import TDocDatabase from tdoc_crawler.http_client import create_cached_session from tdoc_crawler.models import CrawlLimits, MeetingCrawlConfig, MeetingMetadata, WorkingGroup from tdoc_crawler.models import (CrawlLimits, MeetingCrawlConfig, MeetingMetadata, WorkingGroup) logger = logging.getLogger(__name__) Loading src/tdoc_crawler/crawlers/portal.py +2 −1 Original line number Diff line number Diff line Loading @@ -11,7 +11,8 @@ from bs4 import BeautifulSoup from requests.adapters import HTTPAdapter from urllib3.util import Retry from tdoc_crawler.crawlers.constants import LOGIN_URL, PORTAL_BASE_URL, TDOC_VIEW_URL from tdoc_crawler.crawlers.constants import (LOGIN_URL, PORTAL_BASE_URL, TDOC_VIEW_URL) from tdoc_crawler.models.base import PortalCredentials from tdoc_crawler.models.tdocs import TDocMetadata Loading Loading
src/pool_executors/pool_executors/factory.py +4 −2 Original line number Diff line number Diff line Loading @@ -3,7 +3,8 @@ from __future__ import annotations import logging from concurrent.futures import Executor, ProcessPoolExecutor, ThreadPoolExecutor from concurrent.futures import (Executor, ProcessPoolExecutor, ThreadPoolExecutor) from typing import Any from pool_executors.serial import SerialPoolExecutor Loading @@ -11,7 +12,8 @@ from pool_executors.types import ExecutorType # Import InterpreterPoolExecutor for Python 3.14+, handle gracefully for older versions try: from concurrent.futures import InterpreterPoolExecutor # type: ignore[attr-defined] # noqa: PLC0415 from concurrent.futures import \ InterpreterPoolExecutor # type: ignore[attr-defined] # noqa: PLC0415 HAS_INTERPRETER_POOL_EXECUTOR = True except (ImportError, AttributeError): Loading
src/tdoc_crawler/cli/app.py +10 −4 Original line number Diff line number Diff line Loading @@ -13,16 +13,22 @@ import typer import yaml from dotenv import load_dotenv from rich.console import Console from rich.progress import BarColumn, MofNCompleteColumn, Progress, SpinnerColumn, TextColumn from rich.progress import (BarColumn, MofNCompleteColumn, Progress, SpinnerColumn, TextColumn) from rich.table import Table from tdoc_crawler.crawlers import MeetingCrawler, TDocCrawler from tdoc_crawler.database import TDocDatabase from tdoc_crawler.models import MeetingCrawlConfig, MeetingQueryConfig, OutputFormat, QueryConfig, SortOrder, TDocCrawlConfig from tdoc_crawler.models import (MeetingCrawlConfig, MeetingQueryConfig, OutputFormat, QueryConfig, SortOrder, TDocCrawlConfig) from .fetching import maybe_fetch_missing_tdocs from .helpers import build_limits, database_path, launch_file, parse_subgroups, parse_working_groups, prepare_tdoc_file, resolve_credentials from .printing import meeting_to_dict, print_meeting_table, print_tdoc_table, tdoc_to_dict from .helpers import (build_limits, database_path, launch_file, parse_subgroups, parse_working_groups, prepare_tdoc_file, resolve_credentials) from .printing import (meeting_to_dict, print_meeting_table, print_tdoc_table, tdoc_to_dict) load_dotenv() Loading
src/tdoc_crawler/crawlers/hybrid.py +5 −2 Original line number Diff line number Diff line Loading @@ -9,9 +9,12 @@ from collections.abc import Callable from dataclasses import dataclass from tdoc_crawler.crawlers.executor_adapter import Runner from tdoc_crawler.crawlers.parallel import fetch_meeting_document_list_subinterpreter, fetch_meeting_tdocs from tdoc_crawler.crawlers.parallel import ( fetch_meeting_document_list_subinterpreter, fetch_meeting_tdocs) from tdoc_crawler.database import TDocDatabase from tdoc_crawler.models import CrawlLimits, MeetingMetadata, MeetingQueryConfig, SortOrder, TDocCrawlConfig, TDocMetadata, WorkingGroup from tdoc_crawler.models import (CrawlLimits, MeetingMetadata, MeetingQueryConfig, SortOrder, TDocCrawlConfig, TDocMetadata, WorkingGroup) logger = logging.getLogger(__name__) Loading
src/tdoc_crawler/crawlers/meetings.py +6 −2 Original line number Diff line number Diff line Loading @@ -12,10 +12,14 @@ from urllib.parse import urljoin from bs4 import BeautifulSoup, Tag from tdoc_crawler.crawlers.constants import DATE_PATTERN, MEETING_CODE_REGISTRY, MEETINGS_BASE_URL, PORTAL_BASE_URL from tdoc_crawler.crawlers.constants import (DATE_PATTERN, MEETING_CODE_REGISTRY, MEETINGS_BASE_URL, PORTAL_BASE_URL) from tdoc_crawler.database import TDocDatabase from tdoc_crawler.http_client import create_cached_session from tdoc_crawler.models import CrawlLimits, MeetingCrawlConfig, MeetingMetadata, WorkingGroup from tdoc_crawler.models import (CrawlLimits, MeetingCrawlConfig, MeetingMetadata, WorkingGroup) logger = logging.getLogger(__name__) Loading
src/tdoc_crawler/crawlers/portal.py +2 −1 Original line number Diff line number Diff line Loading @@ -11,7 +11,8 @@ from bs4 import BeautifulSoup from requests.adapters import HTTPAdapter from urllib3.util import Retry from tdoc_crawler.crawlers.constants import LOGIN_URL, PORTAL_BASE_URL, TDOC_VIEW_URL from tdoc_crawler.crawlers.constants import (LOGIN_URL, PORTAL_BASE_URL, TDOC_VIEW_URL) from tdoc_crawler.models.base import PortalCredentials from tdoc_crawler.models.tdocs import TDocMetadata Loading