Commit de99d510 authored by Jan Reimes's avatar Jan Reimes
Browse files

fix(lint/tests): apply lint-driven changes and test fixes

parent b81fac96
Loading
Loading
Loading
Loading
+4 −0
Original line number Diff line number Diff line
@@ -64,6 +64,10 @@ tdoc-crawler = "tdoc_crawler.cli:app"
[tool.pytest.ini_options]
testpaths = ["tests"]
pythonpath = ["src"]
# Suppress Pydantic deprecation warning from pydantic-sqlite library (external dependency)
filterwarnings = [
    "ignore:.*Accessing the 'model_fields' attribute on the instance is deprecated.*:DeprecationWarning",
]


[tool.coverage.report]
+4 −10
Original line number Diff line number Diff line
@@ -13,22 +13,16 @@ import typer
import yaml
from dotenv import load_dotenv
from rich.console import Console
from rich.progress import (BarColumn, MofNCompleteColumn, Progress,
                           SpinnerColumn, TextColumn)
from rich.progress import BarColumn, MofNCompleteColumn, Progress, SpinnerColumn, TextColumn
from rich.table import Table

from tdoc_crawler.crawlers import MeetingCrawler, TDocCrawler
from tdoc_crawler.database import TDocDatabase
from tdoc_crawler.models import (MeetingCrawlConfig, MeetingQueryConfig,
                                 OutputFormat, QueryConfig, SortOrder,
                                 TDocCrawlConfig)
from tdoc_crawler.models import MeetingCrawlConfig, MeetingQueryConfig, OutputFormat, QueryConfig, SortOrder, TDocCrawlConfig

from .fetching import maybe_fetch_missing_tdocs
from .helpers import (build_limits, database_path, launch_file,
                      parse_subgroups, parse_working_groups, prepare_tdoc_file,
                      resolve_credentials)
from .printing import (meeting_to_dict, print_meeting_table, print_tdoc_table,
                       tdoc_to_dict)
from .helpers import build_limits, database_path, launch_file, parse_subgroups, parse_working_groups, prepare_tdoc_file, resolve_credentials
from .printing import meeting_to_dict, print_meeting_table, print_tdoc_table, tdoc_to_dict

load_dotenv()

+4 −13
Original line number Diff line number Diff line
@@ -18,12 +18,9 @@ from urllib.request import urlopen
import typer
from rich.console import Console

from tdoc_crawler.crawlers import (normalize_subgroup_alias,
                                   normalize_working_group_alias)
from tdoc_crawler.crawlers import normalize_subgroup_alias, normalize_working_group_alias
from tdoc_crawler.database import TDocDatabase
from tdoc_crawler.models import (CrawlLimits, HttpCacheConfig,
                                 MeetingQueryConfig, PortalCredentials,
                                 SortOrder, TDocMetadata, WorkingGroup)
from tdoc_crawler.models import CrawlLimits, HttpCacheConfig, MeetingQueryConfig, PortalCredentials, SortOrder, TDocMetadata, WorkingGroup

console = Console()
_logger = logging.getLogger(__name__)
@@ -344,19 +341,13 @@ def resolve_http_cache_config(cache_ttl: int | None = None, cache_refresh_on_acc
        ttl = cache_ttl
    else:
        env_ttl = os.getenv("HTTP_CACHE_TTL")
        if env_ttl:
            ttl = int(env_ttl)
        else:
            ttl = 7200  # default TTL of 2 hours
        ttl = int(env_ttl) if env_ttl else 7200

    # Handle refresh on access - check CLI param, then env var, then default
    if cache_refresh_on_access is not None:
        refresh_on_access = cache_refresh_on_access
    else:
        env_refresh = os.getenv("HTTP_CACHE_REFRESH_ON_ACCESS", "").lower()
        if env_refresh:
            refresh_on_access = env_refresh in ("true", "1", "yes", "on", "t", "y")
        else:
            refresh_on_access = True  # default to True
        refresh_on_access = env_refresh in ("true", "1", "yes", "on", "t", "y") if env_refresh else True

    return HttpCacheConfig(ttl=ttl, refresh_ttl_on_access=refresh_on_access)
+2 −5
Original line number Diff line number Diff line
@@ -9,12 +9,9 @@ from collections.abc import Callable
from dataclasses import dataclass

from tdoc_crawler.crawlers.executor_adapter import Runner
from tdoc_crawler.crawlers.parallel import (
    fetch_meeting_document_list_subinterpreter, fetch_meeting_tdocs)
from tdoc_crawler.crawlers.parallel import fetch_meeting_document_list_subinterpreter, fetch_meeting_tdocs
from tdoc_crawler.database import TDocDatabase
from tdoc_crawler.models import (CrawlLimits, MeetingMetadata,
                                 MeetingQueryConfig, SortOrder,
                                 TDocCrawlConfig, TDocMetadata, WorkingGroup)
from tdoc_crawler.models import CrawlLimits, MeetingMetadata, MeetingQueryConfig, SortOrder, TDocCrawlConfig, TDocMetadata, WorkingGroup

logger = logging.getLogger(__name__)

+2 −6
Original line number Diff line number Diff line
@@ -12,14 +12,10 @@ from urllib.parse import urljoin

from bs4 import BeautifulSoup, Tag

from tdoc_crawler.crawlers.constants import (DATE_PATTERN,
                                             MEETING_CODE_REGISTRY,
                                             MEETINGS_BASE_URL,
                                             PORTAL_BASE_URL)
from tdoc_crawler.crawlers.constants import DATE_PATTERN, MEETING_CODE_REGISTRY, MEETINGS_BASE_URL, PORTAL_BASE_URL
from tdoc_crawler.database import TDocDatabase
from tdoc_crawler.http_client import create_cached_session
from tdoc_crawler.models import (CrawlLimits, MeetingCrawlConfig,
                                 MeetingMetadata, WorkingGroup)
from tdoc_crawler.models import CrawlLimits, MeetingCrawlConfig, MeetingMetadata, WorkingGroup

logger = logging.getLogger(__name__)

Loading