Commit ec926f56 authored by Jan Reimes's avatar Jan Reimes
Browse files

style: fix PLC0415 imports and editorial linter issues

Move all inline imports to module level (PLC0415 non-negotiable):
- session.py: HttpVersion import to top with try/except guard
- test_workspaces.py: _resolve_spec_source_id, _effective_source_id,
  WorkspaceMember, SourceKind, patch to module-level imports

Consolidate duplicate import lines (printing.py, crawl.py), reformat
sum() call in checkout.py, remove blank line in utils.py, fix markdown
list numbering in AGENTS.md and workspace.md docs.
parent eade2262
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -145,8 +145,8 @@ Members added with `--release 18.0` use the explicit release. Members added with
`workspace process` is idempotent by default (`--skip-existing`):

1. **Fast-path skip**: Before entering `convert_for_wiki`, `_should_skip_member()` checks if output artifacts already exist on disk.
2. **Per-profile guard**: Inside `convert_for_wiki`, each profile checks for existing output (`.md` for markdown-only, `.md` + `.json` for Docling) and returns early.
3. **Override**: `--force` re-extracts regardless of existing output.
1. **Per-profile guard**: Inside `convert_for_wiki`, each profile checks for existing output (`.md` for markdown-only, `.md` + `.json` for Docling) and returns early.
1. **Override**: `--force` re-extracts regardless of existing output.

---

+6 −3
Original line number Diff line number Diff line
@@ -41,11 +41,13 @@ wiki/atias/sources/26131/
```

When processing a spec workspace member:

1. If the member ID already has `-REL` (e.g. `26260-REL18.0.0`), use it as-is.
2. If the member ID is bare (e.g. `26131`), resolve the release from the database and construct the full ID.
3. If the release cannot be resolved, fall back to the bare ID.
1. If the member ID is bare (e.g. `26131`), resolve the release from the database and construct the full ID.
1. If the release cannot be resolved, fall back to the bare ID.

This applies to **all** code paths that create or reference spec source directories:

- `_effective_source_id()` / `_resolve_spec_source_id()`
- `_process_member()``wiki_source_dir` construction
- `_should_skip_member()` — existing output detection
@@ -60,7 +62,8 @@ Two-layer protection:

1. **`_should_skip_member()`** (fast path): Checks if output artifacts exist before entering `convert_for_wiki()`. Controlled by `--skip-existing` (default: True).

2. **`convert_for_wiki()`** (hard guarantee): Each profile branch checks for existing output and returns early when `force=False`:
1. **`convert_for_wiki()`** (hard guarantee): Each profile branch checks for existing output and returns early when `force=False`:

   - `PDF_ONLY`: `ensure_pdf()` skips if PDF exists
   - `MARKDOWN_ONLY`: skips if `.md` exists
   - `DEFAULT`/`ADVANCED`: skips if both `.md` and `.json` exist
+1 −2
Original line number Diff line number Diff line
@@ -7,10 +7,9 @@ from datetime import date
from typing import Any

from tdoc_crawler.cli.formatting import TableColumnSpec, print_structured_output
from tdoc_crawler.database.oxyde_models import TDocMetadata
from tdoc_crawler.database.oxyde_models import MeetingMetadata, TDocMetadata
from tdoc_crawler.database.specs import SpecCrawlResult
from tdoc_crawler.logging import get_console
from tdoc_crawler.database.oxyde_models import MeetingMetadata
from tdoc_crawler.models.base import OutputFormat
from tdoc_crawler.models.subworking_groups import SUBTB_INDEX
from tdoc_crawler.models.working_groups import WorkingGroup
+10 −10
Original line number Diff line number Diff line
@@ -198,16 +198,6 @@ class TDocDatabase(MeetingDatabase):

        return records

    @staticmethod
    def _meeting_start_date(record: TDocMetadata, meeting_map: dict[int, MeetingMetadata], fallback: date) -> date:
        """Return the meeting start_date for sorting, or fallback if unavailable."""
        if not record.meeting_id:
            return fallback
        meeting = meeting_map.get(record.meeting_id)
        if meeting is None or meeting.start_date is None:
            return fallback
        return meeting.start_date

    async def get_existing_tdoc_ids(self, working_groups: Iterable[WorkingGroup] | None = None) -> set[str]:
        """Get set of existing TDoc IDs, optionally filtered by working group.

@@ -251,6 +241,16 @@ class TDocDatabase(MeetingDatabase):
            return record
        return self._clone_tdoc(record, {"tbid": meeting.tbid})

    @staticmethod
    def _meeting_start_date(record: TDocMetadata, meeting_map: dict[int, MeetingMetadata], fallback: date) -> date:
        """Return the meeting start_date for sorting, or fallback if unavailable."""
        if not record.meeting_id:
            return fallback
        meeting = meeting_map.get(record.meeting_id)
        if meeting is None or meeting.start_date is None:
            return fallback
        return meeting.start_date

    @staticmethod
    def _filter_by_retrieved_dates(
        records: list[TDocMetadata],
+6 −5
Original line number Diff line number Diff line
@@ -18,6 +18,11 @@ from niquests.adapters import HTTPAdapter
from urllib3.response import HTTPResponse
from urllib3.util.retry import Retry

try:
    from urllib3.connection import HttpVersion
except ImportError:
    HttpVersion = None  # type: ignore[assignment,misc]

from tdoc_crawler.config.settings import HttpConfig, PathConfig
from tdoc_crawler.constants.urls import BROWSER_HEADERS
from tdoc_crawler.logging import get_logger
@@ -95,16 +100,12 @@ class _NiquetsCacheAdapter(CacheAdapter):

    def init_poolmanager(self, connections: int, maxsize: int, block: bool = False, **pool_kwargs: object) -> None:
        """Disable HTTP/3 to prevent MustDowngradeError from 3GPP servers."""
        try:
            from urllib3.connection import HttpVersion

        if HttpVersion is not None:
            disabled: set[HttpVersion] = pool_kwargs.get("disabled_svn", set())  # type: ignore[assignment]
            if not isinstance(disabled, set):
                disabled = set()
            disabled.add(HttpVersion.h3)
            pool_kwargs["disabled_svn"] = disabled  # type: ignore[assignment]
        except ImportError:
            pass
        super().init_poolmanager(connections, maxsize, block, **pool_kwargs)

    def send(
Loading