style: fix linter issues and formatting across src and tests (f85d38c7) · Commits · Jan Reimes / 3gpp-crawler

ruff.toml

+2 −2

Original line number	Diff line number	Diff line
		@@ -64,8 +64,8 @@ ignore = [
		max-locals = 20

		[lint.per-file-ignores]
		"tests/*.py" = ["S101", "PLR6301"]
		"tests/*/.py" = ["S101", "PLR6301"]
		"tests/*.py" = ["S101", "S106", "PLR6301"]
		"tests/*/.py" = ["S101", "S106", "PLR6301"]

		[lint.pydocstyle]
		convention = "google"

src/tdoc_crawler/clients/portal.py

+7 −3

Original line number	Diff line number	Diff line
		@@ -33,6 +33,10 @@ class PortalAuthenticationError(Exception):
		"""Raised when portal authentication fails."""


		class PortalCredentialsError(ValueError):
		"""Raised when portal credentials are missing or invalid."""


		class PortalClient:
		"""Unified 3GPP portal client with authentication and TDoc fetching.

		@@ -103,13 +107,13 @@ class PortalClient:

		Raises:
		PortalAuthenticationError: If authentication fails
		ValueError: If no credentials were provided
		PortalCredentialsError: If no credentials were provided
		"""
		if self._authenticated:
		return

		if self.credentials is None:
		raise ValueError("Portal credentials required for targeted fetch. Set TDC_EOL_USERNAME and TDC_EOL_PASSWORD.")
		raise PortalCredentialsError("Portal credentials required for targeted fetch. Set TDC_EOL_USERNAME and TDC_EOL_PASSWORD.")

		logger.info("Authenticating with 3GPP portal...")

		@@ -230,7 +234,7 @@ class PortalClient:
		Raises:
		PortalAuthenticationError: If authentication fails
		PortalParsingError: If page parsing fails or TDoc not found
		ValueError: If no credentials were provided
		PortalCredentialsError: If no credentials were provided
		"""
		# Extract URL if not provided
		if url is None:

src/tdoc_crawler/utils/normalization.py

+17 −13

Original line number	Diff line number	Diff line
		@@ -10,6 +10,10 @@ _RANGE_SPLIT_PATTERN = re.compile(r"\s([-:])\s")
		_RELEASE_PREFIX_PATTERN = re.compile(r"^(?:v\|rel\|rel[-])", re.IGNORECASE)


		class NormalizationError(ValueError):
		"""Raised when spec identifier normalization or expansion fails."""


		def normalize_tdoc_ids(ids: Iterable[str]) -> list[str]:
		"""Uppercase and strip whitespace from TDoc identifiers."""
		return [str(value).strip().upper() for value in ids]
		@@ -65,13 +69,13 @@ def _parse_spec_number(value: str) -> tuple[str, str, str, int]:
		"""Parse a spec number and return (series, increment, format_kind, increment_digits)."""
		body = _strip_prefixes(value)
		if not body:
		raise ValueError("Spec number is required")
		raise NormalizationError("Spec number is required")

		if "." in body:
		body_no_space = re.sub(r"\s+", "", body)
		match = _DOTTED_BODY_PATTERN.match(body_no_space)
		if not match:
		raise ValueError(f"Unsupported spec number format: {value}")
		raise NormalizationError(f"Unsupported spec number format: {value}")
		series = match.group("series")
		increment_raw = match.group("increment")
		increment = increment_raw.zfill(3)
		@@ -80,7 +84,7 @@ def _parse_spec_number(value: str) -> tuple[str, str, str, int]:
		digits_only = re.sub(r"\s+", "", body)
		match = _UNDOTTED_BODY_PATTERN.match(digits_only)
		if not match:
		raise ValueError(f"Unsupported spec number format: {value}")
		raise NormalizationError(f"Unsupported spec number format: {value}")
		series = match.group("series")
		increment_raw = match.group("increment")
		increment = increment_raw.zfill(3)
		@@ -89,9 +93,9 @@ def _parse_spec_number(value: str) -> tuple[str, str, str, int]:

		def _validate_range(start_num: int, end_num: int, spec_input: str) -> None:
		if start_num > end_num:
		raise ValueError(f"Invalid range: start {start_num} > end {end_num}")
		raise NormalizationError(f"Invalid range: start {start_num} > end {end_num}")
		if end_num - start_num > 1329:
		raise ValueError(f"Range too large: {end_num - start_num + 1} specs")
		raise NormalizationError(f"Range too large: {end_num - start_num + 1} specs")


		def normalize_spec_number(value: str) -> str:
		@@ -126,20 +130,20 @@ def expand_spec_ranges(spec_input: str) -> Generator[str]:
		Individual normalized spec numbers (e.g., "26.260", "26.261", ...)

		Raises:
		ValueError: If the range syntax is invalid or series numbers don't match.
		NormalizationError: If the range syntax is invalid or series numbers don't match.
		"""
		cleaned = spec_input.strip()
		if not cleaned:
		raise ValueError("Spec number is required")
		raise NormalizationError("Spec number is required")

		if "+" in cleaned:
		offset_match = _OFFSET_PATTERN.match(cleaned)
		if not offset_match:
		raise ValueError("Invalid offset format")
		raise NormalizationError("Invalid offset format")
		left = offset_match.group("left")
		offset = int(offset_match.group("offset"))
		if offset < 0:
		raise ValueError("Offset must be non-negative")
		raise NormalizationError("Offset must be non-negative")
		series, start_str, _, _ = _parse_spec_number(left)
		start_num = int(start_str)
		end_num = start_num + offset
		@@ -152,13 +156,13 @@ def expand_spec_ranges(spec_input: str) -> Generator[str]:
		if len(split) == 3:
		left, _sep, right = split
		if not right.strip():
		raise ValueError("Missing end value in range")
		raise NormalizationError("Missing end value in range")
		series1, num1, format1, digits1 = _parse_spec_number(left)
		series2, num2, format2, digits2 = _parse_spec_number(right)
		if series1 != series2:
		raise ValueError(f"Series numbers don't match: {series1} vs {series2}")
		raise NormalizationError(f"Series numbers don't match: {series1} vs {series2}")
		if format1 != format2 and (digits1 == 1 or digits2 == 1):
		raise ValueError("Both range endpoints must use the same format")
		raise NormalizationError("Both range endpoints must use the same format")
		start_num = int(num1)
		end_num = int(num2)
		_validate_range(start_num, end_num, spec_input)
		@@ -183,7 +187,7 @@ def expand_spec_ranges_batch(spec_inputs: list[str]) -> list[str]:
		for spec_input in spec_inputs:
		try:
		expanded.extend(expand_spec_ranges(spec_input))
		except ValueError:
		except NormalizationError:
		# Skip invalid spec inputs silently
		continue
		return expanded

tests/test_cli.py

+10 −9

Original line number	Diff line number	Diff line
		@@ -6,6 +6,7 @@ from decimal import Decimal
		from pathlib import Path
		from unittest.mock import MagicMock, patch

		import pytest
		from typer.testing import CliRunner

		from tdoc_crawler.cli import app
		@@ -714,7 +715,7 @@ class TestEnvironmentVariables:
		mock_db_class: MagicMock,
		mock_crawler_class: MagicMock,
		test_cache_dir: Path,
		monkeypatch,
		monkeypatch: pytest.MonkeyPatch,
		) -> None:
		"""Test that TDC_CACHE_DIR environment variable sets cache directory."""
		mock_db = MagicMock(spec=TDocDatabase)
		@@ -745,7 +746,7 @@ class TestEnvironmentVariables:
		self,
		mock_db_class: MagicMock,
		mock_crawler_class: MagicMock,
		monkeypatch,
		monkeypatch: pytest.MonkeyPatch,
		) -> None:
		"""Test that TDC_WORKERS environment variable sets worker count."""
		mock_db = MagicMock(spec=TDocDatabase)
		@@ -778,7 +779,7 @@ class TestEnvironmentVariables:
		def test_env_var_working_group(
		self,
		mock_db_class: MagicMock,
		monkeypatch,
		monkeypatch: pytest.MonkeyPatch,
		) -> None:
		"""Test that TDC_WORKING_GROUP environment variable filters by working group."""
		mock_db = MagicMock()
		@@ -800,7 +801,7 @@ class TestEnvironmentVariables:
		def test_env_var_output_format(
		self,
		mock_db_class: MagicMock,
		monkeypatch,
		monkeypatch: pytest.MonkeyPatch,
		) -> None:
		"""Test that TDC_OUTPUT environment variable sets output format."""
		mock_db = MagicMock(spec=TDocDatabase)
		@@ -816,7 +817,7 @@ class TestEnvironmentVariables:

		def test_env_var_cli_override(
		self,
		monkeypatch,
		monkeypatch: pytest.MonkeyPatch,
		) -> None:
		"""Test that CLI arguments override environment variables."""
		# Note: TDC_VERBOSE has been removed - verbose mode is now controlled by --verbose flag only
		@@ -831,7 +832,7 @@ class TestEnvironmentVariables:
		def test_env_var_credentials(
		self,
		mock_resolve: MagicMock,
		monkeypatch,
		monkeypatch: pytest.MonkeyPatch,
		test_cache_dir: Path,
		) -> None:
		"""Test that TDC_EOL_USERNAME and TDC_EOL_PASSWORD set credentials."""
		@@ -856,7 +857,7 @@ class TestEnvironmentVariables:
		def test_env_var_prompt_credentials(
		self,
		mock_resolve: MagicMock,
		monkeypatch,
		monkeypatch: pytest.MonkeyPatch,
		test_cache_dir: Path,
		) -> None:
		"""Test that TDC_EOL_PROMPT controls credential prompting."""
		@@ -877,7 +878,7 @@ class TestEnvironmentVariables:
		def test_env_var_limit(
		self,
		mock_db_class: MagicMock,
		monkeypatch,
		monkeypatch: pytest.MonkeyPatch,
		) -> None:
		"""Test that TDC_LIMIT environment variables set limits."""
		mock_db = MagicMock(spec=TDocDatabase)
		@@ -897,7 +898,7 @@ class TestEnvironmentVariables:
		def test_env_var_multiple_credentials(
		self,
		mock_resolve: MagicMock,
		monkeypatch,
		monkeypatch: pytest.MonkeyPatch,
		test_cache_dir: Path,
		) -> None:
		"""Test multiple credential environment variables work together."""

tests/test_crawler.py

+1 −2

Original line number	Diff line number	Diff line
		@@ -14,6 +14,7 @@ from tdoc_crawler.constants.patterns import EXCLUDED_DIRS, TDOC_PATTERN
		from tdoc_crawler.database import TDocDatabase
		from tdoc_crawler.meetings.models import MeetingMetadata
		from tdoc_crawler.models import WorkingGroup
		from tdoc_crawler.models.subworking_groups import CODE_INDEX
		from tdoc_crawler.parsers.meetings import parse_meeting_row, parse_single_date
		from tdoc_crawler.tdocs.models import TDocCrawlConfig, TDocMetadata, TDocQueryConfig
		from tdoc_crawler.tdocs.operations import TDocCrawler
		@@ -266,8 +267,6 @@ class TestMeetingCrawler:
		assert row is not None
		cells = row.find_all("td")

		from tdoc_crawler.models.subworking_groups import CODE_INDEX

		# Create get_subtb callback for subtb lookup
		def get_subtb(subgroup_code: str) -> int \| None:
		record = CODE_INDEX.get(subgroup_code.upper())