Loading src/tdoc_crawler/specs/sources/threegpp.py +5 −3 Original line number Diff line number Diff line """3GPP portal metadata fetcher for specs.""" import logging from pathlib import Path from urllib.parse import parse_qs, urlparse import requests from tdoc_crawler.http_client import create_cached_session from tdoc_crawler.specs.normalization import normalize_spec_number _logger = logging.getLogger(__name__) Loading Loading @@ -37,7 +37,9 @@ def fetch_threegpp_metadata(spec_number: str) -> dict[str, object]: "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", } response = requests.get(url, timeout=30, allow_redirects=True, headers=headers) cache_dir = Path.home() / ".cache" / "tdoc-crawler" session = create_cached_session(cache_dir) response = session.get(url, timeout=30, allow_redirects=True, headers=headers) response.raise_for_status() parsed = urlparse(response.url) Loading Loading
src/tdoc_crawler/specs/sources/threegpp.py +5 −3 Original line number Diff line number Diff line """3GPP portal metadata fetcher for specs.""" import logging from pathlib import Path from urllib.parse import parse_qs, urlparse import requests from tdoc_crawler.http_client import create_cached_session from tdoc_crawler.specs.normalization import normalize_spec_number _logger = logging.getLogger(__name__) Loading Loading @@ -37,7 +37,9 @@ def fetch_threegpp_metadata(spec_number: str) -> dict[str, object]: "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8", } response = requests.get(url, timeout=30, allow_redirects=True, headers=headers) cache_dir = Path.home() / ".cache" / "tdoc-crawler" session = create_cached_session(cache_dir) response = session.get(url, timeout=30, allow_redirects=True, headers=headers) response.raise_for_status() parsed = urlparse(response.url) Loading