Commit 6b4c5ba0 authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor(specs): use cached session in threegpp.py

parent b6987093
Loading
Loading
Loading
Loading
+5 −3
Original line number Diff line number Diff line
"""3GPP portal metadata fetcher for specs."""

import logging
from pathlib import Path
from urllib.parse import parse_qs, urlparse

import requests

from tdoc_crawler.http_client import create_cached_session
from tdoc_crawler.specs.normalization import normalize_spec_number

_logger = logging.getLogger(__name__)
@@ -37,7 +37,9 @@ def fetch_threegpp_metadata(spec_number: str) -> dict[str, object]:
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8",
    }
    response = requests.get(url, timeout=30, allow_redirects=True, headers=headers)
    cache_dir = Path.home() / ".cache" / "tdoc-crawler"
    session = create_cached_session(cache_dir)
    response = session.get(url, timeout=30, allow_redirects=True, headers=headers)
    response.raise_for_status()

    parsed = urlparse(response.url)