Commit 515c9a1f authored by Jan Reimes's avatar Jan Reimes
Browse files

🧪 test: add fixtures for testing document conversion

parent e40024b6
Loading
Loading
Loading
Loading
+85 −0
Original line number Diff line number Diff line
@@ -2,10 +2,13 @@

from __future__ import annotations

import zipfile
from dataclasses import dataclass
from pathlib import Path

import httpx
import pytest
from pydantic_settings import BaseSettings, SettingsConfigDict

from pdf_remote_converter.config import ProviderSettings
from pdf_remote_converter.providers.models import ConversionResult
@@ -74,3 +77,85 @@ def provider_settings(cache: pytest.Cache) -> ProviderSettings:
        cache_dir=cache_dir,
        default_provider="cloudconvert",
    )


@pytest.fixture
def example_docs(cache: pytest.Cache) -> list[Path]:
    """Download and extract example DOCX files for tests.

    Downloads from 3GPP specs archive if not already cached.
    Returns list of paths to extracted .docx files.
    """
    zip_urls = [
        # "https://www.3gpp.org/ftp/Specs/archive/26_series/26.253/26253-j10.zip",
        "https://www.3gpp.org/ftp/Specs/archive/26_series/26.131/26131-j00.zip",
    ]

    examples_dir = Path(str(cache.mkdir("examples")))
    downloads_dir = Path(str(cache.mkdir("downloads")))
    doc_paths: list[Path] = []

    for zip_url in zip_urls:
        # Derive names from URL
        zip_name = Path(zip_url).name
        doc_name = zip_name.replace(".zip", ".docx")
        zip_path = downloads_dir / zip_name
        doc_path = examples_dir / doc_name

        # Skip if already extracted
        if doc_path.exists():
            doc_paths.append(doc_path)
            continue

        # Extract zip if it exists but docx is missing
        if zip_path.exists():
            with zipfile.ZipFile(zip_path, "r") as zf:
                zf.extractall(examples_dir)
            doc_paths.append(doc_path)
            continue

        # Download zip if not present
        with httpx.stream("GET", zip_url, timeout=60.0) as response:
            response.raise_for_status()
            zip_path.write_bytes(response.read())

        with zipfile.ZipFile(zip_path, "r") as zf:
            zf.extractall(examples_dir)

        doc_paths.append(doc_path)

    return doc_paths


class EnvSettings(BaseSettings):
    """Settings loaded from .env file."""

    model_config = SettingsConfigDict(env_file=".env", env_file_encoding="utf-8", extra="ignore")

    cloudconvert_api_key: str | None = None
    adobe_client_id: str | None = None
    adobe_client_secret: str | None = None
    zamzar_api_key: str | None = None


@pytest.fixture
def env_settings() -> EnvSettings | None:
    """Load settings from .env file if it exists, otherwise return None."""
    env_path = Path(".env")
    if not env_path.exists():
        return None
    return EnvSettings()


@pytest.fixture
def require_env_settings(env_settings: EnvSettings | None) -> EnvSettings:
    """Require env settings to be present, skip test otherwise."""
    if env_settings is None:
        pytest.skip(".env file not found")
    return env_settings


@pytest.fixture
def converted_dir(cache: pytest.Cache) -> Path:
    """Return cached converted PDFs directory."""
    return Path(str(cache.mkdir("converted")))