Commit ed4c763d authored by Jan Reimes's avatar Jan Reimes
Browse files

refactor(convert-lo): rename path parameters for clarity

- input_path to input_file, output_path to output_file in ConversionResult
- soffice_path to soffice_file in Converter constructor
- file_path to source_file in conversion.py helper functions
- Update tests and documentation to match renamed API
parent 3eaeedb6
Loading
Loading
Loading
Loading
+46 −46
Original line number Diff line number Diff line
@@ -55,36 +55,36 @@ class ConverterConfig:
        )


def is_office_format(file_path: Path) -> bool:
def is_office_format(source_file: Path) -> bool:
    """Check if a file is an Office document that needs PDF conversion.

    Args:
        file_path: Path to the file to check.
        source_file: Path to the file to check.

    Returns:
        True if the file is an Office document format.
    """
    return file_path.suffix.lower() in OFFICE_FORMATS
    return source_file.suffix.lower() in OFFICE_FORMATS


def get_cached_pdf_path(file_path: Path) -> Path | None:
def get_cached_pdf_path(source_file: Path) -> Path | None:
    """Get the path to a cached PDF conversion if it exists.

    The cached PDF is stored in a `.ai` subdirectory next to the original file.

    Args:
        file_path: Path to the original Office document.
        source_file: Path to the original Office document.

    Returns:
        Path to cached PDF if it exists, None otherwise.
    """
    ai_dir = file_path.parent / ".ai"
    cached_pdf = ai_dir / f"{file_path.stem}.pdf"
    ai_dir = source_file.parent / ".ai"
    cached_pdf = ai_dir / f"{source_file.stem}.pdf"
    return cached_pdf if cached_pdf.exists() else None


def convert_to_pdf(
    file_path: Path,
    source_file: Path,
    output_dir: Path | None = None,
    *,
    force: bool = False,
@@ -98,7 +98,7 @@ def convert_to_pdf(
    - Fallback from local to remote on failure (when using AUTO backend)

    Args:
        file_path: Path to the Office document (DOCX, DOC, PPT, etc.)
        source_file: Path to the Office document (DOCX, DOC, PPT, etc.)
        output_dir: Optional output directory for the PDF. If None, uses
            the `.ai` subdirectory next to the source file.
        force: If True, re-convert even if a cached PDF exists.
@@ -112,42 +112,42 @@ def convert_to_pdf(
        ConversionError: If conversion fails with all available backends.
        FileNotFoundError: If the input file does not exist.
    """
    if not file_path.exists():
        raise FileNotFoundError(f"Input file not found: {file_path}")
    if not source_file.exists():
        raise FileNotFoundError(f"Input file not found: {source_file}")

    if not is_office_format(file_path):
        raise ConversionError(f"Unsupported file format: {file_path.suffix}")
    if not is_office_format(source_file):
        raise ConversionError(f"Unsupported file format: {source_file.suffix}")

    config = config or ConverterConfig.from_env()
    output_dir = output_dir or file_path.parent / ".ai"
    output_path = output_dir / f"{file_path.stem}.pdf"
    output_dir = output_dir or source_file.parent / ".ai"
    output_file = output_dir / f"{source_file.stem}.pdf"

    # Check for cached conversion
    if not force and output_path.exists():
        logger.debug("Using cached PDF: %s", output_path)
        return output_path
    if not force and output_file.exists():
        logger.debug("Using cached PDF: %s", output_file)
        return output_file

    # Ensure output directory exists
    output_dir.mkdir(parents=True, exist_ok=True)

    # Select backend and convert
    if config.backend == ConverterBackend.REMOTE:
        return convert_via_remote(file_path, output_dir, config)
        return convert_via_remote(source_file, output_dir, config)

    if config.backend == ConverterBackend.LIBREOFFICE:
        return convert_via_libreoffice(file_path, output_dir)
        return convert_via_libreoffice(source_file, output_dir)

    # AUTO: Try LibreOffice first, fallback to remote
    try:
        return convert_via_libreoffice(file_path, output_dir)
        return convert_via_libreoffice(source_file, output_dir)
    except Exception as e:
        logger.warning("LibreOffice conversion failed for %s: %s", file_path.name, e)
        logger.info("Falling back to remote converter for %s", file_path.name)
        return convert_via_remote(file_path, output_dir, config)
        logger.warning("LibreOffice conversion failed for %s: %s", source_file.name, e)
        logger.info("Falling back to remote converter for %s", source_file.name)
        return convert_via_remote(source_file, output_dir, config)


def convert_via_libreoffice(
    file_path: Path,
    source_file: Path,
    output_dir: Path,
) -> Path:
    """Convert an Office document to PDF using local LibreOffice.
@@ -156,7 +156,7 @@ def convert_via_libreoffice(
    Requires LibreOffice to be installed and available on the system PATH.

    Args:
        file_path: Path to the Office document.
        source_file: Path to the Office document.
        output_dir: Output directory for the PDF.

    Returns:
@@ -165,32 +165,32 @@ def convert_via_libreoffice(
    Raises:
        ConversionError: If LibreOffice conversion fails.
    """
    output_path = output_dir / f"{file_path.stem}.pdf"
    output_file = output_dir / f"{source_file.stem}.pdf"

    try:
        converter = Converter()
        with tempfile.TemporaryDirectory() as tmpdir:
            result = converter.convert(file_path, LibreOfficeFormat.PDF, Path(tmpdir))
            result = converter.convert(source_file, LibreOfficeFormat.PDF, Path(tmpdir))

            if result is None or result.output_path is None:
                raise ConversionError(f"LibreOffice returned empty result for {file_path.name}")
            if result is None or result.output_file is None:
                raise ConversionError(f"LibreOffice returned empty result for {source_file.name}")

            # Copy the converted PDF to the output directory
            output_path.write_bytes(result.output_path.read_bytes())
            output_file.write_bytes(result.output_file.read_bytes())

        logger.info("Converted %s to PDF via LibreOffice: %s", file_path.name, output_path)
        return output_path
        logger.info("Converted %s to PDF via LibreOffice: %s", source_file.name, output_file)
        return output_file

    except ConversionError:
        raise
    except Exception as e:
        msg = f"LibreOffice conversion failed for {file_path.name}: {e}"
        msg = f"LibreOffice conversion failed for {source_file.name}: {e}"
        logger.error(msg)
        raise ConversionError(msg) from e


def convert_via_remote(
    file_path: Path,
    source_file: Path,
    output_dir: Path,
    config: ConverterConfig | None = None,
) -> Path:
@@ -200,7 +200,7 @@ def convert_via_remote(
    when local LibreOffice is not available or fails.

    Args:
        file_path: Path to the Office document.
        source_file: Path to the Office document.
        output_dir: Output directory for the PDF.
        config: Optional converter configuration with API credentials.
            If None, reads from environment variables.
@@ -212,7 +212,7 @@ def convert_via_remote(
        ConversionError: If remote conversion fails.
    """
    config = config or ConverterConfig.from_env()
    output_path = output_dir / f"{file_path.stem}.pdf"
    output_file = output_dir / f"{source_file.stem}.pdf"

    # Prepare API request
    api_key = config.api_key or os.getenv("PDF_REMOTE_API_KEY")
@@ -223,36 +223,36 @@ def convert_via_remote(
        headers["Authorization"] = f"Bearer {api_key}"

    url = f"{api_base.rstrip('/')}/convert"
    logger.debug("Converting %s via remote API: %s", file_path.name, url)
    logger.debug("Converting %s via remote API: %s", source_file.name, url)

    try:
        output_dir.mkdir(parents=True, exist_ok=True)

        with file_path.open("rb") as source_handle:
            files = {"file": (file_path.name, source_handle, "application/octet-stream")}
        with source_file.open("rb") as source_handle:
            files = {"file": (source_file.name, source_handle, "application/octet-stream")}
            with requests.post(url, files=files, headers=headers, stream=True, timeout=120) as response:
                response.raise_for_status()

                # Stream response to output file
                with output_path.open("wb") as output_handle:
                with output_file.open("wb") as output_handle:
                    for chunk in response.iter_content(chunk_size=8192):
                        if chunk:
                            output_handle.write(chunk)

        logger.info("Converted %s to PDF via remote API: %s", file_path.name, output_path)
        return output_path
        logger.info("Converted %s to PDF via remote API: %s", source_file.name, output_file)
        return output_file

    except requests.exceptions.HTTPError as e:
        status = e.response.status_code if e.response is not None else "unknown"
        msg = f"Remote conversion failed for {file_path.name}: HTTP {status}"
        msg = f"Remote conversion failed for {source_file.name}: HTTP {status}"
        logger.error(msg)
        raise ConversionError(msg) from e
    except requests.exceptions.RequestException as e:
        msg = f"Remote conversion failed for {file_path.name}: {e}"
        msg = f"Remote conversion failed for {source_file.name}: {e}"
        logger.error(msg)
        raise ConversionError(msg) from e
    except OSError as e:
        msg = f"Remote conversion failed for {file_path.name}: {e}"
        msg = f"Remote conversion failed for {source_file.name}: {e}"
        logger.error(msg)
        raise ConversionError(msg) from e

+1 −1
Original line number Diff line number Diff line
@@ -21,7 +21,7 @@ No server or daemon required.

| Parameter | Default | Description |
|-----------|---------|-------------|
| `soffice_path` | `None` | Path to soffice (auto-detected) |
| `soffice_file` | `None` | Path to soffice (auto-detected) |
| `timeout` | `300` | Seconds per file conversion |

### Environment Variables
+2 −2
Original line number Diff line number Diff line
@@ -31,7 +31,7 @@ result = converter.convert(
    output_format=LibreOfficeFormat.PDF,
    output_dir=Path("output"),
)
print(f"Converted: {result.output_path}")
print(f"Converted: {result.output_file}")
```

### Batch Conversion
@@ -75,7 +75,7 @@ LibreOfficeFormat.HTML

```python
Converter(
    soffice_path=None,  # Auto-detect LibreOffice
    soffice_file=None,  # Auto-detect LibreOffice
    timeout=300,         # Timeout per file in seconds
)
```
+9 −9
Original line number Diff line number Diff line
@@ -20,8 +20,8 @@ logger = logging.getLogger(__name__)
class ConversionResult:
    """Result of a LibreOffice conversion."""

    input_path: Path
    output_path: Path
    input_file: Path
    output_file: Path
    output_format: LibreOfficeFormat


@@ -40,16 +40,16 @@ class Converter:

    def __init__(
        self,
        soffice_path: Path | None = None,
        soffice_file: Path | None = None,
        timeout: int = 300,
    ) -> None:
        """Initialize the converter.

        Args:
            soffice_path: Path to soffice executable. If None, auto-detects.
            soffice_file: Path to soffice executable. If None, auto-detects.
            timeout: Timeout in seconds per file conversion (default: 300).
        """
        self._soffice_path = soffice_path or find_soffice()
        self._soffice_file = soffice_file or find_soffice()
        self._timeout = timeout

    def convert(
@@ -103,10 +103,10 @@ class Converter:
            msg = f"LibreOffice conversion failed for {input_file}: {exc}"
            raise ConversionError(msg) from exc

        output_path = output_dir / f"{input_file.stem}.{output_format.value}"
        output_file = output_dir / f"{input_file.stem}.{output_format.value}"
        return ConversionResult(
            input_path=input_file,
            output_path=output_path,
            input_file=input_file,
            output_file=output_file,
            output_format=output_format,
        )

@@ -152,7 +152,7 @@ class Converter:
            raise ConversionError(msg)

        cmd: list[str] = [
            str(self._soffice_path),
            str(self._soffice_file),
            "--headless",
            "--invisible",
            "--nologo",
+16 −16
Original line number Diff line number Diff line
@@ -13,21 +13,21 @@ from convert_lo.formats import LibreOfficeFormat

def test_converter_init_with_custom_path() -> None:
    """Use provided soffice path without discovery."""
    soffice_path = Path("C:/custom/soffice.exe")
    soffice_file = Path("C:/custom/soffice.exe")

    converter = Converter(soffice_path=soffice_path)
    converter = Converter(soffice_file=soffice_file)

    assert converter._soffice_path == soffice_path
    assert converter._soffice_file == soffice_file


def test_converter_init_with_auto_discovery(monkeypatch: pytest.MonkeyPatch) -> None:
    """Resolve soffice path via find_soffice when not provided."""
    expected_path = Path("C:/Program Files/LibreOffice/program/soffice.exe")
    monkeypatch.setattr("convert_lo.converter.find_soffice", lambda: expected_path)
    expected_file = Path("C:/Program Files/LibreOffice/program/soffice.exe")
    monkeypatch.setattr("convert_lo.converter.find_soffice", lambda: expected_file)

    converter = Converter()

    assert converter._soffice_path == expected_path
    assert converter._soffice_file == expected_file


def test_convert_accepts_string_format(
@@ -42,7 +42,7 @@ def test_convert_accepts_string_format(
    mock_result.stderr = ""

    with patch("convert_lo.converter.subprocess.run", return_value=mock_result):
        converter = Converter(soffice_path=Path("C:/soffice.exe"))
        converter = Converter(soffice_file=Path("C:/soffice.exe"))

        # String "pdf" should be accepted and converted to LibreOfficeFormat.PDF
        result = converter.convert(example_docx_path, "pdf", output_dir)
@@ -56,7 +56,7 @@ def test_convert_rejects_invalid_format(
    output_dir: Path,
) -> None:
    """Reject invalid format values."""
    converter = Converter(soffice_path=Path("C:/soffice.exe"))
    converter = Converter(soffice_file=Path("C:/soffice.exe"))

    with pytest.raises(UnsupportedConversionError):
        converter.convert(example_docx_path, "invalid_format", output_dir)
@@ -67,7 +67,7 @@ def test_convert_raises_on_unsupported_conversion(
    output_dir: Path,
) -> None:
    """Raise when conversion pair is unsupported."""
    converter = Converter(soffice_path=Path("C:/soffice.exe"))
    converter = Converter(soffice_file=Path("C:/soffice.exe"))

    input_file = output_dir / "input.pdf"
    input_file.write_text("dummy", encoding="utf-8")
@@ -89,7 +89,7 @@ def test_convert_batch_multiple_files(
    mock_result.stderr = ""

    with patch("convert_lo.converter.subprocess.run", return_value=mock_result):
        converter = Converter(soffice_path=Path("C:/soffice.exe"))
        converter = Converter(soffice_file=Path("C:/soffice.exe"))

        results = converter.convert_batch(
            [example_doc_path, example_docx_path],
@@ -107,7 +107,7 @@ def test_convert_batch_empty_list(
    output_dir: Path,
) -> None:
    """Return empty results for empty input list."""
    converter = Converter(soffice_path=Path("C:/soffice.exe"))
    converter = Converter(soffice_file=Path("C:/soffice.exe"))

    results = converter.convert_batch([], LibreOfficeFormat.PDF, output_dir)

@@ -119,7 +119,7 @@ def test_convert_raises_on_missing_input(
    output_dir: Path,
) -> None:
    """Raise ConversionError when input file does not exist."""
    converter = Converter(soffice_path=Path("C:/soffice.exe"))
    converter = Converter(soffice_file=Path("C:/soffice.exe"))
    missing_path = output_dir / "missing.docx"

    with pytest.raises(ConversionError):
@@ -138,7 +138,7 @@ def test_convert_raises_on_subprocess_failure(
    mock_result.stderr = "Conversion failed"

    with patch("convert_lo.converter.subprocess.run", return_value=mock_result):
        converter = Converter(soffice_path=Path("C:/soffice.exe"))
        converter = Converter(soffice_file=Path("C:/soffice.exe"))

        with pytest.raises(ConversionError):
            converter.convert(example_docx_path, LibreOfficeFormat.PDF, output_dir)
@@ -155,17 +155,17 @@ def test_convert_uses_correct_command(
    mock_result.stdout = ""
    mock_result.stderr = ""

    soffice_path = Path("C:/soffice.exe")
    soffice_file = Path("C:/soffice.exe")

    with patch("convert_lo.converter.subprocess.run", return_value=mock_result) as mock_run:
        converter = Converter(soffice_path=soffice_path)
        converter = Converter(soffice_file=soffice_file)
        converter.convert(example_docx_path, LibreOfficeFormat.PDF, output_dir)

        # Verify the command was constructed correctly
        call_args = mock_run.call_args
        cmd = call_args[0][0]  # First positional argument is the command list

        assert str(soffice_path) in cmd
        assert str(soffice_file) in cmd
        assert "--headless" in cmd
        assert "--convert-to" in cmd
        assert "pdf" in cmd
Loading