refactor(adobe, base, cloudconvert, zamzar): improve conversion methods and validations (a00078aa) · Commits · Jan Reimes / pdf-remote-converter

src/pdf_remote_converter/providers/adobe.py

+1 −4

Original line number	Diff line number	Diff line
		@@ -55,12 +55,9 @@ class AdobeProvider(AbstractProvider):
		"""Return True when the provider is available and configured."""
		return bool(self.client_id and self.client_secret) and self.quota_remaining > 0

		def convert(self, input_path: Path, output_path: Path) -> ConversionResult:
		def _perform_convert(self, input_path: Path, output_path: Path) -> ConversionResult:
		"""Convert the input Office document to PDF using Adobe PDF Services."""
		input_format = input_path.suffix.lstrip(".").lower()
		if input_format not in self.supported_formats:
		raise InvalidFormatError(f"Format '{input_format}' is not supported by Adobe.")

		try:
		credentials = ServicePrincipalCredentials(client_id=self.client_id, client_secret=self.client_secret)
		pdf_services = PDFServices(credentials=credentials)

src/pdf_remote_converter/providers/base.py

+41 −3

Original line number	Diff line number	Diff line
		@@ -2,10 +2,10 @@

		from __future__ import annotations

		from dataclasses import dataclass
		from pathlib import Path
		from typing import Protocol

		from pdf_remote_converter.exceptions import FileTooLargeError, InvalidFormatError
		from pdf_remote_converter.providers.models import ConversionResult


		@@ -28,6 +28,10 @@ class ProviderBackend(Protocol):
		def quota_remaining(self) -> int:
		"""Return the remaining quota for the provider."""

		@property
		def max_file_size(self) -> int \| None:
		"""Return the maximum file size in bytes, or None if no limit."""

		def convert(self, input_path: Path, output_path: Path) -> ConversionResult:
		"""Convert an input file to PDF."""

		@@ -35,7 +39,6 @@ class ProviderBackend(Protocol):
		"""Return whether the provider is available and configured."""


		@dataclass
		class AbstractProvider:
		"""Base class with shared provider behavior."""

		@@ -44,6 +47,24 @@ class AbstractProvider:
		monthly_quota: int
		quota_remaining: int

		def __init__(
		self,
		name: str,
		supported_formats: set[str],
		monthly_quota: int,
		quota_remaining: int,
		) -> None:
		"""Initialize the provider with shared configuration."""
		self.name = name
		self.supported_formats = supported_formats
		self.monthly_quota = monthly_quota
		self.quota_remaining = quota_remaining

		@property
		def max_file_size(self) -> int \| None:
		"""Return the maximum file size in bytes, or None if no limit."""
		return None

		def is_healthy(self) -> bool:
		"""Return True when the provider is available for use."""
		return self.quota_remaining > 0
		@@ -51,6 +72,23 @@ class AbstractProvider:
		def convert(self, input_path: Path, output_path: Path) -> ConversionResult:
		"""Convert the input file to PDF.

		Subclasses must implement the conversion logic.
		Performs format and file size validation before delegating to
		the provider-specific implementation via _perform_convert().
		"""
		input_format = input_path.suffix.lstrip(".").lower()
		if input_format not in self.supported_formats:
		raise InvalidFormatError(f"Format '{input_format}' is not supported by {self.name}.")

		if self.max_file_size is not None:
		file_size = input_path.stat().st_size
		if file_size > self.max_file_size:
		raise FileTooLargeError(f"{self.name} supports files up to {self.max_file_size} bytes.")

		return self._perform_convert(input_path, output_path)

		def _perform_convert(self, input_path: Path, output_path: Path) -> ConversionResult:
		"""Perform the actual conversion.

		Subclasses must implement this method to provide provider-specific logic.
		"""
		raise NotImplementedError

src/pdf_remote_converter/providers/cloudconvert.py

+1 −6

Original line number	Diff line number	Diff line
		@@ -12,7 +12,6 @@ from hishel.httpx import SyncCacheClient
		from pdf_remote_converter.exceptions import (
		AuthenticationError,
		ConversionError,
		InvalidFormatError,
		ProviderUnavailableError,
		QuotaExceededError,
		RateLimitError,
		@@ -40,12 +39,8 @@ class CloudConvertProvider(AbstractProvider):
		"""Return True when the provider is available and configured."""
		return bool(self.api_key) and self.quota_remaining > 0

		def convert(self, input_path: Path, output_path: Path) -> ConversionResult:
		def _perform_convert(self, input_path: Path, output_path: Path) -> ConversionResult:
		"""Convert the input Office document to PDF using CloudConvert."""
		input_format = input_path.suffix.lstrip(".").lower()
		if input_format not in self.supported_formats:
		raise InvalidFormatError(f"Format '{input_format}' is not supported by CloudConvert.")

		job_payload = {
		"tasks": {
		"import-my-file": {"operation": "import/upload"},

src/pdf_remote_converter/providers/zamzar.py

+21 −34

Original line number	Diff line number	Diff line
		@@ -13,8 +13,6 @@ from hishel.httpx import SyncCacheClient
		from pdf_remote_converter.exceptions import (
		AuthenticationError,
		ConversionError,
		FileTooLargeError,
		InvalidFormatError,
		ProviderUnavailableError,
		QuotaExceededError,
		RateLimitError,
		@@ -28,6 +26,7 @@ class ZamzarProvider(AbstractProvider):
		"""Zamzar API implementation."""

		BASE_URL = "https://api.zamzar.com/v1"
		MAX_FILE_SIZE = 1 * 1024 * 1024 # 1MB free tier limit

		def __init__(self, api_key: str, http_client: SyncCacheClient \| None = None) -> None:
		"""Initialize Zamzar provider with API key and HTTP client."""
		@@ -36,44 +35,32 @@ class ZamzarProvider(AbstractProvider):
		self.supported_formats = {"doc", "docx", "xls", "xlsx", "ppt", "pptx"}
		self.monthly_quota = 100
		self.quota_remaining = self.monthly_quota
		self.zamzar_max_file_size = 1 * 1024 * 1024
		self.name = "zamzar"

		@property
		def max_file_size(self) -> int \| None:
		"""Return the maximum file size in bytes (1MB for Zamzar free tier)."""
		return self.MAX_FILE_SIZE

		def is_healthy(self) -> bool:
		"""Return True when the provider is available and configured."""
		return bool(self.api_key) and self.quota_remaining > 0

		def convert(self, input_path: Path, output_path: Path) -> ConversionResult:
		def _perform_convert(self, input_path: Path, output_path: Path) -> ConversionResult:
		"""Convert the input Office document to PDF using Zamzar."""
		input_format = input_path.suffix.lstrip(".").lower()
		if input_format not in self.supported_formats:
		raise InvalidFormatError(f"Format '{input_format}' is not supported by Zamzar.")

		file_size = input_path.stat().st_size
		if file_size > self.zamzar_max_file_size:
		raise FileTooLargeError("Zamzar free tier supports files up to 1MB.")

		upload_response = self._request_json(
		"POST",
		f"{self.BASE_URL}/files",
		files={"file": (input_path.name, input_path.read_bytes())},
		)
		upload_data = self._extract_data(upload_response)
		source_file_id = upload_data.get("id")
		if not source_file_id:
		raise ConversionError("Zamzar upload did not return a file id.")

		conversion_response = self._request_json(
		# Create conversion job using /jobs endpoint
		job_response = self._request_json(
		"POST",
		f"{self.BASE_URL}/conversions",
		json={"source_file_id": source_file_id, "target_format": "pdf"},
		f"{self.BASE_URL}/jobs",
		files={"source_file": (input_path.name, input_path.read_bytes())},
		data={"target_format": "pdf"},
		)
		conversion_data = self._extract_data(conversion_response)
		conversion_id = conversion_data.get("id")
		if not conversion_id:
		raise ConversionError("Zamzar conversion did not return a conversion id.")
		job_data = self._extract_data(job_response)
		job_id = job_data.get("id")
		if not job_id:
		raise ConversionError("Zamzar job creation did not return a job id.")

		target_file_id = self._poll_conversion(conversion_id)
		target_file_id = self._poll_job(job_id)
		download_response = self._request_raw(
		"GET",
		f"{self.BASE_URL}/files/{target_file_id}/content",
		@@ -83,7 +70,7 @@ class ZamzarProvider(AbstractProvider):
		return ConversionResult(
		output_path=output_path,
		provider=self.name,
		from_cache=bool(upload_response.extensions.get("from_cache")),
		from_cache=bool(job_response.extensions.get("from_cache")),
		credits_used=1,
		)

		@@ -125,9 +112,9 @@ class ZamzarProvider(AbstractProvider):
		raise QuotaExceededError(message)
		raise ConversionError(message)

		def _poll_conversion(self, conversion_id: str) -> str:
		"""Poll the conversion status until completion and return target file id."""
		poll_url = f"{self.BASE_URL}/conversions/{conversion_id}"
		def _poll_job(self, job_id: int) -> str:
		"""Poll the job status until completion and return target file id."""
		poll_url = f"{self.BASE_URL}/jobs/{job_id}"
		for _ in range(60):
		response = self._request_json("GET", poll_url)
		data = self._extract_data(response)