Commit 3d8251c4 authored by Jan Reimes's avatar Jan Reimes
Browse files

feat(cli): add --extract-media option for image extraction

parent 7cc3d55c
Loading
Loading
Loading
Loading
+7 −0
Original line number Diff line number Diff line
@@ -224,6 +224,13 @@ DocxDirectOption = Annotated[
        help="Feed .docx/.doc directly to backend, skip LibreOffice PDF conversion",
    ),
]
ExtractMediaOption = Annotated[
    bool,
    typer.Option(
        "--extract-media",
        help="Extract embedded images to a ./media folder next to the markdown",
    ),
]
WorkspaceNameOption = Annotated[
    str | None,
    typer.Option("-w", "--workspace", help="Workspace name (default: active workspace)"),
+5 −0
Original line number Diff line number Diff line
@@ -13,6 +13,7 @@ from tdoc_crawler.cli._shared import console, create_progress_bar
from tdoc_crawler.cli.args import (
    DeviceOption,
    DocxDirectOption,
    ExtractMediaOption,
    FiguresModeOption,
    MdYamlFrontmatterOption,
    ProcessLimitOption,
@@ -131,6 +132,7 @@ def _process_member(
    md_yaml_frontmatter: bool,
    docling_config: DoclingConfig,
    docx_direct: bool = False,
    extract_media: bool = False,
) -> tuple[str, bool, bool, int]:
    """Process a single workspace member.

@@ -150,6 +152,7 @@ def _process_member(
            force=force,
            docling_config=docling_config,
            docx_direct=docx_direct,
            extract_media=extract_media,
        )
        if result_path:
            suffix = result_path.suffix.lstrip(".")
@@ -177,6 +180,7 @@ def workspace_process(
    tables: TablesModeOption = "embed",
    device: DeviceOption = "auto",
    docx_direct: DocxDirectOption = False,
    extract_media: ExtractMediaOption = False,
    md_yaml_frontmatter: MdYamlFrontmatterOption = True,
    verbosity: VerbosityOption = str(DEFAULT_VERBOSITY),
) -> None:
@@ -258,6 +262,7 @@ def workspace_process(
                md_yaml_frontmatter,
                docling_config,
                docx_direct,
                extract_media,
            )
            if succeeded:
                processed += 1