Loading src/tdoc_crawler/cli/args.py +7 −0 Original line number Diff line number Diff line Loading @@ -224,6 +224,13 @@ DocxDirectOption = Annotated[ help="Feed .docx/.doc directly to backend, skip LibreOffice PDF conversion", ), ] ExtractMediaOption = Annotated[ bool, typer.Option( "--extract-media", help="Extract embedded images to a ./media folder next to the markdown", ), ] WorkspaceNameOption = Annotated[ str | None, typer.Option("-w", "--workspace", help="Workspace name (default: active workspace)"), Loading src/tdoc_crawler/cli/workspace/process.py +5 −0 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ from tdoc_crawler.cli._shared import console, create_progress_bar from tdoc_crawler.cli.args import ( DeviceOption, DocxDirectOption, ExtractMediaOption, FiguresModeOption, MdYamlFrontmatterOption, ProcessLimitOption, Loading Loading @@ -131,6 +132,7 @@ def _process_member( md_yaml_frontmatter: bool, docling_config: DoclingConfig, docx_direct: bool = False, extract_media: bool = False, ) -> tuple[str, bool, bool, int]: """Process a single workspace member. Loading @@ -150,6 +152,7 @@ def _process_member( force=force, docling_config=docling_config, docx_direct=docx_direct, extract_media=extract_media, ) if result_path: suffix = result_path.suffix.lstrip(".") Loading Loading @@ -177,6 +180,7 @@ def workspace_process( tables: TablesModeOption = "embed", device: DeviceOption = "auto", docx_direct: DocxDirectOption = False, extract_media: ExtractMediaOption = False, md_yaml_frontmatter: MdYamlFrontmatterOption = True, verbosity: VerbosityOption = str(DEFAULT_VERBOSITY), ) -> None: Loading Loading @@ -258,6 +262,7 @@ def workspace_process( md_yaml_frontmatter, docling_config, docx_direct, extract_media, ) if succeeded: processed += 1 Loading Loading
src/tdoc_crawler/cli/args.py +7 −0 Original line number Diff line number Diff line Loading @@ -224,6 +224,13 @@ DocxDirectOption = Annotated[ help="Feed .docx/.doc directly to backend, skip LibreOffice PDF conversion", ), ] ExtractMediaOption = Annotated[ bool, typer.Option( "--extract-media", help="Extract embedded images to a ./media folder next to the markdown", ), ] WorkspaceNameOption = Annotated[ str | None, typer.Option("-w", "--workspace", help="Workspace name (default: active workspace)"), Loading
src/tdoc_crawler/cli/workspace/process.py +5 −0 Original line number Diff line number Diff line Loading @@ -13,6 +13,7 @@ from tdoc_crawler.cli._shared import console, create_progress_bar from tdoc_crawler.cli.args import ( DeviceOption, DocxDirectOption, ExtractMediaOption, FiguresModeOption, MdYamlFrontmatterOption, ProcessLimitOption, Loading Loading @@ -131,6 +132,7 @@ def _process_member( md_yaml_frontmatter: bool, docling_config: DoclingConfig, docx_direct: bool = False, extract_media: bool = False, ) -> tuple[str, bool, bool, int]: """Process a single workspace member. Loading @@ -150,6 +152,7 @@ def _process_member( force=force, docling_config=docling_config, docx_direct=docx_direct, extract_media=extract_media, ) if result_path: suffix = result_path.suffix.lstrip(".") Loading Loading @@ -177,6 +180,7 @@ def workspace_process( tables: TablesModeOption = "embed", device: DeviceOption = "auto", docx_direct: DocxDirectOption = False, extract_media: ExtractMediaOption = False, md_yaml_frontmatter: MdYamlFrontmatterOption = True, verbosity: VerbosityOption = str(DEFAULT_VERBOSITY), ) -> None: Loading Loading @@ -258,6 +262,7 @@ def workspace_process( md_yaml_frontmatter, docling_config, docx_direct, extract_media, ) if succeeded: processed += 1 Loading