diff pdfimages.xml @ 0:fe8e52e52961 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/pdfimages/ commit 6f5af0db756df0cf9cc62dc1d5866a9f10c4346c
author iuc
date Thu, 31 Jul 2025 17:35:49 +0000
parents
children 1ba934f8c35b
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pdfimages.xml	Thu Jul 31 17:35:49 2025 +0000
@@ -0,0 +1,128 @@
+<tool name="pdfimages" id="pdfimages" version="@TOOL_VERSION@+@VERSION_SUFFIX@" profile="24.2" license="GPL-2.0-only">
+    <description>Extract images from a PDF file</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <expand macro="creators"/>
+    <command detect_errors="exit_code"><![CDATA[
+        pdfimages
+        #if str($f) != ""
+            -f $f
+        #end if
+        #if str($l) != ""
+            -l $l
+        #end if
+        $p 
+        #if $output_format != 'ppm'
+            $output_format
+        #end if
+        '$input_file' $output_prefix
+    ]]></command>
+    <inputs>
+        <param name="input_file" type="data" format="pdf" label="Input PDF file"/>
+        <param name="output_format" type="select" label="Select desired image format">
+            <option value="ppm" selected="true">ppm</option>
+            <option value="-png">png</option>
+            <option value="-tiff">tiff</option>
+        </param>
+        <param argument="-f" type="integer" min="1" label="First page" optional="true"/>
+        <param argument="-l" type="integer" min="1" label="Last page" optional="true"/>
+        <param argument="-p" type="boolean" truevalue="-p" falsevalue="" label="Include page numbers in output file names"/>
+        <param name="output_prefix" type="text" value="image" label="Output name prefix">
+            <validator type="regex" message="Use only letters (A–Z, a–z), numbers, underscores (_), or dashes (-).">^[A-Za-z0-9_-]+$</validator>
+        </param>
+    </inputs>
+    <outputs>
+        <collection name="ppm_output_collection" type="list" format="ppm" label="${tool.name} on ${on_string}: Extracted images in ppm format">
+            <filter>output_format == "ppm"</filter>
+            <discover_datasets pattern="__name_and_ext__" recurse="true" format="ppm"/>
+        </collection>
+        <collection name="png_output_collection" type="list" format="png" label="${tool.name} on ${on_string}: Extracted images in png">
+            <filter>output_format == "-png"</filter>
+            <discover_datasets pattern="__name_and_ext__" recurse="true" format="png"/>
+        </collection>
+        <collection name="tiff_output_collection" type="list" format="tiff" label="${tool.name} on ${on_string}: Extracted images in tiff format">
+            <filter>output_format == "-tiff"</filter>
+            <discover_datasets pattern="__name_and_ext__" recurse="true" format="tiff"/>
+        </collection>
+    </outputs>
+    <tests>
+    <test expect_num_outputs="1">
+        <param name="input_file" value="test.pdf"/>
+        <param name="output_format" value="ppm"/>
+        <param name="p" value="false"/>
+        <param name="output_prefix" value="image"/>
+        <output_collection name="ppm_output_collection" type="list">
+            <element name="image-000" ftype="ppm">
+                <assert_contents>
+                    <has_size value="3145745"/>
+                </assert_contents>
+            </element>
+            <element name="image-001" ftype="ppm">
+                <assert_contents>
+                    <has_size value="3145745"/>
+                </assert_contents>
+            </element>
+            <element name="image-002" ftype="ppm">
+                <assert_contents>
+                    <has_size value="3145745"/>
+                </assert_contents>
+            </element>
+        </output_collection>
+    </test>
+    <test expect_num_outputs="1">
+        <param name="input_file" value="test.pdf"/>
+        <param name="output_format" value="-tiff"/>
+        <param name="p" value="true"/>
+        <param name="output_prefix" value="image"/>
+        <output_collection name="tiff_output_collection" type="list">
+            <element name="image-001-000" ftype="tif">
+                <assert_contents>
+                    <has_size value="3149004"/>
+                </assert_contents>
+            </element>
+            <element name="image-002-001" ftype="tif">
+                <assert_contents>
+                    <has_size value="3149004"/>
+                </assert_contents>
+            </element>
+            <element name="image-003-002" ftype="tif">
+                <assert_contents>
+                    <has_size value="1049542"/>
+                </assert_contents>
+            </element>
+        </output_collection>
+    </test>
+    <test expect_num_outputs="1">
+        <param name="input_file" value="test.pdf"/>
+        <param name="output_format" value="-png"/>
+        <param name="f" value="1"/>
+        <param name="l" value="2"/>
+        <param name="p" value="true"/>
+        <param name="output_prefix" value="output_prefix-0"/>
+        <output_collection name="png_output_collection" type="list">
+            <element name="output_prefix-0-001-000" ftype="png" file="output_prefix-0-001-000.png"/>
+            <element name="output_prefix-0-002-001" ftype="png" file="output_prefix-0-002-001.png"/>
+        </output_collection>
+    </test>
+    </tests>
+    <help><![CDATA[
+**What it does**
+
+`pdfimages` extracts images from a PDF file. This tool is useful for retrieving high-quality embedded images from PDFs for further analysis or reuse.
+
+**Inputs**
+
+- **Input PDF file**: The source PDF from which images will be extracted.
+- **Output format**: Choose the desired output format (`ppm`, `png`, or `tiff`).
+- **First page / Last page** (optional): Limit extraction to a specific page range.
+- **Include page numbers**: Adds page numbers to the output filenames for better traceability.
+- **Output name prefix**: Sets a custom prefix for all extracted image files.
+
+**Outputs**
+
+A collection of images in the selected format, named according to the specified prefix and page/image order.
+]]></help>
+    <expand macro="citations"/>
+</tool>
\ No newline at end of file