diff fileidentification.xml @ 0:ff7cec6bc518 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/fileidentification commit bb234555cae70fdd9be475415855a7a03c4139df
author iuc
date Thu, 30 Oct 2025 16:52:27 +0000
parents
children 639d81cf8ac8
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fileidentification.xml	Thu Oct 30 16:52:27 2025 +0000
@@ -0,0 +1,56 @@
+<tool id="fileidentification" name="File Format Identification" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="24.1">
+    <description>Check multimedia files if they are corrupt or duplicated</description>
+    <macros><import>macros.xml</import></macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+unzip '$input' -d input_dir &&
+/app/.venv/bin/python /app/identify.py ./input_dir &&
+/app/.venv/bin/python /app/identify.py ./input_dir --inspect > results.txt
+    ]]></command>
+    <inputs>
+        <param type="data" name="input" format="zip" label="ZIP containing the files to be analysed"/>
+    </inputs>
+    <outputs>
+        <data name="out" format="txt" from_work_dir="results.txt" label="${tool.name} on ${on_string}"/>
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1">
+            <param name="input" value="test-data.zip"/>
+            <output name="out" ftype="txt" file="output_results_linux_gh_action.txt"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+Do you have a huge number of multimedia files and you don't know if they are corrupt,
+or if they have the correct extension?
+This tool:
+
+- gives you an overview of what file types there are
+- checks if the extension of the files match their content
+- checks if there are any duplicates
+- checks if the content of the files is intact
+
+Input: A ZIP containing the files to be analysed (may be a nested folder structure)
+Output: A report in plain text form
+
+A possible use case are digital preservation workflows,
+where you want to make sure that you only preserve high-quality files for the future.
+
+Supported file types: A wide range of image formats (pixel and vector), videos, audios, pdf, MS Office.
+
+Note: The original fileidentification tool is more feature-rich. 
+In particular, it offers bulk conversion of files, which is currently not supported on Galaxy.
+
+Find more information in `the GitHub repo <https://github.com/dasch-swiss/fileidentification>`_.
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+@misc{githubfileidentification,
+  author = {Swiss National Data and Service Center for the Humanities},
+  year = {2025},
+  title = {Fileidentification - A CLI to Identify Multimedia File Formats and Bulk Convert Files},
+  publisher = {GitHub},
+  journal = {GitHub repository},
+  url = {https://github.com/dasch-swiss/fileidentification},
+}</citation>
+    </citations>
+</tool>