Mercurial > repos > iuc > fileidentification
diff fileidentification.xml @ 0:ff7cec6bc518 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/fileidentification commit bb234555cae70fdd9be475415855a7a03c4139df
| author | iuc |
|---|---|
| date | Thu, 30 Oct 2025 16:52:27 +0000 |
| parents | |
| children | 639d81cf8ac8 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fileidentification.xml Thu Oct 30 16:52:27 2025 +0000 @@ -0,0 +1,56 @@ +<tool id="fileidentification" name="File Format Identification" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="24.1"> + <description>Check multimedia files if they are corrupt or duplicated</description> + <macros><import>macros.xml</import></macros> + <expand macro="requirements"/> + <command detect_errors="exit_code"><![CDATA[ +unzip '$input' -d input_dir && +/app/.venv/bin/python /app/identify.py ./input_dir && +/app/.venv/bin/python /app/identify.py ./input_dir --inspect > results.txt + ]]></command> + <inputs> + <param type="data" name="input" format="zip" label="ZIP containing the files to be analysed"/> + </inputs> + <outputs> + <data name="out" format="txt" from_work_dir="results.txt" label="${tool.name} on ${on_string}"/> + </outputs> + <tests> + <test expect_num_outputs="1"> + <param name="input" value="test-data.zip"/> + <output name="out" ftype="txt" file="output_results_linux_gh_action.txt"/> + </test> + </tests> + <help><![CDATA[ +Do you have a huge number of multimedia files and you don't know if they are corrupt, +or if they have the correct extension? +This tool: + +- gives you an overview of what file types there are +- checks if the extension of the files match their content +- checks if there are any duplicates +- checks if the content of the files is intact + +Input: A ZIP containing the files to be analysed (may be a nested folder structure) +Output: A report in plain text form + +A possible use case are digital preservation workflows, +where you want to make sure that you only preserve high-quality files for the future. + +Supported file types: A wide range of image formats (pixel and vector), videos, audios, pdf, MS Office. + +Note: The original fileidentification tool is more feature-rich. +In particular, it offers bulk conversion of files, which is currently not supported on Galaxy. + +Find more information in `the GitHub repo <https://github.com/dasch-swiss/fileidentification>`_. + ]]></help> + <citations> + <citation type="bibtex"> +@misc{githubfileidentification, + author = {Swiss National Data and Service Center for the Humanities}, + year = {2025}, + title = {Fileidentification - A CLI to Identify Multimedia File Formats and Bulk Convert Files}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = {https://github.com/dasch-swiss/fileidentification}, +}</citation> + </citations> +</tool>
