Mercurial > repos > bgruening > markitdown
view markitdown.xml @ 1:f6fa7e70120f draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/markitdown commit 1df47411ce8651c1d4f68cd032b2afe7d5a721de
| author | bgruening |
|---|---|
| date | Mon, 13 Oct 2025 13:22:13 +0000 |
| parents | 5ad32046903b |
| children | 4926706c13db |
line wrap: on
line source
<tool id="markitdown" name="Markitdown" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> <description>Convert documents to Markdown</description> <macros> <token name="@TOOL_VERSION@">0.1.3</token> <token name="@VERSION_SUFFIX@">0</token> <token name="@PROFILE@">23.0</token> </macros> <requirements> <requirement type="package" version="3.12">python</requirement> <requirement type="package" version="@TOOL_VERSION@">markitdown</requirement> </requirements> <command detect_errors="exit_code"><![CDATA[ #set ext_map = { 'pdf': 'pdf', 'docx': 'docx', 'pptx': 'pptx', 'xlsx': 'xlsx', 'html': 'html', 'txt': 'txt', 'ipynb': 'ipynb', 'markdown': 'md', 'zip': 'zip', 'tabular': 'csv', 'csv': 'csv' } #set file_ext = ext_map.get($input.ext, '') #set final_ext = $ext_hint if $ext_hint else $file_ext markitdown ${input} -x $final_ext #if $mime_type: -m $mime_opt #end if #if $charset: -c "$charset_opt" #end if $keep_data_uris -o '$output' ]]></command> <inputs> <param name="input" type="data" format="pdf,docx,pptx,xlsx,html,txt,ipynb,markdown,zip,tabular" label="Input file"/> <param name="ext_hint" type="text" optional="true" label="Extension override"/> <param name="mime_type" type="text" optional="true" label="MIME type hint"/> <param name="charset" type="text" optional="true" label="Character set (e.g. UTF-8)"/> <param name="keep_data_uris" type="boolean" truevalue="--keep-data-uris" falsevalue="" label="Keep embedded data URIs"/> </inputs> <outputs> <data name="output" format="markdown" label="Converted Markdown output"/> </outputs> <tests> <test> <param name="input" value="EAR.pdf" ftype="pdf"/> <output name="output"> <assert_contents> <has_text text="Tags: ERGA-BGE"/> <has_text text="Lineage: mammalia_odb10"/> </assert_contents> </output> </test> <test> <param name="input" value="example.docx" ftype="docx"/> <output name="output"> <assert_contents> <has_text text="# Lorem ipsum dolor sit amet, consectetur adipiscing elit."/> </assert_contents> </output> </test> <!--test> <param name="input" value="example.odt"/> <param name="ext_hint" value="odt"/> <output name="output"> <assert_contents> <has_text text="This is a Word document"/> </assert_contents> </output> </test--> <test> <param name="input" value="report_4.html" ftype="html"/> <param name="keep_data_uris" value="true"/> <output name="output"> <assert_contents> <has_text text="is the contig length such that using longer or equal length contigs produces"/> </assert_contents> </output> </test> <test> <param name="input" value="example.txt" ftype="txt"/> <param name="ext_hint" value="txt"/> <output name="output"> <assert_contents> <has_text text="This is a plain text file"/> </assert_contents> </output> </test> <test> <param name="input" value="example.ipynb" ftype="ipynb"/> <output name="output"> <assert_contents> <has_text text="print("Hello, world!")"/> </assert_contents> </output> </test> </tests> <help format="markdown"><![CDATA[ **Markitdown** converts rich document formats (PDF, DOCX, HTML, etc.) to Markdown. --- ### Supported Formats: - PDF, DOCX, PPTX, XLSX - HTML, TXT, Markdown - Jupyter Notebooks (IPYNB) - ZIP containing supported formats - Tabular (CSV) --- ### Options: - **Extension override** (`-x`): hint for file type if not obvious - **MIME type** (`-m`): manual MIME hint - **Charset** (`-c`): text encoding hint - **Keep data URIs**: retain base64-encoded images Project: https://github.com/microsoft/markitdown ]]></help> <citations> <citation type="bibtex"> @misc{markitdown2024, author = {Microsoft}, title = {markitdown: Convert documents to markdown}, year = {2024}, howpublished = {\url{https://github.com/microsoft/markitdown}} } </citation> </citations> </tool>
