Mercurial > repos > iuc > ont_fast5_api_fast5_subset
changeset 1:3bdd8fa109b1 draft
"planemo upload for repository https://github.com/nanoporetech/ont_fast5_api/ commit a5d038871ff0e1b133039b59183dee795449b383"
author | iuc |
---|---|
date | Fri, 12 Jun 2020 15:08:23 -0400 |
parents | eb1f4aa80c72 |
children | 33a0ff1bb7df |
files | fast5_subset.xml macros.xml test-data/batch.fast5 test-data/multi.fast5.tar test-data/single.fast5.tar |
diffstat | 5 files changed, 131 insertions(+), 49 deletions(-) [+] |
line wrap: on
line diff
--- a/fast5_subset.xml Mon Jun 08 15:57:10 2020 -0400 +++ b/fast5_subset.xml Fri Jun 12 15:08:23 2020 -0400 @@ -1,56 +1,86 @@ <?xml version="1.0"?> -<tool id="ont_fast5_api_fast5_subset" name="Fast5 subset" version="@TOOL_VERSION@+galaxy0" profile="18.01"> - <description>of multi read file</description> +<tool id="ont_fast5_api_fast5_subset" name="@TOOL_NAME@ Subset" version="@TOOL_VERSION@+galaxy1" profile="18.01"> + <description>of multi read file(s)</description> <macros> <import>macros.xml</import> </macros> <expand macro="requirements"/> - <version_command></version_command> + <!-- no specific version command for subcommand fast5_subset available --> + <version_command><![CDATA[compress_fast5 -v]]></version_command> <command detect_errors="exit_code"><![CDATA[ ## initialize -mkdir data && -#for $num, $current in enumerate($input): - ln -s '$current' './data/batch${num}.fast5' && -#end for +mkdir -p './data' && +tar -xf '$input' -C './data' && ## run fast5_subset ## required ---input ./data --s ./results +--input './data' +@SAVEPATH@ --read_id_list '$read_id_list' ## optional ---batch_size $batch_size --t \${GALAXY_SLOTS:-4} +@COMPRESSION@ +@BATCHSIZE@ +@THREADS@ + +## create tarball +@TARBALL@ ]]></command> <inputs> - <param argument="--input" type="data" format="fast5" multiple="true" - label="Select multi read input file(s)"/> - <param argument="--read_id_list" type="data" format="tabular" - label="Select file with read_ids" help="Either containing 1 read_id per line or a tabular file with a column named read_id."/> - <param argument="--batch_size" type="integer" value="4000" min="1" - label="Set batch size" help="Number of single reads to include in each multi read file"/> + <expand macro="input" argument="--input"/> + <param argument="--read_id_list" type="data" format="tabular" label="Select file with read IDs" help="Either containing 1 read_id per line or a tabular file with a column named read_id."/> + <expand macro="batch_size"/> + <expand macro="compression"> + <option value="none" selected="true">None</option> + <option value="vbz">VBZ</option> + </expand> </inputs> <outputs> - <data name="out_results" format="fast5"> - <discover_datasets pattern="(?P<designation>.+)\.fast5" format="fast5" directory="results" assign_primary_output="true" visible="true"/> - </data> + <expand macro="output"/> </outputs> <tests> + <!-- #1 default --> <test expect_num_outputs="1"> - <param name="input" value="batch.fast5"/> + <param name="input" value="multi.fast5.tar"/> + <param name="read_id_list" value="list.txt"/> + <output name="out_results"> + <assert_contents> + <has_size value="30720"/> + </assert_contents> + </output> + </test> + <!-- #2 --> + <test expect_num_outputs="1"> + <param name="input" value="multi.fast5.tar"/> <param name="read_id_list" value="list.txt"/> <param name="batch_size" value="2"/> + <param name="compression" value="gzip"/> + <output name="out_results"> + <assert_contents> + <has_size value="51200"/> + </assert_contents> + </output> + </test> + <!-- #3 --> + <test expect_num_outputs="1"> + <param name="input" value="multi.fast5.tar"/> + <param name="read_id_list" value="list.txt"/> + <param name="compression" value="vbz"/> <output name="out_results"> <assert_contents> - <has_size value="23304"/> + <has_size value="40960"/> </assert_contents> - <!-- batch0 is represented by out_results --> - <discovered_dataset designation="batch1" ftype="fast5"> - <assert_contents> - <has_size value="17328"/> - </assert_contents> - </discovered_dataset> + </output> + </test> + <!-- #4 --> + <test expect_num_outputs="1"> + <param name="input" value="multi.fast5.tar"/> + <param name="read_id_list" value="list.txt"/> + <param name="compression" value="vbz_legacy_v0"/> + <output name="out_results"> + <assert_contents> + <has_size value="40960"/> + </assert_contents> </output> </test> </tests> @@ -61,15 +91,15 @@ @WID@ -fast5_subset extracts reads from multi_read_fast5_file(s) based on a list of read IDs. +*fast5_subset* extracts reads from multi read FAST5 file(s) based on a list of read IDs. **Input** -A multi read file in FAST5 format and a list of read IDs that should be extracted. +Multi read file(s) in FAST5 format, that are stored in a flat TAR, and a list of read IDs that should be extracted. **Output** -A multi read file in FAST5 format containing a subset of the input file. +Multi read file(s) in FAST5 format containing a subset of the input file(s). The rseults are are stored in a flat TAR. .. class:: infomark @@ -78,4 +108,4 @@ @REFERENCES@ ]]></help> <expand macro="citations"/> -</tool> +</tool> \ No newline at end of file
--- a/macros.xml Mon Jun 08 15:57:10 2020 -0400 +++ b/macros.xml Fri Jun 12 15:08:23 2020 -0400 @@ -1,31 +1,83 @@ <?xml version="1.0"?> <macros> <token name="@TOOL_VERSION@">3.1.3</token> + <token name="@TOOL_NAME@">ont_fast5_api:</token> <xml name="requirements"> <requirements> <requirement type="package" version="@TOOL_VERSION@">ont-fast5-api</requirement> + <requirement type="package" version="1.10.5">hdf5</requirement> </requirements> </xml> - <xml name="citations"> - <citations> - <citation type="bibtex">@online{ont_fast5_api, - author = {Oxford Nanopore Technologies }, - title = {ont_fast5_api}, - year = 2020, - url = {https://github.com/nanoporetech/ont_fast5_api}, - urldate = {2020-06-01} - }</citation> - </citations> + + <!-- command --> + <token name="@BATCHSIZE@"><![CDATA[ +--batch_size $batch_size + ]]></token> + <token name="@COMPRESSION@"><![CDATA[ +#if $compression != 'none' + --compression '$compression' +#end if + ]]></token> + <token name="@INITIALIZE@"><![CDATA[ +mkdir -p './data' && +tar -xf '$input_path' -C './data' && + ]]></token> + <token name="@INPUTPATH@"><![CDATA[ +--input_path './data' + ]]></token> + <token name="@SAVEPATH@"><![CDATA[ +--save_path './results' + ]]></token> + <token name="@TARBALL@"><![CDATA[ +&& find './results' -type f -name '*.fast5' | tar --transform 's/.*\///g' -cvf './results.fast5.tar' --files-from=/dev/stdin + ]]></token> + <token name="@THREADS@"><![CDATA[ +--threads \${GALAXY_SLOTS:-4} + ]]></token> + + <!-- input --> + <xml name="input" token_argument="--input_path" token_label="multi"> + <param argument="@ARGUMENT@" type="data" format="fast5.tar" label="Select @LABEL@ read input file"/> </xml> - <token name="@WID@"><![CDATA[ -ont_fast5_api is a simple interface to HDF5 files of the Oxford Nanopore FAST5 file format. + <xml name="batch_size"> + <param argument="--batch_size" type="integer" value="4000" min="1" label="Set batch size" help="Number of single reads to include in each multi read file"/> + </xml> + <xml name="compression"> + <param argument="compression" type="select" label="Select output compression type"> + <yield/> + <option value="vbz_legacy_v0">VBZ legacy v0</option> + <option value="gzip">GZIP</option> + </param> + </xml> -- Concrete implementation of the FAST5 file schema using the generic h5py library -- Plain-english-named methods to interact with and reflect the FAST5 file schema -- Tools to convert between multi_read and single_read formats -- Tools to compress/decompress raw data in files + <!-- output --> + <xml name="output"> + <data name="out_results" format="fast5.tar" from_work_dir="results.fast5.tar" label="${tool.name} on ${on_string}: Results"/> + </xml> + + <!-- help --> + <token name="@WID@"><![CDATA[ +*ont_fast5_api* is a simple interface to HDF5 files of the Oxford Nanopore FAST5 file format. + +- concrete implementation of the FAST5 file schema using the generic h5py library +- plain-english-named methods to interact with and reflect the FAST5 file schema +- tools to convert between multi_read and single_read formats +- tools to compress/decompress raw data in files ]]></token> <token name="@REFERENCES@"><![CDATA[ More information are available on `github <https://github.com/nanoporetech/ont_fast5_api>`_. ]]></token> -</macros> \ No newline at end of file + + <xml name="citations"> + <citations> + <citation type="bibtex"> + @online{ont_fast5_api, + author = {Oxford Nanopore Technologies }, + title = {ont_fast5_api}, + year = 2020, + url = {https://github.com/nanoporetech/ont_fast5_api}, + urldate = {2020-06-01} + }</citation> + </citations> + </xml> +</macros>