9
+ − 1 <tool id="sampler" name="Read sampling" version="1.0.0">
+ − 2 <description> Tool for random sampling subsets of reads from larger dataset</description>
3
+ − 3 <requirements>
+ − 4 <requirement type="package">seqkit</requirement>
+ − 5 </requirements>
+ − 6 <stdio>
+ − 7 <exit_code range="1:" level="fatal" description="Error" />
+ − 8 </stdio>
+ − 9 <command>
9
+ − 10 <![CDATA[
3
+ − 11 #if str($paired)=="true"
+ − 12 ${__tool_directory__}/deinterlacer.py $input Afile Bfile
9
+ − 13 &&
+ − 14 seqkit sample -2 --number $number --rand-seed $seed -o Asample -w 0 Afile < /dev/null
+ − 15 &&
+ − 16 seqkit sample -2 --number $number --rand-seed $seed -o Bsample -w 0 Bfile < /dev/null
+ − 17 &&
3
+ − 18 ${__tool_directory__}/fasta_interlacer.py -a Asample -b Bsample -p $output -x tmpfile
+ − 19 #else
+ − 20 seqkit sample -2 --number $number --rand-seed $seed -o $output -w 0 $input
+ − 21 #end if
9
+ − 22 ]]>
+ − 23 </command>
3
+ − 24
+ − 25 <inputs>
9
+ − 26 <param format="fasta" type="data" name="input" label="Read file (FASTA)" />
+ − 27 <param name="number" type="integer" size="7" value="500000" min="1" label="Number of reads or read pairs"/>
+ − 28 <param name="seed" type="integer" size="10" value="10" min="0" label="Random number generator seed " />
+ − 29 <param name="paired" type="boolean" truevalue="true" falsevalue="false" checked="False" label="Paired-end reads" help="If paired-end reads are sampled, left and right-hand reads must be interlaced and all pairs must be complete."/>
3
+ − 30
+ − 31
+ − 32 </inputs>
+ − 33
+ − 34
+ − 35 <outputs>
+ − 36 <data format="fasta" name="output" label="Random selection from dataset ${input.hid}, sample size ${number})" />
+ − 37 </outputs>
+ − 38
+ − 39 <help>
+ − 40 **What it does**
+ − 41
+ − 42 This tools is intended to create sample of sequences from by taking 'random' sample from larger data sets.
+ − 43 Using a same seed parameter make sampling reproducible.
+ − 44
+ − 45
+ − 46 </help>
+ − 47
+ − 48
+ − 49 </tool>