Mercurial > repos > jgarbe > redup
diff redup.xml @ 0:df1e7c7dd9cb draft default tip
Initial uploaded of files
author | jgarbe |
---|---|
date | Wed, 27 Nov 2013 14:39:56 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/redup.xml Wed Nov 27 14:39:56 2013 -0500 @@ -0,0 +1,45 @@ +<tool id="redup" name="Redup" version="1.0"> +<description>Remove exact duplicate reads from paired-end fastq files</description> +<command interpreter="perl"> + ## Need to handle file names, probably should fix redup.pl + redup.pl + #if $opt_n: + -n $opt_n + #end if + $fastq1_in $fastq2_in $unique1_out $unique2_out + #if $opt_n: + > $duplicates + #end if +</command> +<inputs> + <param name="fastq1_in" type="data" format="fastq" label="Fastq Input 1"/> + <param name="fastq2_in" type="data" format="fastq" label="Fastq Input 2"/> + <param name="opt_n" type="integer" value="20" optional="true" label="Number of most duplicated sequences printed out. (default 20)"> + <validator type="in_range" message="Value can not be negative" min="0"/> + </param> +</inputs> +<stdio> + <exit_code range="1:" level="fatal"/> +</stdio> +<outputs> + <data format_source="fastq1_in" name="unique1_out" label="${tool.name} on ${on_string}: fastq1.unique" /> + <data format_source="fastq2_in" name="unique2_out" label="${tool.name} on ${on_string}: fastq2.unique" /> + <data format="fasta" name="duplicates" label="${tool.name} on ${on_string}: top duplicates" > + <filter>opt_n != None</filter> + </data> +</outputs> +<tests> + <test> + <param name="fastq1_in" ftype="fastq" value="input1.fastq" /> + <param name="fastq2_in" ftype="fastq" value="input2.fastq" /> + <param name="opt_n" value="20" /> + <output name="unique1_out" file="output1.fastq" /> + <output name="unique2_out" file="output2.fastq" /> + <output name="duplicates" file="duplicates.fasta" /> + </test> +</tests> + +<help> +This script removes duplicate paired-end reads from the input files sample1_R1.fastq and sample1_R2.fastq and prints out unique reads to the files sample1_R1.fastq.unique and sample2_R2.fastq.unique. Reads must have the exact same sequence to be called duplicates, quality scores are ignored. The top N (default 20) most duplicated sequences are printed out in fasta format, making it convenient for using BLAST to identify them. +</help> +</tool>