comparison stacks_kmerfilter.xml @ 0:b2e3553e1be2 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/stacks2 commit b395fa36fa826e26085820ba3a9faacaeddcb460
author iuc
date Mon, 01 Jul 2019 10:58:28 -0400
parents
children 38c9f9a680f0
comparison
equal deleted inserted replaced
-1:000000000000 0:b2e3553e1be2
1 <tool id="stacks2_kmerfilter" name="Stacks2: kmer filter" profile="@PROFILE@" version="@STACKS_VERSION@+galaxy@WRAPPER_VERSION@">
2 <description>Identify PCR clones</description>
3 <macros>
4 <import>macros.xml</import>
5 </macros>
6 <expand macro="requirements"/>
7 <expand macro="version_cmd"/>
8 <command detect_errors="aggressive"><![CDATA[
9 @FASTQ_INPUT_FUNCTIONS@
10
11 mkdir stacks_inputs stacks_outputs &&
12
13 #set ($link_command, $fwd_path, $rev_path, $inputype) = $fastq_input_batch($input_type.fqinputs, $input_type.input_type_select)
14 $link_command
15
16 kmer_filter
17 #if $input_type.input_type_select == 'single':
18 -f '$fwd_path'
19 #else
20 -1 '$fwd_path'
21 -2 '$rev_path'
22 #end if
23 ## TODO $options_kmer_char.read_k_freq
24 -i $inputype
25 -o stacks_outputs
26 $capture
27 -y fastq
28 $options_filtering.rare
29 $options_filtering.abundant
30 --k_len $options_filtering.k_len
31 --max_k_freq $options_advanced_filtering.max_k_freq
32 #if str($options_advanced_filtering.min_lim)!="":
33 --min_lim $options_advanced_filtering.min_lim
34 #end if
35 #if str($options_advanced_filtering.max_lim)!="":
36 --max_lim $options_advanced_filtering.max_lim
37 #end if
38 #if str($options_normalization.normalize)!="":
39 --normalize $options_normalization.normalize
40 #end if
41 #if $options_kmer_char.write_k_freq
42 --read_k_freq $kfreq
43 #end if
44 $options_kmer_char.k_dist
45 #if $options_kmer_char.k_dist
46 | sed 's/KmerFrequency/# KmerFrequency/' > $kfreqdist
47 #end if
48 @TEE_APPEND_LOG@
49 @CAT_LOG_TO_STDERR@
50
51 ## move outputs such that Galaxy can find them
52 ## if filtering is on then ...filt...fq is created
53 ## if normalization is on then ...norm...fq is created
54 ## if both are active then both files are created, but only norm is needed
55 #if str($options_filtering.rare)!="" or str($options_filtering.abundant)!="" or str($options_normalization.normalize)!="":
56 #if str($options_normalization.normalize)!="":
57 #set infix="norm"
58 #else
59 #set infix="fil"
60 #end if
61 #if $capture:
62 #if $input_type.input_type_select == "single"
63 && mv stacks_outputs/*.discards.fastq '$discarded'
64 #else
65 && mv stacks_outputs/*.1.discards.fastq '$discarded_pair.forward'
66 && mv stacks_outputs/*.2.discards.fastq '$discarded_pair.reverse'
67 #end if
68 #end if
69 #if $input_type.input_type_select == "single"
70 && mv stacks_outputs/*.${infix}.fastq '$clean'
71 #else
72 && mv stacks_outputs/*.1.${infix}.fastq '$clean_pair.forward'
73 && mv stacks_outputs/*.2.${infix}.fastq '$clean_pair.reverse'
74 #end if
75 #end if
76
77 ]]></command>
78 <inputs>
79 <expand macro="fastq_input_bc"/>
80 <param name="capture" type="boolean" checked="false" truevalue="-D" falsevalue="" argument="-D" label="Capture discarded reads to a file" />
81 <section name="options_filtering" title="Filtering options" expanded="False">
82 <param argument="--rare" type="boolean" checked="false" truevalue="--rare" falsevalue="" label="Turn on filtering based on rare k-mers" />
83 <param argument="--abundant" type="boolean" checked="false" truevalue="--abundant" falsevalue="" label="Turn on filtering based on abundant k-mers" />
84 <param argument="--k_len" type="integer" value="15" label="K-mer size" />
85 </section>
86 <section name="options_advanced_filtering" title="Advanced fitering options" expanded="False">
87 <param argument="--max_k_freq" type="integer" value="20000" label="Number of times a kmer must occur to be considered abundant" />
88 <param argument="--min_lim" type="integer" value="" optional="true" label="Number of rare kmers occuring in a row required to discard a read" help="(default: 80% of the k-mer length)." />
89 <param argument="--max_lim" type="integer" value="" optional="true" label="Number of abundant kmers required to discard a read" help="(default: 80% of the k-mers in a read)" />
90 </section>
91 <section name="options_normalization" title="Normalization options" expanded="False">
92 <param argument="--normalize" type="integer" value="" optional="true" label="Normalize read depth according to k-mer coverage" />
93 </section>
94 <section name="options_kmer_char" title="Characterizing K-mers options" expanded="False">
95 <param argument="--write_k_freq" type="boolean" checked="false" truevalue="--write_k_freq" falsevalue="" label="Write kmers along with their frequency of occurrence and exit" />
96 <param argument="--k_dist" type="boolean" checked="false" truevalue="--k_dist" falsevalue="" label="Print k-mer frequency distribution and exit" />
97 </section>
98 <!--<section name="options_advanced_input" title="Advanced input options" expanded="False">
99 <param argument="\-\-read_k_freq" type="boolean" checked="false" truevalue="\-\-read_k_freq" falsevalue="" label="Read a set of kmers along with their frequencies of occurrence instead of reading raw input files" />
100 </section>-->
101 <expand macro="in_log"/>
102 </inputs>
103 <outputs>
104 <expand macro="out_log"/>
105 <data name="clean" format="fastqsanger" label="${tool.name} on ${on_string}">
106 <filter>input_type['input_type_select'] == 'single' and not options_kmer_char['k_dist']</filter>
107 </data>
108 <collection name="clean_pair" type="paired" label="${tool.name} on ${on_string}">
109 <filter>input_type['input_type_select'] == 'paired' and not options_kmer_char['k_dist']</filter>
110 </collection>
111 <data name="discarded" format="fastqsanger" label="${tool.name} on ${on_string}: discarded reads">
112 <filter>capture and input_type['input_type_select'] == 'single' and not options_kmer_char['k_dist']</filter>
113 </data>
114 <collection name="discarded_pair" type="paired" label="${tool.name} on ${on_string}: discarded reads">
115 <filter>capture and input_type['input_type_select'] == 'paired' and not options_kmer_char['k_dist']</filter>
116 </collection>
117 <data format="tabular" name="kfreq" label="${tool.name} on ${on_string} kmer frequencies">
118 <filter>options_kmer_char['write_k_freq']</filter>
119 </data>
120 <data format="tabular" name="kfreqdist" label="${tool.name} on ${on_string} kmer frequency distribution">
121 <filter>options_kmer_char['k_dist']</filter>
122 </data>
123 </outputs>
124 <tests>
125 <!-- default output for filtering -->
126 <test>
127 <conditional name="input_type">
128 <param name="input_type_select" value="single" />
129 <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
130 </conditional>
131 <param name="add_log" value="yes" />
132 <output name="output_log" ftype="txt" file="kmerfilter/kmerfilter.log" lines_diff="8"/>
133 <param name="rare" value="--rare"/>
134 <param name="abundant" value="--abundant" />
135 <param name="k_len" value="16" />
136 <assert_command>
137 <has_text text="--rare" />
138 <has_text text="--abundant" />
139 <has_text text="--k_len 16" />
140 </assert_command>
141 <output name="clean" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.single.gz"/>
142 </test>
143 <test>
144 <conditional name="input_type">
145 <param name="input_type_select" value="paired" />
146 <param name="fqinputs">
147 <collection type="paired">
148 <element name="forward" value="clonefilter/R1_0001.1.fq.gz" />
149 <element name="reverse" value="clonefilter/R2_0001.2.fq.gz" />
150 </collection>
151 </param>
152 </conditional>
153 <param name="capture" value="-D" />
154 <param name="normalize" value="1" />
155 <assert_command>
156 <has_text text="--normalize 1" />
157 </assert_command>
158 <output_collection name="clean_pair" type="paired">
159 <element name="forward" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz" />
160 <element name="reverse" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz" />
161 </output_collection>
162 <output_collection name="discarded_pair" type="paired">
163 <element name="forward" compare="sim_size" file="clonefilter/Removed1_0001.1.1.fq.gz" />
164 <element name="reverse" compare="sim_size" file="clonefilter/Removed2_0001.2.2.fq.gz" />
165 </output_collection>
166 </test>
167 <!-- kfreq output -->
168 <test>
169 <conditional name="input_type">
170 <param name="input_type_select" value="single" />
171 <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
172 </conditional>
173 <section name="options_kmer_char">
174 <param name="write_k_freq" value="--write_k_freq" />
175 </section>
176 <output name="kfreq" file="kmerfilter/kfreq.tsv"/>
177 </test>
178 <!-- kfreqdist output -->
179 <test>
180 <conditional name="input_type">
181 <param name="input_type_select" value="single" />
182 <param name="fqinputs" ftype="fastqsanger.gz" value="clonefilter/R1_0001.1.fq.gz" />
183 </conditional>
184 <section name="options_kmer_char">
185 <param name="k_dist" value="--k_dist" />
186 </section>
187 <output name="kfreqdist" file="kmerfilter/kfreqdist.tsv"/>
188 </test>
189 </tests>
190 <help>
191 <![CDATA[
192 .. class:: infomark
193
194 Allows paired or single-end reads to be filtered according to the number or rare or abundant kmers they contain. Useful for both RAD datasets as well as randomly sheared genomic or transcriptomic data.
195
196 @STACKS_INFOS@
197 ]]>
198 </help>
199 <expand macro="citation" />
200 </tool>