comparison bamqc.xml @ 2:934cd08c77af draft

Uploaded
author joachim-jacob
date Tue, 12 Feb 2013 04:48:36 -0500
parents
children 9537dd9dd18b
comparison
equal deleted inserted replaced
1:59c0a4f9c9ff 2:934cd08c77af
1
2 <tool id="qualimap_bamqc" name="Analyse SAM/BAM with bamqc" version="0.0.1">
3 <!-- Additional info: wrapper compatible with versions ..... -->
4 <description>
5 to asses mapping quality metrics.
6 </description>
7
8 <version_command>
9 qualimap --version
10 </version_command>
11
12 <requirements>
13 <requirement type="package">qualimap</requirement>
14 </requirements>
15
16 <command interpreter="perl">
17 ## it is recommended that you write a wrapper for your tool
18 ## and pass all parameters to that tool, which parses them.
19 bamqc_wrapper.pl $configfile
20 </command>
21
22 <inputs>
23 <param format="sam,bam" name="bam" type="data" label="Alignments in the BAM or SAM format" help="The set of aligned reads." />
24 <param type="boolean" name="c" checked="TRUE" truevalue="-c" falsevalue="" label="paint chromosome limits inside charts" />
25 <conditional name="customgtf">
26 <param name="upload" type="select" label="BETA! Analyze the alignment data for the regions of interest you provide">
27 <option value="yes">Yes</option>
28 <option value="no" selected="true">No</option>
29 </param>
30 <when value="yes">
31 <param name="gff" type="data" format="bed,gtf,gff3" label="Choose your feature annotation file" help="Provide your BED, GTF or GFF file"/>
32 <param name="os" type="boolean" checked="FALSE" truevalue="-os" falsevalue="" label="compute also regions outside stats" help="If checked, the information about the reads that are mapped outside of the regions of interest will be also computed and shown in a separate section" />
33 <param type="select" name="p" label="The sequencing protocol strand specificity" help="Can be non-strand-specific, forward-stranded orreverse-stranded. This information is required to calculate the number of correct strand reads.">
34 <option value="NON-STRAND-SPECIFIC">Non-strand-specific</option>
35 <option value="STRAND-SPECIFIC-FORWARD">Strand-specific forward</option>
36 <option value="STRAND-SPECIFIC-REVERSE">Strand-specific reverse</option>
37 </param>
38 </when>
39 <when value="no"/>
40 </conditional>
41 <param name="hm" type="text" size="3" value="3" label="minimum size for a homopolymer to be considered in indel analysis" help="Only homopolymers of this size or larger will be considered when estimating homopolymer indels count"/>
42 <param name="nr" type="text" size="6" value="1000" label="number of reads in the chunk" help="In order to reduce the load of I/O, reads are analyzed in chunks. Each chunk contains the selected number of reads which will be loaded into memory and analyzed by a single thread. Smaller numbers may result in lower performance, but also the memory consumption will be reduced. The default value is 1000 reads"/>
43 </inputs>
44
45 <outputs>
46 <data format="html" name="bamqc_result" label="${tool.name} on ${on_string}">
47 <!-- <data format="html" name="bamqc_result" label="${tool.name} on ${on_string}" from_work_dir="bamqc_output/qualimapReport.html"> -->
48 </data>
49 </outputs>
50
51 <configfiles>
52 <!-- this config file collects all parameter settings -->
53 <configfile name="configfile">
54 ## first we pass some galaxy environment variables
55 galtemp==${__new_file_path__}
56
57 bamqc_result==$bamqc_result
58 outputdir==$bamqc_result.files_path
59 bam==$bam
60 c==$c
61 hm==$hm
62 nr==$nr
63 #if $customgtf.upload=="yes"
64 gff==$customgtf.gff
65 os==$customgtf.os
66 p==$customgtf.p
67 #end if
68 </configfile>
69 </configfiles>
70
71 <tests>
72 <!-- Test base-space single-end reads with pre-built index and preset parameters -->
73 <test>
74 <!-- TopHat commands:
75 tophat -o tmp_dir -p 1 tophat_in1 test-data/tophat_in2.fastqsanger
76 Rename the files in tmp_dir appropriately
77 -->
78 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" />
79 <param name="genomeSource" value="indexed" />
80 <param name="index" value="tophat_test" />
81 <param name="sPaired" value="single" />
82 <param name="sSettingsType" value="preSet" />
83 <output name="junctions" file="tophat_out1j.bed" />
84 <output name="accepted_hits" file="tophat_out1h.bam" compare="sim_size" />
85 </test>
86 <!-- Test using base-space test data: paired-end reads, index from history. -->
87 <test>
88 <!-- TopHat commands:
89 bowtie-build -f test-data/tophat_in1.fasta tophat_in1
90 tophat -o tmp_dir -p 1 -r 20 tophat_in1 test-data/tophat_in2.fastqsanger test-data/tophat_in3.fastqsanger
91 Rename the files in tmp_dir appropriately
92 -->
93 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" />
94 <param name="genomeSource" value="history" />
95 <param name="ownFile" ftype="fasta" value="tophat_in1.fasta" />
96 <param name="sPaired" value="paired" />
97 <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger" />
98 <param name="mate_inner_distance" value="20" />
99 <param name="pSettingsType" value="preSet" />
100 <output name="junctions" file="tophat_out2j.bed" />
101 <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" />
102 </test>
103 <!-- Test base-space single-end reads with user-supplied reference fasta and full parameters -->
104 <test>
105 <!-- Tophat commands:
106 bowtie-build -f test-data/tophat_in1.fasta tophat_in1
107 tophat -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intro 5000 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger
108 Replace the + with double-dash
109 Rename the files in tmp_dir appropriately
110 -->
111 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
112 <param name="genomeSource" value="history"/>
113 <param name="ownFile" value="tophat_in1.fasta"/>
114 <param name="sPaired" value="single"/>
115 <param name="sSettingsType" value="full"/>
116 <param name="library_type" value="FR Unstranded"/>
117 <param name="anchor_length" value="8"/>
118 <param name="splice_mismatches" value="0"/>
119 <param name="min_intron_length" value="70"/>
120 <param name="max_intron_length" value="500000"/>
121 <param name="max_multihits" value="40"/>
122 <param name="min_segment_intron" value="50" />
123 <param name="max_segment_intron" value="500000" />
124 <param name="seg_mismatches" value="2"/>
125 <param name="seg_length" value="25"/>
126 <param name="allow_indel_search" value="Yes"/>
127 <param name="max_insertion_length" value="3"/>
128 <param name="max_deletion_length" value="3"/>
129 <param name="use_junctions" value="Yes" />
130 <param name="use_annotations" value="No" />
131 <param name="use_juncs" value="No" />
132 <param name="no_novel_juncs" value="No" />
133 <param name="use_search" value="Yes" />
134 <param name="min_closure_exon" value="50" />
135 <param name="min_closure_intron" value="50" />
136 <param name="max_closure_intron" value="5000" />
137 <param name="use_search" value="Yes" />
138 <param name="min_coverage_intron" value="50" />
139 <param name="max_coverage_intron" value="20000" />
140 <param name="microexon_search" value="Yes" />
141 <output name="insertions" file="tophat_out3i.bed" />
142 <output name="deletions" file="tophat_out3d.bed" />
143 <output name="junctions" file="tophat_out3j.bed" />
144 <output name="accepted_hits" file="tophat_out3h.bam" compare="sim_size" />
145 </test>
146 <!-- Test base-space paired-end reads with user-supplied reference fasta and full parameters -->
147 <test>
148 <!-- TopHat commands:
149 tophat -o tmp_dir -r 20 -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger test-data/tophat_in3.fastqsanger
150 Replace the + with double-dash
151 Rename the files in tmp_dir appropriately
152 -->
153 <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
154 <param name="genomeSource" value="indexed"/>
155 <param name="index" value="tophat_test"/>
156 <param name="sPaired" value="paired"/>
157 <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/>
158 <param name="mate_inner_distance" value="20"/>
159 <param name="pSettingsType" value="full"/>
160 <param name="library_type" value="FR Unstranded"/>
161 <param name="mate_std_dev" value="20"/>
162 <param name="anchor_length" value="8"/>
163 <param name="splice_mismatches" value="0"/>
164 <param name="min_intron_length" value="70"/>
165 <param name="max_intron_length" value="500000"/>
166 <param name="max_multihits" value="40"/>
167 <param name="min_segment_intron" value="50" />
168 <param name="max_segment_intron" value="500000" />
169 <param name="seg_mismatches" value="2"/>
170 <param name="seg_length" value="25"/>
171 <param name="allow_indel_search" value="No"/>
172 <param name="use_junctions" value="Yes" />
173 <param name="use_annotations" value="No" />
174 <param name="use_juncs" value="No" />
175 <param name="no_novel_juncs" value="No" />
176 <param name="use_search" value="No" />
177 <param name="microexon_search" value="Yes" />
178 <output name="junctions" file="tophat_out4j.bed" />
179 <output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" />
180 </test>
181 </tests>
182
183 <help>
184 **Tool Overview**
185
186 Tool_ allows for simply but throroughly checking of the quality of mapping.
187
188 .. _Tool: http://qualimap.bioinfo.cipf.es//
189
190 ------
191
192 **Know what you are doing**
193
194 .. class:: warningmark
195
196 Know what you are doing by reading the `documentation`__ and experimenting.
197
198 .. __: http://tophat.cbcb.umd.edu/manual.html
199
200 ------
201
202 **Input formats**
203
204 Tool accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.
205
206 ------
207
208 **Outputs**
209
210 Tool produces two output files:
211
212 - junctions -- A UCSC BED_ track of junctions reported by TopHat. Each junction consists of two connected BED blocks, where each block is as long as the maximal overhang of any read spanning the junction. The score is the number of alignments spanning the junction.
213 - accepted_hits -- A list of read alignments in BAM_ format.
214
215 .. _BED: http://genome.ucsc.edu/FAQ/FAQformat.html#format1
216 .. _BAM: http://samtools.sourceforge.net/
217
218 Two other possible outputs, depending on the options you choose, are insertions and deletions, both of which are in BED format.
219
220 -------
221
222 **Tool settings**
223
224 All of the options have a default value. You can change any of them. Some of the options in Tophat have been implemented here.
225
226 ------
227
228 **Tool parameter list**
229
230 This is a list of implemented Tophat options::
231
232 -r This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments
233 selected at 300bp, where each end is 50bp, you should set -r to be 200. There is no default, and this parameter
234 is required for paired end runs.
235
236 </help>
237 </tool>
238