diff bamqc.xml @ 2:934cd08c77af draft

Uploaded
author joachim-jacob
date Tue, 12 Feb 2013 04:48:36 -0500
parents
children 9537dd9dd18b
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/bamqc.xml	Tue Feb 12 04:48:36 2013 -0500
@@ -0,0 +1,238 @@
+
+<tool id="qualimap_bamqc" name="Analyse SAM/BAM with bamqc" version="0.0.1">
+    <!-- Additional info: wrapper compatible with versions ..... -->
+    <description>
+		to asses mapping quality metrics.
+    </description>
+    
+    <version_command>
+		qualimap --version
+	</version_command>
+    
+    <requirements>
+        <requirement type="package">qualimap</requirement>
+    </requirements>
+    
+    <command interpreter="perl">
+        ## it is recommended that you write a wrapper for your tool
+        ## and pass all parameters to that tool, which parses them.
+        bamqc_wrapper.pl $configfile
+    </command> 
+   
+    <inputs>
+	<param format="sam,bam" name="bam" type="data" label="Alignments in the BAM or SAM format" help="The set of aligned reads." />
+	<param type="boolean" name="c" checked="TRUE" truevalue="-c" falsevalue="" label="paint chromosome limits inside charts" />
+	<conditional name="customgtf">
+            <param name="upload" type="select" label="BETA! Analyze the alignment data for the regions of interest you provide">
+              <option value="yes">Yes</option>
+              <option value="no" selected="true">No</option>
+            </param>
+            <when value="yes">
+	      <param name="gff" type="data" format="bed,gtf,gff3" label="Choose your feature annotation file" help="Provide your BED, GTF or GFF file"/>
+	      <param name="os" type="boolean" checked="FALSE" truevalue="-os" falsevalue="" label="compute also regions outside stats" help="If checked, the information about the reads that are mapped outside of the regions of interest will be also computed and shown in a separate section" />
+	      <param type="select" name="p" label="The sequencing protocol strand specificity" help="Can be non-strand-specific, forward-stranded orreverse-stranded. This information is required to calculate the number of correct strand reads.">
+                  <option value="NON-STRAND-SPECIFIC">Non-strand-specific</option>                                        
+                  <option value="STRAND-SPECIFIC-FORWARD">Strand-specific forward</option>                                        
+                  <option value="STRAND-SPECIFIC-REVERSE">Strand-specific reverse</option>                                        
+	      </param>
+	    </when>
+	    <when value="no"/>
+        </conditional>
+	<param name="hm" type="text" size="3" value="3" label="minimum size for a homopolymer to be considered in indel analysis" help="Only homopolymers of this size or larger will be considered when estimating homopolymer indels count"/>
+	<param name="nr" type="text" size="6" value="1000" label="number of reads in the chunk" help="In order to reduce the load of I/O, reads are analyzed in chunks. Each chunk contains the selected number of reads which will be loaded into memory and analyzed by a single thread. Smaller numbers may result in lower performance, but also the memory consumption will be reduced. The default value is 1000 reads"/>
+    </inputs>
+
+    <outputs>
+        <data format="html" name="bamqc_result" label="${tool.name} on ${on_string}">
+        <!-- <data format="html" name="bamqc_result" label="${tool.name} on ${on_string}" from_work_dir="bamqc_output/qualimapReport.html"> -->
+        </data>
+    </outputs>
+
+    <configfiles>
+      <!-- this config file collects all parameter settings -->
+      <configfile name="configfile">
+	## first we pass some galaxy environment variables
+	galtemp==${__new_file_path__}
+
+	bamqc_result==$bamqc_result
+	outputdir==$bamqc_result.files_path
+	bam==$bam
+	c==$c
+	hm==$hm
+	nr==$nr
+	#if $customgtf.upload=="yes"
+	 gff==$customgtf.gff
+	 os==$customgtf.os
+	 p==$customgtf.p
+	#end if
+      </configfile>
+    </configfiles> 
+
+    <tests>
+        <!-- Test base-space single-end reads with pre-built index and preset parameters -->
+        <test>
+            <!-- TopHat commands:
+            tophat -o tmp_dir -p 1 tophat_in1 test-data/tophat_in2.fastqsanger
+            Rename the files in tmp_dir appropriately
+            -->
+            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" />
+            <param name="genomeSource" value="indexed" />
+            <param name="index" value="tophat_test" />
+            <param name="sPaired" value="single" />
+            <param name="sSettingsType" value="preSet" />
+            <output name="junctions" file="tophat_out1j.bed" />
+            <output name="accepted_hits" file="tophat_out1h.bam" compare="sim_size" />
+        </test>
+        <!-- Test using base-space test data: paired-end reads, index from history. -->
+        <test>
+            <!-- TopHat commands:
+            bowtie-build -f test-data/tophat_in1.fasta tophat_in1
+            tophat -o tmp_dir -p 1 -r 20 tophat_in1 test-data/tophat_in2.fastqsanger test-data/tophat_in3.fastqsanger
+            Rename the files in tmp_dir appropriately
+            -->
+            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" />
+            <param name="genomeSource" value="history" />
+            <param name="ownFile" ftype="fasta" value="tophat_in1.fasta" />
+            <param name="sPaired" value="paired" />
+            <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger" />
+            <param name="mate_inner_distance" value="20" />
+            <param name="pSettingsType" value="preSet" />
+            <output name="junctions" file="tophat_out2j.bed" />
+            <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" />
+        </test>
+        <!-- Test base-space single-end reads with user-supplied reference fasta and full parameters -->
+        <test>
+            <!-- Tophat commands:
+            bowtie-build -f test-data/tophat_in1.fasta tophat_in1
+            tophat -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intro 5000 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger
+            Replace the + with double-dash
+            Rename the files in tmp_dir appropriately
+            -->
+            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
+            <param name="genomeSource" value="history"/>
+            <param name="ownFile" value="tophat_in1.fasta"/>
+            <param name="sPaired" value="single"/>
+            <param name="sSettingsType" value="full"/>
+            <param name="library_type" value="FR Unstranded"/>
+            <param name="anchor_length" value="8"/>
+            <param name="splice_mismatches" value="0"/>
+            <param name="min_intron_length" value="70"/>
+            <param name="max_intron_length" value="500000"/>
+            <param name="max_multihits" value="40"/>
+            <param name="min_segment_intron" value="50" />
+            <param name="max_segment_intron" value="500000" />
+            <param name="seg_mismatches" value="2"/>
+            <param name="seg_length" value="25"/>
+            <param name="allow_indel_search" value="Yes"/>
+            <param name="max_insertion_length" value="3"/>
+            <param name="max_deletion_length" value="3"/>
+            <param name="use_junctions" value="Yes" />
+            <param name="use_annotations" value="No" />
+            <param name="use_juncs" value="No" />
+            <param name="no_novel_juncs" value="No" />
+            <param name="use_search" value="Yes" />
+            <param name="min_closure_exon" value="50" />
+            <param name="min_closure_intron" value="50" />
+            <param name="max_closure_intron" value="5000" />
+            <param name="use_search" value="Yes" />
+            <param name="min_coverage_intron" value="50" />
+            <param name="max_coverage_intron" value="20000" />
+            <param name="microexon_search" value="Yes" />
+            <output name="insertions" file="tophat_out3i.bed" />
+            <output name="deletions" file="tophat_out3d.bed" />
+            <output name="junctions" file="tophat_out3j.bed" />
+            <output name="accepted_hits" file="tophat_out3h.bam" compare="sim_size" />
+        </test>
+        <!-- Test base-space paired-end reads with user-supplied reference fasta and full parameters -->
+        <test>
+            <!-- TopHat commands:
+            tophat -o tmp_dir -r 20 -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger test-data/tophat_in3.fastqsanger
+            Replace the + with double-dash
+            Rename the files in tmp_dir appropriately
+            -->
+            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
+            <param name="genomeSource" value="indexed"/>
+            <param name="index" value="tophat_test"/>
+            <param name="sPaired" value="paired"/>
+            <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/>
+            <param name="mate_inner_distance" value="20"/>
+            <param name="pSettingsType" value="full"/>
+            <param name="library_type" value="FR Unstranded"/>
+            <param name="mate_std_dev" value="20"/>
+            <param name="anchor_length" value="8"/>
+            <param name="splice_mismatches" value="0"/>
+            <param name="min_intron_length" value="70"/>
+            <param name="max_intron_length" value="500000"/>
+            <param name="max_multihits" value="40"/>
+            <param name="min_segment_intron" value="50" />
+            <param name="max_segment_intron" value="500000" />
+            <param name="seg_mismatches" value="2"/>
+            <param name="seg_length" value="25"/>
+            <param name="allow_indel_search" value="No"/>
+            <param name="use_junctions" value="Yes" />
+            <param name="use_annotations" value="No" />
+            <param name="use_juncs" value="No" />
+            <param name="no_novel_juncs" value="No" />
+            <param name="use_search" value="No" />
+            <param name="microexon_search" value="Yes" />
+            <output name="junctions" file="tophat_out4j.bed" />
+            <output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" />
+        </test>
+    </tests>
+
+    <help>
+**Tool Overview**
+
+Tool_ allows for simply but throroughly checking of the quality of mapping. 
+
+.. _Tool: http://qualimap.bioinfo.cipf.es//
+        
+------
+
+**Know what you are doing**
+
+.. class:: warningmark
+
+Know what you are doing by reading the `documentation`__ and experimenting. 
+
+.. __: http://tophat.cbcb.umd.edu/manual.html
+
+------
+
+**Input formats**
+
+Tool accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.
+
+------
+
+**Outputs**
+
+Tool produces two output files:
+
+- junctions -- A UCSC BED_ track of junctions reported by TopHat. Each junction consists of two connected BED blocks, where each block is as long as the maximal overhang of any read spanning the junction. The score is the number of alignments spanning the junction.
+- accepted_hits -- A list of read alignments in BAM_ format.
+
+.. _BED: http://genome.ucsc.edu/FAQ/FAQformat.html#format1
+.. _BAM: http://samtools.sourceforge.net/
+
+Two other possible outputs, depending on the options you choose, are insertions and deletions, both of which are in BED format.
+
+-------
+
+**Tool settings**
+
+All of the options have a default value. You can change any of them. Some of the options in Tophat have been implemented here.
+
+------
+
+**Tool parameter list**
+
+This is a list of implemented Tophat options::
+
+  -r                                This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments 
+                                    selected at 300bp, where each end is 50bp, you should set -r to be 200. There is no default, and this parameter 
+                                    is required for paired end runs.
+
+    </help>
+</tool>
+