Mercurial > repos > joachim-jacob > qualimap_suite


<tool id="qualimap_bamqc" name="Analyse SAM/BAM with bamqc" version="0.0.1">
    <!-- Additional info: wrapper compatible with versions ..... -->
    <description>
		to asses mapping quality metrics.
    </description>

    <version_command>
		qualimap --version
	</version_command>

    <requirements>
        <requirement type="package">qualimap</requirement>
    </requirements>

    <command interpreter="perl">
        ## it is recommended that you write a wrapper for your tool
        ## and pass all parameters to that tool, which parses them.
        bamqc_wrapper.pl $configfile
    </command>

    <inputs>
	<param format="sam,bam" name="bam" type="data" label="Alignments in the BAM or SAM format" help="The set of aligned reads." />
	<param type="boolean" name="c" checked="TRUE" truevalue="-c" falsevalue="" label="paint chromosome limits inside charts" />
	<conditional name="customgtf">
            <param name="upload" type="select" label="BETA! Analyze the alignment data for the regions of interest you provide">
              <option value="yes">Yes</option>
              <option value="no" selected="true">No</option>
            </param>
            <when value="yes">
	      <param name="gff" type="data" format="bed,gtf,gff3" label="Choose your feature annotation file" help="Provide your BED, GTF or GFF file"/>
	      <param name="os" type="boolean" checked="FALSE" truevalue="-os" falsevalue="" label="compute also regions outside stats" help="If checked, the information about the reads that are mapped outside of the regions of interest will be also computed and shown in a separate section" />
	      <param type="select" name="p" label="The sequencing protocol strand specificity" help="Can be non-strand-specific, forward-stranded orreverse-stranded. This information is required to calculate the number of correct strand reads.">
                  <option value="NON-STRAND-SPECIFIC">Non-strand-specific</option>
                  <option value="STRAND-SPECIFIC-FORWARD">Strand-specific forward</option>
                  <option value="STRAND-SPECIFIC-REVERSE">Strand-specific reverse</option>
	      </param>
	    </when>
	    <when value="no"/>
        </conditional>
	<param name="hm" type="text" size="3" value="3" label="minimum size for a homopolymer to be considered in indel analysis" help="Only homopolymers of this size or larger will be considered when estimating homopolymer indels count"/>
	<param name="nr" type="text" size="6" value="1000" label="number of reads in the chunk" help="In order to reduce the load of I/O, reads are analyzed in chunks. Each chunk contains the selected number of reads which will be loaded into memory and analyzed by a single thread. Smaller numbers may result in lower performance, but also the memory consumption will be reduced. The default value is 1000 reads"/>
    </inputs>

    <outputs>
        <data format="html" name="bamqc_result" label="${tool.name} on ${on_string}">
        <!-- <data format="html" name="bamqc_result" label="${tool.name} on ${on_string}" from_work_dir="bamqc_output/qualimapReport.html"> -->
        </data>
    </outputs>

    <configfiles>
      <!-- this config file collects all parameter settings -->
      <configfile name="configfile">
	## first we pass some galaxy environment variables
	galtemp==${__new_file_path__}

	bamqc_result==$bamqc_result
	outputdir==$bamqc_result.files_path
	bam==$bam
	c==$c
	hm==$hm
	nr==$nr
	#if $customgtf.upload=="yes"
	 gff==$customgtf.gff
	 os==$customgtf.os
	 p==$customgtf.p
	#end if
      </configfile>
    </configfiles>

    <tests>
        <!-- Test base-space single-end reads with pre-built index and preset parameters -->
        <test>
            <!-- TopHat commands:
            tophat -o tmp_dir -p 1 tophat_in1 test-data/tophat_in2.fastqsanger
            Rename the files in tmp_dir appropriately
            -->
            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" />
            <param name="genomeSource" value="indexed" />
            <param name="index" value="tophat_test" />
            <param name="sPaired" value="single" />
            <param name="sSettingsType" value="preSet" />
            <output name="junctions" file="tophat_out1j.bed" />
            <output name="accepted_hits" file="tophat_out1h.bam" compare="sim_size" />
        </test>
        <!-- Test using base-space test data: paired-end reads, index from history. -->
        <test>
            <!-- TopHat commands:
            bowtie-build -f test-data/tophat_in1.fasta tophat_in1
            tophat -o tmp_dir -p 1 -r 20 tophat_in1 test-data/tophat_in2.fastqsanger test-data/tophat_in3.fastqsanger
            Rename the files in tmp_dir appropriately
            -->
            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger" />
            <param name="genomeSource" value="history" />
            <param name="ownFile" ftype="fasta" value="tophat_in1.fasta" />
            <param name="sPaired" value="paired" />
            <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger" />
            <param name="mate_inner_distance" value="20" />
            <param name="pSettingsType" value="preSet" />
            <output name="junctions" file="tophat_out2j.bed" />
            <output name="accepted_hits" file="tophat_out2h.bam" compare="sim_size" />
        </test>
        <!-- Test base-space single-end reads with user-supplied reference fasta and full parameters -->
        <test>
            <!-- Tophat commands:
            bowtie-build -f test-data/tophat_in1.fasta tophat_in1
            tophat -o tmp_dir -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +closure-search +min-closure-exon 50 +min-closure-intron 50 +max-closure-intro 5000 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger
            Replace the + with double-dash
            Rename the files in tmp_dir appropriately
            -->
            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
            <param name="genomeSource" value="history"/>
            <param name="ownFile" value="tophat_in1.fasta"/>
            <param name="sPaired" value="single"/>
            <param name="sSettingsType" value="full"/>
            <param name="library_type" value="FR Unstranded"/>
            <param name="anchor_length" value="8"/>
            <param name="splice_mismatches" value="0"/>
            <param name="min_intron_length" value="70"/>
            <param name="max_intron_length" value="500000"/>
            <param name="max_multihits" value="40"/>
            <param name="min_segment_intron" value="50" />
            <param name="max_segment_intron" value="500000" />
            <param name="seg_mismatches" value="2"/>
            <param name="seg_length" value="25"/>
            <param name="allow_indel_search" value="Yes"/>
            <param name="max_insertion_length" value="3"/>
            <param name="max_deletion_length" value="3"/>
            <param name="use_junctions" value="Yes" />
            <param name="use_annotations" value="No" />
            <param name="use_juncs" value="No" />
            <param name="no_novel_juncs" value="No" />
            <param name="use_search" value="Yes" />
            <param name="min_closure_exon" value="50" />
            <param name="min_closure_intron" value="50" />
            <param name="max_closure_intron" value="5000" />
            <param name="use_search" value="Yes" />
            <param name="min_coverage_intron" value="50" />
            <param name="max_coverage_intron" value="20000" />
            <param name="microexon_search" value="Yes" />
            <output name="insertions" file="tophat_out3i.bed" />
            <output name="deletions" file="tophat_out3d.bed" />
            <output name="junctions" file="tophat_out3j.bed" />
            <output name="accepted_hits" file="tophat_out3h.bam" compare="sim_size" />
        </test>
        <!-- Test base-space paired-end reads with user-supplied reference fasta and full parameters -->
        <test>
            <!-- TopHat commands:
            tophat -o tmp_dir -r 20 -p 1 -a 8 -m 0 -i 70 -I 500000 -F 0.15 -g 40 +coverage-search +min-coverage-intron 50 +max-coverage-intro 20000 +segment-mismatches 2 +segment-length 25 +microexon-search tophat_in1 test-data/tophat_in2.fastqsanger test-data/tophat_in3.fastqsanger
            Replace the + with double-dash
            Rename the files in tmp_dir appropriately
            -->
            <param name="input1" ftype="fastqsanger" value="tophat_in2.fastqsanger"/>
            <param name="genomeSource" value="indexed"/>
            <param name="index" value="tophat_test"/>
            <param name="sPaired" value="paired"/>
            <param name="input2" ftype="fastqsanger" value="tophat_in3.fastqsanger"/>
            <param name="mate_inner_distance" value="20"/>
            <param name="pSettingsType" value="full"/>
            <param name="library_type" value="FR Unstranded"/>
            <param name="mate_std_dev" value="20"/>
            <param name="anchor_length" value="8"/>
            <param name="splice_mismatches" value="0"/>
            <param name="min_intron_length" value="70"/>
            <param name="max_intron_length" value="500000"/>
            <param name="max_multihits" value="40"/>
            <param name="min_segment_intron" value="50" />
            <param name="max_segment_intron" value="500000" />
            <param name="seg_mismatches" value="2"/>
            <param name="seg_length" value="25"/>
            <param name="allow_indel_search" value="No"/>
            <param name="use_junctions" value="Yes" />
            <param name="use_annotations" value="No" />
            <param name="use_juncs" value="No" />
            <param name="no_novel_juncs" value="No" />
            <param name="use_search" value="No" />
            <param name="microexon_search" value="Yes" />
            <output name="junctions" file="tophat_out4j.bed" />
            <output name="accepted_hits" file="tophat_out4h.bam" compare="sim_size" />
        </test>
    </tests>

    <help>
**Tool Overview**

Tool_ allows for simply but throroughly checking of the quality of mapping.

.. _Tool: http://qualimap.bioinfo.cipf.es//

------

**Know what you are doing**

.. class:: warningmark

Know what you are doing by reading the `documentation`__ and experimenting.

.. __: http://tophat.cbcb.umd.edu/manual.html

------

**Input formats**

Tool accepts files in Sanger FASTQ format. Use the FASTQ Groomer to prepare your files.

------

**Outputs**

Tool produces two output files:

- junctions -- A UCSC BED_ track of junctions reported by TopHat. Each junction consists of two connected BED blocks, where each block is as long as the maximal overhang of any read spanning the junction. The score is the number of alignments spanning the junction.
- accepted_hits -- A list of read alignments in BAM_ format.

.. _BED: http://genome.ucsc.edu/FAQ/FAQformat.html#format1
.. _BAM: http://samtools.sourceforge.net/

Two other possible outputs, depending on the options you choose, are insertions and deletions, both of which are in BED format.

-------

**Tool settings**

All of the options have a default value. You can change any of them. Some of the options in Tophat have been implemented here.

------

**Tool parameter list**

This is a list of implemented Tophat options::

  -r                                This is the expected (mean) inner distance between mate pairs. For, example, for paired end runs with fragments
                                    selected at 300bp, where each end is 50bp, you should set -r to be 200. There is no default, and this parameter
                                    is required for paired end runs.

    </help>
</tool>
author	joachim-jacob
date	Tue, 12 Feb 2013 04:48:36 -0500
parents
children	9537dd9dd18b