view sam_pileup.xml @ 3:890d97772e2a draft

Uploaded
author devteam
date Thu, 09 Jan 2014 14:28:39 -0500
parents 3ff8935743a9
children a3b4ad6858ff
line wrap: on
line source

<tool id="sam_pileup" name="Generate pileup" version="1.1.2">
  <description>from BAM dataset</description>
  <requirements>
    <requirement type="package" version="0.1.16">samtools</requirement>
  </requirements>
  <command interpreter="python">
    sam_pileup.py
      --input1=$input1
      --output=$output1
      --ref=$refOrHistory.reference
      #if $refOrHistory.reference == "history":
        --ownFile=$refOrHistory.ownFile
      #else:
        --index=${refOrHistory.index.fields.path}
      #end if
       --bamIndex=${input1.metadata.bam_index}
       --lastCol=$lastCol
       --indels=$indels
       --mapCap=$mapCap
       --consensus=$c.consensus
      #if $c.consensus == "yes":
        --theta=$c.theta
        --hapNum=$c.hapNum
        --fraction=$c.fraction
        --phredProb=$c.phredProb
       #else:
        --theta="None"
        --hapNum="None"
        --fraction="None"
        --phredProb="None"
      #end if
  </command>
  <inputs>
    <conditional name="refOrHistory">
      <param name="reference" type="select" label="Will you select a reference genome from your history or use a built-in index?">
        <option value="indexed">Use a built-in index</option>
        <option value="history">Use one from the history</option>
      </param>
      <when value="indexed">
        <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for">
           <validator type="unspecified_build" />
           <validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." />

        </param>
        <param name="index" type="select" label="Using reference genome">
          <options from_data_table="fasta_indexes">
            <filter type="data_meta" ref="input1" key="dbkey" column="1" />
            <validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" />
          </options>
        </param>
      </when>
      <when value="history">
        <param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for" />
        <param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome" />
      </when>
    </conditional>
    <param name="lastCol" type="select" label="Whether or not to print the mapping quality as the last column" help="Makes the output easier to parse, but is space inefficient">
      <option value="no">Do not print the mapping quality as the last column</option>
      <option value="yes">Print the mapping quality as the last column</option>
    </param>
    <param name="indels" type="select" label="Whether or not to print only output pileup lines containing indels">
      <option value="no">Print all lines</option>
      <option value="yes">Print only lines containing indels</option>
    </param>
    <param name="mapCap" type="integer" value="60" label="Where to cap mapping quality" />
    <conditional name="c">
      <param name="consensus" type="select" label="Call consensus according to MAQ model?">
        <option selected="true" value="no">No</option>
        <option value="yes">Yes</option>
      </param> 
      <when value="no" />
      <when value="yes">
        <param name="theta" type="float" value="0.85" label="Theta parameter (error dependency coefficient) in the MAQ consensus calling model" />
        <param name="hapNum" type="integer" value="2" label="Number of haplotypes in the sample" help="Greater than or equal to 2" />
        <param name="fraction" type="float" value="0.001" label="Expected fraction of differences between a pair of haplotypes" />
        <param name="phredProb" type="integer" value="40" label="Phred probability of an indel in sequencing/prep" />
      </when>
    </conditional>
  </inputs>
  <outputs>
    <data format="tabular" name="output1" label="${tool.name} on ${on_string}: converted pileup" />
  </outputs>
  <tests>
    <test>
      <!--
      Bam to pileup command:
      samtools faidx chr_m.fasta
      samtools pileup -M 60 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out1.pileup
      chr_m.fasta is the prefix of the index
      -->
      <param name="reference" value="history" />
      <param name="input1" value="sam_pileup_in1.bam" ftype="bam" />
      <param name="ownFile" value="chr_m.fasta" ftype="fasta" dbkey="equCab2" />
      <param name="lastCol" value="no" />
      <param name="indels" value="no" />
      <param name="mapCap" value="60" />
      <param name="consensus" value="no" />
      <output name="output1" file="sam_pileup_out1.pileup" />
    </test>
    <test>
      <!--
      Bam to pileup command:
      samtools pileup -M 60 -c -T 0.85 -N 2 -r 0.001 -I 40 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out2.pileup
      chr_m.fasta is the prefix of the index
      -->
      <param name="reference" value="indexed" />
      <param name="input1" value="sam_pileup_in1.bam" ftype="bam" dbkey="equCab2" />
      <param name="index" value="chr_m" />
      <param name="lastCol" value="no" />
      <param name="indels" value="no" />
      <param name="mapCap" value="60" />
      <param name="consensus" value="yes" />
      <param name="theta" value="0.85" />
      <param name="hapNum" value="2" />
      <param name="fraction" value="0.001" />
      <param name="phredProb" value="40" />
      <output name="output1" file="sam_pileup_out2.pileup" />
    </test>
  </tests>
  <help>

**What it does**

Uses SAMTools_' pileup command to produce a pileup dataset from a provided BAM dataset. It generates two types of pileup datasets depending on the specified options. If *Call consensus according to MAQ model?* option is set to **No**, the tool produces simple pileup. If the option is set to **Yes**, a ten column pileup dataset with consensus is generated. Both types of datasets are briefly summarized below.

.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml

------

**Types of pileup datasets**

The description of pileup format below is largely based on information that can be found on SAMTools Pileup_ documentation page. The 6- and 10-column variants are described below.

.. _Pileup: http://samtools.sourceforge.net/pileup.shtml

**Six column pileup**::

    1    2  3  4        5        6
 ---------------------------------
 chrM  412  A  2       .,       II
 chrM  413  G  4     ..t,     IIIH
 chrM  414  C  4     ...a     III2
 chrM  415  C  4     TTTt     III7
   
where::

  Column Definition
 ------- ----------------------------
       1 Chromosome
       2 Position (1-based)
       3 Reference base at that position
       4 Coverage (# reads aligning over that position)
       5 Bases within reads where (see Galaxy wiki for more info)
       6 Quality values (phred33 scale, see Galaxy wiki for more)
       
**Ten column pileup**

The `ten-column` (consensus_) pileup incorporates additional consensus information generated with *-c* option of *samtools pileup* command::


    1    2  3  4   5   6   7   8       9       10
 ------------------------------------------------
 chrM  412  A  A  75   0  25  2       .,       II
 chrM  413  G  G  72   0  25  4     ..t,     IIIH
 chrM  414  C  C  75   0  25  4     ...a     III2
 chrM  415  C  T  75  75  25  4     TTTt     III7

where::

  Column Definition
 ------- --------------------------------------------------------
       1 Chromosome
       2 Position (1-based)
       3 Reference base at that position
       4 Consensus bases
       5 Consensus quality
       6 SNP quality
       7 Maximum mapping quality
       8 Coverage (# reads aligning over that position)
       9 Bases within reads where (see Galaxy wiki for more info)
      10 Quality values (phred33 scale, see Galaxy wiki for more)


.. _consensus: http://samtools.sourceforge.net/cns0.shtml

------

**Citation**

For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. &lt;http://www.ncbi.nlm.nih.gov/pubmed/19505943&gt;`_


  </help>
</tool>