view pyCRAC/pyPileup.xml @ 0:19b20927172d draft

Uploaded
author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
parents
children
line wrap: on
line source

<?xml version="1.0" encoding="utf-8"?>
 <tool id ="pyPileup" name="pyPileup">
	<requirements>
        	<requirement type="package">pyCRAC</requirement>
    	</requirements>
	<command interpreter="python">
	/usr/local/bin/pyPileup.py
	-f $ftype.input
        --file_type $ftype.file_type                                                                                                                       
        #if $geneOpt.alignGene == "gene":                                                                                                                  
                -g $geneOpt.genes                                                                                                                          
        #end if#                                                                                                                                           
        #if $geneOpt.alignGene == "chr":                                                                                                                  
                --chr $geneOpt.chr                                                                                                                         
        #end if#                                                    
        #if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.disc.discard == "discard":  
                   --discarded $discarded
	#end if#                                                                                       
        --gtf=$addGTF.gtf                                                                                                                                  
        --tab=$addTab.tab                                                                                                                                  
        #if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.addAlignOpt.alignoptions == "edit":
               --align_quality=$ftype.addAlignOpt.align_quality                                                                                            
               --align_score=$ftype.addAlignOpt.align_score                                                                                                
               --distance=$ftype.addAlignOpt.d                                                                                                             
               --length=$ftype.addAlignOpt.length                                                                                                          
               #if int($ftype.addAlignOpt.max) > 0:
                    --max=$ftype.addAlignOpt.max                                                                                                           
               #end if#                                                                                                                                    
               $ftype.addAlignOpt.unique                                                                                                                   
               $ftype.addAlignOpt.blocks                                                                                                                   
               $ftype.addAlignOpt.mutations                                                                                                                
               #if $ftype.disc.discard == "--discarded":                                                                                            
                   --discarded $discarded                                                                                                                 
               #end if#                                                                                                                                    
        #end if#                                                                                                                                           
        #if $addOpt.options == "edit":                                                                                                                     
                --range=$addOpt.range                                                                                                                      
                --overlap=$addOpt.overlap                                                                                                                  
                $addOpt.iclip                                                                                                                              
                $addOpt.ignore                                                                                                                             
                -s $addOpt.sequence
                #if int($addOpt.limit) > 0:                                                                                                                
                        --limit=$addOpt.limit                                                                                                              
                #end if#                                                                                                                                   
        #end if#               
	-o $output	
	</command>
	<version_command>/usr/local/bin/pyPileup.py --version</version_command>
	<inputs>


	        <conditional name="geneOpt">
                        <param name="alignGene" type="select"  label="Do you want to align reads to genes or chromosome co-ordinates?">
                                <option value="gene" selected="true">Genes</option>
                                <option value="chr">Chromosome Co-ordinates</option>
                        </param>
                        <when value="chr">
			  <param format="interval" name="chr" type="data" label="Choose a Chromosome Coordinate File" help="Tab delimited text file containing an identifier, chromosome name, start position, end position and strand ('-' or '+')"/>
                        </when>
                        <when value="gene">
			  <param format="txt" name="genes" type="data" label="Choose a Gene List -g" help="Single column gene ID file"/>
                        </when>
		</conditional>
                <conditional name="addGTF">
                        <param name="gtfFile" type="select"  label="Choose GTF File from">
                                <option value="default" selected="true">Defaults</option>
                                <option value="other">History</option>
                        </param>
                        <when value="default">
                                <param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
                                        <options from_data_table="pycrac_gtf"/>
                                </param>
                        </when>
                        <when value="other">
                                <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
                        </when>
                </conditional>
               <conditional name="addTab">
                        <param name="tabFile" type="select"  label="Choose Genomic Reference Sequence from">
                                <option value="default" selected="true">Defaults</option>
                                <option value="other">History</option>
                        </param>
                        <when value="default">
                                <param name="tab" type="select"  label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence">
                                        <options from_data_table="pycrac_tab"/>
                                </param>
                        </when>
                        <when value="other">
                                <param format="tabular" name="tab" type="data" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"/>
                        </when>
	       </conditional>


		<conditional name="ftype">
			<param name="file_type" type="select"  label="Input File Type --file_type">
					<option value="novo" selected="true">Novo</option>
                                        <option value="sam">Sam/BAM</option>
					<option value="gtf">GTF</option>
			</param>
			<when value="sam">
			    <param format="sam,bam" name="input" type="data" label="Input File -f" help="Alignment file of type .sam or .bam" />
			    <conditional name="disc">
			    <param name="discard" type="select"  label="Print discarded reads to a separate file">
			      <option value="" selected="true">OFF</option>
			      <option value="discard">ON</option>
			    </param>
			    <when value="discard">
			    </when>
			    <when value="">
			    </when>
			    </conditional>
			    <conditional name="addAlignOpt">
				<param name="alignoptions" type="select"  label="Alignment Options">
                                    <option value="default" selected="true">Default</option>
                                    <option value="edit">Edit</option>
                                </param>
				<when value="edit">
                                    <param name="mutations" type="select"  label="Filter reads by mutations --mutations" help="cross-linking sites are often highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not.">
                                      <option value="" selected="true">Off</option>
                                      <option value="--mutations=delsonly">deletions</option>
                                      <option value="--mutations=subsonly">substitutions</option>
                                      <option value="--mutations=TC">T->C mutations</option>
                                      <option value="--mutations=allmuts">all mutations</option>
                                      <option value="--mutations=nomuts">no mutations</option>
                                    </param>
                                    <param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
                                      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
                                    </param>
                                    <param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
                                      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
                                    </param>
                                    <param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
                                      <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
                                    </param>
                                    <param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
                                      <validator type="in_range" min="1" message="Please enter a value >= 0"/>
                                    </param>
                                    <param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
                                      <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
                                    </param>
                                    <param name="unique" type="select"  label="Remove reads with multiple alignment locations --unique">
                                      <option value="" selected="true">OFF</option>
                                      <option value="--unique">ON</option>
                                    </param>
                                    <param name="blocks" type="select"  label="Only count reads with same start and end coords once --blocks">
                                      <option value="" selected="true">OFF</option>
                                      <option value="--blocks">ON</option>
                                    </param>
                                </when>
                                <when value="default">
                                </when>
			    </conditional>
			</when>
			<when value="novo">
				<param format="tabular" name="input" type="data" label="Input File -f" help="Alignment file of type .novo" />
				<conditional name="disc">
				  <param name="discard" type="select"  label="Print discarded reads to a separate file">
				    <option value="" selected="true">OFF</option>
				    <option value="discard">ON</option>
				  </param>
				  <when value="discard">
				  </when>
				  <when value="">
				  </when>
				</conditional>
				<conditional name="addAlignOpt">
                                <param name="alignoptions" type="select"  label="Alignment Options">
                                    <option value="default" selected="true">Default</option>
                                    <option value="edit">Edit</option>
                                </param>
                                <when value="edit">
                                    <param name="mutations" type="select"  label="Filter reads by mutations --mutations" help="cross-linking sites are often
 highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not.">
                                      <option value="" selected="true">Off</option>
                                      <option value="--mutations=delsonly">deletions</option>
                                      <option value="--mutations=subsonly">substitutions</option>
                                      <option value="--mutations=TC">T->C mutations</option>
                                      <option value="--mutations=allmuts">all mutations</option>
                                      <option value="--mutations=nomuts">no mutations</option>
                                    </param>
                                    <param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
                                      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
                                    </param>
                                    <param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
                                      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
                                    </param>
                                    <param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
                                      <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
                                    </param>
                                    <param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
                                      <validator type="in_range" min="1" message="Please enter a value >= 0"/>
                                    </param>
                                    <param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
                                      <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
                                    </param>
                                    <param name="unique" type="select"  label="Remove reads with multiple alignment locations --unique">
                                      <option value="" selected="true">OFF</option>
                                      <option value="--unique">ON</option>
                                    </param>
                                    <param name="blocks" type="select"  label="Only count reads with same start and end coords once --blocks">
                                      <option value="" selected="true">OFF</option>
                                      <option value="--blocks">ON</option>
                                    </param>
                                </when>
                                <when value="default">
                                </when>
			</conditional>
			</when>
			<when value="gtf">
				<param format="gtf" name="input" type="data" label="Input File -f" help="File of type .gtf" />
			</when>
		      </conditional>
		      
		      <conditional name="addOpt">
			<param name="options" type="select"  label="Standard Options">
			  <option value="default" selected="true">Default</option>
			  <option value="edit">Edit</option>
			</param>        
			<when value="edit">
			  <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
			    <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
			  </param>
			  <param name="ignore" type="select" label="Ignore strand information? --ignorestrand">
			    <option value="" selected="true">No</option>
			    <option value="--ignorestrand">Yes</option>
			  </param>
			  <param format="integer" name="overlap" type="integer" label="Overlap --overlap" value="1" size="5" help="Sets the number of nucleotides a read has to overlap with a gene before it is considered a hit. ">
			    <validator type="in_range" min="1" message="Please enter a positive integer"/>
			  </param>
                          <param name="sequence" type="select" label="Align reads to --sequence">
                            <option value="genomic" selected="true">Genomic Sequence</option>
                            <option value="coding">Coding Sequence</option>
                          </param>
			  <param name="iclip" type="select" label="iCLIP mode --iCLIP">
                            <option value="" selected="true">OFF</option>
                            <option value="--iCLIP">ON</option>
			  </param>
			  <param format="integer" name="limit" type="integer" label="Limit number of reads to count that map to a particular region --limit" value="0" size="15" help="Set to 0 for unlimited reads" >
			    <validator type="in_range" min="0" message="Please enter a value greater than 1 or set to 0 for unlimited reads"/>
			  </param> 
			</when>
			<when value="default">
			</when>
		      </conditional> 
                <param name="label" type="text" format="txt" size="30" value="pyPileup" label="Enter output file label -o" />
	</inputs>
	<outputs>
		<data format="tabular" name="output" label="${label.value}.pileup"/>
                <data format="txt" name="discarded" label="${label.value}_discarded.txt">
                        <filter>(ftype['file_type'] == "novo" or ftype['file_type'] == "sam") and ftype['disc']['discard'] ==  "discard"</filter>
                </data> 
	</outputs>
	<help>


.. class:: infomark

**pyPileup**

pyPileup is part of the pyCRAC_ package. Produces pileups containing the number of hits, substitutions and deletions for each nucleotide covered by 
reads in specific genes or genomic regions
   
.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
        
------

**Parameter list**

File input options::

    -f FILE, --input_file=FILE
                        As input files you can use Novoalign native output,
                        SAM, pyMotif or pyReadCounters GTF files as input
                        file. By default it expects data from the standard
                        input. Make sure to specify the file type of the file
                        you want to have analyzed using the --file_type
                        option!
    -o OUTPUT_FILE, --output_file=OUTPUT_FILE
                        Use this flag to override the standard output file
                        names. All pileups will be written to one output file.
    -g FILE, --genes_file=FILE
                        here you need to type in the name of your gene list
                        file (1 column) or the hittable file
    --chr=FILE          
                        if you simply would like to align reads against a
                        genomic sequence you should generate a tab delimited
                        file containing an identifyer, chromosome name, start
                        position, end position and strand
    --gtf=annotation_file.gtf
                        type the path to the gtf annotation file that you want
                        to use
    --tab=tab_file.tab  
                        type the path to the tab file that contains the
                        genomic reference sequence
    --file_type=FILE_TYPE
                        use this option to specify the file type (i.e. 'novo',
                        'sam', 'gtf'). This will tell the program which
                        parsers to use for processing the files. Default =
                        'novo'

pyPileup specific options::

    --limit=500         
                        with this option you can select how many reads mapped
                        to a particular gene/ORF/region you want to count.
                        Default = All
    --iCLIP             
                        This turns on the iCLIP mode and the pileups will
                        report cross-linking site frequencies in iCLIP data in
                        reference sequences

Common options::

    -v, --verbose       
                        prints all the status messages to a file rather than
                        the standard output
    --ignorestrand      
                        this flag tells the program to ignore strand
                        information and all overlapping reads will considered
                        sense reads. Useful for analysing ChIP or RIP data
    --zip=FILE          
                        use this option to compress all the output files in a
                        single zip file
    --overlap=1         
                        sets the number of nucleotides a read has to overlap
                        with a gene before it is considered a hit. Default =
                        1 nucleotide
    -s genomic, --sequence=genomic
                        with this option you can select whether you want the
                        reads aligned to the genomic or the coding sequence.
                        Default = genomic
    -r 100, --range=100
                        allows you to set the length of the UTR regions. If
                        you set '-r 50' or '--range=50', then the program will
                        set a fixed length (50 bp) regardless of whether the
                        GTF file has genes with annotated UTRs.

Options for novo, SAM and BAM files::

    --align_quality=100, --mapping_quality=100
                        with these options you can set the alignment quality
                        (Novoalign) or mapping quality (SAM) threshold. Reads
                        with qualities lower than the threshold will be
                        ignored. Default = 0
    --align_score=100   
                        with this option you can set the alignment score
                        threshold. Reads with alignment scores lower than the
                        threshold will be ignored. Default = 0
    -l 100, --length=100
                        to set read length threshold. Default = 1000
    -m 100000, --max=100000
                        maximum number of mapped reads that will be analyzed.
                        Default = All
    --unique            
                        with this option reads with multiple alignment
                        locations will be removed. Default = Off
    --blocks            
                        with this option reads with the same start and end
                        coordinates on a chromosome will only be counted once.
                        Default = Off
    --discarded=FILE    
                        prints the lines from the alignments file that were
                        discarded by the parsers. This file contains reads
                        that were unmapped (NM), of poor quality (i.e. QC) or
                        paired reads that were mapped to different chromosomal
                        locations or were too far apart on the same
                        chromosome. Useful for debugging purposes
    -d 1000, --distance=1000
                        this option allows you to set the maximum number of
                        base-pairs allowed between two non-overlapping paired
                        reads. Default = 1000
    --mutations=delsonly
                        Use this option to only track mutations that are of
                        interest. For CRAC data this is usually deletions
                        (--mutations=delsonly). For PAR-CLIP data this is
                        usually T-C mutations (--mutations=TC). Other options
                        are: do not report any mutations: --mutations=nomuts.
                        Only report specific base mutations, for example only
                        in T's, C's and G's :--mutations=[TCG]. The brackets
                        are essential. Other nucleotide combinations are also
                        possible

	</help>
</tool>