diff pyCRAC/pyPileup.xml @ 0:19b20927172d draft

Uploaded
author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyPileup.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,384 @@
+<?xml version="1.0" encoding="utf-8"?>
+ <tool id ="pyPileup" name="pyPileup">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="python">
+	/usr/local/bin/pyPileup.py
+	-f $ftype.input
+        --file_type $ftype.file_type                                                                                                                       
+        #if $geneOpt.alignGene == "gene":                                                                                                                  
+                -g $geneOpt.genes                                                                                                                          
+        #end if#                                                                                                                                           
+        #if $geneOpt.alignGene == "chr":                                                                                                                  
+                --chr $geneOpt.chr                                                                                                                         
+        #end if#                                                    
+        #if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.disc.discard == "discard":  
+                   --discarded $discarded
+	#end if#                                                                                       
+        --gtf=$addGTF.gtf                                                                                                                                  
+        --tab=$addTab.tab                                                                                                                                  
+        #if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.addAlignOpt.alignoptions == "edit":
+               --align_quality=$ftype.addAlignOpt.align_quality                                                                                            
+               --align_score=$ftype.addAlignOpt.align_score                                                                                                
+               --distance=$ftype.addAlignOpt.d                                                                                                             
+               --length=$ftype.addAlignOpt.length                                                                                                          
+               #if int($ftype.addAlignOpt.max) > 0:
+                    --max=$ftype.addAlignOpt.max                                                                                                           
+               #end if#                                                                                                                                    
+               $ftype.addAlignOpt.unique                                                                                                                   
+               $ftype.addAlignOpt.blocks                                                                                                                   
+               $ftype.addAlignOpt.mutations                                                                                                                
+               #if $ftype.disc.discard == "--discarded":                                                                                            
+                   --discarded $discarded                                                                                                                 
+               #end if#                                                                                                                                    
+        #end if#                                                                                                                                           
+        #if $addOpt.options == "edit":                                                                                                                     
+                --range=$addOpt.range                                                                                                                      
+                --overlap=$addOpt.overlap                                                                                                                  
+                $addOpt.iclip                                                                                                                              
+                $addOpt.ignore                                                                                                                             
+                -s $addOpt.sequence
+                #if int($addOpt.limit) > 0:                                                                                                                
+                        --limit=$addOpt.limit                                                                                                              
+                #end if#                                                                                                                                   
+        #end if#               
+	-o $output	
+	</command>
+	<version_command>/usr/local/bin/pyPileup.py --version</version_command>
+	<inputs>
+
+
+	        <conditional name="geneOpt">
+                        <param name="alignGene" type="select"  label="Do you want to align reads to genes or chromosome co-ordinates?">
+                                <option value="gene" selected="true">Genes</option>
+                                <option value="chr">Chromosome Co-ordinates</option>
+                        </param>
+                        <when value="chr">
+			  <param format="interval" name="chr" type="data" label="Choose a Chromosome Coordinate File" help="Tab delimited text file containing an identifier, chromosome name, start position, end position and strand ('-' or '+')"/>
+                        </when>
+                        <when value="gene">
+			  <param format="txt" name="genes" type="data" label="Choose a Gene List -g" help="Single column gene ID file"/>
+                        </when>
+		</conditional>
+                <conditional name="addGTF">
+                        <param name="gtfFile" type="select"  label="Choose GTF File from">
+                                <option value="default" selected="true">Defaults</option>
+                                <option value="other">History</option>
+                        </param>
+                        <when value="default">
+                                <param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+                                        <options from_data_table="pycrac_gtf"/>
+                                </param>
+                        </when>
+                        <when value="other">
+                                <param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+                        </when>
+                </conditional>
+               <conditional name="addTab">
+                        <param name="tabFile" type="select"  label="Choose Genomic Reference Sequence from">
+                                <option value="default" selected="true">Defaults</option>
+                                <option value="other">History</option>
+                        </param>
+                        <when value="default">
+                                <param name="tab" type="select"  label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence">
+                                        <options from_data_table="pycrac_tab"/>
+                                </param>
+                        </when>
+                        <when value="other">
+                                <param format="tabular" name="tab" type="data" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"/>
+                        </when>
+	       </conditional>
+
+
+		<conditional name="ftype">
+			<param name="file_type" type="select"  label="Input File Type --file_type">
+					<option value="novo" selected="true">Novo</option>
+                                        <option value="sam">Sam/BAM</option>
+					<option value="gtf">GTF</option>
+			</param>
+			<when value="sam">
+			    <param format="sam,bam" name="input" type="data" label="Input File -f" help="Alignment file of type .sam or .bam" />
+			    <conditional name="disc">
+			    <param name="discard" type="select"  label="Print discarded reads to a separate file">
+			      <option value="" selected="true">OFF</option>
+			      <option value="discard">ON</option>
+			    </param>
+			    <when value="discard">
+			    </when>
+			    <when value="">
+			    </when>
+			    </conditional>
+			    <conditional name="addAlignOpt">
+				<param name="alignoptions" type="select"  label="Alignment Options">
+                                    <option value="default" selected="true">Default</option>
+                                    <option value="edit">Edit</option>
+                                </param>
+				<when value="edit">
+                                    <param name="mutations" type="select"  label="Filter reads by mutations --mutations" help="cross-linking sites are often highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not.">
+                                      <option value="" selected="true">Off</option>
+                                      <option value="--mutations=delsonly">deletions</option>
+                                      <option value="--mutations=subsonly">substitutions</option>
+                                      <option value="--mutations=TC">T->C mutations</option>
+                                      <option value="--mutations=allmuts">all mutations</option>
+                                      <option value="--mutations=nomuts">no mutations</option>
+                                    </param>
+                                    <param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
+                                      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+                                    </param>
+                                    <param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
+                                      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+                                    </param>
+                                    <param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
+                                      <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
+                                    </param>
+                                    <param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
+                                      <validator type="in_range" min="1" message="Please enter a value >= 0"/>
+                                    </param>
+                                    <param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
+                                      <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
+                                    </param>
+                                    <param name="unique" type="select"  label="Remove reads with multiple alignment locations --unique">
+                                      <option value="" selected="true">OFF</option>
+                                      <option value="--unique">ON</option>
+                                    </param>
+                                    <param name="blocks" type="select"  label="Only count reads with same start and end coords once --blocks">
+                                      <option value="" selected="true">OFF</option>
+                                      <option value="--blocks">ON</option>
+                                    </param>
+                                </when>
+                                <when value="default">
+                                </when>
+			    </conditional>
+			</when>
+			<when value="novo">
+				<param format="tabular" name="input" type="data" label="Input File -f" help="Alignment file of type .novo" />
+				<conditional name="disc">
+				  <param name="discard" type="select"  label="Print discarded reads to a separate file">
+				    <option value="" selected="true">OFF</option>
+				    <option value="discard">ON</option>
+				  </param>
+				  <when value="discard">
+				  </when>
+				  <when value="">
+				  </when>
+				</conditional>
+				<conditional name="addAlignOpt">
+                                <param name="alignoptions" type="select"  label="Alignment Options">
+                                    <option value="default" selected="true">Default</option>
+                                    <option value="edit">Edit</option>
+                                </param>
+                                <when value="edit">
+                                    <param name="mutations" type="select"  label="Filter reads by mutations --mutations" help="cross-linking sites are often
+ highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not.">
+                                      <option value="" selected="true">Off</option>
+                                      <option value="--mutations=delsonly">deletions</option>
+                                      <option value="--mutations=subsonly">substitutions</option>
+                                      <option value="--mutations=TC">T->C mutations</option>
+                                      <option value="--mutations=allmuts">all mutations</option>
+                                      <option value="--mutations=nomuts">no mutations</option>
+                                    </param>
+                                    <param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
+                                      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+                                    </param>
+                                    <param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
+                                      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+                                    </param>
+                                    <param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
+                                      <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
+                                    </param>
+                                    <param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
+                                      <validator type="in_range" min="1" message="Please enter a value >= 0"/>
+                                    </param>
+                                    <param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
+                                      <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
+                                    </param>
+                                    <param name="unique" type="select"  label="Remove reads with multiple alignment locations --unique">
+                                      <option value="" selected="true">OFF</option>
+                                      <option value="--unique">ON</option>
+                                    </param>
+                                    <param name="blocks" type="select"  label="Only count reads with same start and end coords once --blocks">
+                                      <option value="" selected="true">OFF</option>
+                                      <option value="--blocks">ON</option>
+                                    </param>
+                                </when>
+                                <when value="default">
+                                </when>
+			</conditional>
+			</when>
+			<when value="gtf">
+				<param format="gtf" name="input" type="data" label="Input File -f" help="File of type .gtf" />
+			</when>
+		      </conditional>
+		      
+		      <conditional name="addOpt">
+			<param name="options" type="select"  label="Standard Options">
+			  <option value="default" selected="true">Default</option>
+			  <option value="edit">Edit</option>
+			</param>        
+			<when value="edit">
+			  <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
+			    <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
+			  </param>
+			  <param name="ignore" type="select" label="Ignore strand information? --ignorestrand">
+			    <option value="" selected="true">No</option>
+			    <option value="--ignorestrand">Yes</option>
+			  </param>
+			  <param format="integer" name="overlap" type="integer" label="Overlap --overlap" value="1" size="5" help="Sets the number of nucleotides a read has to overlap with a gene before it is considered a hit. ">
+			    <validator type="in_range" min="1" message="Please enter a positive integer"/>
+			  </param>
+                          <param name="sequence" type="select" label="Align reads to --sequence">
+                            <option value="genomic" selected="true">Genomic Sequence</option>
+                            <option value="coding">Coding Sequence</option>
+                          </param>
+			  <param name="iclip" type="select" label="iCLIP mode --iCLIP">
+                            <option value="" selected="true">OFF</option>
+                            <option value="--iCLIP">ON</option>
+			  </param>
+			  <param format="integer" name="limit" type="integer" label="Limit number of reads to count that map to a particular region --limit" value="0" size="15" help="Set to 0 for unlimited reads" >
+			    <validator type="in_range" min="0" message="Please enter a value greater than 1 or set to 0 for unlimited reads"/>
+			  </param> 
+			</when>
+			<when value="default">
+			</when>
+		      </conditional> 
+                <param name="label" type="text" format="txt" size="30" value="pyPileup" label="Enter output file label -o" />
+	</inputs>
+	<outputs>
+		<data format="tabular" name="output" label="${label.value}.pileup"/>
+                <data format="txt" name="discarded" label="${label.value}_discarded.txt">
+                        <filter>(ftype['file_type'] == "novo" or ftype['file_type'] == "sam") and ftype['disc']['discard'] ==  "discard"</filter>
+                </data> 
+	</outputs>
+	<help>
+
+
+.. class:: infomark
+
+**pyPileup**
+
+pyPileup is part of the pyCRAC_ package. Produces pileups containing the number of hits, substitutions and deletions for each nucleotide covered by 
+reads in specific genes or genomic regions
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+File input options::
+
+    -f FILE, --input_file=FILE
+                        As input files you can use Novoalign native output,
+                        SAM, pyMotif or pyReadCounters GTF files as input
+                        file. By default it expects data from the standard
+                        input. Make sure to specify the file type of the file
+                        you want to have analyzed using the --file_type
+                        option!
+    -o OUTPUT_FILE, --output_file=OUTPUT_FILE
+                        Use this flag to override the standard output file
+                        names. All pileups will be written to one output file.
+    -g FILE, --genes_file=FILE
+                        here you need to type in the name of your gene list
+                        file (1 column) or the hittable file
+    --chr=FILE          
+                        if you simply would like to align reads against a
+                        genomic sequence you should generate a tab delimited
+                        file containing an identifyer, chromosome name, start
+                        position, end position and strand
+    --gtf=annotation_file.gtf
+                        type the path to the gtf annotation file that you want
+                        to use
+    --tab=tab_file.tab  
+                        type the path to the tab file that contains the
+                        genomic reference sequence
+    --file_type=FILE_TYPE
+                        use this option to specify the file type (i.e. 'novo',
+                        'sam', 'gtf'). This will tell the program which
+                        parsers to use for processing the files. Default =
+                        'novo'
+
+pyPileup specific options::
+
+    --limit=500         
+                        with this option you can select how many reads mapped
+                        to a particular gene/ORF/region you want to count.
+                        Default = All
+    --iCLIP             
+                        This turns on the iCLIP mode and the pileups will
+                        report cross-linking site frequencies in iCLIP data in
+                        reference sequences
+
+Common options::
+
+    -v, --verbose       
+                        prints all the status messages to a file rather than
+                        the standard output
+    --ignorestrand      
+                        this flag tells the program to ignore strand
+                        information and all overlapping reads will considered
+                        sense reads. Useful for analysing ChIP or RIP data
+    --zip=FILE          
+                        use this option to compress all the output files in a
+                        single zip file
+    --overlap=1         
+                        sets the number of nucleotides a read has to overlap
+                        with a gene before it is considered a hit. Default =
+                        1 nucleotide
+    -s genomic, --sequence=genomic
+                        with this option you can select whether you want the
+                        reads aligned to the genomic or the coding sequence.
+                        Default = genomic
+    -r 100, --range=100
+                        allows you to set the length of the UTR regions. If
+                        you set '-r 50' or '--range=50', then the program will
+                        set a fixed length (50 bp) regardless of whether the
+                        GTF file has genes with annotated UTRs.
+
+Options for novo, SAM and BAM files::
+
+    --align_quality=100, --mapping_quality=100
+                        with these options you can set the alignment quality
+                        (Novoalign) or mapping quality (SAM) threshold. Reads
+                        with qualities lower than the threshold will be
+                        ignored. Default = 0
+    --align_score=100   
+                        with this option you can set the alignment score
+                        threshold. Reads with alignment scores lower than the
+                        threshold will be ignored. Default = 0
+    -l 100, --length=100
+                        to set read length threshold. Default = 1000
+    -m 100000, --max=100000
+                        maximum number of mapped reads that will be analyzed.
+                        Default = All
+    --unique            
+                        with this option reads with multiple alignment
+                        locations will be removed. Default = Off
+    --blocks            
+                        with this option reads with the same start and end
+                        coordinates on a chromosome will only be counted once.
+                        Default = Off
+    --discarded=FILE    
+                        prints the lines from the alignments file that were
+                        discarded by the parsers. This file contains reads
+                        that were unmapped (NM), of poor quality (i.e. QC) or
+                        paired reads that were mapped to different chromosomal
+                        locations or were too far apart on the same
+                        chromosome. Useful for debugging purposes
+    -d 1000, --distance=1000
+                        this option allows you to set the maximum number of
+                        base-pairs allowed between two non-overlapping paired
+                        reads. Default = 1000
+    --mutations=delsonly
+                        Use this option to only track mutations that are of
+                        interest. For CRAC data this is usually deletions
+                        (--mutations=delsonly). For PAR-CLIP data this is
+                        usually T-C mutations (--mutations=TC). Other options
+                        are: do not report any mutations: --mutations=nomuts.
+                        Only report specific base mutations, for example only
+                        in T's, C's and G's :--mutations=[TCG]. The brackets
+                        are essential. Other nucleotide combinations are also
+                        possible
+
+	</help>
+</tool>