diff pyCRAC/pyReadAligner.xml @ 0:19b20927172d draft

Uploaded
author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyReadAligner.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,368 @@
+ <tool id ="pyReadAligner" name="pyReadAligner">
+	<requirements>
+			<requirement type="package">pyCRAC</requirement>
+		</requirements>
+	<command interpreter="python"> 
+	/usr/local/bin/pyReadAligner.py
+	-f $ftype.input
+	--file_type $ftype.file_type
+	#if $geneOpt.alignGene == "gene":
+		-g $geneOpt.genes
+	#end if#
+	#if $geneOpt.alignGene == "chr":
+		--chr $geneOpt.chr
+	#end if#
+		#if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.disc.discard == "discard":	 
+				   --discarded $discarded
+		#end if#	   
+	--gtf=$addGTF.gtf
+	--tab=$addTab.tab
+	#if ($ftype.file_type == "novo" or $ftype.file_type == "sam") and $ftype.addAlignOpt.alignoptions == "edit":
+		   --align_quality=$ftype.addAlignOpt.align_quality			
+		   --align_score=$ftype.addAlignOpt.align_score							
+		   --distance=$ftype.addAlignOpt.d									
+		   --length=$ftype.addAlignOpt.length	
+		   #if int($ftype.addAlignOpt.max) > 0:									
+		        --max=$ftype.addAlignOpt.max							 
+		   #end if#
+		   $ftype.addAlignOpt.unique										   
+		   $ftype.addAlignOpt.blocks		   
+		   $ftype.addAlignOpt.mutations	 
+	#end if#
+	#if $addOpt.options == "edit":
+		--range=$addOpt.range
+		--overlap=$addOpt.overlap
+		$addOpt.ignore
+		-s $addOpt.sequence
+		#if int($addOpt.limit) > 0:
+			--limit=$addOpt.limit
+		#end if#
+	#end if#
+	-o $output	
+	</command>
+	<version_command>/usr/local/bin/pyReadAligner.py --version</version_command>
+	<inputs>
+
+
+			<conditional name="geneOpt">
+						<param name="alignGene" type="select"  label="Do you want to align reads to genes or chromosome co-ordinates?">
+								<option value="gene" selected="true">Genes</option>
+								<option value="chr">Chromosome Co-ordinates</option>
+						</param>
+						<when value="chr">
+			  <param format="interval" name="chr" type="data" label="Choose a Chromosome Coordinate File" help="Tab delimited text file contai\
+ning an identifier, chromosome name, start position, end position and strand ('-' or '+')"/>
+						</when>
+						<when value="gene">
+			  <param format="txt" name="genes" type="data" label="Choose a Gene List -g" help="Single column gene ID file"/>
+						</when>
+		</conditional>
+				<conditional name="addGTF">
+						<param name="gtfFile" type="select"	 label="Choose GTF File from">
+								<option value="default" selected="true">Defaults</option>
+								<option value="other">History</option>
+						</param>
+						<when value="default">
+								<param name="gtf" type="select"	 label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+										<options from_data_table="pycrac_gtf"/>
+								</param>
+						</when>
+						<when value="other">
+								<param format="GTF" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+						</when>
+				</conditional>
+			   <conditional name="addTab">
+						<param name="tabFile" type="select"	 label="Choose Genomic Reference Sequence from">
+								<option value="default" selected="true">Defaults</option>
+								<option value="other">History</option>
+						</param>
+						<when value="default">
+								<param name="tab" type="select"	 label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence">
+										<options from_data_table="pycrac_tab"/>
+								</param>
+						</when>
+						<when value="other">
+								<param format="tabular" name="tab" type="data" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"/>
+						</when>
+		   </conditional>
+
+
+		<conditional name="ftype">
+			<param name="file_type" type="select"  label="Input File Type --file_type">
+					<option value="sam">Sam/BAM</option>
+					<option value="novo">Novo</option>
+					<option value="gtf">GTF</option>
+			</param>
+			<when value="sam">
+				<param format="sam,bam" name="input" type="data" label="Input File -f" help="Alignment file of type .sam or .bam"/>
+				<conditional name="disc">
+				  <param name="discard" type="select"  label="Print discarded reads to a separate file">
+				<option value="" selected="true">OFF</option>
+				<option value="discard">ON</option>
+				  </param>
+				  <when value="discard">
+				  </when>
+				  <when value="">
+				  </when>
+				</conditional>
+				<conditional name="addAlignOpt">
+				<param name="alignoptions" type="select"  label="Alignment Options">
+									<option value="default" selected="true">Default</option>
+									<option value="edit">Edit</option>
+								</param>
+								<when value="edit">
+									<param name="mutations" type="select"  label="Filter reads by mutations --mutations" help="cross-linking sites are often highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not.">
+									  <option value="" selected="true">Off</option>
+									  <option value="--mutations=delsonly">deletions</option>
+									  <option value="--mutations=subsonly">substitutions</option>
+									  <option value="--mutations=TC">T->C mutations</option>
+									  <option value="--mutations=allmuts">all mutations</option>
+									  <option value="--mutations=nomuts">no mutations</option>
+									</param>
+									<param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
+									  <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+									</param>
+									<param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
+									  <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+									</param>
+									<param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
+									  <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
+									</param>
+									<param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
+									  <validator type="in_range" min="1" message="Please enter a value >= 0"/>
+									</param>
+									<param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
+									  <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
+									</param>
+									<param name="unique" type="select"	label="Remove reads with multiple alignment locations --unique">
+									  <option value="" selected="true">OFF</option>
+									  <option value="--unique">ON</option>
+									</param>
+									<param name="blocks" type="select"	label="Only count reads with same start and end coords once --blocks">
+									  <option value="" selected="true">OFF</option>
+									  <option value="--blocks">ON</option>
+									</param>
+								</when>
+								<when value="default">
+								</when>
+				</conditional>
+			</when>
+			<when value="novo">
+				<param format="tabular" name="input" type="data" label="Input File -f" help="Alignment file of type .novo" />
+				<conditional name="disc">
+				  <param name="discard" type="select"  label="Print discarded reads to a separate file">
+					<option value="" selected="true">OFF</option>
+					<option value="discard">ON</option>
+				  </param>
+				  <when value="discard">
+				  </when>
+				  <when value="">
+				  </when>
+				</conditional>
+								<conditional name="addAlignOpt">
+								<param name="alignoptions" type="select"  label="Alignment Options">
+									<option value="default" selected="true">Default</option>
+									<option value="edit">Edit</option>
+								</param>
+								<when value="edit">
+									<param name="mutations" type="select"  label="Filter reads by mutations --mutations" help="cross-linking sites are often
+ highlighted by deletions and/or substitutions in the reads. You can use this option to filter reads based on whether they have mutations or not.">
+									  <option value="" selected="true">Off</option>
+									  <option value="--mutations=delsonly">deletions</option>
+									  <option value="--mutations=subsonly">substitutions</option>
+									  <option value="--mutations=TC">T->C mutations</option>
+									  <option value="--mutations=allmuts">all mutations</option>
+									  <option value="--mutations=nomuts">no mutations</option>
+									</param>
+									<param format="integer" name="align_quality" type="integer" label="Align Quality --align_quality " value="0" size="5" >
+									  <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+									</param>
+									<param format="integer" name="align_score" type="integer" label="Align Score --align_score " value="0" size="5" >
+									  <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+									</param>
+									<param format="integer" name="max" type="integer" label="Mapped reads to read from input file --max" help="Set to 0 to align all reads." value="0" size="10" >
+									  <validator type="in_range" min="0" max="100000000" message="Please enter a value between 1 and 100000000 or 0 to align all reads"/>
+									</param>
+									<param format="integer" name="d" type="integer" label="Distance --distance " value="1000" size="6" help="Set the maximum number of bp allowed between two non-overlapping paired reads">
+									  <validator type="in_range" min="1" message="Please enter a value >= 0"/>
+									</param>
+									<param format="integer" name="length" type="integer" label="Set the maximum length of reads --length" value="1000" size="7" help="Set the read length threshold between 15 and 1000">
+									  <validator type="in_range" min="15" max="1000" message="Please enter a value between 15 and 1000"/>
+									</param>
+									<param name="unique" type="select"	label="Remove reads with multiple alignment locations --unique">
+									  <option value="" selected="true">OFF</option>
+									  <option value="--unique">ON</option>
+									</param>
+									<param name="blocks" type="select"	label="Only count reads with same start and end coords once --blocks">
+									  <option value="" selected="true">OFF</option>
+									  <option value="--blocks">ON</option>
+									</param>
+								</when>
+								<when value="default">
+								</when>
+			</conditional>
+			</when>
+			<when value="gtf">
+				<param format="gtf" name="input" type="data" label="Input File -f" help="File of type .gtf" />
+			</when>
+			  </conditional>
+			  
+			  <conditional name="addOpt">
+			<param name="options" type="select"	 label="Standard Options">
+			  <option value="default" selected="true">Default</option>
+			  <option value="edit">Edit</option>
+			</param>		
+			<when value="edit">
+			  <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
+				<validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
+			  </param>
+			  <param name="ignore" type="select" label="Ignore strand information? --ignorestrand">
+				<option value="" selected="true">No</option>
+				<option value="--ignorestrand">Yes</option>
+			  </param>
+			  <param format="integer" name="overlap" type="integer" label="Overlap --overlap" value="1" size="5" help="Sets the number of nucleotides a read has to overlap with a gene before it is considered a hit. ">
+				<validator type="in_range" min="1" message="Please enter a positive integer"/>
+			  </param>
+						  <param name="sequence" type="select" label="Align reads to --sequence">
+							<option value="genomic" selected="true">Genomic Sequence</option>
+							<option value="coding">Coding Sequence</option>
+						  </param>
+			  <param format="integer" name="limit" type="integer" label="Limit number of reads to count that map to a particular region --limit" value="0" size="15" help="Set to 0 for unlimited reads" >
+				<validator type="in_range" min="0" message="Please enter a value greater than 1 or set to 0 for unlimited reads"/>
+			  </param> 
+			</when>
+			<when value="default">
+			</when>
+			  </conditional>
+				<param name="label" type="text" format="txt" size="30" value="pyReadAligner" label="Enter output file label -o" />
+	</inputs>
+	<outputs>
+		<data format="fasta" name="output" label="${label.value}.aligned.fasta"/>
+				<data format="txt" name="discarded" label="${label.value}_discarded.txt">
+						<filter>(ftype['file_type'] == "novo" or ftype['file_type'] == "sam") and ftype['disc']['discard'] ==  "discard"</filter>
+				</data> 
+	</outputs>
+	<help>
+
+
+.. class:: infomark
+
+**pyReadAligner**
+
+pyReadAligner is part of the pyCRAC_ package. Generates multiple sequence alignments for reads mapped to individual genes or genomic regions. 
+Produces a fasta output file.
+
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+		
+------
+
+**Parameter list**
+
+File input options::
+
+	-f FILE, --input_file=FILE
+						As input files you can use Novoalign native output or
+						SAM files as input file. By default it expects data
+						from the standard input. Make sure to specify the file
+						type of the file you want to have analyzed using the
+						--file_type option!
+	-o OUTPUT_FILE, --output_file=OUTPUT_FILE
+						Use this flag to override the standard output file
+						names. All alignments will be written to one output
+						file.
+	-g FILE, --genes_file=FILE
+						here you need to type in the name of your gene list
+						file (1 column) or the hittable file
+	--chr=FILE			
+                                                if you simply would like to align reads against a
+						genomic sequence you should generate a tab delimited
+						file containing an identifyer, chromosome name, start
+						position, end position and strand
+	--gtf=annotation_file.gtf
+						type the path to the gtf annotation file that you want
+						to use
+	--tab=tab_file.tab	
+                                                type the path to the tab file that contains the
+						genomic reference sequence
+	--file_type=FILE_TYPE
+						use this option to specify the file type (i.e. 'novo',
+						'sam', 'gtf'). This will tell the program which
+						parsers to use for processing the files. Default =
+						'novo'
+
+pyReadAligner specific options::
+
+	--limit=500			
+                                                with this option you can select how many reads mapped
+						to a particular gene/ORF/region you want to count.
+						Default = All
+
+Common options::
+
+	--ignorestrand		
+                                                this flag tells the program to ignore strand
+						information and all overlapping reads will considered
+						sense reads. Useful for analysing ChIP or RIP data
+	--overlap=1			
+                                                sets the number of nucleotides a read has to overlap
+						with a gene before it is considered a hit. Default =
+						1 nucleotide
+	-s genomic, --sequence=genomic
+						with this option you can select whether you want the
+						reads aligned to the genomic or the coding sequence.
+						Default = genomic
+	-r 100, --range=100
+						allows you to set the length of the UTR regions. If
+						you set '-r 50' or '--range=50', then the program will
+						set a fixed length (50 bp) regardless of whether the
+						GTF file has genes with annotated UTRs.
+
+Options for novo, SAM and BAM files::
+
+	--align_quality=100, --mapping_quality=100
+						with these options you can set the alignment quality
+						(Novoalign) or mapping quality (SAM) threshold. Reads
+						with qualities lower than the threshold will be
+						ignored. Default = 0
+	--align_score=100	
+                                                with this option you can set the alignment score
+						threshold. Reads with alignment scores lower than the
+						threshold will be ignored. Default = 0
+	-l 100, --length=100
+						to set read length threshold. Default = 1000
+	-m 100000, --max=100000
+						maximum number of mapped reads that will be analyzed.
+						Default = All
+	--unique			      
+                                                with this option reads with multiple alignment
+						locations will be removed. Default = Off
+	--blocks			
+                                                with this option reads with the same start and end
+						coordinates on a chromosome will only be counted once.
+						Default = Off
+	--discarded=FILE	                
+                                                prints the lines from the alignments file that were
+						discarded by the parsers. This file contains reads
+						that were unmapped (NM), of poor quality (i.e. QC) or
+						paired reads that were mapped to different chromosomal
+						locations or were too far apart on the same
+						chromosome. Useful for debugging purposes
+	-d 1000, --distance=1000
+						this option allows you to set the maximum number of
+						base-pairs allowed between two non-overlapping paired
+						reads. Default = 1000
+	--mutations=delsonly
+						Use this option to only track mutations that are of
+						interest. For CRAC data this is usually deletions
+						(--mutations=delsonly). For PAR-CLIP data this is
+						usually T-C mutations (--mutations=TC). Other options
+						are: do not report any mutations: --mutations=nomuts.
+						Only report specific base mutations, for example only
+						in T's, C's and G's :--mutations=[TCG]. The brackets
+						are essential. Other nucleotide combinations are also
+						possible
+
+
+	</help>
+</tool>