diff pyCRAC/pyBinCollector.xml @ 0:19b20927172d draft

Uploaded
author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyBinCollector.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,290 @@
+ <tool id ="pyBinCollector" name="pyBinCollector">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="perl"> 
+	pyBinCollector.pl
+	-f $input
+	--gtf $addGTF.gtf
+        #if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
+		--annotation $addGTF.annotate.scan.annotation
+	#else:
+		--annotation $addGTF.annotate.annotation
+	#end if#
+	#if $addOpt.options == "edit":
+	        --options
+	        --range $addOpt.range
+		--min_length $addOpt.min_length
+		--max_length $addOpt.max_length
+		--numberofbins $addOpt.numberofbins
+		-s $addOpt.sequence
+		#if $addOpt.limitBins.binselect == "yes":
+			--bins1 $addOpt.limitBins.bs_first 
+			--bins2 $addOpt.limitBins.bs_last
+		#end if#
+		$addOpt.ignore
+		$addOpt.oall.outputall
+	#end if#
+	-o "$input.name"	
+	#if $addOpt.options == "edit" and $addOpt.oall.outputall == "--outputall":
+	   --id $sd.id
+	   --sd $sd
+	   --ssub $ssub
+	   --sdel $sdel
+	   --asd $asd
+	   --assub $assub
+	   --asdel $asdel
+	#else:
+	   --out $out
+	   --id $out.id
+	#end if#
+	</command>
+	<version_command>/usr/local/bin/pyBinCollector.py --version</version_command>
+	<inputs>
+		<param format="gtf" name="input" type="data" label="Input File -f" help="pyReadCounters or pyMotif gtf output files" />
+
+		<conditional name="addGTF">
+			<param name="gtfFile" type="select"  label="Choose GTF File from">
+				<option value="default" selected="true">Defaults</option>
+				<option value="other">History</option>
+			</param>	
+			<when value="default">
+				<param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+					<options from_data_table="pycrac_gtf"/>
+				</param>
+
+				<conditional name="annotate">
+				<param name="annotations" type="select"  label="Select annotation">
+					<option value="all" selected="true">All</option>
+					<option value="manual">Enter in text box</option>
+					<option value="auto">Scan pyGetGTFSources file</option>
+				</param>	
+				<when value="all">
+					<param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+				</when>
+				<when value="manual">
+					<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+						<validator type="empty_field" message="Please enter a value"/>			
+					</param>
+				</when>
+				<when value="auto">
+					<param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>	
+						<conditional name="scan">
+						<param name="annotations" type="select"  label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
+							<option value="wait" selected="true">Waiting</option>
+							<option value="scanning">Go</option>
+						</param>	
+						<when value="wait">
+						</when>
+						<when value="scanning">
+						<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+							  <options from_dataset="gtf_annotation">
+							    <column name="name" index="0"/>
+							    <column name="value" index="0"/>
+							  </options>
+						</param>      
+						</when>
+						</conditional>
+				</when>
+				</conditional>
+		
+			</when>
+			<when value="other">
+				<param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+				<conditional name="annotate">
+				<param name="annotations" type="select"  label="Select annotation">
+					<option value="all" selected="true">All</option>
+					<option value="manual">Enter in text box</option>
+					<option value="auto">Scan selected file</option>
+				</param>	
+				<when value="all">
+					<param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+				</when>
+				<when value="manual">
+					<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+						<validator type="empty_field" message="Please enter a value"/>			
+					</param>
+				</when>
+				<when value="auto">
+					<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+						  <options from_dataset="gtf">
+						    <column name="name" index="1"/>
+						    <column name="value" index="1"/>
+						    <filter type="unique_value" name="unique" column="1"/>
+						  </options>
+					</param>      
+				</when>
+				</conditional>
+			</when>
+		</conditional>
+
+
+		<conditional name="addOpt">
+			<param name="options" type="select"  label="Options">
+				<option value="default" selected="true">Default</option>
+				<option value="edit">Edit</option>
+			</param>
+			<when value="edit">
+			        <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
+				  <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
+				</param>
+				<param format="integer" name="numberofbins" type="integer" label="Set the number of bins --numberofbins" value="20" size="7" help="Set the number of bins you want to divide the genes into">
+					<validator type="in_range" min="20" max="1000" message="Please enter a value between 20 and 1000"/>
+				</param>
+				<param format="integer" name="min_length" type="integer" label="Set the minimum gene length (nt) --min_length" value="50" size="7" help="To filter the data for gene length (nucleotides)" >
+					<validator type="in_range" min="20" message="Please enter a value greater than 20"/>
+				</param>
+				<param format="integer" name="max_length" type="integer" label="Set the maximum gene length (nt) --max_length" help="Default = 100000000" value="100000000" size="10" >
+					<validator type="in_range" min="50" max="100000000" message="Please enter a value between 50 and 100000000"/>
+				</param>
+				<param name="sequence" type="select" label="What sequences do you want to run pyBinCollector on? --sequence">
+					<option value="genomic" selected="true">Genomic Sequence</option>
+					<option value="coding">Coding Sequence</option>
+					<option value="intron">Introns</option>
+                                        <option value="exon">Exons</option>
+                                        <option value="CDS">CDS</option>
+                                        <option value="5UTR">5UTR</option>
+                                        <option value="3UTR">3UTR</option>
+				</param>
+				<conditional name="limitBins">
+					<param name="binselect" type="select"  label="Select sequences that map to specific bins --binselect">
+						<option value="no" selected="true">No</option>
+						<option value="yes">Yes</option>
+					</param>
+				<when value="yes">
+					<param format="integer" name="bs_first" type="integer" label="Select First Bin" value="1" size="7">
+						<validator type="in_range" min="1" message="Please enter a value greater than 0"/>
+					</param>
+					<param format="integer" name="bs_last" type="integer" label="Select Last Bin" value="2" size="7">
+						<validator type="in_range" min="2" message="Please enter a value greater than 0"/>
+					</param>
+				</when>
+				<when value="no">
+				</when>
+				</conditional>
+                                <param name="ignore" type="select" label="Ignore strand information? --ignorestrand">
+                                        <option value="" selected="true">No</option>
+                                        <option value="--ignorestrand">Yes</option>
+                                </param>		
+				<conditional name="oall">
+				<param name="outputall" type="select" label="Output all genes --outputall" help="output the normalized distribution for each individual gene, rather than making a cumulative coverage plot">
+                                        <option value="" selected="true">No</option>
+                                        <option value="--outputall">Yes</option>
+				</param>
+				<when value="--outputall"/>
+				<when value=""/>
+				</conditional>
+			</when>	
+			<when value="default">
+			</when>
+		</conditional>
+		<param name="label" type="text" format="txt" size="30" value="pyBinCollector" label="Enter output file label -o" />
+	</inputs>
+
+	<outputs>
+		<data format="gtf" name="out" label="${label.value}.gtf">
+                        <filter>addOpt['oall']['outputall'] == ""</filter>
+		</data>
+                <data format="txt" name="sd" label="sense_data_${label.value}.txt">
+                        <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>		  
+		</data>
+                <data format="txt" name="ssub" label="sense_subs_${label.value}.txt">
+                         <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
+                </data>
+                <data format="txt" name="sdel" label="sense_dels_${label.value}.txt">
+                        <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
+		</data>
+                <data format="txt" name="asd" label="anti_sense_data_${label.value}.txt">
+                        <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
+                </data>
+                <data format="txt" name="assub" label="anti_sense_subs_${label.value}.txt">
+                        <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
+                </data>
+                <data format="txt" name="asdel" label="anti_sense_dels_${label.value}.txt">
+                        <filter>addOpt['options'] == "edit" and addOpt['oall']['outputall'] == "--outputall"</filter>
+                </data>
+	</outputs>
+	<help>
+
+
+.. class:: infomark
+
+**pyBinCollector**
+
+pyBinCollector is part of the pyCRAC_ package. Allows the user to generate genome-wide coverage plots. Normalises gene lengths by dividing genes into a 
+fixed number of bins and then calculates the hit density in each bin. The program also allows the user to input specific bin numbers to extract 
+blocks/clusters present in these bins.
+
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+File input options::
+
+    -f FILE, --input_file=FILE
+                        Provide the path and name of the pyReadCounters.py or
+                        pyMotif.py GTF file. By default the program expects
+                        data from the standard input.
+    -o OUTPUT_FILE, --output_file=OUTPUT_FILE
+                        To set an output file name. Do not add a file
+                        extension. By default, if the --outputall flag is not
+                        used, the program writes to the standard output.
+    --gtf=yeast.gtf     
+                        type the path to the gtf annotation file that you want
+                        to use. Default is /usr/local/pyCRAC/db/Saccharomyces_
+                        cerevisiae.EF2.59.1.2.gtf
+
+pyBinCollector.py specific options::
+
+    -a protein_coding, --annotation=protein_coding
+                        select which annotation (i.e. protein_coding, ncRNA,
+                        sRNA, rRNA, tRNA, snoRNA, all) you would like to focus
+                        your search on. Default = all
+    --min_length=20     
+                        to set a minimum length threshold for genes. Genes
+                        shorter than the minimal length will be discarded.
+                        Default = 1
+    --max_length=10000  
+                        to set a maximum length threshold for genes. Genes
+                        larger than the maximum length will be discarded.
+                        Default = 100000000
+    -n 20, --numberofbins=20
+                        select the number of bins you want to generate.
+                        Default=20
+    --binselect=2 4     
+                        allows selection of sequences that were mapped to
+                        specific bins. This option expects two numbers, one
+                        for each bin, separated by a space. For example:
+                        --binselect 20 30.
+    --outputall         
+                        use this flag to output the normalized distribution
+                        for each individual gene, rather than making a
+                        cumulative coverage plot. Useful for making box plots
+                        or for making heat maps.
+
+Common options::
+
+    -r 100, --range=100
+                        allows you to set the length of the UTR regions. If
+                        you set '-r 50' or '--range=50', then the program will
+                        set a fixed length (50 bp) regardless of whether the
+                        GTF file has genes with annotated UTRs.
+    -s intron, --sequence=intron
+                        with this option you can select whether you want to
+                        generate bins from the coding or genomic sequence or
+                        introns,exon,CDS, or UTR coordinates. Default =
+                        genomic
+    --ignorestrand      
+                        To ignore strand information and all reads overlapping
+                        with genomic features will be considered sense reads.
+                        Useful for analysing ChIP or RIP data
+
+
+
+ 
+
+	</help>
+</tool>