diff pyCRAC/pyMotif.xml @ 0:19b20927172d draft

Uploaded
author swebb
date Tue, 18 Jun 2013 09:11:00 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyCRAC/pyMotif.xml	Tue Jun 18 09:11:00 2013 -0400
@@ -0,0 +1,222 @@
+ <tool id ="pyMotif" name="pyMotif">
+	<requirements>
+        	<requirement type="package">pyCRAC</requirement>
+    	</requirements>
+	<command interpreter="perl"> 
+	pyMotif.pl
+	-f $input
+	--gtf=$addGTF.gtf
+
+       	#if $addGTF.gtfFile == "default" and $addGTF.annotate.annotations == "auto":
+	  --annotation $addGTF.annotate.scan.annotation
+	#else:
+	  --annotation $addGTF.annotate.annotation
+	#end if#
+
+	--tab=$addTab.tab
+
+	#if $addOpt.options == "edit":
+	        --options
+	        --k_min $addOpt.kmin
+		--k_max $addOpt.kmax
+		--numberofkmers=$addOpt.numberofkmers
+		--overlap $addOpt.overlap
+		--range $addOpt.range
+	#end if#
+	-o "$input.name"
+	--id $count.id
+	--count $count
+	--random $random
+	--features $features
+	--zscores $zscores
+	</command>
+	<version_command>/usr/local/bin/pyMotif.py --version</version_command>
+	<inputs>
+		<param format="gtf" name="input" type="data" label="Input File --input_file" help="File of type .gtf" />
+                <conditional name="addTab">
+                        <param name="tabFile" type="select"  label="Choose Genomic Reference Sequence from">
+                                <option value="default" selected="true">Defaults</option>
+                                <option value="other">History</option>
+                        </param>
+                        <when value="default">
+                                <param name="tab" type="select"  label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence">
+                                        <options from_data_table="pycrac_tab"/>
+                                </param>
+                        </when>
+                        <when value="other">
+                                <param format="tabular" name="tab" type="data" label="Genomic Reference Sequence --tab" help="Tab file containing genomic reference sequence"/>
+                        </when>
+                </conditional>				
+		<conditional name="addGTF">
+			<param name="gtfFile" type="select"  label="Choose GTF File from">
+				<option value="default" selected="true">Defaults</option>
+				<option value="other">History</option>
+			</param>	
+			<when value="default">
+				<param name="gtf" type="select"  label="GTF File --gtf" help="GTF file containing gene ID co-ordinates">
+					<options from_data_table="pycrac_gtf"/>
+				</param>
+
+				<conditional name="annotate">
+				<param name="annotations" type="select"  label="Select annotation">
+					<option value="all" selected="true">All</option>
+					<option value="manual">Enter in text box</option>
+					<option value="auto">Scan pyGetGTFSources file</option>
+				</param>	
+				<when value="all">
+					<param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+				</when>
+				<when value="manual">
+					<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+						<validator type="empty_field" message="Please enter a value"/>			
+					</param>
+				</when>
+				<when value="auto">
+					<param format="tabular" name="gtf_annotation" type="data" label="GTF annotation File (pyGetGTFSources output)" help="Tabular file containing unique list of annotations/sources in selected GTF file. Refer to pyGetGTFSources"/>	
+						<conditional name="scan">
+						<param name="annotations" type="select"  label="Scan this file for annotations" help="Choose the correct GTF file then choose GO">
+							<option value="wait" selected="true">Waiting</option>
+							<option value="scanning">Go</option>
+						</param>	
+						<when value="wait">
+						</when>
+						<when value="scanning">
+						<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+							  <options from_dataset="gtf_annotation">
+							    <column name="name" index="0"/>
+							    <column name="value" index="0"/>
+							  </options>
+						</param>      
+						</when>
+						</conditional>
+				</when>
+				</conditional>		
+			</when>
+			<when value="other">
+				<param format="gtf" name="gtf" type="data" label="GTF File --gtf" help="GTF file containing gene ID co-ordinates"/>
+				<conditional name="annotate">
+				<param name="annotations" type="select"  label="Select annotation">
+					<option value="all" selected="true">All</option>
+					<option value="manual">Enter in text box</option>
+					<option value="auto">Scan selected file</option>
+				</param>	
+				<when value="all">
+					<param name="annotation" type="hidden" format="txt" size="10" value="all"/>
+				</when>
+				<when value="manual">
+					<param name="annotation" type="text" format="txt" size="100" value="protein_coding" label="Select which annotation to focus search on --annotation" help="To find a list of available annotations please use pyGetGTFSources tool">
+						<validator type="empty_field" message="Please enter a value"/>			
+					</param>
+				</when>
+				<when value="auto">
+					<param name="annotation" type="select" multiple="false" label="Select which annotation to focus search on --annotation">
+						  <options from_dataset="gtf">
+						    <column name="name" index="1"/>
+						    <column name="value" index="1"/>
+						    <filter type="unique_value" name="unique" column="1"/>
+						  </options>
+					</param>      
+				</when>
+				</conditional>
+			</when>
+		</conditional>
+		<conditional name="addOpt">
+		  <param name="options" type="select"  label="Standard options">
+		    <option value="default" selected="true">Default</option>
+		    <option value="edit">Edit</option>
+		  </param>
+		  <when value="edit">
+		    <param format="integer" name="kmin" type="integer" label="Minimum k-mer Length --k_min " value="4" size="6" help="Set the minimal k-mer length">
+		      <validator type="in_range" min="1" message="Please enter a value >= 1"/>
+		    </param>
+		    <param format="integer" name="kmax" type="integer" label="Maximum k-mer Length --k_min " value="8" size="6" help="Set the minimal k-mer length">
+		      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+		    </param>
+		    <param format="integer" name="numberofkmers" type="integer" label="Maximum number of k-mers in output file --numberofkmers" value="1000" size="6" help="Set the maximum number of k-mers in output">
+		      <validator type="in_range" min="0" message="Please enter a value >= 0"/>
+		    </param>
+		    <param format="integer" name="range" type="integer" label="Range --range" value="0" size="5" help="Manually set the length of the 5' and 3' UTRs 0>50000">
+		      <validator type="in_range" min="0" max="50000" message="Please enter a value between 0 and 50000"/>
+		    </param>
+		    <param format="integer" name="overlap" type="integer" label="Overlap --overlap" value="1" size="5" help="Sets the number of nucleotides a read has to overlap with a gene before it is considered a hit. ">
+		      <validator type="in_range" min="1" message="Please enter a positive integer"/>
+		    </param>
+		  </when>
+		  <when value="default">
+		  </when>
+		</conditional>
+                <param name="label" type="text" format="txt" size="30" value="pyMotif" label="Enter output file label -o" />
+	</inputs>
+
+	<outputs>
+		<data format="tabular" name="zscores" label="${label.value}_k-mer_Z_scores.txt"/>
+		<data format="tabular" name="count" label="${label.value}_data_k-mers_count.txt"/>
+		<data format="gtf" name="features" label="${label.value}_top_k-mers_in_features.gtf"/>
+                <data format="tabular" name="random" label="${label.value}_random_k-mers_count.txt"/>
+	</outputs>
+	<help>
+
+.. class:: infomark
+
+**pyMotif**
+
+pyMotif is part of the pyCRAC_ package. Looks for enriched sequence motifs in high-throughput sequencing data. Produces a GTF type output file 
+with coordinates and Z-scores for enriched motifs. The GTF file can be visualised in genome browsers.
+   
+.. _pyCRAC: http://sandergranneman.bio.ed.ac.uk/Granneman_Lab/pyCRAC_software.html
+        
+------
+
+**Parameter list**
+
+File input options::
+
+    -f intervals.gtf, --input_file=intervals.gtf
+                        Provide the path to an interval gtf file. By default
+                        it expects data from the standard input.
+    -o OUTPUT_FILE, --output_file=OUTPUT_FILE
+                        Use this flag to override the standard file names. Do
+                        NOT add an extension.
+    --gtf=annotation_file.gtf
+                        type the path to the gtf annotation file that you want
+                        to use
+    --tab=tab_file.tab  
+                        type the path to the tab file that contains the
+                        genomic reference sequence
+
+pyMotif specific options::
+
+    --k_min=4           
+                        this option allows you to set the shortest k-mer
+                        length. Default = 4.
+    --k_max=6           
+                        this option allows you to set the longest k-mer
+                        length. Default = 8.
+    -n 100, --numberofkmers=100
+                        choose the maximum number of enriched k-mer sequences
+                        you want to have reported in output files. Default =
+                        1000
+
+pyCRAC common options::
+
+    -a protein_coding, --annotation=protein_coding
+                        select which annotation (i.e. protein_coding, ncRNA,
+                        sRNA, rRNA,snoRNA,snRNA, depending on the source of
+                        your GTF file) you would like to focus your search on.
+                        Default = all annotations
+    -r 100, --range=100
+                        allows you to add regions flanking the genomic
+                        feature. If you set '-r 50' or '--range=50', then the
+                        program will add 50 nucleotides to each feature on
+                        each side regardless of whether the GTF file has genes
+                        with annotated UTRs.
+    --overlap=1         
+                        sets the number of nucleotides a motif has to overlap
+                        with a genomic feature before it is considered a hit.
+                        Default =  1 nucleotide
+
+
+
+
+	</help>
+</tool>