diff protxml_to_gff.xml @ 0:04dc24d06ddb draft default tip

Uploaded
author iracooke
date Sat, 14 Jun 2014 18:18:41 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/protxml_to_gff.xml	Sat Jun 14 18:18:41 2014 -0400
@@ -0,0 +1,81 @@
+<tool id="protxml_to_gff" name="ProtXML to GFF" version="1.0.1">
+	<requirements>
+	    <requirement type="package" version="1.3">protk</requirement>
+    	<requirement type="package" version="2.2.29">blast+</requirement>	    
+   </requirements>
+
+	<description>Map peptides from a protXML file to genomic coordinates</description>
+
+	<command>
+		protxml_to_gff.rb -p $protxml_file 
+
+		-g $genome_fasta_file 
+
+		-d $protein_fasta_file 
+
+		-o $output
+
+		--threshold $peptide_threshold
+
+		--prot-threshold $protein_threshold
+
+		$stack_charges
+
+	</command>
+
+
+
+
+	<stdio>
+		<exit_code range="1:"   level="fatal"   description="Failure" />
+	</stdio>
+
+	<inputs>	
+		<param name="protxml_file" type="data" format="protxml" help="ProtXML containing combined results from all searches" label="ProtXML File" />
+		<param name="genome_fasta_file" type="data" format="fasta" help="The genome against which peptides will be mapped" label="Genome fasta file" />
+		<param name="protein_fasta_file" type="data" format="fasta" help="The database used for ms/ms searches (must have genomic coords encoded in the fasta header)" label="Protein fasta file" />
+
+		<param name="peptide_threshold" help="Peptide Probability Threshold" type="float" value="0.95" min="0" max="1" label="Peptide Probability Threshold" />
+		<param name="protein_threshold" help="Protein Probability Threshold" type="float" value="0.99" min="0" max="1" label="Protein Probability Threshold" />
+
+		<param name="stack_charges" type="boolean" label="Stack Charges" help="Different peptide charge states get separate gff entries" truevalue="--stack-charge-states" falsevalue=""/>
+
+		<param name="collapse_redundant_proteins" type="boolean" label="Collapse Redundant Proteins" help="Proteins that cover genomic regions already covered will be skipped" truevalue="--collapse-redundant-proteins" falsevalue=""/>
+
+	</inputs>
+
+	<outputs>
+		<data format="gff3" name="output" />
+	</outputs>
+
+
+  <help>
+
+**What it does**
+
+Generates a gff file containing genomic coordinates for peptides present in a protXML file.
+
+In order for this tool to work the inputs must satisfy certain requirements. 
+
+1. The genome fasta should encode the scaffold numbers as in the following example
+
+>scaffoldXXX
+
+or 
+
+>scaffold_XXX
+
+where XXX represent digits encoding the scaffold number. Any number of digits are allowed
+
+2. The protXML should have been generated by searching a database generated using the protk Generate 6 frame translation tool and the extract proteins from gff3 tool.  Both those tools should be run with the genomics coordinates included in the output file.
+
+
+
+----
+
+**References**
+
+
+  </help>
+
+</tool>