changeset 0:04dc24d06ddb draft default tip

Uploaded
author iracooke
date Sat, 14 Jun 2014 18:18:41 -0400
parents
children
files README README.md protxml_to_gff.xml repository_dependencies.xml tool_dependencies.xml
diffstat 5 files changed, 100 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README	Sat Jun 14 18:18:41 2014 -0400
@@ -0,0 +1,6 @@
+This tool takes a protxml file and a reference genome and produces a gff file with genomic coordinates of all peptides in the protxml file.  The mapping process relies on the presence of genomic coordinates in the protxml file, encoded in the protein names.  To generate such a file you should run all searches against a database that was generated with the companion tool "proteindb_from_gff3".
+
+Requirements:
+This package uses protk which must be installed separately. 
+
+For instructions please see: https://github.com/iracooke/protk/#galaxy-integration
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.md	Sat Jun 14 18:18:41 2014 -0400
@@ -0,0 +1,1 @@
+# This is my README
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/protxml_to_gff.xml	Sat Jun 14 18:18:41 2014 -0400
@@ -0,0 +1,81 @@
+<tool id="protxml_to_gff" name="ProtXML to GFF" version="1.0.1">
+	<requirements>
+	    <requirement type="package" version="1.3">protk</requirement>
+    	<requirement type="package" version="2.2.29">blast+</requirement>	    
+   </requirements>
+
+	<description>Map peptides from a protXML file to genomic coordinates</description>
+
+	<command>
+		protxml_to_gff.rb -p $protxml_file 
+
+		-g $genome_fasta_file 
+
+		-d $protein_fasta_file 
+
+		-o $output
+
+		--threshold $peptide_threshold
+
+		--prot-threshold $protein_threshold
+
+		$stack_charges
+
+	</command>
+
+
+
+
+	<stdio>
+		<exit_code range="1:"   level="fatal"   description="Failure" />
+	</stdio>
+
+	<inputs>	
+		<param name="protxml_file" type="data" format="protxml" help="ProtXML containing combined results from all searches" label="ProtXML File" />
+		<param name="genome_fasta_file" type="data" format="fasta" help="The genome against which peptides will be mapped" label="Genome fasta file" />
+		<param name="protein_fasta_file" type="data" format="fasta" help="The database used for ms/ms searches (must have genomic coords encoded in the fasta header)" label="Protein fasta file" />
+
+		<param name="peptide_threshold" help="Peptide Probability Threshold" type="float" value="0.95" min="0" max="1" label="Peptide Probability Threshold" />
+		<param name="protein_threshold" help="Protein Probability Threshold" type="float" value="0.99" min="0" max="1" label="Protein Probability Threshold" />
+
+		<param name="stack_charges" type="boolean" label="Stack Charges" help="Different peptide charge states get separate gff entries" truevalue="--stack-charge-states" falsevalue=""/>
+
+		<param name="collapse_redundant_proteins" type="boolean" label="Collapse Redundant Proteins" help="Proteins that cover genomic regions already covered will be skipped" truevalue="--collapse-redundant-proteins" falsevalue=""/>
+
+	</inputs>
+
+	<outputs>
+		<data format="gff3" name="output" />
+	</outputs>
+
+
+  <help>
+
+**What it does**
+
+Generates a gff file containing genomic coordinates for peptides present in a protXML file.
+
+In order for this tool to work the inputs must satisfy certain requirements. 
+
+1. The genome fasta should encode the scaffold numbers as in the following example
+
+>scaffoldXXX
+
+or 
+
+>scaffold_XXX
+
+where XXX represent digits encoding the scaffold number. Any number of digits are allowed
+
+2. The protXML should have been generated by searching a database generated using the protk Generate 6 frame translation tool and the extract proteins from gff3 tool.  Both those tools should be run with the genomics coordinates included in the output file.
+
+
+
+----
+
+**References**
+
+
+  </help>
+
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/repository_dependencies.xml	Sat Jun 14 18:18:41 2014 -0400
@@ -0,0 +1,4 @@
+<?xml version="1.0"?>
+<repositories description="Proteomics datatypes">
+    <repository changeset_revision="f66f8ca7b7b9" name="proteomics_datatypes" owner="iracooke" toolshed="http://toolshed.g2.bx.psu.edu" />
+ </repositories>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_dependencies.xml	Sat Jun 14 18:18:41 2014 -0400
@@ -0,0 +1,8 @@
+<?xml version="1.0"?>
+<tool_dependency>
+
+    <package name="blast+" version="2.2.29">
+        <repository changeset_revision="a2ec897aac2c" name="package_blast_plus_2_2_29" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+    </package>
+
+</tool_dependency>