Mercurial > repos > iracooke > protxml_to_gff
changeset 0:04dc24d06ddb draft default tip
Uploaded
author | iracooke |
---|---|
date | Sat, 14 Jun 2014 18:18:41 -0400 |
parents | |
children | |
files | README README.md protxml_to_gff.xml repository_dependencies.xml tool_dependencies.xml |
diffstat | 5 files changed, 100 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README Sat Jun 14 18:18:41 2014 -0400 @@ -0,0 +1,6 @@ +This tool takes a protxml file and a reference genome and produces a gff file with genomic coordinates of all peptides in the protxml file. The mapping process relies on the presence of genomic coordinates in the protxml file, encoded in the protein names. To generate such a file you should run all searches against a database that was generated with the companion tool "proteindb_from_gff3". + +Requirements: +This package uses protk which must be installed separately. + +For instructions please see: https://github.com/iracooke/protk/#galaxy-integration
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.md Sat Jun 14 18:18:41 2014 -0400 @@ -0,0 +1,1 @@ +# This is my README
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protxml_to_gff.xml Sat Jun 14 18:18:41 2014 -0400 @@ -0,0 +1,81 @@ +<tool id="protxml_to_gff" name="ProtXML to GFF" version="1.0.1"> + <requirements> + <requirement type="package" version="1.3">protk</requirement> + <requirement type="package" version="2.2.29">blast+</requirement> + </requirements> + + <description>Map peptides from a protXML file to genomic coordinates</description> + + <command> + protxml_to_gff.rb -p $protxml_file + + -g $genome_fasta_file + + -d $protein_fasta_file + + -o $output + + --threshold $peptide_threshold + + --prot-threshold $protein_threshold + + $stack_charges + + </command> + + + + + <stdio> + <exit_code range="1:" level="fatal" description="Failure" /> + </stdio> + + <inputs> + <param name="protxml_file" type="data" format="protxml" help="ProtXML containing combined results from all searches" label="ProtXML File" /> + <param name="genome_fasta_file" type="data" format="fasta" help="The genome against which peptides will be mapped" label="Genome fasta file" /> + <param name="protein_fasta_file" type="data" format="fasta" help="The database used for ms/ms searches (must have genomic coords encoded in the fasta header)" label="Protein fasta file" /> + + <param name="peptide_threshold" help="Peptide Probability Threshold" type="float" value="0.95" min="0" max="1" label="Peptide Probability Threshold" /> + <param name="protein_threshold" help="Protein Probability Threshold" type="float" value="0.99" min="0" max="1" label="Protein Probability Threshold" /> + + <param name="stack_charges" type="boolean" label="Stack Charges" help="Different peptide charge states get separate gff entries" truevalue="--stack-charge-states" falsevalue=""/> + + <param name="collapse_redundant_proteins" type="boolean" label="Collapse Redundant Proteins" help="Proteins that cover genomic regions already covered will be skipped" truevalue="--collapse-redundant-proteins" falsevalue=""/> + + </inputs> + + <outputs> + <data format="gff3" name="output" /> + </outputs> + + + <help> + +**What it does** + +Generates a gff file containing genomic coordinates for peptides present in a protXML file. + +In order for this tool to work the inputs must satisfy certain requirements. + +1. The genome fasta should encode the scaffold numbers as in the following example + +>scaffoldXXX + +or + +>scaffold_XXX + +where XXX represent digits encoding the scaffold number. Any number of digits are allowed + +2. The protXML should have been generated by searching a database generated using the protk Generate 6 frame translation tool and the extract proteins from gff3 tool. Both those tools should be run with the genomics coordinates included in the output file. + + + +---- + +**References** + + + </help> + +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/repository_dependencies.xml Sat Jun 14 18:18:41 2014 -0400 @@ -0,0 +1,4 @@ +<?xml version="1.0"?> +<repositories description="Proteomics datatypes"> + <repository changeset_revision="f66f8ca7b7b9" name="proteomics_datatypes" owner="iracooke" toolshed="http://toolshed.g2.bx.psu.edu" /> + </repositories>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_dependencies.xml Sat Jun 14 18:18:41 2014 -0400 @@ -0,0 +1,8 @@ +<?xml version="1.0"?> +<tool_dependency> + + <package name="blast+" version="2.2.29"> + <repository changeset_revision="a2ec897aac2c" name="package_blast_plus_2_2_29" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" /> + </package> + +</tool_dependency>