Mercurial > repos > iracooke > protxml_to_gff
diff protxml_to_gff.xml @ 0:04dc24d06ddb draft default tip
Uploaded
author | iracooke |
---|---|
date | Sat, 14 Jun 2014 18:18:41 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/protxml_to_gff.xml Sat Jun 14 18:18:41 2014 -0400 @@ -0,0 +1,81 @@ +<tool id="protxml_to_gff" name="ProtXML to GFF" version="1.0.1"> + <requirements> + <requirement type="package" version="1.3">protk</requirement> + <requirement type="package" version="2.2.29">blast+</requirement> + </requirements> + + <description>Map peptides from a protXML file to genomic coordinates</description> + + <command> + protxml_to_gff.rb -p $protxml_file + + -g $genome_fasta_file + + -d $protein_fasta_file + + -o $output + + --threshold $peptide_threshold + + --prot-threshold $protein_threshold + + $stack_charges + + </command> + + + + + <stdio> + <exit_code range="1:" level="fatal" description="Failure" /> + </stdio> + + <inputs> + <param name="protxml_file" type="data" format="protxml" help="ProtXML containing combined results from all searches" label="ProtXML File" /> + <param name="genome_fasta_file" type="data" format="fasta" help="The genome against which peptides will be mapped" label="Genome fasta file" /> + <param name="protein_fasta_file" type="data" format="fasta" help="The database used for ms/ms searches (must have genomic coords encoded in the fasta header)" label="Protein fasta file" /> + + <param name="peptide_threshold" help="Peptide Probability Threshold" type="float" value="0.95" min="0" max="1" label="Peptide Probability Threshold" /> + <param name="protein_threshold" help="Protein Probability Threshold" type="float" value="0.99" min="0" max="1" label="Protein Probability Threshold" /> + + <param name="stack_charges" type="boolean" label="Stack Charges" help="Different peptide charge states get separate gff entries" truevalue="--stack-charge-states" falsevalue=""/> + + <param name="collapse_redundant_proteins" type="boolean" label="Collapse Redundant Proteins" help="Proteins that cover genomic regions already covered will be skipped" truevalue="--collapse-redundant-proteins" falsevalue=""/> + + </inputs> + + <outputs> + <data format="gff3" name="output" /> + </outputs> + + + <help> + +**What it does** + +Generates a gff file containing genomic coordinates for peptides present in a protXML file. + +In order for this tool to work the inputs must satisfy certain requirements. + +1. The genome fasta should encode the scaffold numbers as in the following example + +>scaffoldXXX + +or + +>scaffold_XXX + +where XXX represent digits encoding the scaffold number. Any number of digits are allowed + +2. The protXML should have been generated by searching a database generated using the protk Generate 6 frame translation tool and the extract proteins from gff3 tool. Both those tools should be run with the genomics coordinates included in the output file. + + + +---- + +**References** + + + </help> + +</tool>