comparison protxml_to_gff.xml @ 0:04dc24d06ddb draft default tip

Uploaded
author iracooke
date Sat, 14 Jun 2014 18:18:41 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:04dc24d06ddb
1 <tool id="protxml_to_gff" name="ProtXML to GFF" version="1.0.1">
2 <requirements>
3 <requirement type="package" version="1.3">protk</requirement>
4 <requirement type="package" version="2.2.29">blast+</requirement>
5 </requirements>
6
7 <description>Map peptides from a protXML file to genomic coordinates</description>
8
9 <command>
10 protxml_to_gff.rb -p $protxml_file
11
12 -g $genome_fasta_file
13
14 -d $protein_fasta_file
15
16 -o $output
17
18 --threshold $peptide_threshold
19
20 --prot-threshold $protein_threshold
21
22 $stack_charges
23
24 </command>
25
26
27
28
29 <stdio>
30 <exit_code range="1:" level="fatal" description="Failure" />
31 </stdio>
32
33 <inputs>
34 <param name="protxml_file" type="data" format="protxml" help="ProtXML containing combined results from all searches" label="ProtXML File" />
35 <param name="genome_fasta_file" type="data" format="fasta" help="The genome against which peptides will be mapped" label="Genome fasta file" />
36 <param name="protein_fasta_file" type="data" format="fasta" help="The database used for ms/ms searches (must have genomic coords encoded in the fasta header)" label="Protein fasta file" />
37
38 <param name="peptide_threshold" help="Peptide Probability Threshold" type="float" value="0.95" min="0" max="1" label="Peptide Probability Threshold" />
39 <param name="protein_threshold" help="Protein Probability Threshold" type="float" value="0.99" min="0" max="1" label="Protein Probability Threshold" />
40
41 <param name="stack_charges" type="boolean" label="Stack Charges" help="Different peptide charge states get separate gff entries" truevalue="--stack-charge-states" falsevalue=""/>
42
43 <param name="collapse_redundant_proteins" type="boolean" label="Collapse Redundant Proteins" help="Proteins that cover genomic regions already covered will be skipped" truevalue="--collapse-redundant-proteins" falsevalue=""/>
44
45 </inputs>
46
47 <outputs>
48 <data format="gff3" name="output" />
49 </outputs>
50
51
52 <help>
53
54 **What it does**
55
56 Generates a gff file containing genomic coordinates for peptides present in a protXML file.
57
58 In order for this tool to work the inputs must satisfy certain requirements.
59
60 1. The genome fasta should encode the scaffold numbers as in the following example
61
62 >scaffoldXXX
63
64 or
65
66 >scaffold_XXX
67
68 where XXX represent digits encoding the scaffold number. Any number of digits are allowed
69
70 2. The protXML should have been generated by searching a database generated using the protk Generate 6 frame translation tool and the extract proteins from gff3 tool. Both those tools should be run with the genomics coordinates included in the output file.
71
72
73
74 ----
75
76 **References**
77
78
79 </help>
80
81 </tool>