0
|
1 <tool id="protxml_to_gff" name="ProtXML to GFF" version="1.0.1">
|
|
2 <requirements>
|
|
3 <requirement type="package" version="1.3">protk</requirement>
|
|
4 <requirement type="package" version="2.2.29">blast+</requirement>
|
|
5 </requirements>
|
|
6
|
|
7 <description>Map peptides from a protXML file to genomic coordinates</description>
|
|
8
|
|
9 <command>
|
|
10 protxml_to_gff.rb -p $protxml_file
|
|
11
|
|
12 -g $genome_fasta_file
|
|
13
|
|
14 -d $protein_fasta_file
|
|
15
|
|
16 -o $output
|
|
17
|
|
18 --threshold $peptide_threshold
|
|
19
|
|
20 --prot-threshold $protein_threshold
|
|
21
|
|
22 $stack_charges
|
|
23
|
|
24 </command>
|
|
25
|
|
26
|
|
27
|
|
28
|
|
29 <stdio>
|
|
30 <exit_code range="1:" level="fatal" description="Failure" />
|
|
31 </stdio>
|
|
32
|
|
33 <inputs>
|
|
34 <param name="protxml_file" type="data" format="protxml" help="ProtXML containing combined results from all searches" label="ProtXML File" />
|
|
35 <param name="genome_fasta_file" type="data" format="fasta" help="The genome against which peptides will be mapped" label="Genome fasta file" />
|
|
36 <param name="protein_fasta_file" type="data" format="fasta" help="The database used for ms/ms searches (must have genomic coords encoded in the fasta header)" label="Protein fasta file" />
|
|
37
|
|
38 <param name="peptide_threshold" help="Peptide Probability Threshold" type="float" value="0.95" min="0" max="1" label="Peptide Probability Threshold" />
|
|
39 <param name="protein_threshold" help="Protein Probability Threshold" type="float" value="0.99" min="0" max="1" label="Protein Probability Threshold" />
|
|
40
|
|
41 <param name="stack_charges" type="boolean" label="Stack Charges" help="Different peptide charge states get separate gff entries" truevalue="--stack-charge-states" falsevalue=""/>
|
|
42
|
|
43 <param name="collapse_redundant_proteins" type="boolean" label="Collapse Redundant Proteins" help="Proteins that cover genomic regions already covered will be skipped" truevalue="--collapse-redundant-proteins" falsevalue=""/>
|
|
44
|
|
45 </inputs>
|
|
46
|
|
47 <outputs>
|
|
48 <data format="gff3" name="output" />
|
|
49 </outputs>
|
|
50
|
|
51
|
|
52 <help>
|
|
53
|
|
54 **What it does**
|
|
55
|
|
56 Generates a gff file containing genomic coordinates for peptides present in a protXML file.
|
|
57
|
|
58 In order for this tool to work the inputs must satisfy certain requirements.
|
|
59
|
|
60 1. The genome fasta should encode the scaffold numbers as in the following example
|
|
61
|
|
62 >scaffoldXXX
|
|
63
|
|
64 or
|
|
65
|
|
66 >scaffold_XXX
|
|
67
|
|
68 where XXX represent digits encoding the scaffold number. Any number of digits are allowed
|
|
69
|
|
70 2. The protXML should have been generated by searching a database generated using the protk Generate 6 frame translation tool and the extract proteins from gff3 tool. Both those tools should be run with the genomics coordinates included in the output file.
|
|
71
|
|
72
|
|
73
|
|
74 ----
|
|
75
|
|
76 **References**
|
|
77
|
|
78
|
|
79 </help>
|
|
80
|
|
81 </tool>
|