annotate validate_fasta_database.xml @ 1:3b18022a7613 draft

"planemo upload commit 6b8665071f8d1bc9a26491e8f9a85b708169b500"
author galaxyp
date Thu, 26 Nov 2020 20:25:21 +0000
parents 48c2271171f2
children 9c246c2e24ad
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
1 <tool id="validate_fasta_database" name="Validate FASTA Database" version="0.1.4">
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
2 <requirements>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
3 <requirement type="package" version="1.0">validate-fasta-database</requirement>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
4 </requirements>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
5 <stdio>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
6 <exit_code range="1" level="fatal" description="Invalid FASTA headers detected, was asked to fail"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
7 </stdio>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
8 <command detect_errors="exit_code"><![CDATA[
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
9 validate-fasta-database
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
10 '$inFasta'
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
11 '$goodFastaOut'
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
12 '$badFastaOut'
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
13 '$crashIfInvalid'
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
14 '$checkIsProtein'
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
15 '$customLetters'
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
16 '$checkHasAccession'
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
17 '$minimumLength'
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
18 ]]></command>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
19 <inputs>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
20 <param type="data" name="inFasta" format="fasta" label="Select input FASTA dataset"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
21 <param type="boolean" name="crashIfInvalid"
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
22 label="Fail job if invalid FASTA headers detected?"
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
23 value="false"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
24 <param type="boolean" name="checkIsProtein"
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
25 label="Ensure that sequence is a valid amino acid sequence?"
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
26 help="Checks that sequence only contains the 20 essential amino
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
27 acids (and optional non-standard AAs), and checks that is not DNA or RNA"
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
28 value="true"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
29 <param type="text" name="customLetters" value=""
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
30 label="Optional: add one letter codes for any non-standard amino acids that you are using. "
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
31 help="Anything that is not an upper case letter [A-Z] will be ignored."/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
32 <param type="boolean" name="checkHasAccession"
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
33 label="Only pass sequences with accession numbers?"
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
34 value="false"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
35 <param type="integer" name="minimumLength"
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
36 label="Minimum length for sequences to pass"
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
37 value="0"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
38 </inputs>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
39 <outputs>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
40 <data name="goodFastaOut" format="fasta" label="Validate FASTA ${on_string}: passed"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
41 <data name="badFastaOut" format="fasta" label="Validate FASTA ${on_string}: failed"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
42 </outputs>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
43 <tests>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
44 <!-- test general filtering -->
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
45 <test>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
46 <param name="inFasta" value="fastaFilteringTest_IN.fasta"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
47 <output name="goodFastaOut" file="fastaFilteringTest_OUT1.fasta" />
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
48 <output name="badFastaOut" file="fastaFilteringTest_OUT2.fasta" />
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
49 </test>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
50
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
51 <!--test filtering out genetic sequences and bad protein sequences-->
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
52 <test>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
53 <param name="inFasta" value="geneticFiltering.in"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
54 <param name="checkIsProtein" value="true"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
55 <output name="goodFastaOut" file="geneticFilteringGood.out"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
56 <output name="badFastaOut" file="geneticFilteringBad.out"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
57 </test>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
58
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
59 <test>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
60 <param name="inFasta" value="length5Filtering.in"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
61 <param name="minimumLength" value="5"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
62 <output name="goodFastaOut" file="length5FilteringGood.out"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
63 <output name="badFastaOut" file="length5FilteringBad.out"/>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
64 </test>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
65 </tests>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
66 <help>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
67
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
68 <![CDATA[
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
69 **Notes**
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
70
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
71 Takes a FASTA database and validates the headers using the Compomics (developers of SearchGUI and PeptideShaker) schema.
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
72 Custom FASTA databases may be in an invalid format, which causes SearchGUI to crash.
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
73
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
74 **Output**
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
75
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
76 The main output of this tool, "Validate FASTA: Passed Sequences", is a FASTA database that can be run through SearchGUI without error.
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
77 The failed sequences may be examined for typos and other errors.
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
78
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
79 In addition, the tool will print the databases assigned by the Compomics utility (i.e., UniProt), for a quick check of the validity of the custom FASTA database.
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
80
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
81 Sequences that may cause the tool to report an exception are those that are not valid examples of the following formats:
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
82 * UniProt,
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
83 * SwissProt (starts with ">sw|" or ">SW|")
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
84 * NCBI (starts with ">gi|" or ">GI|")
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
85 * Halobacterium from Max Planck (starts with "OE")
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
86 * H Influenza, from Novartis (starts with ">hflu_")
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
87 * C Trachomatis (starts with ">C.tr\_" or "C\_trachomatis\_")
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
88 * M Tuberculosis (starts with ">M. tub")
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
89 * Saccharomyces Genome Database (contains "SGDID")
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
90 * Genome translation (ex. ">dm345\_3L-sense [2343534-234353938]")
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
91 * Genome Annotation Framework for Flexible Analysis (GAFFA) (starts with ">GAFFA")
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
92 * UPS (contains "\_HUMAN\_UPS")
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
93
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
94 Many sequences are reported as Generic, which may or may not allow for extraction of the accession number.
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
95 ]]>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
96 </help>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
97 <citations>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
98 <citation type="bibtex">
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
99 @misc{fastaValidationTool,
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
100 author = {The GalaxyP Team},
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
101 date = {22 June 2017},
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
102 title = {FASTA Database Validation Tool}
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
103 }
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
104 </citation>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
105 </citations>
48c2271171f2 planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
galaxyp
parents:
diff changeset
106 </tool>