Mercurial > repos > galaxyp > validate_fasta_database
comparison validate_fasta_database.xml @ 0:48c2271171f2 draft
planemo upload commit dd9bf5d878d54362e928a834956feedf082960f3
author | galaxyp |
---|---|
date | Thu, 14 Sep 2017 16:15:15 -0400 |
parents | |
children | 9c246c2e24ad |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:48c2271171f2 |
---|---|
1 <tool id="validate_fasta_database" name="Validate FASTA Database" version="0.1.4"> | |
2 <requirements> | |
3 <requirement type="package" version="1.0">validate-fasta-database</requirement> | |
4 </requirements> | |
5 <stdio> | |
6 <exit_code range="1" level="fatal" description="Invalid FASTA headers detected, was asked to fail"/> | |
7 </stdio> | |
8 <command detect_errors="exit_code"><![CDATA[ | |
9 validate-fasta-database | |
10 '$inFasta' | |
11 '$goodFastaOut' | |
12 '$badFastaOut' | |
13 '$crashIfInvalid' | |
14 '$checkIsProtein' | |
15 '$customLetters' | |
16 '$checkHasAccession' | |
17 '$minimumLength' | |
18 ]]></command> | |
19 <inputs> | |
20 <param type="data" name="inFasta" format="fasta" label="Select input FASTA dataset"/> | |
21 <param type="boolean" name="crashIfInvalid" | |
22 label="Fail job if invalid FASTA headers detected?" | |
23 value="false"/> | |
24 <param type="boolean" name="checkIsProtein" | |
25 label="Ensure that sequence is a valid amino acid sequence?" | |
26 help="Checks that sequence only contains the 20 essential amino | |
27 acids (and optional non-standard AAs), and checks that is not DNA or RNA" | |
28 value="true"/> | |
29 <param type="text" name="customLetters" value="" | |
30 label="Optional: add one letter codes for any non-standard amino acids that you are using. " | |
31 help="Anything that is not an upper case letter [A-Z] will be ignored."/> | |
32 <param type="boolean" name="checkHasAccession" | |
33 label="Only pass sequences with accession numbers?" | |
34 value="false"/> | |
35 <param type="integer" name="minimumLength" | |
36 label="Minimum length for sequences to pass" | |
37 value="0"/> | |
38 </inputs> | |
39 <outputs> | |
40 <data name="goodFastaOut" format="fasta" label="Validate FASTA ${on_string}: passed"/> | |
41 <data name="badFastaOut" format="fasta" label="Validate FASTA ${on_string}: failed"/> | |
42 </outputs> | |
43 <tests> | |
44 <!-- test general filtering --> | |
45 <test> | |
46 <param name="inFasta" value="fastaFilteringTest_IN.fasta"/> | |
47 <output name="goodFastaOut" file="fastaFilteringTest_OUT1.fasta" /> | |
48 <output name="badFastaOut" file="fastaFilteringTest_OUT2.fasta" /> | |
49 </test> | |
50 | |
51 <!--test filtering out genetic sequences and bad protein sequences--> | |
52 <test> | |
53 <param name="inFasta" value="geneticFiltering.in"/> | |
54 <param name="checkIsProtein" value="true"/> | |
55 <output name="goodFastaOut" file="geneticFilteringGood.out"/> | |
56 <output name="badFastaOut" file="geneticFilteringBad.out"/> | |
57 </test> | |
58 | |
59 <test> | |
60 <param name="inFasta" value="length5Filtering.in"/> | |
61 <param name="minimumLength" value="5"/> | |
62 <output name="goodFastaOut" file="length5FilteringGood.out"/> | |
63 <output name="badFastaOut" file="length5FilteringBad.out"/> | |
64 </test> | |
65 </tests> | |
66 <help> | |
67 | |
68 <![CDATA[ | |
69 **Notes** | |
70 | |
71 Takes a FASTA database and validates the headers using the Compomics (developers of SearchGUI and PeptideShaker) schema. | |
72 Custom FASTA databases may be in an invalid format, which causes SearchGUI to crash. | |
73 | |
74 **Output** | |
75 | |
76 The main output of this tool, "Validate FASTA: Passed Sequences", is a FASTA database that can be run through SearchGUI without error. | |
77 The failed sequences may be examined for typos and other errors. | |
78 | |
79 In addition, the tool will print the databases assigned by the Compomics utility (i.e., UniProt), for a quick check of the validity of the custom FASTA database. | |
80 | |
81 Sequences that may cause the tool to report an exception are those that are not valid examples of the following formats: | |
82 * UniProt, | |
83 * SwissProt (starts with ">sw|" or ">SW|") | |
84 * NCBI (starts with ">gi|" or ">GI|") | |
85 * Halobacterium from Max Planck (starts with "OE") | |
86 * H Influenza, from Novartis (starts with ">hflu_") | |
87 * C Trachomatis (starts with ">C.tr\_" or "C\_trachomatis\_") | |
88 * M Tuberculosis (starts with ">M. tub") | |
89 * Saccharomyces Genome Database (contains "SGDID") | |
90 * Genome translation (ex. ">dm345\_3L-sense [2343534-234353938]") | |
91 * Genome Annotation Framework for Flexible Analysis (GAFFA) (starts with ">GAFFA") | |
92 * UPS (contains "\_HUMAN\_UPS") | |
93 | |
94 Many sequences are reported as Generic, which may or may not allow for extraction of the accession number. | |
95 ]]> | |
96 </help> | |
97 <citations> | |
98 <citation type="bibtex"> | |
99 @misc{fastaValidationTool, | |
100 author = {The GalaxyP Team}, | |
101 date = {22 June 2017}, | |
102 title = {FASTA Database Validation Tool} | |
103 } | |
104 </citation> | |
105 </citations> | |
106 </tool> |