0
|
1 <?xml version="1.0" ?>
|
|
2 <tool id="check_id_map" name="Check ID Map" version="1.6.0">
|
|
3 <description>
|
|
4 Checks user's metadata mapping file for required data, valid
|
|
5 format
|
|
6 </description>
|
|
7 <requirements>
|
|
8 <requirement type="package">qiime</requirement>
|
|
9 </requirements>
|
|
10 <command>check_id_map.py
|
|
11
|
|
12 -m $mapping_fp
|
|
13
|
|
14 #if str($char_replace):
|
|
15 -c $char_replace
|
|
16 #end if
|
|
17
|
|
18 #if $not_barcoded:
|
|
19 -b
|
|
20 #end if
|
|
21
|
|
22 #if $variable_len_barcodes:
|
|
23 -B
|
|
24 #end if
|
|
25
|
|
26 #if $disable_primer_check:
|
|
27 -p
|
|
28 #end if
|
|
29
|
|
30 #if str($added_demultiplex_field):
|
|
31 -j $added_demultiplex_field
|
|
32 #end if
|
|
33 ;
|
|
34 rm `basename $mapping_fp .txt`'.html'
|
|
35 ;
|
|
36 rm overlib.js
|
|
37 ;
|
|
38 mv `basename $mapping_fp .txt`'.log' $out_log
|
|
39 ;
|
|
40 mv `basename $mapping_fp .txt`'_corrected.txt' $out_txt
|
|
41 </command>
|
|
42 <inputs>
|
|
43 <param name="mapping_fp" label="Metadata mapping file" optional="False" type="data" format="tabular"/>
|
|
44
|
|
45 <param name="char_replace" value="_" label="Character used to replace invalid characters found in the mapping file. Must be a valid character (alphanumeric, period, or underscore)" optional="False" type="text"/>
|
|
46
|
|
47 <param name="not_barcoded" label="Set if barcodes are not present. BarcodeSequence header still required" selected="False" type="boolean"/>
|
|
48
|
|
49 <param name="variable_len_barcodes" label="Set if variable length barcodes are present to suppress warnings about barcodes of unequal length" selected="False" type="boolean"/>
|
|
50
|
|
51 <param name="disable_primer_check" label="Set to disable checks for primers. LinkerPrimerSequence header still required" selected="False" type="boolean"/>
|
|
52
|
|
53 <param name="added_demultiplex_field" label="Use to add a field to use in the mapping file as additional demultiplexing (can be used with or without barcodes). All combinations of barcodes/primers and the these fields must be unique. The fields must contain values that can be parsed from the fasta labels such as 'plate=R_2008_12_09'. In this case, 'plate' would be the column header and 'R_2008_12_09' would be the field data (minus quotes) in the mapping file. To use the run prefix from the fasta label, such as '>FLP3FBN01ELBSX', where 'FLP3FBN01' is generated from the run ID, use '-j run_prefix' and set the run prefix to be used as the data under the column header 'run_prefix'" optional="False" type="text" value=""/>
|
|
54 </inputs>
|
|
55 <outputs>
|
|
56 <data format="txt" name="out_log" label="${tool.name} on ${mapping_fp.name}"/>
|
|
57 <data format="tabular" name="out_txt" label="Corrected ${mapping_fp.name}"/>
|
|
58 </outputs>
|
|
59 <help>
|
|
60
|
|
61 Specifically, we check that:
|
|
62
|
|
63 1. The BarcodeSequence, LinkerPrimerSequences, and ReversePrimer fields
|
|
64 have valid IUPAC DNA characters, and BarcodeSequence characters
|
|
65 are non-degenerate (error)
|
|
66
|
|
67 2. The SampleID, BarcodeSequence, LinkerPrimerSequence, and Description
|
|
68 headers are present (error)
|
|
69
|
|
70 3. There are not duplicate header fields (error)
|
|
71
|
|
72 4. There are not duplicate barcodes (error)
|
|
73
|
|
74 5. Barcodes are of the same length. Suppressed when
|
|
75 variable_len_barcode flag is passed (warning)
|
|
76
|
|
77 6. The headers do not contain invalid characters (alphanumeric and
|
|
78 underscore only) (warning)
|
|
79
|
|
80 7. The data fields do not contain invalid characters (alphanumeric,
|
|
81 underscore, space, and +-%./:,; characters) (warning)
|
|
82
|
|
83 8. SampleID fields are MIENS compliant (only alphanumeric
|
|
84 and . characters). (warning)
|
|
85
|
|
86 9. There are no duplicates when the primer and variable length
|
|
87 barcodes are appended (error)
|
|
88
|
|
89 10. There are no duplicates when barcodes and added demultiplex
|
|
90 fields (-j option) are combined (error)
|
|
91
|
|
92 11. Data fields are not found beyond the Description column (warning)
|
|
93
|
|
94 Details about the metadata mapping file format can be found here:
|
|
95 http://www.qiime.org/documentation/file_formats.html#metadata-mapping-files
|
|
96
|
|
97 Errors and warnings are saved to a log file. Errors can be caused
|
|
98 by problems with the headers, invalid characters in barcodes or
|
|
99 primers, or by duplications in SampleIDs or barcodes.
|
|
100
|
|
101 Warnings can arise from invalid characters and variable length
|
|
102 barcodes that are not specified with the --variable_len_barcode.
|
|
103 Warnings will contain a reference to the cell (row,column) that
|
|
104 the warning arose from.
|
|
105
|
|
106 In addition to the log file, a 'corrected_mapping' file will be
|
|
107 created. Any invalid characters will be replaced with '.'
|
|
108 characters in the SampleID fields (to enforce MIENS compliance)
|
|
109 and text in other data fields will be replaced with the character
|
|
110 specified by the -c parameter, which is an underscore '_' by
|
|
111 default.
|
|
112
|
|
113 If pooled primers are used, separate with a comma. For instance,
|
|
114 a pooled set of three 27f primers (used to increase taxonomic
|
|
115 coverage) could be specified in the LinkerPrimerSequence fields as
|
|
116 such:
|
|
117 AGGGTTCGATTCTGGCTCAG,AGAGTTTGATCCTGGCTTAG,AGAATTTGATCTTGGTTCAG
|
|
118 </help>
|
|
119 </tool>
|