Mercurial > repos > davide-albanese > qiime_1_6_0
diff check_id_map.xml @ 0:0d8e091eb3e1 draft
Uploaded
author | davide-albanese |
---|---|
date | Fri, 08 Mar 2013 10:35:27 -0500 |
parents | |
children | 2280c43de0e2 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/check_id_map.xml Fri Mar 08 10:35:27 2013 -0500 @@ -0,0 +1,119 @@ +<?xml version="1.0" ?> +<tool id="check_id_map" name="Check ID Map" version="1.6.0"> + <description> + Checks user's metadata mapping file for required data, valid + format + </description> + <requirements> + <requirement type="package">qiime</requirement> + </requirements> + <command>check_id_map.py + + -m $mapping_fp + + #if str($char_replace): + -c $char_replace + #end if + + #if $not_barcoded: + -b + #end if + + #if $variable_len_barcodes: + -B + #end if + + #if $disable_primer_check: + -p + #end if + + #if str($added_demultiplex_field): + -j $added_demultiplex_field + #end if + ; + rm `basename $mapping_fp .txt`'.html' + ; + rm overlib.js + ; + mv `basename $mapping_fp .txt`'.log' $out_log + ; + mv `basename $mapping_fp .txt`'_corrected.txt' $out_txt + </command> + <inputs> + <param name="mapping_fp" label="Metadata mapping file" optional="False" type="data" format="tabular"/> + + <param name="char_replace" value="_" label="Character used to replace invalid characters found in the mapping file. Must be a valid character (alphanumeric, period, or underscore)" optional="False" type="text"/> + + <param name="not_barcoded" label="Set if barcodes are not present. BarcodeSequence header still required" selected="False" type="boolean"/> + + <param name="variable_len_barcodes" label="Set if variable length barcodes are present to suppress warnings about barcodes of unequal length" selected="False" type="boolean"/> + + <param name="disable_primer_check" label="Set to disable checks for primers. LinkerPrimerSequence header still required" selected="False" type="boolean"/> + + <param name="added_demultiplex_field" label="Use to add a field to use in the mapping file as additional demultiplexing (can be used with or without barcodes). All combinations of barcodes/primers and the these fields must be unique. The fields must contain values that can be parsed from the fasta labels such as 'plate=R_2008_12_09'. In this case, 'plate' would be the column header and 'R_2008_12_09' would be the field data (minus quotes) in the mapping file. To use the run prefix from the fasta label, such as '>FLP3FBN01ELBSX', where 'FLP3FBN01' is generated from the run ID, use '-j run_prefix' and set the run prefix to be used as the data under the column header 'run_prefix'" optional="False" type="text" value=""/> + </inputs> + <outputs> + <data format="txt" name="out_log" label="${tool.name} on ${mapping_fp.name}"/> + <data format="tabular" name="out_txt" label="Corrected ${mapping_fp.name}"/> + </outputs> +<help> + +Specifically, we check that: + +1. The BarcodeSequence, LinkerPrimerSequences, and ReversePrimer fields + have valid IUPAC DNA characters, and BarcodeSequence characters + are non-degenerate (error) + +2. The SampleID, BarcodeSequence, LinkerPrimerSequence, and Description + headers are present (error) + +3. There are not duplicate header fields (error) + +4. There are not duplicate barcodes (error) + +5. Barcodes are of the same length. Suppressed when + variable_len_barcode flag is passed (warning) + +6. The headers do not contain invalid characters (alphanumeric and + underscore only) (warning) + +7. The data fields do not contain invalid characters (alphanumeric, + underscore, space, and +-%./:,; characters) (warning) + +8. SampleID fields are MIENS compliant (only alphanumeric + and . characters). (warning) + +9. There are no duplicates when the primer and variable length + barcodes are appended (error) + +10. There are no duplicates when barcodes and added demultiplex + fields (-j option) are combined (error) + +11. Data fields are not found beyond the Description column (warning) + +Details about the metadata mapping file format can be found here: +http://www.qiime.org/documentation/file_formats.html#metadata-mapping-files + +Errors and warnings are saved to a log file. Errors can be caused +by problems with the headers, invalid characters in barcodes or +primers, or by duplications in SampleIDs or barcodes. + +Warnings can arise from invalid characters and variable length +barcodes that are not specified with the --variable_len_barcode. +Warnings will contain a reference to the cell (row,column) that +the warning arose from. + +In addition to the log file, a 'corrected_mapping' file will be +created. Any invalid characters will be replaced with '.' +characters in the SampleID fields (to enforce MIENS compliance) +and text in other data fields will be replaced with the character +specified by the -c parameter, which is an underscore '_' by +default. + +If pooled primers are used, separate with a comma. For instance, +a pooled set of three 27f primers (used to increase taxonomic +coverage) could be specified in the LinkerPrimerSequence fields as +such: +AGGGTTCGATTCTGGCTCAG,AGAGTTTGATCCTGGCTTAG,AGAATTTGATCTTGGTTCAG +</help> +</tool>