diff check_id_map.xml @ 0:0d8e091eb3e1 draft

Uploaded
author davide-albanese
date Fri, 08 Mar 2013 10:35:27 -0500
parents
children 2280c43de0e2
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/check_id_map.xml	Fri Mar 08 10:35:27 2013 -0500
@@ -0,0 +1,119 @@
+<?xml version="1.0" ?>
+<tool id="check_id_map" name="Check ID Map" version="1.6.0">
+  <description>
+    Checks user's metadata mapping file for required data, valid
+    format
+  </description>
+  <requirements>
+    <requirement type="package">qiime</requirement>
+  </requirements>
+  <command>check_id_map.py
+  
+  -m $mapping_fp
+
+  #if str($char_replace):
+  -c $char_replace
+  #end if
+  
+  #if $not_barcoded:
+  -b
+  #end if
+  
+  #if $variable_len_barcodes:
+  -B
+  #end if
+
+  #if $disable_primer_check:
+  -p
+  #end if
+
+  #if str($added_demultiplex_field):
+  -j $added_demultiplex_field
+  #end if
+  ;
+  rm `basename $mapping_fp .txt`'.html'
+  ;
+  rm overlib.js
+  ;
+  mv `basename $mapping_fp .txt`'.log' $out_log
+  ;
+  mv `basename $mapping_fp .txt`'_corrected.txt' $out_txt
+  </command>
+  <inputs>
+    <param name="mapping_fp" label="Metadata mapping file" optional="False" type="data" format="tabular"/>
+    
+    <param name="char_replace" value="_" label="Character used to replace invalid characters found in the mapping file. Must be a valid character (alphanumeric, period, or underscore)" optional="False" type="text"/>
+    
+    <param name="not_barcoded" label="Set if barcodes are not present. BarcodeSequence header still required" selected="False" type="boolean"/>
+    
+    <param name="variable_len_barcodes" label="Set if variable length barcodes are present to suppress warnings about barcodes of unequal length"  selected="False" type="boolean"/>
+    
+    <param name="disable_primer_check" label="Set to disable checks for primers. LinkerPrimerSequence header still required" selected="False" type="boolean"/>
+    
+    <param name="added_demultiplex_field" label="Use to add a field to use in the mapping file as additional demultiplexing (can be used with or without barcodes).  All combinations of barcodes/primers and the these fields must be unique. The fields must contain values that can be parsed from the fasta labels such as 'plate=R_2008_12_09'.  In this case, 'plate' would be the column header and 'R_2008_12_09' would be the field data (minus quotes) in the mapping file.  To use the run prefix from the fasta label, such as '>FLP3FBN01ELBSX', where 'FLP3FBN01' is generated from the run ID, use '-j run_prefix' and set the run prefix to be used as the data under the column header 'run_prefix'" optional="False" type="text" value=""/>
+  </inputs>
+  <outputs>
+    <data format="txt" name="out_log" label="${tool.name} on ${mapping_fp.name}"/>
+    <data format="tabular" name="out_txt" label="Corrected ${mapping_fp.name}"/>
+  </outputs>
+<help>
+
+Specifically, we check that:
+
+1. The BarcodeSequence, LinkerPrimerSequences, and ReversePrimer fields 
+   have valid IUPAC DNA characters, and BarcodeSequence characters
+   are non-degenerate (error)
+
+2. The SampleID, BarcodeSequence, LinkerPrimerSequence, and Description
+   headers are present (error)
+
+3. There are not duplicate header fields (error)
+
+4. There are not duplicate barcodes (error)
+
+5. Barcodes are of the same length.  Suppressed when
+   variable_len_barcode flag is passed (warning)
+
+6. The headers do not contain invalid characters (alphanumeric and 
+   underscore only) (warning)
+
+7. The data fields do not contain invalid characters (alphanumeric, 
+   underscore, space, and +-%./:,; characters) (warning)
+
+8. SampleID fields are MIENS compliant (only alphanumeric
+   and . characters). (warning)
+
+9. There are no duplicates when the primer and variable length 
+   barcodes are appended (error)
+
+10. There are no duplicates when barcodes and added demultiplex 
+    fields (-j option) are combined (error)
+
+11. Data fields are not found beyond the Description column (warning)
+
+Details about the metadata mapping file format can be found here:
+http://www.qiime.org/documentation/file_formats.html#metadata-mapping-files
+
+Errors and warnings are saved to a log file.  Errors can be caused
+by problems with the headers, invalid characters in barcodes or
+primers, or by duplications in SampleIDs or barcodes.
+
+Warnings can arise from invalid characters and variable length
+barcodes that are not specified with the --variable_len_barcode.
+Warnings will contain a reference to the cell (row,column) that
+the warning arose from.
+
+In addition to the log file, a 'corrected_mapping' file will be
+created.  Any invalid characters will be replaced with '.'
+characters in the SampleID fields (to enforce MIENS compliance)
+and text in other data fields will be replaced with the character
+specified by the -c parameter, which is an underscore '_' by
+default.
+
+If pooled primers are used, separate with a comma.  For instance,
+a pooled set of three 27f primers (used to increase taxonomic
+coverage) could be specified in the LinkerPrimerSequence fields as
+such:
+AGGGTTCGATTCTGGCTCAG,AGAGTTTGATCCTGGCTTAG,AGAATTTGATCTTGGTTCAG
+</help>
+</tool>