view qiime/split_libraries.xml @ 0:003162f90751 draft

Uploaded
author azuzolo
date Wed, 06 Jun 2012 16:40:30 -0400
parents
children
line wrap: on
line source

<tool id="split_libraries" name="split_libraries" version="2.0.0">
 <description>Split libraries according to barcodes specified in mapping file</description>
 <requirements>
  <requirement type="binary">split_libraries.py</requirement>
 </requirements>
 <command interpreter="python">
  qiime_wrapper.py
  --galaxy_datasets='^seqs\.fna$:'$sequences,'histograms\.txt:'$histograms,'split_library_log\.txt:'$log
  --galaxy_outputdir='$log.extra_files_path'
  split_libraries.py
  --dir-prefix='$log.extra_files_path'
  --map=$map
  #set fnas = []
  #for i in $inputs:
    #set fnas = $fnas + [$i.fasta.__str__]
  #end for
  --fasta=#echo ','.join($fnas)
  #set quals = []
  #for i in $inputs:
    #if $i.qual != None and $i.qual.__str__ != 'None':
      #set quals = $quals + [$i.qual.__str__]
    #end if
  #end for
  #if len($quals) > 0:
    --qual=#echo ','.join($quals)
  #end if
  #if len($min_seq_length.__str__) > 0 and $min_seq_length > 0:
    --min-seq-length=$min_seq_length
  #end if
  #if len($max_seq_length.__str__) > 0:
    --max-seq-length=$max_seq_length
  #end if
  $trim_seq_length
  #if len($min_qual_score.__str__) > 0:
    --min-qual-score=$min_qual_score
  #end if
  $keep_primer
  $keep_barcode
  #if len($max_ambig.__str__) > 0:
    --max-ambig=$max_ambig
  #end if
  #if len($max_homopolymer.__str__) > 0:
  --max-homopolymer=$max_homopolymer
  #end if
  #if len($max_primer_mismatch.__str__) > 0:
  --max-primer-mismatch=$max_primer_mismatch
  #end if
  --barcode-type=$barcode_type
  #if $max_barcode_errors >= 0.:
  --max-barcode-errors=$max_barcode_errors
  #end if
  #if len($start_numbering_at.__str__) > 0:
  --start-numbering-at=$start_numbering_at
  #end if
  $retain_unassigned_reads
  $disable_bc_correction
  #if len($qual_score_window.__str__) > 0:
  --qual_score_window=$qual_score_window
  #end if
  $disable_primers
  --reverse_primers=$reverse_primers
  #if $reverse_primer_mismatches != None and $reverse_primer_mismatches.__str__ != "" and $reverse_primers.__str__!='disable':
  --reverse_primer_mismatches=$reverse_primer_mismatches
  #end if
  $record_qual_scores
  $discard_bad_windows
  #if $median_length_filtering != None and $median_length_filtering.__str__ != "":
  --median_length_filtering=$median_length_filtering
  #end if
  #if $added_demultiplex_field != None and $added_demultiplex_field.__str__ != "":
  --added_demultiplex_field=$added_demultiplex_field
  #end if
 </command>
 <inputs>
  <param name="map" type="data" format="tabular" label="map"
   help="name of mapping file. NOTE: Must contain a header line indicating SampleID in the first column and BarcodeSequence in the second, LinkerPrimerSequence in the third. [REQUIRED]"/>
  <repeat name="inputs" title="Input Sequences">
    <param name="fasta" type="data" format="fasta" label="fasta"
     help="names of fasta file [REQUIRED]"/>
    <param name="qual" type="data" format="qual"  optional="true" label="qual"
     help="names of qual file [OPTIONAL]"/>
  </repeat>
  <param name="min_seq_length" type="integer" value="200" label="min-seq-length"
   help="minimum sequence length, in nucleotides [default: 200]"/>
  <param name="max_seq_length" type="integer" value="1000" label="max-seq-length"
   help="maximum sequence length, in nucleotides [default: 1000]"/>
  <param name="trim_seq_length" type="boolean" truevalue="--trim-seq-length" falsevalue="" checked="false" label="trim-seq-length"
   help="calculate sequence lengths after trimming primers and barcodes [default: False]"/>
  <param name="min_qual_score" type="integer" value="25" label="min-qual-score"
   help="min average qual score allowed in read [default: 25]"/>
  <param name="keep_primer" type="boolean" truevalue="--keep-primer" falsevalue="" checked="false" label="keep-primer"
   help="do not remove primer from sequences"/>
  <param name="keep_barcode" type="boolean" truevalue="--keep-barcode" falsevalue="" checked="false" label="keep-barcode"
   help="do not remove barcode from sequences"/>
  <param name="max_ambig" type="integer" value="0" label="max-ambig"
   help="maximum number of ambiguous bases [default: 0]"/>
  <param name="max_homopolymer" type="integer" value="6" label="max-homopolymer"
   help="maximum length of homopolymer run [default: 6]"/>
  <param name="max_primer_mismatch" type="integer" value="0" label="max-primer-mismatch"
   help="maximum number of primer mismatches [default: 0]"/>
  <param name="barcode_type" type="text" value="golay_12" label="barcode-type"
   help="barcode type, hamming_8, golay_12, variable_length (will disable any barcode correction if variable_length set), or a number representing the length of the barcode, such as -b 4.  [default: golay_12]"/>
  <param name="max_barcode_errors" type="float" value="1.5" label="max-barcode-errors"
   help="maximum number of errors in barcode [default: 1.5]"/>
  <param name="start_numbering_at" type="integer" min="1" value="1" label="start-numbering-at"
   help="seq id to use for the first sequence [default: 1]"/>
  <param name="retain_unassigned_reads" type="boolean" truevalue="--retain_unassigned_reads" falsevalue="" checked="false" label="retain_unassigned_reads"
   help="Retain sequences which are unassigned in the output sequence file [default: False]"/>
  <param name="disable_bc_correction" type="boolean" truevalue="--disable_bc_correction" falsevalue="" checked="false" label="disable_bc_correction"
   help="Disable attempts to find nearest corrected barcode. Can improve performance. [default: False]"/>
  <param name="qual_score_window" type="integer" value="0" label="qual_score_window"
   help="Enable sliding window test of quality scores.  If the average score of a continuous set of w nucleotides falls below the threshold (see -s for default), the sequence is discarded. A good value would be 50. 0 (zero) means no filtering. Must pass a .qual file (see -q parameter) if this functionality is enabled. [default: 0]"/>
  <param name="discard_bad_windows" type="boolean" truevalue="--discard_bad_windows" falsevalue="" checked="false" label="discard_bad_windows"
   help="If the qual_score_window option (-w) is enabled, this will override the default truncation behavior and discard any sequences where a bad window is found.  [default: False]"/>
  <param name="disable_primers" type="boolean" truevalue="--disable_primers" falsevalue="" checked="false" label="disable_primers"
   help="Disable primer usage when demultiplexing.  Should be enabled for unusual circumstances, such as analyzing Sanger sequence data generated with different primers.  [default: False]"/>
  <param name="reverse_primers" type="select" label="reverse_primers"
   help="Enable removal of the reverse primer and any subsequence sequence from the end of each read.  To enable this, there has to be a 'ReversePrimer' column in the mapping file. Primers a required to be in IUPAC format and written in the 5' to  3' direction.  Valid options are 'disable', 'truncate_only', and 'truncate_remove'.  'truncate_only' will remove the primer and subsequence sequence data from the output read and will not alter output of sequences where the primer cannot be found. 'truncate_remove' will flag sequences where the primer cannot be found to not be written and will record the quantity of such failed sequences in the log file. [default: disable]">
    <option value="disable" selected="true">disable</option>
    <option value="truncate_only">truncate_only</option>
    <option value="truncate_remove">truncate_remove</option>
  </param>
  <param name="reverse_primer_mismatches" type="integer" value="0" label="reverse_primer_mismatches"
  help="Set number of allowed mismatches for reverse primers. [default: 0]"/>
  <param name="record_qual_scores" type="boolean" truevalue="--record_qual_scores" falsevalue="" checked="false" label="record_qual_scores"
   help="Enables recording of quality scores for all sequences that are recorded.  If this option is enabled, a file named seqs_filtered.qual will be created in the output directory, and will contain the same sequence IDs in the seqs.fna file and sequence quality scores matching the bases present in the seqs.fna file. [default: False]"/>
  <param name="median_length_filtering" type="text" value="" label="median_length_filtering"
   help="Disables minimum and maximum sequence length filtering, and instead calculates the median sequence length and filters the sequences based upon the number of median absolute deviations specified by this parameter. Any sequences with lengths outside the number of deviations will be removed. [default: None]"/>
  <param name="added_demultiplex_field" type="text" value="" label="added_demultiplex_field"
   help="Use this option to add a field to use in the mapping file as an additional demultiplexing option to the barcode. All combinations of barcodes and the values in these fields must be unique. The fields must contain values that can be parsed from the fasta labels such as 'plate==R_2008_12_09'. In this case, 'plate' would be the column header and 'R_2008_12_09' would be the field data (minus quotes) in the mapping file. To use the run prefix from the fasta label, such as '>FLP3FBN01ELBSX', where 'FLP3FBN01' is generated from the run ID, enter 'run_prefix' in the field and set the run prefix to be used as the data under the column header 'run_prefix'. [default: None]"/>
 </inputs>
 <outputs>
   <data format="txt" name="log" label="${tool.name} on ${on_string}: log" />
   <data format="txt" name="histograms" label="${tool.name} on ${on_string}: histograms"/>
   <data format="fasta" name="sequences" label="${tool.name} on ${on_string}: fasta"/>
 </outputs>
 <tests>
 </tests>
 <help>For more information, see split_libraries_ in the Qiime documentation.
 
Updated and validated 01/19/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA

Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN
 
 .. _split_libraries: http://qiime.org/scripts/split_libraries.html 
 </help>
</tool>