view split_libraries.xml @ 0:e5c3175506b7 default tip

Initial tool configs for qiime, most need work.
author Jim Johnson <jj@umn.edu>
date Sun, 17 Jul 2011 10:30:11 -0500
parents
children
line wrap: on
line source

<tool id="split_libraries" name="split_libraries" version="1.2.1">
 <description>Split libraries according to barcodes specified in mapping file</description>
 <requirements>
  <requirement type="binary">split_libraries.py</requirement>
 </requirements>
 <command interpreter="python">
  qiime_wrapper.py
  --galaxy_tmpdir='$__new_file_path__'
  --galaxy_datasets='^seqs.fna$:'$sequences,'histograms.txt:'$histograms,'split_library_log.txt:'$log
  split_libraries.py
  --map=$map
  #set fnas = []
  #for i in $inputs:
    #set fnas = $fnas + [$i.fasta.__str__]
  #end for
  --fasta=#echo ','.join($fnas)
  #set quals = []
  #for i in $inputs:
    #if $i.qual != None and $i.qual.__str__ != 'None':
      #set quals = $quals + [$i.qual.__str__]
    #end if
  #end for
  #if len($quals) > 0:
    --qual=#echo ','.join($quals)
  #end if
  #if len($min_seq_length.__str__) > 0 and $min_seq_length > 0:
    --min-seq-length=$min_seq_length
  #end if
  #if len($max_seq_length.__str__) > 0:
    --max-seq-length=$max_seq_length
  #end if
  $trim_seq_length
  #if len($min_qual_score.__str__) > 0:
    --min-qual-score=$min_qual_score
  #end if
  $keep_primer
  $keep_barcode
  #if len($max_ambig.__str__) > 0:
    --max-ambig=$max_ambig
  #end if
  #if len($max_homopolymer.__str__) > 0:
  --max-homopolymer=$max_homopolymer
  #end if
  #if len($max_primer_mismatch.__str__) > 0:
  --max-primer-mismatch=$max_primer_mismatch
  #end if
  --barcode-type=$barcode_type
  ## --dir-prefix=$dir_prefix
  #if $max_barcode_errors >= 0.:
  --max-barcode-errors=$max_barcode_errors
  #end if
  #if len($start_numbering_at.__str__) > 0:
  --start-numbering-at=$start_numbering_at
  #end if
  $remove_unassigned
  $disable_bc_correction
  #if len($qual_score_window.__str__) > 0:
  --qual_score_window=$qual_score_window
  #end if
  $disable_primers
  --reverse_primers=$reverse_primers
  $record_qual_scores
  $discard_bad_windows
 </command>
 <inputs>
  <param name="map" type="data" format="tabular" label="map"
   help="name of mapping file. NOTE: Must contain a header line indicating SampleID in the first column and BarcodeSequence in the second, LinkerPrimerSequence in the third. [REQUIRED]"/>
  <repeat name="inputs" title="Input Sequences">
    <param name="fasta" type="data" format="fasta" label="fasta"
     help="names of fasta files, comma-delimited [REQUIRED]"/>
    <param name="qual" type="data" format="qual" label="qual"
     help="names of qual files, comma-delimited [default: ('NO', 'DEFAULT')]"/>
  </repeat>
  <param name="min_seq_length" type="integer" optional="true" value="200" label="min-seq-length"
   help="minimum sequence length, in nucleotides [default: 200]"/>
  <param name="max_seq_length" type="integer" optional="true" value="1000" label="max-seq-length"
   help="maximum sequence length, in nucleotides [default: 1000]"/>
  <param name="trim_seq_length" type="boolean" truevalue="--trim-seq-length" falsevalue="" checked="false" label="trim-seq-length"
   help="calculate sequence lengths after trimming primers and barcodes [default: False]"/>
  <param name="min_qual_score" type="integer" optional="true" value="25" label="min-qual-score"
   help="min average qual score allowed in read [default: 25]"/>
  <param name="keep_primer" type="boolean" truevalue="--keep-primer" falsevalue="" checked="false" label="keep-primer"
   help="do not remove primer from sequences"/>
  <param name="keep_barcode" type="boolean" truevalue="--keep-barcode" falsevalue="" checked="false" label="keep-barcode"
   help="do not remove barcode from sequences"/>
  <param name="max_ambig" type="integer" optional="true" value="0" label="max-ambig"
   help="maximum number of ambiguous bases [default: 0]"/>
  <param name="max_homopolymer" type="integer" optional="true" value="6" label="max-homopolymer"
   help="maximum length of homopolymer run [default: 6]"/>
  <param name="max_primer_mismatch" type="integer" optional="true" value="0" label="max-primer-mismatch"
   help="maximum number of primer mismatches [default: 0]"/>
  <param name="barcode_type" type="text" value="golay_12" label="barcode-type"
   help="barcode type, hamming_8, golay_12, variable_length (will disable any barcode correction if variable_length set), or a number representing the length of the barcode, such as -b 4.  [default: golay_12]"/>
  <!-- Not needed in galaxy 
  <param name="dir_prefix" type="text" value="." label="dir-prefix"
   help="directory prefix for output files [default: .]"/>
  -->
  <param name="max_barcode_errors" type="float"  optional="true" value="1.5" label="max-barcode-errors"
   help="maximum number of errors in barcode [default: 1.5]"/>
  <param name="start_numbering_at" type="integer" optional="true" min="1" value="1" label="start-numbering-at"
   help="seq id to use for the first sequence [default: 1]"/>
  <param name="remove_unassigned" type="boolean" truevalue="--remove_unassigned" falsevalue="" checked="false" label="remove_unassigned"
   help="remove sequences which are Unassigned from             output [default: False]"/>
  <param name="disable_bc_correction" type="boolean" truevalue="--disable_bc_correction" falsevalue="" checked="false" label="disable_bc_correction"
   help="Disable attempts to find nearest corrected barcode.  Can improve performance. [default: False]"/>
  <param name="qual_score_window" type="integer" optional="true" value="0" label="qual_score_window"
   help="Enable sliding window test of quality scores.  If the average score of a continuous set of w nucleotides falls below the threshold (see -s for default), the sequence is discarded. A good value would be 50. 0 (zero) means no filtering. Must pass a .qual file (see -q parameter) if this functionality is enabled. [default: 0]"/>
  <param name="discard_bad_windows" type="boolean" truevalue="--discard_bad_windows" falsevalue="" checked="false" label="discard_bad_windows"
   help="If the qual_score_window option (-w) is enabled, this will override the default truncation behavior and discard any sequences where a bad window is found.  [default: False]"/>
  <param name="disable_primers" type="boolean" truevalue="--disable_primers" falsevalue="" checked="false" label="disable_primers"
   help="Disable primer usage when demultiplexing.  Should be enabled for unusual circumstances, such as analyzing Sanger sequence data generated with different primers.  [default: False]"/>
  <param name="reverse_primers" type="select" label="reverse_primers"
   help="Enable removal of the reverse primer and any subsequence sequence from the end of each read.  To enable this, there has to be a 'ReversePrimer' column in the mapping file. Primers a required to be in IUPAC format and written in the 5' to  3' direction.  Valid options are 'disable', 'truncate_only', and 'truncate_remove'.  'truncate_only' will remove the primer and subsequence sequence data from the output read and will not alter output of sequences where the primer cannot be found. 'truncate_remove' will flag sequences where the primer cannot be found to not be written and will record the quantity of such failed sequences in the log file. [default: disable]">
    <option value="disable" >disable</option>
    <option value="truncate_only">truncate_only</option>
    <option value="truncate_remove">truncate_remove</option>
  </param>
  <param name="record_qual_scores" type="boolean" truevalue="--record_qual_scores" falsevalue="" checked="false" label="record_qual_scores"
   help="Enables recording of quality scores for all sequences that are recorded.  If this option is enabled, a file named seqs_filtered.qual will be created in the output directory, and will contain the same sequence IDs in the seqs.fna file and sequence quality scores matching the bases present in the seqs.fna file. [default: False]"/>
 </inputs>
 <outputs>
   <data format="txt" name="log" label="${tool.name} on ${on_string}: log" />
   <data format="txt" name="histograms" label="${tool.name} on ${on_string}: histograms"/>
   <data format="fasta" name="sequences" label="${tool.name} on ${on_string}: fasta"/>
 </outputs>
 <tests>
 </tests>
 <help>
  
 </help>
</tool>