comparison qiime/split_libraries.xml @ 0:003162f90751 draft

Uploaded
author azuzolo
date Wed, 06 Jun 2012 16:40:30 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:003162f90751
1 <tool id="split_libraries" name="split_libraries" version="2.0.0">
2 <description>Split libraries according to barcodes specified in mapping file</description>
3 <requirements>
4 <requirement type="binary">split_libraries.py</requirement>
5 </requirements>
6 <command interpreter="python">
7 qiime_wrapper.py
8 --galaxy_datasets='^seqs\.fna$:'$sequences,'histograms\.txt:'$histograms,'split_library_log\.txt:'$log
9 --galaxy_outputdir='$log.extra_files_path'
10 split_libraries.py
11 --dir-prefix='$log.extra_files_path'
12 --map=$map
13 #set fnas = []
14 #for i in $inputs:
15 #set fnas = $fnas + [$i.fasta.__str__]
16 #end for
17 --fasta=#echo ','.join($fnas)
18 #set quals = []
19 #for i in $inputs:
20 #if $i.qual != None and $i.qual.__str__ != 'None':
21 #set quals = $quals + [$i.qual.__str__]
22 #end if
23 #end for
24 #if len($quals) > 0:
25 --qual=#echo ','.join($quals)
26 #end if
27 #if len($min_seq_length.__str__) > 0 and $min_seq_length > 0:
28 --min-seq-length=$min_seq_length
29 #end if
30 #if len($max_seq_length.__str__) > 0:
31 --max-seq-length=$max_seq_length
32 #end if
33 $trim_seq_length
34 #if len($min_qual_score.__str__) > 0:
35 --min-qual-score=$min_qual_score
36 #end if
37 $keep_primer
38 $keep_barcode
39 #if len($max_ambig.__str__) > 0:
40 --max-ambig=$max_ambig
41 #end if
42 #if len($max_homopolymer.__str__) > 0:
43 --max-homopolymer=$max_homopolymer
44 #end if
45 #if len($max_primer_mismatch.__str__) > 0:
46 --max-primer-mismatch=$max_primer_mismatch
47 #end if
48 --barcode-type=$barcode_type
49 #if $max_barcode_errors >= 0.:
50 --max-barcode-errors=$max_barcode_errors
51 #end if
52 #if len($start_numbering_at.__str__) > 0:
53 --start-numbering-at=$start_numbering_at
54 #end if
55 $retain_unassigned_reads
56 $disable_bc_correction
57 #if len($qual_score_window.__str__) > 0:
58 --qual_score_window=$qual_score_window
59 #end if
60 $disable_primers
61 --reverse_primers=$reverse_primers
62 #if $reverse_primer_mismatches != None and $reverse_primer_mismatches.__str__ != "" and $reverse_primers.__str__!='disable':
63 --reverse_primer_mismatches=$reverse_primer_mismatches
64 #end if
65 $record_qual_scores
66 $discard_bad_windows
67 #if $median_length_filtering != None and $median_length_filtering.__str__ != "":
68 --median_length_filtering=$median_length_filtering
69 #end if
70 #if $added_demultiplex_field != None and $added_demultiplex_field.__str__ != "":
71 --added_demultiplex_field=$added_demultiplex_field
72 #end if
73 </command>
74 <inputs>
75 <param name="map" type="data" format="tabular" label="map"
76 help="name of mapping file. NOTE: Must contain a header line indicating SampleID in the first column and BarcodeSequence in the second, LinkerPrimerSequence in the third. [REQUIRED]"/>
77 <repeat name="inputs" title="Input Sequences">
78 <param name="fasta" type="data" format="fasta" label="fasta"
79 help="names of fasta file [REQUIRED]"/>
80 <param name="qual" type="data" format="qual" optional="true" label="qual"
81 help="names of qual file [OPTIONAL]"/>
82 </repeat>
83 <param name="min_seq_length" type="integer" value="200" label="min-seq-length"
84 help="minimum sequence length, in nucleotides [default: 200]"/>
85 <param name="max_seq_length" type="integer" value="1000" label="max-seq-length"
86 help="maximum sequence length, in nucleotides [default: 1000]"/>
87 <param name="trim_seq_length" type="boolean" truevalue="--trim-seq-length" falsevalue="" checked="false" label="trim-seq-length"
88 help="calculate sequence lengths after trimming primers and barcodes [default: False]"/>
89 <param name="min_qual_score" type="integer" value="25" label="min-qual-score"
90 help="min average qual score allowed in read [default: 25]"/>
91 <param name="keep_primer" type="boolean" truevalue="--keep-primer" falsevalue="" checked="false" label="keep-primer"
92 help="do not remove primer from sequences"/>
93 <param name="keep_barcode" type="boolean" truevalue="--keep-barcode" falsevalue="" checked="false" label="keep-barcode"
94 help="do not remove barcode from sequences"/>
95 <param name="max_ambig" type="integer" value="0" label="max-ambig"
96 help="maximum number of ambiguous bases [default: 0]"/>
97 <param name="max_homopolymer" type="integer" value="6" label="max-homopolymer"
98 help="maximum length of homopolymer run [default: 6]"/>
99 <param name="max_primer_mismatch" type="integer" value="0" label="max-primer-mismatch"
100 help="maximum number of primer mismatches [default: 0]"/>
101 <param name="barcode_type" type="text" value="golay_12" label="barcode-type"
102 help="barcode type, hamming_8, golay_12, variable_length (will disable any barcode correction if variable_length set), or a number representing the length of the barcode, such as -b 4. [default: golay_12]"/>
103 <param name="max_barcode_errors" type="float" value="1.5" label="max-barcode-errors"
104 help="maximum number of errors in barcode [default: 1.5]"/>
105 <param name="start_numbering_at" type="integer" min="1" value="1" label="start-numbering-at"
106 help="seq id to use for the first sequence [default: 1]"/>
107 <param name="retain_unassigned_reads" type="boolean" truevalue="--retain_unassigned_reads" falsevalue="" checked="false" label="retain_unassigned_reads"
108 help="Retain sequences which are unassigned in the output sequence file [default: False]"/>
109 <param name="disable_bc_correction" type="boolean" truevalue="--disable_bc_correction" falsevalue="" checked="false" label="disable_bc_correction"
110 help="Disable attempts to find nearest corrected barcode. Can improve performance. [default: False]"/>
111 <param name="qual_score_window" type="integer" value="0" label="qual_score_window"
112 help="Enable sliding window test of quality scores. If the average score of a continuous set of w nucleotides falls below the threshold (see -s for default), the sequence is discarded. A good value would be 50. 0 (zero) means no filtering. Must pass a .qual file (see -q parameter) if this functionality is enabled. [default: 0]"/>
113 <param name="discard_bad_windows" type="boolean" truevalue="--discard_bad_windows" falsevalue="" checked="false" label="discard_bad_windows"
114 help="If the qual_score_window option (-w) is enabled, this will override the default truncation behavior and discard any sequences where a bad window is found. [default: False]"/>
115 <param name="disable_primers" type="boolean" truevalue="--disable_primers" falsevalue="" checked="false" label="disable_primers"
116 help="Disable primer usage when demultiplexing. Should be enabled for unusual circumstances, such as analyzing Sanger sequence data generated with different primers. [default: False]"/>
117 <param name="reverse_primers" type="select" label="reverse_primers"
118 help="Enable removal of the reverse primer and any subsequence sequence from the end of each read. To enable this, there has to be a 'ReversePrimer' column in the mapping file. Primers a required to be in IUPAC format and written in the 5' to 3' direction. Valid options are 'disable', 'truncate_only', and 'truncate_remove'. 'truncate_only' will remove the primer and subsequence sequence data from the output read and will not alter output of sequences where the primer cannot be found. 'truncate_remove' will flag sequences where the primer cannot be found to not be written and will record the quantity of such failed sequences in the log file. [default: disable]">
119 <option value="disable" selected="true">disable</option>
120 <option value="truncate_only">truncate_only</option>
121 <option value="truncate_remove">truncate_remove</option>
122 </param>
123 <param name="reverse_primer_mismatches" type="integer" value="0" label="reverse_primer_mismatches"
124 help="Set number of allowed mismatches for reverse primers. [default: 0]"/>
125 <param name="record_qual_scores" type="boolean" truevalue="--record_qual_scores" falsevalue="" checked="false" label="record_qual_scores"
126 help="Enables recording of quality scores for all sequences that are recorded. If this option is enabled, a file named seqs_filtered.qual will be created in the output directory, and will contain the same sequence IDs in the seqs.fna file and sequence quality scores matching the bases present in the seqs.fna file. [default: False]"/>
127 <param name="median_length_filtering" type="text" value="" label="median_length_filtering"
128 help="Disables minimum and maximum sequence length filtering, and instead calculates the median sequence length and filters the sequences based upon the number of median absolute deviations specified by this parameter. Any sequences with lengths outside the number of deviations will be removed. [default: None]"/>
129 <param name="added_demultiplex_field" type="text" value="" label="added_demultiplex_field"
130 help="Use this option to add a field to use in the mapping file as an additional demultiplexing option to the barcode. All combinations of barcodes and the values in these fields must be unique. The fields must contain values that can be parsed from the fasta labels such as 'plate==R_2008_12_09'. In this case, 'plate' would be the column header and 'R_2008_12_09' would be the field data (minus quotes) in the mapping file. To use the run prefix from the fasta label, such as '>FLP3FBN01ELBSX', where 'FLP3FBN01' is generated from the run ID, enter 'run_prefix' in the field and set the run prefix to be used as the data under the column header 'run_prefix'. [default: None]"/>
131 </inputs>
132 <outputs>
133 <data format="txt" name="log" label="${tool.name} on ${on_string}: log" />
134 <data format="txt" name="histograms" label="${tool.name} on ${on_string}: histograms"/>
135 <data format="fasta" name="sequences" label="${tool.name} on ${on_string}: fasta"/>
136 </outputs>
137 <tests>
138 </tests>
139 <help>For more information, see split_libraries_ in the Qiime documentation.
140
141 Updated and validated 01/19/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA
142
143 Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN
144
145 .. _split_libraries: http://qiime.org/scripts/split_libraries.html
146 </help>
147 </tool>
148