annotate qiime/split_libraries.xml @ 0:003162f90751 draft

Uploaded
author azuzolo
date Wed, 06 Jun 2012 16:40:30 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
003162f90751 Uploaded
azuzolo
parents:
diff changeset
1 <tool id="split_libraries" name="split_libraries" version="2.0.0">
003162f90751 Uploaded
azuzolo
parents:
diff changeset
2 <description>Split libraries according to barcodes specified in mapping file</description>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
3 <requirements>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
4 <requirement type="binary">split_libraries.py</requirement>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
5 </requirements>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
6 <command interpreter="python">
003162f90751 Uploaded
azuzolo
parents:
diff changeset
7 qiime_wrapper.py
003162f90751 Uploaded
azuzolo
parents:
diff changeset
8 --galaxy_datasets='^seqs\.fna$:'$sequences,'histograms\.txt:'$histograms,'split_library_log\.txt:'$log
003162f90751 Uploaded
azuzolo
parents:
diff changeset
9 --galaxy_outputdir='$log.extra_files_path'
003162f90751 Uploaded
azuzolo
parents:
diff changeset
10 split_libraries.py
003162f90751 Uploaded
azuzolo
parents:
diff changeset
11 --dir-prefix='$log.extra_files_path'
003162f90751 Uploaded
azuzolo
parents:
diff changeset
12 --map=$map
003162f90751 Uploaded
azuzolo
parents:
diff changeset
13 #set fnas = []
003162f90751 Uploaded
azuzolo
parents:
diff changeset
14 #for i in $inputs:
003162f90751 Uploaded
azuzolo
parents:
diff changeset
15 #set fnas = $fnas + [$i.fasta.__str__]
003162f90751 Uploaded
azuzolo
parents:
diff changeset
16 #end for
003162f90751 Uploaded
azuzolo
parents:
diff changeset
17 --fasta=#echo ','.join($fnas)
003162f90751 Uploaded
azuzolo
parents:
diff changeset
18 #set quals = []
003162f90751 Uploaded
azuzolo
parents:
diff changeset
19 #for i in $inputs:
003162f90751 Uploaded
azuzolo
parents:
diff changeset
20 #if $i.qual != None and $i.qual.__str__ != 'None':
003162f90751 Uploaded
azuzolo
parents:
diff changeset
21 #set quals = $quals + [$i.qual.__str__]
003162f90751 Uploaded
azuzolo
parents:
diff changeset
22 #end if
003162f90751 Uploaded
azuzolo
parents:
diff changeset
23 #end for
003162f90751 Uploaded
azuzolo
parents:
diff changeset
24 #if len($quals) > 0:
003162f90751 Uploaded
azuzolo
parents:
diff changeset
25 --qual=#echo ','.join($quals)
003162f90751 Uploaded
azuzolo
parents:
diff changeset
26 #end if
003162f90751 Uploaded
azuzolo
parents:
diff changeset
27 #if len($min_seq_length.__str__) > 0 and $min_seq_length > 0:
003162f90751 Uploaded
azuzolo
parents:
diff changeset
28 --min-seq-length=$min_seq_length
003162f90751 Uploaded
azuzolo
parents:
diff changeset
29 #end if
003162f90751 Uploaded
azuzolo
parents:
diff changeset
30 #if len($max_seq_length.__str__) > 0:
003162f90751 Uploaded
azuzolo
parents:
diff changeset
31 --max-seq-length=$max_seq_length
003162f90751 Uploaded
azuzolo
parents:
diff changeset
32 #end if
003162f90751 Uploaded
azuzolo
parents:
diff changeset
33 $trim_seq_length
003162f90751 Uploaded
azuzolo
parents:
diff changeset
34 #if len($min_qual_score.__str__) > 0:
003162f90751 Uploaded
azuzolo
parents:
diff changeset
35 --min-qual-score=$min_qual_score
003162f90751 Uploaded
azuzolo
parents:
diff changeset
36 #end if
003162f90751 Uploaded
azuzolo
parents:
diff changeset
37 $keep_primer
003162f90751 Uploaded
azuzolo
parents:
diff changeset
38 $keep_barcode
003162f90751 Uploaded
azuzolo
parents:
diff changeset
39 #if len($max_ambig.__str__) > 0:
003162f90751 Uploaded
azuzolo
parents:
diff changeset
40 --max-ambig=$max_ambig
003162f90751 Uploaded
azuzolo
parents:
diff changeset
41 #end if
003162f90751 Uploaded
azuzolo
parents:
diff changeset
42 #if len($max_homopolymer.__str__) > 0:
003162f90751 Uploaded
azuzolo
parents:
diff changeset
43 --max-homopolymer=$max_homopolymer
003162f90751 Uploaded
azuzolo
parents:
diff changeset
44 #end if
003162f90751 Uploaded
azuzolo
parents:
diff changeset
45 #if len($max_primer_mismatch.__str__) > 0:
003162f90751 Uploaded
azuzolo
parents:
diff changeset
46 --max-primer-mismatch=$max_primer_mismatch
003162f90751 Uploaded
azuzolo
parents:
diff changeset
47 #end if
003162f90751 Uploaded
azuzolo
parents:
diff changeset
48 --barcode-type=$barcode_type
003162f90751 Uploaded
azuzolo
parents:
diff changeset
49 #if $max_barcode_errors >= 0.:
003162f90751 Uploaded
azuzolo
parents:
diff changeset
50 --max-barcode-errors=$max_barcode_errors
003162f90751 Uploaded
azuzolo
parents:
diff changeset
51 #end if
003162f90751 Uploaded
azuzolo
parents:
diff changeset
52 #if len($start_numbering_at.__str__) > 0:
003162f90751 Uploaded
azuzolo
parents:
diff changeset
53 --start-numbering-at=$start_numbering_at
003162f90751 Uploaded
azuzolo
parents:
diff changeset
54 #end if
003162f90751 Uploaded
azuzolo
parents:
diff changeset
55 $retain_unassigned_reads
003162f90751 Uploaded
azuzolo
parents:
diff changeset
56 $disable_bc_correction
003162f90751 Uploaded
azuzolo
parents:
diff changeset
57 #if len($qual_score_window.__str__) > 0:
003162f90751 Uploaded
azuzolo
parents:
diff changeset
58 --qual_score_window=$qual_score_window
003162f90751 Uploaded
azuzolo
parents:
diff changeset
59 #end if
003162f90751 Uploaded
azuzolo
parents:
diff changeset
60 $disable_primers
003162f90751 Uploaded
azuzolo
parents:
diff changeset
61 --reverse_primers=$reverse_primers
003162f90751 Uploaded
azuzolo
parents:
diff changeset
62 #if $reverse_primer_mismatches != None and $reverse_primer_mismatches.__str__ != "" and $reverse_primers.__str__!='disable':
003162f90751 Uploaded
azuzolo
parents:
diff changeset
63 --reverse_primer_mismatches=$reverse_primer_mismatches
003162f90751 Uploaded
azuzolo
parents:
diff changeset
64 #end if
003162f90751 Uploaded
azuzolo
parents:
diff changeset
65 $record_qual_scores
003162f90751 Uploaded
azuzolo
parents:
diff changeset
66 $discard_bad_windows
003162f90751 Uploaded
azuzolo
parents:
diff changeset
67 #if $median_length_filtering != None and $median_length_filtering.__str__ != "":
003162f90751 Uploaded
azuzolo
parents:
diff changeset
68 --median_length_filtering=$median_length_filtering
003162f90751 Uploaded
azuzolo
parents:
diff changeset
69 #end if
003162f90751 Uploaded
azuzolo
parents:
diff changeset
70 #if $added_demultiplex_field != None and $added_demultiplex_field.__str__ != "":
003162f90751 Uploaded
azuzolo
parents:
diff changeset
71 --added_demultiplex_field=$added_demultiplex_field
003162f90751 Uploaded
azuzolo
parents:
diff changeset
72 #end if
003162f90751 Uploaded
azuzolo
parents:
diff changeset
73 </command>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
74 <inputs>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
75 <param name="map" type="data" format="tabular" label="map"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
76 help="name of mapping file. NOTE: Must contain a header line indicating SampleID in the first column and BarcodeSequence in the second, LinkerPrimerSequence in the third. [REQUIRED]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
77 <repeat name="inputs" title="Input Sequences">
003162f90751 Uploaded
azuzolo
parents:
diff changeset
78 <param name="fasta" type="data" format="fasta" label="fasta"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
79 help="names of fasta file [REQUIRED]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
80 <param name="qual" type="data" format="qual" optional="true" label="qual"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
81 help="names of qual file [OPTIONAL]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
82 </repeat>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
83 <param name="min_seq_length" type="integer" value="200" label="min-seq-length"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
84 help="minimum sequence length, in nucleotides [default: 200]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
85 <param name="max_seq_length" type="integer" value="1000" label="max-seq-length"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
86 help="maximum sequence length, in nucleotides [default: 1000]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
87 <param name="trim_seq_length" type="boolean" truevalue="--trim-seq-length" falsevalue="" checked="false" label="trim-seq-length"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
88 help="calculate sequence lengths after trimming primers and barcodes [default: False]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
89 <param name="min_qual_score" type="integer" value="25" label="min-qual-score"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
90 help="min average qual score allowed in read [default: 25]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
91 <param name="keep_primer" type="boolean" truevalue="--keep-primer" falsevalue="" checked="false" label="keep-primer"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
92 help="do not remove primer from sequences"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
93 <param name="keep_barcode" type="boolean" truevalue="--keep-barcode" falsevalue="" checked="false" label="keep-barcode"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
94 help="do not remove barcode from sequences"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
95 <param name="max_ambig" type="integer" value="0" label="max-ambig"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
96 help="maximum number of ambiguous bases [default: 0]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
97 <param name="max_homopolymer" type="integer" value="6" label="max-homopolymer"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
98 help="maximum length of homopolymer run [default: 6]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
99 <param name="max_primer_mismatch" type="integer" value="0" label="max-primer-mismatch"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
100 help="maximum number of primer mismatches [default: 0]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
101 <param name="barcode_type" type="text" value="golay_12" label="barcode-type"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
102 help="barcode type, hamming_8, golay_12, variable_length (will disable any barcode correction if variable_length set), or a number representing the length of the barcode, such as -b 4. [default: golay_12]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
103 <param name="max_barcode_errors" type="float" value="1.5" label="max-barcode-errors"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
104 help="maximum number of errors in barcode [default: 1.5]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
105 <param name="start_numbering_at" type="integer" min="1" value="1" label="start-numbering-at"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
106 help="seq id to use for the first sequence [default: 1]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
107 <param name="retain_unassigned_reads" type="boolean" truevalue="--retain_unassigned_reads" falsevalue="" checked="false" label="retain_unassigned_reads"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
108 help="Retain sequences which are unassigned in the output sequence file [default: False]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
109 <param name="disable_bc_correction" type="boolean" truevalue="--disable_bc_correction" falsevalue="" checked="false" label="disable_bc_correction"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
110 help="Disable attempts to find nearest corrected barcode. Can improve performance. [default: False]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
111 <param name="qual_score_window" type="integer" value="0" label="qual_score_window"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
112 help="Enable sliding window test of quality scores. If the average score of a continuous set of w nucleotides falls below the threshold (see -s for default), the sequence is discarded. A good value would be 50. 0 (zero) means no filtering. Must pass a .qual file (see -q parameter) if this functionality is enabled. [default: 0]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
113 <param name="discard_bad_windows" type="boolean" truevalue="--discard_bad_windows" falsevalue="" checked="false" label="discard_bad_windows"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
114 help="If the qual_score_window option (-w) is enabled, this will override the default truncation behavior and discard any sequences where a bad window is found. [default: False]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
115 <param name="disable_primers" type="boolean" truevalue="--disable_primers" falsevalue="" checked="false" label="disable_primers"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
116 help="Disable primer usage when demultiplexing. Should be enabled for unusual circumstances, such as analyzing Sanger sequence data generated with different primers. [default: False]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
117 <param name="reverse_primers" type="select" label="reverse_primers"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
118 help="Enable removal of the reverse primer and any subsequence sequence from the end of each read. To enable this, there has to be a 'ReversePrimer' column in the mapping file. Primers a required to be in IUPAC format and written in the 5' to 3' direction. Valid options are 'disable', 'truncate_only', and 'truncate_remove'. 'truncate_only' will remove the primer and subsequence sequence data from the output read and will not alter output of sequences where the primer cannot be found. 'truncate_remove' will flag sequences where the primer cannot be found to not be written and will record the quantity of such failed sequences in the log file. [default: disable]">
003162f90751 Uploaded
azuzolo
parents:
diff changeset
119 <option value="disable" selected="true">disable</option>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
120 <option value="truncate_only">truncate_only</option>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
121 <option value="truncate_remove">truncate_remove</option>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
122 </param>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
123 <param name="reverse_primer_mismatches" type="integer" value="0" label="reverse_primer_mismatches"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
124 help="Set number of allowed mismatches for reverse primers. [default: 0]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
125 <param name="record_qual_scores" type="boolean" truevalue="--record_qual_scores" falsevalue="" checked="false" label="record_qual_scores"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
126 help="Enables recording of quality scores for all sequences that are recorded. If this option is enabled, a file named seqs_filtered.qual will be created in the output directory, and will contain the same sequence IDs in the seqs.fna file and sequence quality scores matching the bases present in the seqs.fna file. [default: False]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
127 <param name="median_length_filtering" type="text" value="" label="median_length_filtering"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
128 help="Disables minimum and maximum sequence length filtering, and instead calculates the median sequence length and filters the sequences based upon the number of median absolute deviations specified by this parameter. Any sequences with lengths outside the number of deviations will be removed. [default: None]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
129 <param name="added_demultiplex_field" type="text" value="" label="added_demultiplex_field"
003162f90751 Uploaded
azuzolo
parents:
diff changeset
130 help="Use this option to add a field to use in the mapping file as an additional demultiplexing option to the barcode. All combinations of barcodes and the values in these fields must be unique. The fields must contain values that can be parsed from the fasta labels such as 'plate==R_2008_12_09'. In this case, 'plate' would be the column header and 'R_2008_12_09' would be the field data (minus quotes) in the mapping file. To use the run prefix from the fasta label, such as '>FLP3FBN01ELBSX', where 'FLP3FBN01' is generated from the run ID, enter 'run_prefix' in the field and set the run prefix to be used as the data under the column header 'run_prefix'. [default: None]"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
131 </inputs>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
132 <outputs>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
133 <data format="txt" name="log" label="${tool.name} on ${on_string}: log" />
003162f90751 Uploaded
azuzolo
parents:
diff changeset
134 <data format="txt" name="histograms" label="${tool.name} on ${on_string}: histograms"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
135 <data format="fasta" name="sequences" label="${tool.name} on ${on_string}: fasta"/>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
136 </outputs>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
137 <tests>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
138 </tests>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
139 <help>For more information, see split_libraries_ in the Qiime documentation.
003162f90751 Uploaded
azuzolo
parents:
diff changeset
140
003162f90751 Uploaded
azuzolo
parents:
diff changeset
141 Updated and validated 01/19/12 by Amanda Zuzolo, Microbiome Analysis Center, George Mason University, Fairfax, VA
003162f90751 Uploaded
azuzolo
parents:
diff changeset
142
003162f90751 Uploaded
azuzolo
parents:
diff changeset
143 Qiime integration courtesy Jim Johnson, Minnesota Supercomputing Institute, University of Minnesota, Minneapolis, MN
003162f90751 Uploaded
azuzolo
parents:
diff changeset
144
003162f90751 Uploaded
azuzolo
parents:
diff changeset
145 .. _split_libraries: http://qiime.org/scripts/split_libraries.html
003162f90751 Uploaded
azuzolo
parents:
diff changeset
146 </help>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
147 </tool>
003162f90751 Uploaded
azuzolo
parents:
diff changeset
148