Mercurial > repos > iuc > umi_tools_extract
comparison umi-tools_extract.xml @ 15:27ac32a22ad2 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/umi_tools commit bf6a3aa532e8f9d122da4c1e39f3e256ae587b79"
author | iuc |
---|---|
date | Mon, 13 Sep 2021 14:52:06 +0000 |
parents | 9fa7803d1c51 |
children | 7accf7407811 |
comparison
equal
deleted
inserted
replaced
14:9fa7803d1c51 | 15:27ac32a22ad2 |
---|---|
1 <tool id="umi_tools_extract" name="UMI-tools extract" version="@VERSION@.2"> | 1 <tool id="umi_tools_extract" name="UMI-tools extract" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@"> |
2 <description>Extract UMI from fastq files</description> | 2 <description>Extract UMI from fastq files</description> |
3 <expand macro="bio_tools"/> | |
3 <macros> | 4 <macros> |
4 <import>macros.xml</import> | 5 <import>macros.xml</import> |
5 <macro name="out_conditional"> | |
6 <actions> | |
7 <conditional name="input_type.type"> | |
8 <when value="paired_collection" > | |
9 <action type="format"> | |
10 <option type="from_param" name="input_type.input_readpair" param_attribute="forward.ext" /> | |
11 </action> | |
12 </when> | |
13 <when value="paired" > | |
14 <action type="format"> | |
15 <option type="from_param" name="input_type.input_read1" param_attribute="ext" /> | |
16 </action> | |
17 </when> | |
18 </conditional> | |
19 </actions> | |
20 </macro> | |
21 </macros> | 6 </macros> |
22 <expand macro="requirements" /> | 7 <expand macro="requirements" /> |
23 <command detect_errors="exit_code"><![CDATA[ | 8 <command detect_errors="exit_code"><![CDATA[ |
24 @COMMAND_LINK@ | 9 @COMMAND_LINK@ |
25 | 10 |
26 umi_tools extract | 11 umi_tools extract |
27 --extract-method='$extract_method.value' | 12 |
28 --bc-pattern='$bc_pattern' | 13 @FASTQ_BARCODE_EXTRACTION_OPTIONS@ |
29 | 14 #if $input_type_cond.input_type == 'single': |
30 #if $input_type.type == 'single': | 15 #if $gz: |
31 #if $gz: | 16 --stdin=input_single.gz |
32 --stdin=input_single.gz | 17 --stdout out.gz |
33 --stdout out.gz | 18 #else |
19 --stdin=input_single.txt | |
20 --stdout '$out' | |
21 #end if | |
22 #else: | |
23 #if $gz: | |
24 --stdin=input_read1.gz | |
25 --read2-in=input_read2.gz | |
26 --stdout out1.gz | |
27 --read2-out=out2.gz | |
28 #else: | |
29 --stdin=input_read1.txt | |
30 --read2-in=input_read2.txt | |
31 #if $input_type_cond.input_type == 'paired' | |
32 --stdout '$out' | |
33 --read2-out='$out2' | |
34 #else | 34 #else |
35 --stdin=input_single.txt | 35 --stdout '$out_paired_collection.forward' |
36 --stdout '$out' | 36 --read2-out='$out_paired_collection.reverse' |
37 #end if | 37 #end if |
38 #else: | 38 #end if |
39 #if $gz: | 39 $input_type_cond.reconcile_pairs |
40 --stdin=input_read1.gz | 40 #end if |
41 --read2-in=input_read2.gz | 41 |
42 --stdout out1.gz | 42 #if $whitelist |
43 --read2-out=out2.gz | 43 --whitelist='$whitelist' |
44 #else: | 44 #end if |
45 --stdin=input_read1.txt | 45 #if $blacklist |
46 --read2-in=input_read2.txt | 46 --blacklist='$blacklist' |
47 --stdout '$out1' | 47 #end if |
48 --read2-out='$out2' | 48 $error_correct_cell.value |
49 #end if | 49 |
50 #if $input_type.barcode.barcode_select == "both_reads": | 50 #if $quality.quality_selector =='true': |
51 --split-barcode | 51 #if str($quality.quality_filter_threshold) != '' |
52 --bc-pattern2='$input_type.barcode.bc_pattern2' | |
53 #end if | |
54 #end if | |
55 | |
56 #if $barcodes.use_barcodes.value == 'yes': | |
57 --filter-cell-barcode | |
58 --whitelist='$barcodes.filter_barcode_file' | |
59 '$barcodes.filter_correct.value' | |
60 #end if | |
61 | |
62 #if not $prime3: | |
63 --3prime | |
64 #end if | |
65 #if $quality.quality_selector =='true': | |
66 --quality-filter-threshold '$quality.quality_filter_threshold' | 52 --quality-filter-threshold '$quality.quality_filter_threshold' |
67 --quality-encoding '$quality.quality_encoding' | 53 #end if |
68 #end if | 54 #if str($quality.quality_filter_mask) != '' |
69 #if $print_log == "1": | 55 --quality-filter-mask '$quality.quality_filter_mask' |
70 --log='$out_log' | 56 #end if |
71 #end if | 57 #if $input_type_cond.input_type != 'paired_collection' |
58 #set input=$input_type_cond.input_read1 | |
59 #else | |
60 #set input=$input_type_cond.input_readpair.forward | |
61 #end if | |
62 --quality-encoding | |
63 #if $input.ext.startswith("fastqillumina") | |
64 phred64 | |
65 #else if $input.ext.startswith("fastqsolexa") | |
66 solexa | |
67 #else | |
68 phred33 | |
69 #end if | |
70 #end if | |
71 @LOG@ | |
72 #if $gz: | 72 #if $gz: |
73 #if $input_type.type == 'single': | 73 #if $input_type_cond.input_type == 'single': |
74 && mv out.gz '$out' | 74 && mv out.gz '$out' |
75 #else | 75 #else if $input_type_cond.input_type == 'paired' |
76 && mv out1.gz '$out1' | 76 && mv out1.gz '$out' |
77 && mv out2.gz '$out2' | 77 && mv out2.gz '$out2' |
78 #else | |
79 && mv out1.gz '$out_paired_collection.forward' | |
80 && mv out2.gz '$out_paired_collection.reverse' | |
78 #end if | 81 #end if |
79 #end if | 82 #end if |
80 ]]></command> | 83 ]]></command> |
81 <inputs> | 84 <inputs> |
82 <expand macro="input_types" /> | 85 <expand macro="input_types"> |
83 | 86 <param argument="--reconcile-pairs" type="boolean" truevalue="--reconcile-pairs" falsevalue="" checked="false" label="Allow unpaired reads" help="Allow the presences of reads in read2 input that are not present in read1 input. This allows cell barcode filtering of read1s without considering read2s" /> |
84 <conditional name="barcodes" > | 87 </expand> |
85 <param name="use_barcodes" argument="--filter-cell-barcode" type="select" label="Use Known Barcodes?" > | 88 <expand macro="fastq_barcode_extraction_options_macro"/> |
86 <option value="yes">Yes</option> | 89 |
87 <option value="no" selected="true" >No</option> | 90 <param argument="--whitelist" type="data" optional="true" format="tabular,tsv" label="Allowlist of accepted barcodes" /> |
88 </param> | 91 <param argument="--blacklist" type="data" optional="true" format="tabular,tsv" label="Denylist of accepted barcodes" /> |
89 <when value="no" /> | 92 <param argument="--error-correct-cell" type="boolean" truevalue="--error-correct-cell" falsevalue="" checked="false" label="Apply correction to cell barcodes?" help="This only applies if your barcode file has two columns output from the umi_tools whitelist command" /> |
90 <when value="yes" > | 93 |
91 <param name="filter_barcode_file" type="data" format="tabular,tsv" label="Barcode File" /> | |
92 <param name="filter_correct" argument="--error-correct-cell" type="boolean" truevalue="--error-correct-cell" falsevalue="" checked="false" label="Apply correction to cell barcodes?" help="This only applies if your barcode file has two columns output from the umi_tools whitelist command." /> | |
93 </when> | |
94 </conditional> | |
95 | |
96 <param name="extract_method" type="select" label="Method to extract barcodes" > | |
97 <option value="regex">Regular Expressions</option> | |
98 <option value="string" selected="true">String</option> | |
99 </param> | |
100 | |
101 <param name="bc_pattern" argument="--bc-pattern" type="text" label="Barcode pattern for first read" | |
102 help="Use this option to specify the format of the UMI/barcode. Use Ns to | |
103 represent the random positions and Xs to indicate the bc positions. | |
104 Bases with Ns will be extracted and added to the read name. Remaining | |
105 bases, marked with an X will be reattached to the read."> | |
106 <expand macro="barcode_sanitizer" /> | |
107 </param> | |
108 | |
109 <param name="prime3" argument="--3prime" type="boolean" label="Is the barcode at the 5' end?" | |
110 truevalue="1" falsevalue="0" checked="true" | |
111 help="By default the barcode is assumed to be on the 5' end of the read, but | |
112 use this option to sepecify that it is on the 3' end instead." /> | |
113 <param name="print_log" argument="-L" type="boolean" label="Output log?" | |
114 truevalue="1" falsevalue="0" checked="true" | |
115 help="Choose if you want to generate a text file containing logging information." /> | |
116 <conditional name="quality"> | 94 <conditional name="quality"> |
117 <param name="quality_selector" type="select" label="Enable quality filter?" > | 95 <param name="quality_selector" type="select" label="Enable quality filter?" > |
118 <option value="false">No</option> | 96 <option value="false">No</option> |
119 <option value="true">Yes</option> | 97 <option value="true">Yes</option> |
120 </param> | 98 </param> |
121 <when value="false"> | 99 <when value="false"> |
122 </when> | 100 </when> |
123 <when value="true"> | 101 <when value="true"> |
124 <param name="quality_filter_threshold" label="Phred score threshold" | 102 <param argument="--quality-filter-threshold" label="Phred score threshold" |
125 type="integer" value="20" argument="--quality-filter-threshold" | 103 type="integer" value="" optional="true" |
126 help="Remove reads where any UMI base quality score falls below this threshold." /> | 104 help="Remove reads where any UMI base quality score falls below this threshold" /> |
127 <param name="quality_encoding" argument="--quality-encoding" type="select" label="Library type" | 105 <param argument="--quality-filter-mask" label="Mask UMI bases below threshold" |
128 help="Quality score encoding. Choose from phred33 [33-77], phred64 [64-106] or solexa [59-106]."> | 106 type="integer" value="" optional="true" |
129 <option value="phred33">phred33 [33-77]</option> | 107 help="If a UMI base has a quality below this threshold, |
130 <option value="phred64">phred64 [64-106]</option> | 108 replace the base with 'N'" /> |
131 <option value="solexa">solexa [59-106]</option> | |
132 </param> | |
133 </when> | 109 </when> |
134 </conditional> | 110 </conditional> |
111 <expand macro="log_input_macro"/> | |
135 </inputs> | 112 </inputs> |
136 <outputs> | 113 <outputs> |
137 <data name="out" format_source="input_single" label="Reads: ${tool.name} on ${on_string}" > | 114 <data name="out" format_source="input_read1" label="${tool.name} on ${on_string}: Reads" > |
138 <filter>input_type['type'] == "single"</filter> | 115 <filter>input_type_cond['input_type'] in ['single', 'paired']</filter> |
139 </data> | 116 </data> |
140 <data name="out1" format_source="input_read1" label="Reads1: ${tool.name} on ${on_string}" > | 117 <data name="out2" format_source="input_read2" label="${tool.name} on ${on_string}: Reads2" > |
141 <filter>input_type['type'] != "single"</filter> | 118 <filter>input_type_cond['input_type'] == 'paired'</filter> |
142 <expand macro="out_conditional" /> | |
143 </data> | 119 </data> |
144 <data name="out2" format_source="input_read2" label="Reads2: ${tool.name} on ${on_string}" > | 120 <collection name="out_paired_collection" type="paired" label="${tool.name} on ${on_string}: Reads"> |
145 <filter>input_type['type'] != "single"</filter> | 121 <data name="forward" format_source="input_readpair" /> |
146 <expand macro="out_conditional" /> | 122 <data name="reverse" format_source="input_readpair" /> |
147 </data> | 123 <filter>input_type_cond['input_type'] == 'paired_collection'</filter> |
148 | 124 </collection> |
149 <data name="out_log" format="txt"> | 125 <expand macro="log_output_macro"/> |
150 <filter>print_log == True</filter> | |
151 </data> | |
152 </outputs> | 126 </outputs> |
153 <tests> | 127 <tests> |
154 <test expect_num_outputs="2"> | 128 <test expect_num_outputs="2"> |
155 <param name="type" value="single" /> | 129 <conditional name="input_type_cond"> |
156 <param name="input_single" value="t_R1.fastq" ftype="fastq" /> | 130 <param name="input_type" value="single" /> |
157 <param name="bc_pattern" value="XXXNNN" /> | 131 <param name="input_read1" value="t_R1.fastq" ftype="fastqsanger" /> |
158 <param name="prime3" value="0" /> | 132 <param name="bc_pattern" value="XXXNNN" /> |
133 </conditional> | |
134 <conditional name="extract_method_cond"> | |
135 <param name="prime3" value="true" /> | |
136 </conditional> | |
159 <param name="quality_selector" value="true" /> | 137 <param name="quality_selector" value="true" /> |
160 <param name="quality_filter_threshold" value="10" /> | 138 <param name="quality_filter_threshold" value="10" /> |
161 <param name="quality_encoding" value="phred33" /> | 139 <param name="quality_encoding" value="phred33" /> |
162 <output name="out" file="out_SE.fastq" ftype="fastq" /> | 140 <param name="log" value="true"/> |
141 <output name="out" file="out_SE.fastq" ftype="fastqsanger" /> | |
163 <output name="out_log" > | 142 <output name="out_log" > |
164 <assert_contents> | 143 <assert_contents> |
165 <has_text text="Input Reads: 100" /> | 144 <has_text text="Input Reads: 100" /> |
166 <has_text text="umi quality: 28" /> | 145 <has_text text="umi quality: 28" /> |
167 <has_text text="Reads output: 72" /> | 146 <has_text text="Reads output: 72" /> |
168 </assert_contents> | 147 </assert_contents> |
169 </output> | 148 </output> |
170 </test> | 149 </test> |
171 <test expect_num_outputs="3"> | 150 <test expect_num_outputs="3"> |
172 <param name="type" value="paired" /> | 151 <conditional name="input_type_cond"> |
173 <param name="input_read1" value="t_R1.fastq.gz" ftype="fastq.gz" /> | 152 <param name="input_type" value="paired" /> |
174 <param name="input_read2" value="t_R2.fastq.gz" ftype="fastq.gz" /> | 153 <param name="input_read1" value="t_R1.fastq.gz" ftype="fastqsanger.gz" /> |
175 <param name="bc_pattern" value="NNNXXX" /> | 154 <param name="input_read2" value="t_R2.fastq.gz" ftype="fastqsanger.gz" /> |
176 <output name="out1" file="out_R1.fastq.gz" decompress="true" lines_diff="2" ftype="fastq.gz" /> | 155 <param name="bc_pattern" value="NNNXXX" /> |
177 <output name="out2" file="out_R2.fastq.gz" decompress="true" lines_diff="2" ftype="fastq.gz" /> | 156 </conditional> |
157 <param name="log" value="true"/> | |
158 <output name="out" file="out_R1.fastq.gz" decompress="true" lines_diff="2" ftype="fastqsanger.gz" /> | |
159 <output name="out2" file="out_R2.fastq.gz" decompress="true" lines_diff="2" ftype="fastqsanger.gz" /> | |
178 <output name="out_log" > | 160 <output name="out_log" > |
179 <assert_contents> | 161 <assert_contents> |
180 <has_text text="Input Reads: 100" /> | 162 <has_text text="Input Reads: 100" /> |
181 <has_text text="Reads output: 100" /> | 163 <has_text text="Reads output: 100" /> |
182 </assert_contents> | 164 </assert_contents> |
183 </output> | 165 </output> |
184 </test> | 166 </test> |
185 <test expect_num_outputs="3"> | 167 <test expect_num_outputs="4"> |
186 <param name="type" value="paired_collection" /> <!-- same as before, but uncompressed --> | 168 <conditional name="input_type_cond"> |
187 <param name="paired_type" value="no" /> | 169 <param name="input_type" value="paired_collection" /> <!-- same as before, but uncompressed --> |
188 <param name="input_readpair" > | 170 <param name="paired_type" value="no" /> |
189 <collection type="paired" > | 171 <param name="input_readpair"> |
190 <element name="forward" ftype="fastq" value="t_R1.fastq" /> | 172 <collection type="paired" > |
191 <element name="reverse" ftype="fastq" value="t_R2.fastq" /> | 173 <element name="forward" ftype="fastqsanger" value="t_R1.fastq" /> |
192 </collection> | 174 <element name="reverse" ftype="fastqsanger" value="t_R2.fastq" /> |
193 </param> | 175 </collection> |
194 <param name="bc_pattern" value="NNNXXX" /> | 176 </param> |
195 <output name="out1" file="out_R1.fastq" ftype="fastq" /> | 177 <param name="bc_pattern" value="NNNXXX" /> |
196 <output name="out2" file="out_R2.fastq" ftype="fastq" /> | 178 </conditional> |
179 <param name="log" value="true"/> | |
180 <output_collection name="out_paired_collection" type="paired"> | |
181 <element name="forward" file="out_R1.fastq" ftype="fastqsanger" /> | |
182 <element name="reverse" file="out_R2.fastq" ftype="fastqsanger" /> | |
183 </output_collection> | |
197 <output name="out_log" > | 184 <output name="out_log" > |
198 <assert_contents> | 185 <assert_contents> |
199 <has_text text="Input Reads: 100" /> | 186 <has_text text="Input Reads: 100" /> |
200 <has_text text="Reads output: 100" /> | 187 <has_text text="Reads output: 100" /> |
201 </assert_contents> | 188 </assert_contents> |
202 </output> | 189 </output> |
203 </test> | 190 </test> |
204 <test expect_num_outputs="3"> | 191 <test expect_num_outputs="3"> |
205 <param name="type" value="paired" /> | 192 <conditional name="input_type_cond"> |
206 <param name="input_read1" value="scrb_seq_fastq.1.gz" ftype="fastq.gz" /> | 193 <param name="input_type" value="paired" /> |
207 <param name="input_read2" value="scrb_seq_fastq.2.gz" ftype="fastq.gz" /> | 194 <param name="input_read1" value="scrb_seq_fastq.1.gz" ftype="fastqsanger.gz" /> |
195 <param name="input_read2" value="scrb_seq_fastq.2.gz" ftype="fastqsanger.gz" /> | |
196 <param name="bc_pattern" value="CCCCCCNNNNNNNNNN" /> | |
197 </conditional> | |
208 <param name="extract_method" value="string" /> | 198 <param name="extract_method" value="string" /> |
209 <param name="bc_pattern" value="CCCCCCNNNNNNNNNN" /> | 199 <param name="whitelist" value="scrb_seq_barcodes" /> |
210 <param name="use_barcodes" value="yes" /> | 200 <param name="log" value="true"/> |
211 <param name="filter_barcode_file" value="scrb_seq_barcodes" /> | 201 <output name="out2" file="scrb_extract.fastq.gz" decompress="true" ftype="fastqsanger.gz" /> |
212 <output name="out2" file="scrb_extract.fastq.gz" decompress="true" ftype="fastq.gz" /> | |
213 </test> | 202 </test> |
214 <test expect_num_outputs="3"><!-- same as above but with regex barcode--> | 203 <test expect_num_outputs="3"><!-- same as above but with regex barcode--> |
215 <param name="type" value="paired" /> | 204 <conditional name="input_type_cond"> |
216 <param name="input_read1" value="scrb_seq_fastq.1.gz" ftype="fastq.gz" /> | 205 <param name="input_type" value="paired" /> |
217 <param name="input_read2" value="scrb_seq_fastq.2.gz" ftype="fastq.gz" /> | 206 <param name="input_read1" value="scrb_seq_fastq.1.gz" ftype="fastqsanger.gz" /> |
207 <param name="input_read2" value="scrb_seq_fastq.2.gz" ftype="fastqsanger.gz" /> | |
208 <param name="bc_pattern" value="^(?P<cell_1>.{6})(?P<umi_1>.{10})" /> | |
209 </conditional> | |
218 <param name="extract_method" value="regex" /> | 210 <param name="extract_method" value="regex" /> |
219 <param name="bc_pattern" value="^(?P<cell_1>.{6})(?P<umi_1>.{10})" /> | 211 <param name="whitelist" value="scrb_seq_barcodes" /> |
220 <param name="use_barcodes" value="yes" /> | 212 <param name="log" value="true"/> |
221 <param name="filter_barcode_file" value="scrb_seq_barcodes" /> | 213 <output name="out2" file="scrb_extract.fastq.gz" decompress="true" ftype="fastqsanger.gz" /> |
222 <output name="out2" file="scrb_extract.fastq.gz" decompress="true" ftype="fastq.gz" /> | |
223 </test> | 214 </test> |
224 <test expect_num_outputs="2"><!-- CelSeq2 example --> | 215 <test expect_num_outputs="2"><!-- CelSeq2 example --> |
225 <param name="type" value="paired" /> | 216 <conditional name="input_type_cond"> |
226 <param name="input_read1" value="read_R1.200.gz" ftype="fastq.gz" /> | 217 <param name="input_type" value="paired" /> |
227 <param name="input_read2" value="read_R2.200.gz" ftype="fastq.gz" /> | 218 <param name="input_read1" value="read_R1.200.gz" ftype="fastqsanger.gz" /> |
219 <param name="input_read2" value="read_R2.200.gz" ftype="fastqsanger.gz" /> | |
220 <param name="bc_pattern" value="NNNNNNCCCCCC" /> | |
221 </conditional> | |
228 <param name="extract_method" value="string" /> | 222 <param name="extract_method" value="string" /> |
229 <param name="bc_pattern" value="NNNNNNCCCCCC" /> | 223 <output name="out" file="read_R1.200_extracted.fastq.gz" ftype="fastqsanger.gz" decompress="true" lines_diff="1" /> |
230 <output name="out1" file="read_R1.200_extracted.fastq.gz" ftype="fastq.gz" decompress="true" lines_diff="1" /> | 224 <output name="out2" file="read_R2.200_extracted.fastq.gz" ftype="fastqsanger.gz" decompress="true" lines_diff="1" /> |
231 <output name="out2" file="read_R2.200_extracted.fastq.gz" ftype="fastq.gz" decompress="true" lines_diff="1" /> | |
232 <param name="print_log" value="false"/> | |
233 </test> | 225 </test> |
234 </tests> | 226 </tests> |
235 <help><![CDATA[ | 227 <help><![CDATA[ |
236 | 228 |
237 | 229 extract - Extract UMI from fastq |
238 UMI-tools extract.py - Extract UMI from fastq | 230 ================================ |
239 ============================================= | |
240 | |
241 Purpose | |
242 ------- | |
243 | 231 |
244 Extract UMI barcode from a read and add it to the read name, leaving | 232 Extract UMI barcode from a read and add it to the read name, leaving |
245 any sample barcode in place. Can deal with paired end reads and UMIs | 233 any sample barcode in place |
246 split across the paired ends | 234 |
247 | 235 Can deal with paired end reads and UMIs |
248 Options | 236 split across the paired ends. Can also optionally extract cell |
249 ------- | 237 barcodes and append these to the read name also. See the section below |
250 | 238 for an explanation for how to encode the barcode pattern(s) to |
251 --split-barcode | 239 specficy the position of the UMI +/- cell barcode. |
252 By default the UMI is assumed to be on the first read. Use this | 240 |
253 option if the UMI is contained on both reads and specify the | 241 |
254 pattern of the barcode/UMI on the second read using the option | 242 Filtering and correcting cell barcodes |
255 ``--bc-pattern2`` | 243 -------------------------------------- |
256 | 244 |
257 --bc-pattern | 245 ``umi_tools extract`` can optionally filter cell barcodes against a user-supplied |
258 Use this option to specify the format of the UMI/barcode. Use Ns to | 246 whitelist (``--whitelist``). If a whitelist is not available for your data, |
259 represent the random positions and Xs to indicate the bc positions. | 247 e.g |
260 Bases with Ns will be extracted and added to the read name. Remaining | 248 if you have performed droplet-based scRNA-Seq, you can use the |
261 bases, marked with an X will be reattached to the read. | 249 whitelist tool. |
262 | 250 |
263 E.g. If the pattern is NNXXNN, | 251 Cell barcodes which do not match the whitelist (user-generated or |
264 Then the read: | 252 automatically generated) can also be optionally corrected using the |
265 | 253 ``--error-correct-cell`` option. |
266 @HISEQ:87:00000000 read1 | 254 |
267 AAGGTTGCTGATTGGATGGGCTAG | 255 The whitelist should be in the following format (tab-separated):: |
268 DA1AEBFGGCG01DFH00B1FF0B | 256 |
269 + | 257 AAAAAA AGAAAA |
270 | 258 AAAATC |
271 will become: | 259 AAACAT |
272 @HISEQ:87:00000000_AATT read1 | 260 AAACTA AAACTN,GAACTA |
273 GGGCTGATTGGATGGGCTAG | 261 AAATAC |
274 1AFGGCG01DFH00B1FF0B | 262 AAATCA GAATCA |
275 + | 263 AAATGT AAAGGT,CAATGT |
276 | 264 |
277 --bc-pattern2 | 265 Where column 1 is the whitelisted cell barcodes and column 2 is |
278 Use this option to specify the format of the UMI/barcode for | 266 the list (comma-separated) of other cell barcodes which should be |
279 the second read pair if required. If --bc-pattern2 is not | 267 corrected to the barcode in column 1. If the ``--error-correct-cell`` |
280 supplied, this defaults to the same pattern as --bc-pattern | 268 option is not used, this column will be ignored. Any additional columns |
281 | 269 in the whitelist input, such as the counts columns from the output of |
282 --3prime | 270 umi_tools whitelist, will be ignored. |
283 By default the barcode is assumed to be on the 5' end of the read, but | 271 |
284 use this option to sepecify that it is on the 3' end instead | 272 @FASTQ_BARCODE_EXTRACTION_HELP@ |
285 | |
286 -L | |
287 Specify a log file to retain logging information and final statistics | |
288 | |
289 --split-barcode | |
290 barcode is split across read pair | |
291 | |
292 --quality-filter-threshold=QUALITY_FILTER_THRESHOLD | |
293 Remove reads where any UMI base quality score falls | |
294 below this threshold | |
295 --quality-encoding=QUALITY_ENCODING | |
296 Quality score encoding. Choose from phred33[33-77] | |
297 phred64 [64-106] or solexa [59-106] | |
298 | |
299 Usage: | |
300 ------ | |
301 | |
302 For single ended reads: | |
303 umi_tools extract --bc-pattern=[PATTERN] -L extract.log [OPTIONS] | |
304 | |
305 reads from stdin and outputs to stdout. | |
306 | |
307 For paired end reads: | |
308 umi_tools extract --bc-pattern=[PATTERN] --read2-in=[FASTQIN] --read2-out=[FASTQOUT] -L extract.log [OPTIONS] | |
309 | |
310 reads end one from stdin and end two from FASTQIN and outputs end one to stdin | |
311 and end two to FASTQOUT. | |
312 | 273 |
313 ]]></help> | 274 ]]></help> |
314 <expand macro="citations" /> | 275 <expand macro="citations" /> |
315 </tool> | 276 </tool> |