Mercurial > repos > artbio > guppy_basecaller
comparison guppy_basecaller.xml @ 0:fb42dde97559 draft
"planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/guppy commit ebd2091cbe5b34821c7c1192949dbec5f4d2eb03-dirty"
author | artbio |
---|---|
date | Wed, 18 Nov 2020 23:26:35 +0000 |
parents | |
children | 93b6cbff5ea4 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:fb42dde97559 |
---|---|
1 <tool id="guppy-basecaller" name="Guppy basecaller wrapper" version="0.1.4" python_template_version="3.5"> | |
2 <description>A simple wrapper for guppy basecaller that depends on configuration files</description> | |
3 <requirements> | |
4 </requirements> | |
5 <command detect_errors="exit_code"><![CDATA[ | |
6 | |
7 #for $file in $infiles: | |
8 ln -s $file ${file.element_identifier}.fast5 && | |
9 #end for | |
10 tar xf $config && | |
11 guppy_basecaller -i . | |
12 --save_path out | |
13 --data_path . | |
14 --config *.cfg | |
15 --num_callers 4 | |
16 --records_per_fastq 0 | |
17 --cpu_threads_per_caller \${GALAXY_SLOTS:-2} | |
18 --disable_pings | |
19 --qscore_filtering | |
20 --calib_detect | |
21 ]]></command> | |
22 <inputs> | |
23 <param name="infiles" type="data_collection" format="h5" label="Fast5 input (datatype h5)" multiple="true"/> | |
24 <param name="config" type="data" format="tar" label="Guppy basecall configuration model"/> | |
25 </inputs> | |
26 <outputs> | |
27 <data name="guppy_result" format="fastq"> | |
28 <discover_datasets directory="out/PASS" ext="fastq" pattern=".+\.fastq" visible="true"/> | |
29 </data> | |
30 </outputs> | |
31 <help><![CDATA[ | |
32 A wrapper for guppy basecaller. This expects two type of inputs: a collection of fast5 files, | |
33 and a configuration in the form of a tar file. | |
34 | |
35 You can find configurations at https://github.com/nanoporetech/rerio, | |
36 and in particular the directory https://github.com/nanoporetech/rerio/basecall_models. | |
37 | |
38 Each file there contains a URL you can download to use, for example | |
39 https://github.com/nanoporetech/rerio/blob/master/basecall_models/res_rna2_r941_min_flipflop_v001 | |
40 points to 'https://nanoporetech.box.com/shared/static/84e1jeudx8lr8ay7e9u1ebnvx3bk2kjf.tgz' | |
41 | |
42 When uploading these .tgz files take care to set the format to 'tar' (galaxy doesn't autodetect this?). | |
43 | |
44 The results should be fastq files. | |
45 | |
46 ------ | |
47 | |
48 guppy_basecaller --help | |
49 : Guppy Basecalling Software, (C) Oxford Nanopore Technologies, Limited. Version 3.6.1+249406c, client-server API version 1.1.0 | |
50 | |
51 **Usage**:: | |
52 | |
53 With config file:: | |
54 | |
55 guppy_basecaller -i <input path> -s <save path> -c <config file> [options] | |
56 | |
57 With flowcell and kit name:: | |
58 | |
59 guppy_basecaller -i <input path> -s <save path> --flowcell <flowcell name> | |
60 --kit <kit name> | |
61 | |
62 List supported flowcells and kits:: | |
63 | |
64 guppy_basecaller --print_workflows | |
65 | |
66 Use server for basecalling:: | |
67 | |
68 guppy_basecaller -i <input path> -s <save path> -c <config file> | |
69 --port <server address> [options] | |
70 | |
71 | |
72 **Command line parameters**:: | |
73 | |
74 --trim_threshold arg Threshold above which data will be trimmed | |
75 (in standard deviations of current level | |
76 distribution). | |
77 --trim_min_events arg Adapter trimmer minimum stride intervals | |
78 after stall that must be seen. | |
79 --max_search_len arg Maximum number of samples to search through | |
80 for the stall | |
81 --override_scaling Manually provide scaling parameters rather | |
82 than estimating them from each read. | |
83 --scaling_med arg Median current value to use for manual | |
84 scaling. | |
85 --scaling_mad arg Median absolute deviation to use for manual | |
86 scaling. | |
87 --trim_strategy arg Trimming strategy to apply: 'dna' or 'rna' | |
88 (or 'none' to disable trimming) | |
89 --dmean_win_size arg Window size for coarse stall event | |
90 detection | |
91 --dmean_threshold arg Threhold for coarse stall event detection | |
92 --jump_threshold arg Threshold level for rna stall detection | |
93 --pt_scaling Enable polyT/adapter max detection for read | |
94 scaling. | |
95 --pt_median_offset arg Set polyT median offset for setting read | |
96 scaling median (default 2.5) | |
97 --adapter_pt_range_scale arg Set polyT/adapter range scale for setting | |
98 read scaling median absolute deviation | |
99 (default 5.2) | |
100 --pt_required_adapter_drop arg Set minimum required current drop from | |
101 adapter max to polyT detection. (default | |
102 30.0) | |
103 --pt_minimum_read_start_index arg Set minimum index for read start sample | |
104 required to attempt polyT scaling. (default | |
105 30) | |
106 --as_model_file arg Path to JSON model file for adapter | |
107 scaling. | |
108 --as_gpu_runners_per_device arg Number of runners per GPU device for | |
109 adapter scaling. | |
110 --as_cpu_threads_per_scaler arg Number of CPU worker threads per adapter | |
111 scaler | |
112 --as_reads_per_runner arg Maximum reads per runner for adapter | |
113 scaling. | |
114 --as_num_scalers arg Number of parallel scalers for adapter | |
115 scaling. | |
116 -m [ --model_file ] arg Path to JSON model file. | |
117 -k [ --kernel_path ] arg Path to GPU kernel files location (only | |
118 needed if builtin_scripts is false). | |
119 -x [ --device ] arg Specify basecalling device: 'auto', or | |
120 'cuda:<device_id>'. | |
121 --builtin_scripts arg Whether to use GPU kernels that were | |
122 included at compile-time. | |
123 --chunk_size arg Stride intervals per chunk. | |
124 --chunks_per_runner arg Maximum chunks per runner. | |
125 --chunks_per_caller arg Soft limit on number of chunks in each | |
126 caller's queue. New reads will not be | |
127 queued while this is exceeded. | |
128 --high_priority_threshold arg Number of high priority chunks to process | |
129 for each medium priority chunk. | |
130 --medium_priority_threshold arg Number of medium priority chunks to process | |
131 for each low priority chunk. | |
132 --overlap arg Overlap between chunks (in stride | |
133 intervals). | |
134 --gpu_runners_per_device arg Number of runners per GPU device. | |
135 --cpu_threads_per_caller arg Number of CPU worker threads per | |
136 basecaller. | |
137 --num_callers arg Number of parallel basecallers to create. | |
138 --post_out Return full posterior matrix in output | |
139 fast5 file and/or called read message from | |
140 server. | |
141 --stay_penalty arg Scaling factor to apply to stay probability | |
142 calculation during transducer decode. | |
143 --qscore_offset arg Qscore calibration offset. | |
144 --qscore_scale arg Qscore calibration scale factor. | |
145 --temp_weight arg Temperature adjustment for weight matrix in | |
146 softmax layer of RNN. | |
147 --temp_bias arg Temperature adjustment for bias vector in | |
148 softmax layer of RNN. | |
149 --qscore_filtering Enable filtering of reads into PASS/FAIL | |
150 folders based on min qscore. | |
151 --min_qscore arg Minimum acceptable qscore for a read to be | |
152 filtered into the PASS folder | |
153 --reverse_sequence arg Reverse the called sequence (for RNA | |
154 sequencing). | |
155 --u_substitution arg Substitute 'U' for 'T' in the called | |
156 sequence (for RNA sequencing). | |
157 --log_speed_frequency arg How often to print out basecalling speed. | |
158 --barcode_kits arg Space separated list of barcoding kit(s) or | |
159 expansion kit(s) to detect against. Must be | |
160 in double quotes. | |
161 --trim_barcodes Trim the barcodes from the output sequences | |
162 in the FastQ files. | |
163 --num_extra_bases_trim arg How vigorous to be in trimming the barcode. | |
164 Default is 0 i.e. the length of the | |
165 detected barcode. A positive integer means | |
166 extra bases will be trimmed, a negative | |
167 number is how many fewer bases (less | |
168 vigorous) will be trimmed. | |
169 --arrangements_files arg Files containing arrangements. | |
170 --score_matrix_filename arg File containing mismatch score matrix. | |
171 --start_gap1 arg Gap penalty for aligning before the | |
172 reference. | |
173 --end_gap1 arg Gap penalty for aligning after the | |
174 reference. | |
175 --open_gap1 arg Penalty for opening a new gap in the | |
176 reference. | |
177 --extend_gap1 arg Penalty for extending a gap in the | |
178 reference. | |
179 --start_gap2 arg Gap penalty for aligning before the query. | |
180 --end_gap2 arg Gap penalty for aligning after the query. | |
181 --open_gap2 arg Penalty for opening a new gap in the query. | |
182 --extend_gap2 arg Penalty for extending a gap in the query. | |
183 --min_score arg Minimum score to consider a valid | |
184 alignment. | |
185 --min_score_rear_override arg Minimum score to consider a valid alignment | |
186 for the rear barcode only (and min_score | |
187 will then be used for the front only when | |
188 this is set). | |
189 --front_window_size arg Window size for the beginning barcode. | |
190 --rear_window_size arg Window size for the ending barcode. | |
191 --require_barcodes_both_ends Reads will only be classified if there is a | |
192 barcode above the min_score at both ends of | |
193 the read. | |
194 --allow_inferior_barcodes Reads will still be classified even if both | |
195 the barcodes at the front and rear (if | |
196 applicable) were not the best scoring | |
197 barcodes above the min_score. | |
198 --detect_mid_strand_barcodes Search for barcodes through the entire | |
199 length of the read. | |
200 --min_score_mid_barcodes arg Minimum score for a barcode to be detected | |
201 in the middle of a read. | |
202 --num_barcoding_buffers arg Number of GPU memory buffers to allocate to | |
203 perform barcoding into. Controls level of | |
204 parallelism on GPU for barcoding. | |
205 --num_barcode_threads arg Number of worker threads to use for | |
206 barcoding. | |
207 --calib_detect Enable calibration strand detection and | |
208 filtering. | |
209 --calib_reference arg Reference FASTA file containing calibration | |
210 strand. | |
211 --calib_min_sequence_length arg Minimum sequence length for reads to be | |
212 considered candidate calibration strands. | |
213 --calib_max_sequence_length arg Maximum sequence length for reads to be | |
214 considered candidate calibration strands. | |
215 --calib_min_coverage arg Minimum reference coverage to pass | |
216 calibration strand detection. | |
217 --print_workflows Output available workflows. | |
218 --flowcell arg Flowcell to find a configuration for | |
219 --kit arg Kit to find a configuration for | |
220 -z [ --quiet ] Quiet mode. Nothing will be output to | |
221 STDOUT if this option is set. | |
222 --trace_categories_logs arg Enable trace logs - list of strings with | |
223 the desired names. | |
224 --verbose_logs Enable verbose logs. | |
225 --disable_pings Disable the transmission of telemetry | |
226 pings. | |
227 --ping_url arg URL to send pings to | |
228 --ping_segment_duration arg Duration in minutes of each ping segment. | |
229 -q [ --records_per_fastq ] arg Maximum number of records per fastq file, 0 | |
230 means use a single file (per worker, per | |
231 run id). | |
232 --read_batch_size arg Maximum batch size, in reads, for grouping | |
233 input files. | |
234 --compress_fastq Compress fastq output files with gzip. | |
235 -i [ --input_path ] arg Path to input fast5 files. | |
236 --input_file_list arg Optional file containing list of input | |
237 fast5 files to process from the input_path. | |
238 -s [ --save_path ] arg Path to save fastq files. | |
239 -l [ --read_id_list ] arg File containing list of read ids to filter | |
240 to | |
241 -r [ --recursive ] Search for input files recursively. | |
242 --fast5_out Choice of whether to do fast5 output. | |
243 --resume Resume a previous basecall run using the | |
244 same output folder. | |
245 --progress_stats_frequency arg Frequency in seconds in which to report | |
246 progress statistics, if supplied will | |
247 replace the default progress display. | |
248 --max_block_size arg Maximum block size (in events) of basecall | |
249 messages to server. | |
250 -p [ --port ] arg Port for basecalling service. | |
251 --barcoding_config_file arg Configuration file to use for barcoding. | |
252 --num_barcode_threads arg Number of worker threads to use for | |
253 barcoding. | |
254 --disable_events Disable the transmission of event tables | |
255 when receiving reads back from the basecall | |
256 server. | |
257 --client_id arg Optional unique identifier (non-negative | |
258 integer) for this instance of the Guppy | |
259 Client Basecaller, if supplied will form | |
260 part of the output filenames. | |
261 --nested_output_folder If flagged output fastq files will be | |
262 written to a nested folder structure, based | |
263 on: protocol_group/sample/protocol/qscore_p | |
264 ass_fail/barcode_arrangement/ | |
265 -h [ --help ] produce help message | |
266 -v [ --version ] print version number | |
267 -c [ --config ] arg Config file to use | |
268 -d [ --data_path ] arg Path to use for loading any data files the | |
269 application requires. | |
270 | |
271 | |
272 ------ | |
273 ]]></help> | |
274 </tool> |