Mercurial > repos > dfornika > data_manager_build_kraken2_database
comparison data_manager/kraken2_build_database.py @ 18:f005b6efd096 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit a6877055190331683367394d1d1ca6ff47cf4fa7-dirty
author | dfornika |
---|---|
date | Fri, 24 May 2019 13:39:08 -0400 |
parents | 4c9f9d6098eb |
children | ffeb852407d6 |
comparison
equal
deleted
inserted
replaced
17:4c9f9d6098eb | 18:f005b6efd096 |
---|---|
48 | 48 |
49 def __str__(self): | 49 def __str__(self): |
50 return self.value | 50 return self.value |
51 | 51 |
52 | 52 |
53 def kraken2_build_standard(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): | 53 def kraken2_build_standard(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): |
54 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") | 54 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") |
55 | 55 |
56 database_value = "_".join([ | 56 database_value = "_".join([ |
57 now, | 57 now, |
58 "standard", | 58 "standard", |
90 ] | 90 ] |
91 | 91 |
92 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) | 92 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
93 | 93 |
94 data_table_entry = { | 94 data_table_entry = { |
95 "value": database_value, | 95 'data_tables': { |
96 "name": database_name, | 96 data_table_name: [ |
97 "path": database_path, | 97 { |
98 } | 98 "value": database_value, |
99 | 99 "name": database_name, |
100 _add_data_table_entry(data_manager_dict, data_table_entry) | 100 "path": database_path, |
101 | 101 } |
102 | 102 ] |
103 def kraken2_build_minikraken(data_manager_dict, minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME): | 103 } |
104 } | |
105 | |
106 return data_table_entry | |
107 | |
108 | |
109 def kraken2_build_minikraken(minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME): | |
104 | 110 |
105 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") | 111 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") |
106 | 112 |
107 database_value = "_".join([ | 113 database_value = "_".join([ |
108 now, | 114 now, |
115 "Minikraken2", | 121 "Minikraken2", |
116 minikraken2_version, | 122 minikraken2_version, |
117 "(Created:", | 123 "(Created:", |
118 now + ")" | 124 now + ")" |
119 ]) | 125 ]) |
126 | |
127 database_path = database_value | |
120 | 128 |
121 # download the minikraken2 data | 129 # download the minikraken2 data |
122 src = urlopen( | 130 src = urlopen( |
123 'ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken2_%s_8GB_201904_UPDATE.tgz' | 131 'ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken2_%s_8GB_201904_UPDATE.tgz' |
124 % minikraken2_version | 132 % minikraken2_version |
125 ) | 133 ) |
126 with open('tmp_data.tar.gz', 'wb') as dst: | 134 with open('tmp_data.tar.gz', 'wb') as dst: |
127 shutil.copyfileobj(src, dst) | 135 shutil.copyfileobj(src, dst) |
128 # unpack the downloaded archive to the target directory | 136 # unpack the downloaded archive to the target directory |
129 with tarfile.open('tmp_data.tar.gz', 'r:gz') as fh: | 137 with tarfile.open('tmp_data.tar.gz', 'r:gz') as fh: |
130 fh.extractall(target_directory) | 138 for member in fh.getmembers(): |
139 if member.isreg(): | |
140 member.name = os.path.basename(member.name) | |
141 fh.extract(member, os.path.join(target_directory, database_path)) | |
131 | 142 |
132 data_table_entry = { | 143 data_table_entry = { |
133 "value": database_value, | 144 'data_tables': { |
134 "name": database_name, | 145 data_table_name: [ |
135 "path": database_value, | 146 { |
136 } | 147 "value": database_value, |
137 | 148 "name": database_name, |
138 _add_data_table_entry(data_manager_dict, data_table_entry) | 149 "path": database_path, |
139 | 150 } |
140 | 151 ] |
141 def kraken2_build_special(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): | 152 } |
153 } | |
154 | |
155 return data_table_entry | |
156 | |
157 | |
158 def kraken2_build_special(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): | |
142 | 159 |
143 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") | 160 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") |
144 | 161 |
145 special_database_names = { | 162 special_database_names = { |
146 "rdp": "RDP", | 163 "rdp": "RDP", |
185 ] | 202 ] |
186 | 203 |
187 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) | 204 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
188 | 205 |
189 data_table_entry = { | 206 data_table_entry = { |
190 "value": database_value, | 207 'data_tables': { |
191 "name": database_name, | 208 data_table_name: [ |
192 "path": database_path, | 209 { |
193 } | 210 "value": database_value, |
194 | 211 "name": database_name, |
195 _add_data_table_entry(data_manager_dict, data_table_entry) | 212 "path": database_path, |
196 | 213 } |
197 | 214 ] |
198 def kraken2_build_custom(data_manager_dict, kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME): | 215 } |
216 } | |
217 | |
218 return data_table_entry | |
219 | |
220 | |
221 def kraken2_build_custom(kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME): | |
199 | 222 |
200 args = [ | 223 args = [ |
201 '--threads', str(kraken2_args["threads"]), | 224 '--threads', str(kraken2_args["threads"]), |
202 '--download-taxonomy', | 225 '--download-taxonomy', |
203 '--db', custom_database_name | 226 '--db', custom_database_name, |
204 ] | 227 ] |
228 | |
229 if kraken2_args['skip_maps']: | |
230 args.append('--skip-maps') | |
205 | 231 |
206 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) | 232 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
207 | 233 |
208 args = [ | 234 args = [ |
209 '--threads', str(kraken2_args["threads"]), | 235 '--threads', str(kraken2_args["threads"]), |
228 '--threads', str(kraken2_args["threads"]), | 254 '--threads', str(kraken2_args["threads"]), |
229 '--clean', | 255 '--clean', |
230 '--db', custom_database_name | 256 '--db', custom_database_name |
231 ] | 257 ] |
232 | 258 |
233 subprocess.check_call(['kraken2-build'] + args, target_directory) | 259 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
234 | 260 |
235 data_table_entry = { | 261 data_table_entry = { |
236 "value": custom_database_name, | 262 'data_tables': { |
237 "name": custom_database_name, | 263 data_table_name: [ |
238 "path": custom_database_name | 264 { |
239 } | 265 "value": custom_database_name, |
240 | 266 "name": custom_database_name, |
241 _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) | 267 "path": custom_database_name |
242 | 268 } |
243 | 269 ] |
244 def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name=DATA_TABLE_NAME): | 270 } |
245 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) | 271 } |
246 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get( data_table_name, [] ) | 272 |
247 data_manager_dict['data_tables'][data_table_name].append( data_table_entry ) | 273 return data_table_entry |
248 return data_manager_dict | |
249 | 274 |
250 | 275 |
251 def main(): | 276 def main(): |
252 parser = argparse.ArgumentParser() | 277 parser = argparse.ArgumentParser() |
253 parser.add_argument('data_manager_json') | 278 parser.add_argument('data_manager_json') |
257 parser.add_argument('--threads', dest='threads', default=1, help='threads') | 282 parser.add_argument('--threads', dest='threads', default=1, help='threads') |
258 parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build') | 283 parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build') |
259 parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)') | 284 parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)') |
260 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') | 285 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') |
261 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') | 286 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') |
262 parser.add_argument( '--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)' ) | 287 parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)') |
288 parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='') | |
263 args = parser.parse_args() | 289 args = parser.parse_args() |
264 | 290 |
265 data_manager_input = json.loads(open(args.data_manager_json).read()) | 291 data_manager_input = json.loads(open(args.data_manager_json).read()) |
266 | 292 |
267 target_directory = data_manager_input['output_data'][0]['extra_files_path'] | 293 target_directory = data_manager_input['output_data'][0]['extra_files_path'] |
274 else: | 300 else: |
275 raise | 301 raise |
276 | 302 |
277 data_manager_output = {} | 303 data_manager_output = {} |
278 | 304 |
279 print(args.database_type) | |
280 if str(args.database_type) == 'standard': | 305 if str(args.database_type) == 'standard': |
281 kraken2_args = { | 306 kraken2_args = { |
282 "kmer_len": args.kmer_len, | 307 "kmer_len": args.kmer_len, |
283 "minimizer_len": args.minimizer_len, | 308 "minimizer_len": args.minimizer_len, |
284 "minimizer_spaces": args.minimizer_spaces, | 309 "minimizer_spaces": args.minimizer_spaces, |
285 "threads": args.threads, | 310 "threads": args.threads, |
286 } | 311 } |
287 kraken2_build_standard( | 312 data_manager_output = kraken2_build_standard( |
288 data_manager_output, | |
289 kraken2_args, | 313 kraken2_args, |
290 target_directory, | 314 target_directory, |
291 ) | 315 ) |
292 elif str(args.database_type) == 'minikraken': | 316 elif str(args.database_type) == 'minikraken': |
293 kraken2_build_minikraken( | 317 data_manager_output = kraken2_build_minikraken( |
294 data_manager_output, | |
295 str(args.minikraken2_version), | 318 str(args.minikraken2_version), |
296 target_directory | 319 target_directory |
297 ) | 320 ) |
298 elif str(args.database_type) == 'special': | 321 elif str(args.database_type) == 'special': |
299 kraken2_args = { | 322 kraken2_args = { |
301 "kmer_len": args.kmer_len, | 324 "kmer_len": args.kmer_len, |
302 "minimizer_len": args.minimizer_len, | 325 "minimizer_len": args.minimizer_len, |
303 "minimizer_spaces": args.minimizer_spaces, | 326 "minimizer_spaces": args.minimizer_spaces, |
304 "threads": args.threads, | 327 "threads": args.threads, |
305 } | 328 } |
306 kraken2_build_special( | 329 data_manager_output = kraken2_build_special( |
307 data_manager_output, | |
308 kraken2_args, | 330 kraken2_args, |
309 target_directory, | 331 target_directory, |
310 ) | 332 ) |
311 elif str(args.database_type) == 'custom': | 333 elif str(args.database_type) == 'custom': |
312 kraken2_args = { | 334 kraken2_args = { |
313 "custom_fasta": args.custom_fasta, | 335 "custom_fasta": args.custom_fasta, |
336 "skip_maps": args.skip_maps, | |
314 "kmer_len": args.kmer_len, | 337 "kmer_len": args.kmer_len, |
315 "minimizer_len": args.minimizer_len, | 338 "minimizer_len": args.minimizer_len, |
316 "minimizer_spaces": args.minimizer_spaces, | 339 "minimizer_spaces": args.minimizer_spaces, |
317 "threads": args.threads, | 340 "threads": args.threads, |
318 } | 341 } |
319 kraken2_build_custom( | 342 data_manager_output = kraken2_build_custom( |
320 data_manager_output, | |
321 kraken2_args, | 343 kraken2_args, |
322 args.custom_database_name, | 344 args.custom_database_name, |
323 target_directory, | 345 target_directory, |
324 ) | 346 ) |
325 else: | 347 else: |