Mercurial > repos > dfornika > data_manager_build_kraken2_database
comparison data_manager/kraken2_build_database.py @ 18:f005b6efd096 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/data_managers/data_manager_build_kraken2_database/ commit a6877055190331683367394d1d1ca6ff47cf4fa7-dirty
| author | dfornika |
|---|---|
| date | Fri, 24 May 2019 13:39:08 -0400 |
| parents | 4c9f9d6098eb |
| children | ffeb852407d6 |
comparison
equal
deleted
inserted
replaced
| 17:4c9f9d6098eb | 18:f005b6efd096 |
|---|---|
| 48 | 48 |
| 49 def __str__(self): | 49 def __str__(self): |
| 50 return self.value | 50 return self.value |
| 51 | 51 |
| 52 | 52 |
| 53 def kraken2_build_standard(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): | 53 def kraken2_build_standard(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): |
| 54 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") | 54 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") |
| 55 | 55 |
| 56 database_value = "_".join([ | 56 database_value = "_".join([ |
| 57 now, | 57 now, |
| 58 "standard", | 58 "standard", |
| 90 ] | 90 ] |
| 91 | 91 |
| 92 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) | 92 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
| 93 | 93 |
| 94 data_table_entry = { | 94 data_table_entry = { |
| 95 "value": database_value, | 95 'data_tables': { |
| 96 "name": database_name, | 96 data_table_name: [ |
| 97 "path": database_path, | 97 { |
| 98 } | 98 "value": database_value, |
| 99 | 99 "name": database_name, |
| 100 _add_data_table_entry(data_manager_dict, data_table_entry) | 100 "path": database_path, |
| 101 | 101 } |
| 102 | 102 ] |
| 103 def kraken2_build_minikraken(data_manager_dict, minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME): | 103 } |
| 104 } | |
| 105 | |
| 106 return data_table_entry | |
| 107 | |
| 108 | |
| 109 def kraken2_build_minikraken(minikraken2_version, target_directory, data_table_name=DATA_TABLE_NAME): | |
| 104 | 110 |
| 105 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") | 111 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") |
| 106 | 112 |
| 107 database_value = "_".join([ | 113 database_value = "_".join([ |
| 108 now, | 114 now, |
| 115 "Minikraken2", | 121 "Minikraken2", |
| 116 minikraken2_version, | 122 minikraken2_version, |
| 117 "(Created:", | 123 "(Created:", |
| 118 now + ")" | 124 now + ")" |
| 119 ]) | 125 ]) |
| 126 | |
| 127 database_path = database_value | |
| 120 | 128 |
| 121 # download the minikraken2 data | 129 # download the minikraken2 data |
| 122 src = urlopen( | 130 src = urlopen( |
| 123 'ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken2_%s_8GB_201904_UPDATE.tgz' | 131 'ftp://ftp.ccb.jhu.edu/pub/data/kraken2_dbs/minikraken2_%s_8GB_201904_UPDATE.tgz' |
| 124 % minikraken2_version | 132 % minikraken2_version |
| 125 ) | 133 ) |
| 126 with open('tmp_data.tar.gz', 'wb') as dst: | 134 with open('tmp_data.tar.gz', 'wb') as dst: |
| 127 shutil.copyfileobj(src, dst) | 135 shutil.copyfileobj(src, dst) |
| 128 # unpack the downloaded archive to the target directory | 136 # unpack the downloaded archive to the target directory |
| 129 with tarfile.open('tmp_data.tar.gz', 'r:gz') as fh: | 137 with tarfile.open('tmp_data.tar.gz', 'r:gz') as fh: |
| 130 fh.extractall(target_directory) | 138 for member in fh.getmembers(): |
| 139 if member.isreg(): | |
| 140 member.name = os.path.basename(member.name) | |
| 141 fh.extract(member, os.path.join(target_directory, database_path)) | |
| 131 | 142 |
| 132 data_table_entry = { | 143 data_table_entry = { |
| 133 "value": database_value, | 144 'data_tables': { |
| 134 "name": database_name, | 145 data_table_name: [ |
| 135 "path": database_value, | 146 { |
| 136 } | 147 "value": database_value, |
| 137 | 148 "name": database_name, |
| 138 _add_data_table_entry(data_manager_dict, data_table_entry) | 149 "path": database_path, |
| 139 | 150 } |
| 140 | 151 ] |
| 141 def kraken2_build_special(data_manager_dict, kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): | 152 } |
| 153 } | |
| 154 | |
| 155 return data_table_entry | |
| 156 | |
| 157 | |
| 158 def kraken2_build_special(kraken2_args, target_directory, data_table_name=DATA_TABLE_NAME): | |
| 142 | 159 |
| 143 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") | 160 now = datetime.datetime.utcnow().strftime("%Y-%m-%dT%H%M%SZ") |
| 144 | 161 |
| 145 special_database_names = { | 162 special_database_names = { |
| 146 "rdp": "RDP", | 163 "rdp": "RDP", |
| 185 ] | 202 ] |
| 186 | 203 |
| 187 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) | 204 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
| 188 | 205 |
| 189 data_table_entry = { | 206 data_table_entry = { |
| 190 "value": database_value, | 207 'data_tables': { |
| 191 "name": database_name, | 208 data_table_name: [ |
| 192 "path": database_path, | 209 { |
| 193 } | 210 "value": database_value, |
| 194 | 211 "name": database_name, |
| 195 _add_data_table_entry(data_manager_dict, data_table_entry) | 212 "path": database_path, |
| 196 | 213 } |
| 197 | 214 ] |
| 198 def kraken2_build_custom(data_manager_dict, kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME): | 215 } |
| 216 } | |
| 217 | |
| 218 return data_table_entry | |
| 219 | |
| 220 | |
| 221 def kraken2_build_custom(kraken2_args, custom_database_name, target_directory, data_table_name=DATA_TABLE_NAME): | |
| 199 | 222 |
| 200 args = [ | 223 args = [ |
| 201 '--threads', str(kraken2_args["threads"]), | 224 '--threads', str(kraken2_args["threads"]), |
| 202 '--download-taxonomy', | 225 '--download-taxonomy', |
| 203 '--db', custom_database_name | 226 '--db', custom_database_name, |
| 204 ] | 227 ] |
| 228 | |
| 229 if kraken2_args['skip_maps']: | |
| 230 args.append('--skip-maps') | |
| 205 | 231 |
| 206 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) | 232 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
| 207 | 233 |
| 208 args = [ | 234 args = [ |
| 209 '--threads', str(kraken2_args["threads"]), | 235 '--threads', str(kraken2_args["threads"]), |
| 228 '--threads', str(kraken2_args["threads"]), | 254 '--threads', str(kraken2_args["threads"]), |
| 229 '--clean', | 255 '--clean', |
| 230 '--db', custom_database_name | 256 '--db', custom_database_name |
| 231 ] | 257 ] |
| 232 | 258 |
| 233 subprocess.check_call(['kraken2-build'] + args, target_directory) | 259 subprocess.check_call(['kraken2-build'] + args, cwd=target_directory) |
| 234 | 260 |
| 235 data_table_entry = { | 261 data_table_entry = { |
| 236 "value": custom_database_name, | 262 'data_tables': { |
| 237 "name": custom_database_name, | 263 data_table_name: [ |
| 238 "path": custom_database_name | 264 { |
| 239 } | 265 "value": custom_database_name, |
| 240 | 266 "name": custom_database_name, |
| 241 _add_data_table_entry(data_manager_dict, data_table_name, data_table_entry) | 267 "path": custom_database_name |
| 242 | 268 } |
| 243 | 269 ] |
| 244 def _add_data_table_entry(data_manager_dict, data_table_entry, data_table_name=DATA_TABLE_NAME): | 270 } |
| 245 data_manager_dict['data_tables'] = data_manager_dict.get( 'data_tables', {} ) | 271 } |
| 246 data_manager_dict['data_tables'][data_table_name] = data_manager_dict['data_tables'].get( data_table_name, [] ) | 272 |
| 247 data_manager_dict['data_tables'][data_table_name].append( data_table_entry ) | 273 return data_table_entry |
| 248 return data_manager_dict | |
| 249 | 274 |
| 250 | 275 |
| 251 def main(): | 276 def main(): |
| 252 parser = argparse.ArgumentParser() | 277 parser = argparse.ArgumentParser() |
| 253 parser.add_argument('data_manager_json') | 278 parser.add_argument('data_manager_json') |
| 257 parser.add_argument('--threads', dest='threads', default=1, help='threads') | 282 parser.add_argument('--threads', dest='threads', default=1, help='threads') |
| 258 parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build') | 283 parser.add_argument('--database-type', dest='database_type', type=KrakenDatabaseTypes, choices=list(KrakenDatabaseTypes), required=True, help='type of kraken database to build') |
| 259 parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)') | 284 parser.add_argument('--minikraken2-version', dest='minikraken2_version', type=Minikraken2Versions, choices=list(Minikraken2Versions), help='MiniKraken2 version (only applies to --database-type minikraken)') |
| 260 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') | 285 parser.add_argument('--special-database-type', dest='special_database_type', type=SpecialDatabaseTypes, choices=list(SpecialDatabaseTypes), help='type of special database to build (only applies to --database-type special)') |
| 261 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') | 286 parser.add_argument('--custom-fasta', dest='custom_fasta', help='fasta file for custom database (only applies to --database-type custom)') |
| 262 parser.add_argument( '--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)' ) | 287 parser.add_argument('--custom-database-name', dest='custom_database_name', help='Name for custom database (only applies to --database-type custom)') |
| 288 parser.add_argument('--skip-maps', dest='skip_maps', action='store_true', help='') | |
| 263 args = parser.parse_args() | 289 args = parser.parse_args() |
| 264 | 290 |
| 265 data_manager_input = json.loads(open(args.data_manager_json).read()) | 291 data_manager_input = json.loads(open(args.data_manager_json).read()) |
| 266 | 292 |
| 267 target_directory = data_manager_input['output_data'][0]['extra_files_path'] | 293 target_directory = data_manager_input['output_data'][0]['extra_files_path'] |
| 274 else: | 300 else: |
| 275 raise | 301 raise |
| 276 | 302 |
| 277 data_manager_output = {} | 303 data_manager_output = {} |
| 278 | 304 |
| 279 print(args.database_type) | |
| 280 if str(args.database_type) == 'standard': | 305 if str(args.database_type) == 'standard': |
| 281 kraken2_args = { | 306 kraken2_args = { |
| 282 "kmer_len": args.kmer_len, | 307 "kmer_len": args.kmer_len, |
| 283 "minimizer_len": args.minimizer_len, | 308 "minimizer_len": args.minimizer_len, |
| 284 "minimizer_spaces": args.minimizer_spaces, | 309 "minimizer_spaces": args.minimizer_spaces, |
| 285 "threads": args.threads, | 310 "threads": args.threads, |
| 286 } | 311 } |
| 287 kraken2_build_standard( | 312 data_manager_output = kraken2_build_standard( |
| 288 data_manager_output, | |
| 289 kraken2_args, | 313 kraken2_args, |
| 290 target_directory, | 314 target_directory, |
| 291 ) | 315 ) |
| 292 elif str(args.database_type) == 'minikraken': | 316 elif str(args.database_type) == 'minikraken': |
| 293 kraken2_build_minikraken( | 317 data_manager_output = kraken2_build_minikraken( |
| 294 data_manager_output, | |
| 295 str(args.minikraken2_version), | 318 str(args.minikraken2_version), |
| 296 target_directory | 319 target_directory |
| 297 ) | 320 ) |
| 298 elif str(args.database_type) == 'special': | 321 elif str(args.database_type) == 'special': |
| 299 kraken2_args = { | 322 kraken2_args = { |
| 301 "kmer_len": args.kmer_len, | 324 "kmer_len": args.kmer_len, |
| 302 "minimizer_len": args.minimizer_len, | 325 "minimizer_len": args.minimizer_len, |
| 303 "minimizer_spaces": args.minimizer_spaces, | 326 "minimizer_spaces": args.minimizer_spaces, |
| 304 "threads": args.threads, | 327 "threads": args.threads, |
| 305 } | 328 } |
| 306 kraken2_build_special( | 329 data_manager_output = kraken2_build_special( |
| 307 data_manager_output, | |
| 308 kraken2_args, | 330 kraken2_args, |
| 309 target_directory, | 331 target_directory, |
| 310 ) | 332 ) |
| 311 elif str(args.database_type) == 'custom': | 333 elif str(args.database_type) == 'custom': |
| 312 kraken2_args = { | 334 kraken2_args = { |
| 313 "custom_fasta": args.custom_fasta, | 335 "custom_fasta": args.custom_fasta, |
| 336 "skip_maps": args.skip_maps, | |
| 314 "kmer_len": args.kmer_len, | 337 "kmer_len": args.kmer_len, |
| 315 "minimizer_len": args.minimizer_len, | 338 "minimizer_len": args.minimizer_len, |
| 316 "minimizer_spaces": args.minimizer_spaces, | 339 "minimizer_spaces": args.minimizer_spaces, |
| 317 "threads": args.threads, | 340 "threads": args.threads, |
| 318 } | 341 } |
| 319 kraken2_build_custom( | 342 data_manager_output = kraken2_build_custom( |
| 320 data_manager_output, | |
| 321 kraken2_args, | 343 kraken2_args, |
| 322 args.custom_database_name, | 344 args.custom_database_name, |
| 323 target_directory, | 345 target_directory, |
| 324 ) | 346 ) |
| 325 else: | 347 else: |
