Mercurial > repos > iuc > virannot_blast2tsv
comparison otu.py @ 3:f8ebd1e802d7 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
author | iuc |
---|---|
date | Sun, 08 Sep 2024 14:09:19 +0000 |
parents | e889010415a1 |
children | bb29ae8708b5 |
comparison
equal
deleted
inserted
replaced
2:77c3ef9b0ed7 | 3:f8ebd1e802d7 |
---|---|
184 cdd_output = options.output + "/" + hits_collection[cdd_id]["short_description"].replace(" ", "_") | 184 cdd_output = options.output + "/" + hits_collection[cdd_id]["short_description"].replace(" ", "_") |
185 if not os.path.exists(cdd_output): | 185 if not os.path.exists(cdd_output): |
186 os.mkdir(cdd_output) | 186 os.mkdir(cdd_output) |
187 if os.path.exists(cdd_output + "/seq_to_align.fasta"): | 187 if os.path.exists(cdd_output + "/seq_to_align.fasta"): |
188 os.remove(cdd_output + "/seq_to_align.fasta") | 188 os.remove(cdd_output + "/seq_to_align.fasta") |
189 if os.path.exists(cdd_output + "/seq_nucc.fasta"): | |
190 os.remove(cdd_output + "/seq_nucc.fasta") | |
189 file_seq_to_align = cdd_output + "/seq_to_align.fasta" | 191 file_seq_to_align = cdd_output + "/seq_to_align.fasta" |
190 file_color_config = cdd_output + "/color_config.txt" | 192 file_color_config = cdd_output + "/color_config.txt" |
191 f = open(file_seq_to_align, "a") | 193 f = open(file_seq_to_align, "a") |
192 f_c = open(file_color_config, "w+") | 194 f_c = open(file_color_config, "w+") |
193 log.info("Writing to " + file_seq_to_align) | 195 log.info("Writing to " + file_seq_to_align) |
296 for cdd_id in hits_collection: | 298 for cdd_id in hits_collection: |
297 otu_collection = {} | 299 otu_collection = {} |
298 cdd_output = options.output + "/" + hits_collection[cdd_id]["short_description"].replace(" ", "_") | 300 cdd_output = options.output + "/" + hits_collection[cdd_id]["short_description"].replace(" ", "_") |
299 worksheet = workbook.add_worksheet(hits_collection[cdd_id]["short_description"]) # add a worksheet | 301 worksheet = workbook.add_worksheet(hits_collection[cdd_id]["short_description"]) # add a worksheet |
300 file_cluster = cdd_output + '/otu_cluster.csv' | 302 file_cluster = cdd_output + '/otu_cluster.csv' |
303 file_fasta_nucc = cdd_output + '/representative_nucc.fasta' | |
301 with open(file_cluster, 'r') as clust: | 304 with open(file_cluster, 'r') as clust: |
302 otu_reader = csv.reader(clust, delimiter=',') | 305 otu_reader = csv.reader(clust, delimiter=',') |
303 samples_list = [] | 306 samples_list = [] |
304 for row in otu_reader: | 307 for row in otu_reader: |
305 contigs_list = row[2:len(row) - 1] # remove last empty column | 308 contigs_list = row[2:len(row) - 1] # remove last empty column |
340 for otu in otu_collection: | 343 for otu in otu_collection: |
341 for sample in otu_collection[otu]: | 344 for sample in otu_collection[otu]: |
342 if sample not in ['contigs_list', 'global_taxonomy']: | 345 if sample not in ['contigs_list', 'global_taxonomy']: |
343 total_nb_read = 0 | 346 total_nb_read = 0 |
344 for contig in otu_collection[otu][sample]: | 347 for contig in otu_collection[otu][sample]: |
348 if otu_collection[otu][sample][contig]['nb'] == '': | |
349 otu_collection[otu][sample][contig]['nb'] = 0 | |
345 total_nb_read += int(otu_collection[otu][sample][contig]['nb']) | 350 total_nb_read += int(otu_collection[otu][sample][contig]['nb']) |
346 otu_collection[otu][sample]['total_nb_read'] = total_nb_read | 351 otu_collection[otu][sample]['total_nb_read'] = total_nb_read |
347 row = 0 | 352 row = 0 |
348 column = 0 | 353 column = 0 |
349 item = '#OTU_name' | 354 item = '#OTU_name' |
353 worksheet.write(row, column, samp) | 358 worksheet.write(row, column, samp) |
354 worksheet.write(row, column + 1, 'taxonomy') | 359 worksheet.write(row, column + 1, 'taxonomy') |
355 worksheet.write(row, column + 2, 'contigs_list') | 360 worksheet.write(row, column + 2, 'contigs_list') |
356 row = 1 | 361 row = 1 |
357 # column = 0 | 362 # column = 0 |
358 for otu in otu_collection: | 363 with open(file_fasta_nucc, "w+") as f_nucc: |
359 if isinstance(otu_collection[otu], dict): | 364 for otu in otu_collection: |
360 column = 0 | 365 log.info(otu) |
361 worksheet.write(row, column, otu) | 366 if isinstance(otu_collection[otu], dict): |
362 # prepare table with 0 in each cells | 367 column = 0 |
363 for sample in otu_collection[otu]: | 368 worksheet.write(row, column, otu) |
364 column = 1 | 369 # prepare table with 0 in each cells |
365 for samp in samples_list: | 370 for sample in otu_collection[otu]: |
366 worksheet.write(row, column, 0) | 371 column = 1 |
367 column += 1 | 372 for samp in samples_list: |
368 # fill in table with nb of read for each sample and each OTU | 373 worksheet.write(row, column, 0) |
369 for sample in otu_collection[otu]: | 374 column += 1 |
370 column = 1 | 375 # fill in table with nb of read for each sample and each OTU |
371 for samp in samples_list: | 376 for sample in otu_collection[otu]: |
372 if samp == sample: | 377 column = 1 |
373 worksheet.write(row, column, otu_collection[otu][sample]['total_nb_read']) | 378 for samp in samples_list: |
374 column += 1 | 379 if samp == sample: |
375 worksheet.write(row, len(samples_list) + 1, otu_collection[otu]['global_taxonomy'].replace(';', ' ')) | 380 worksheet.write(row, column, otu_collection[otu][sample]['total_nb_read']) |
376 worksheet.write(row, len(samples_list) + 2, ",".join(otu_collection[otu]['contigs_list'])) | 381 column += 1 |
377 row += 1 | 382 worksheet.write(row, len(samples_list) + 1, otu_collection[otu]['global_taxonomy'].replace(';', ' ')) |
383 worksheet.write(row, len(samples_list) + 2, ",".join(otu_collection[otu]['contigs_list'])) | |
384 row += 1 | |
385 f_nucc.write(">" + cdd_id + "_" + otu + "_" + otu_collection[otu]['contigs_list'][0] + "\n") | |
386 f_nucc.write(str(hits_collection[cdd_id][otu_collection[otu]['contigs_list'][0]]['nuccleotide']) + "\n") | |
378 workbook.close() | 387 workbook.close() |
379 read_file = pd.ExcelFile(file_xlsx) | 388 read_file = pd.ExcelFile(file_xlsx) |
380 for sheet in read_file.sheet_names: | 389 for sheet in read_file.sheet_names: |
381 cluster_nb_reads_file = options.output + "/" + sheet.replace(" ", "_") + "/cluster_nb_reads_files.tab" | 390 cluster_nb_reads_file = options.output + "/" + sheet.replace(" ", "_") + "/cluster_nb_reads_files.tab" |
382 data_xls = pd.read_excel(file_xlsx, sheet, dtype=str, index_col=None) | 391 data_xls = pd.read_excel(file_xlsx, sheet, dtype=str, index_col=None) |
390 # create mapping file with all informations to use to create HTML report | 399 # create mapping file with all informations to use to create HTML report |
391 map_file_path = options.output + "/map.txt" | 400 map_file_path = options.output + "/map.txt" |
392 if os.path.exists(map_file_path): | 401 if os.path.exists(map_file_path): |
393 os.remove(map_file_path) | 402 os.remove(map_file_path) |
394 | 403 |
395 map_file = open(map_file_path, "w+") | 404 with open(map_file_path, "w+") as map_file: |
396 headers = ['#cdd_id', 'align_files', 'tree_files', 'cluster_files', 'cluster_nb_reads_files', 'pairwise_files', 'description', 'full_description\n'] | 405 headers = ['#cdd_id', 'align_files', 'tree_files', 'cluster_files', 'cluster_nb_reads_files', 'pairwise_files', 'description', 'full_description\n'] |
397 map_file.write("\t".join(headers)) | 406 map_file.write("\t".join(headers)) |
398 for cdd_id in hits_collection: | 407 for cdd_id in hits_collection: |
399 cdd_output = hits_collection[cdd_id]["short_description"].replace(" ", "_") | 408 cdd_output = hits_collection[cdd_id]["short_description"].replace(" ", "_") |
400 short_description = cdd_output | 409 short_description = cdd_output |
401 file_seq_aligned = cdd_output + '/seq_aligned.final_tree.fa' | 410 file_seq_aligned = cdd_output + '/seq_aligned.final_tree.fa' |
402 tree_file = cdd_output + '/tree.dnd.png' | 411 tree_file = cdd_output + '/tree.dnd.png' |
403 file_cluster = cdd_output + '/otu_cluster.csv' | 412 file_cluster = cdd_output + '/otu_cluster.csv' |
404 file_matrix = cdd_output + "/identity_matrix.csv" | 413 file_matrix = cdd_output + "/identity_matrix.csv" |
405 cluster_nb_reads_files = cdd_output + "/cluster_nb_reads_files.tab" | 414 cluster_nb_reads_files = cdd_output + "/cluster_nb_reads_files.tab" |
406 map_file.write(cdd_id + "\t" + file_seq_aligned + "\t" + tree_file + "\t") | 415 map_file.write(cdd_id + "\t" + file_seq_aligned + "\t" + tree_file + "\t") |
407 map_file.write(file_cluster + "\t" + cluster_nb_reads_files + "\t" + file_matrix + "\t") | 416 map_file.write(file_cluster + "\t" + cluster_nb_reads_files + "\t" + file_matrix + "\t") |
408 map_file.write(short_description + "\t" + hits_collection[cdd_id]["full_description"] + "\n") | 417 map_file.write(short_description + "\t" + hits_collection[cdd_id]["full_description"] + "\n") |
409 map_file.close() | |
410 log.info("Writing HTML report") | 418 log.info("Writing HTML report") |
411 html_cmd = os.path.join(options.tool_path, 'rps2tree_html.py') + ' -m ' + map_file_path + ' -o ' + options.output | 419 html_cmd = os.path.join(options.tool_path, 'rps2tree_html.py') + ' -m ' + map_file_path + ' -o ' + options.output |
412 log.debug(html_cmd) | 420 log.debug(html_cmd) |
413 os.system(html_cmd) | 421 os.system(html_cmd) |
414 | 422 |