virannot_blast2tsv: otu.py comparison

comparison otu.py @ 3:f8ebd1e802d7 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f

author	iuc
date	Sun, 08 Sep 2024 14:09:19 +0000
parents	e889010415a1
children	bb29ae8708b5

comparison

equal deleted inserted replaced

-:77c3ef9b0ed7
+:f8ebd1e802d7
 cdd_output = options.output + "/" + hits_collection[cdd_id]["short_description"].replace(" ", "_")
 if not os.path.exists(cdd_output):
 os.mkdir(cdd_output)
 if os.path.exists(cdd_output + "/seq_to_align.fasta"):
 os.remove(cdd_output + "/seq_to_align.fasta")
+if os.path.exists(cdd_output + "/seq_nucc.fasta"):
+os.remove(cdd_output + "/seq_nucc.fasta")
 file_seq_to_align = cdd_output + "/seq_to_align.fasta"
 file_color_config = cdd_output + "/color_config.txt"
 f = open(file_seq_to_align, "a")
 f_c = open(file_color_config, "w+")
 log.info("Writing to " + file_seq_to_align)
 for cdd_id in hits_collection:
 otu_collection = {}
 cdd_output = options.output + "/" + hits_collection[cdd_id]["short_description"].replace(" ", "_")
 worksheet = workbook.add_worksheet(hits_collection[cdd_id]["short_description"])  # add a worksheet
 file_cluster = cdd_output + '/otu_cluster.csv'
+file_fasta_nucc = cdd_output + '/representative_nucc.fasta'
 with open(file_cluster, 'r') as clust:
 otu_reader = csv.reader(clust, delimiter=',')
 samples_list = []
 for row in otu_reader:
 contigs_list = row[2:len(row) - 1]  # remove last empty column
 for otu in otu_collection:
 for sample in otu_collection[otu]:
 if sample not in ['contigs_list', 'global_taxonomy']:
 total_nb_read = 0
 for contig in otu_collection[otu][sample]:
+if otu_collection[otu][sample][contig]['nb'] == '':
+otu_collection[otu][sample][contig]['nb'] = 0
 total_nb_read += int(otu_collection[otu][sample][contig]['nb'])
 otu_collection[otu][sample]['total_nb_read'] = total_nb_read
 row = 0
 column = 0
 item = '#OTU_name'
 worksheet.write(row, column, samp)
 worksheet.write(row, column + 1, 'taxonomy')
 worksheet.write(row, column + 2, 'contigs_list')
 row = 1
 # column = 0
-for otu in otu_collection:
+with open(file_fasta_nucc, "w+") as f_nucc:
-if isinstance(otu_collection[otu], dict):
+for otu in otu_collection:
-column = 0
+log.info(otu)
-worksheet.write(row, column, otu)
+if isinstance(otu_collection[otu], dict):
-# prepare table with 0 in each cells
+column = 0
-for sample in otu_collection[otu]:
+worksheet.write(row, column, otu)
-column = 1
+# prepare table with 0 in each cells
-for samp in samples_list:
+for sample in otu_collection[otu]:
-worksheet.write(row, column, 0)
+column = 1
-column += 1
+for samp in samples_list:
-# fill in table with nb of read for each sample and each OTU
+worksheet.write(row, column, 0)
-for sample in otu_collection[otu]:
+column += 1
-column = 1
+# fill in table with nb of read for each sample and each OTU
-for samp in samples_list:
+for sample in otu_collection[otu]:
-if samp == sample:
+column = 1
-worksheet.write(row, column, otu_collection[otu][sample]['total_nb_read'])
+for samp in samples_list:
-column += 1
+if samp == sample:
-worksheet.write(row, len(samples_list) + 1, otu_collection[otu]['global_taxonomy'].replace(';', ' '))
+worksheet.write(row, column, otu_collection[otu][sample]['total_nb_read'])
-worksheet.write(row, len(samples_list) + 2, ",".join(otu_collection[otu]['contigs_list']))
+column += 1
-row += 1
+worksheet.write(row, len(samples_list) + 1, otu_collection[otu]['global_taxonomy'].replace(';', ' '))
+worksheet.write(row, len(samples_list) + 2, ",".join(otu_collection[otu]['contigs_list']))
+row += 1
+f_nucc.write(">" + cdd_id + "_" + otu + "_" + otu_collection[otu]['contigs_list'][0] + "\n")
+f_nucc.write(str(hits_collection[cdd_id][otu_collection[otu]['contigs_list'][0]]['nuccleotide']) + "\n")
 workbook.close()
 read_file = pd.ExcelFile(file_xlsx)
 for sheet in read_file.sheet_names:
 cluster_nb_reads_file = options.output + "/" + sheet.replace(" ", "_") + "/cluster_nb_reads_files.tab"
 data_xls = pd.read_excel(file_xlsx, sheet, dtype=str, index_col=None)
 # create mapping file with all informations to use to create HTML report
 map_file_path = options.output + "/map.txt"
 if os.path.exists(map_file_path):
 os.remove(map_file_path)
-map_file = open(map_file_path, "w+")
+with open(map_file_path, "w+") as map_file:
 headers = ['#cdd_id', 'align_files', 'tree_files', 'cluster_files', 'cluster_nb_reads_files', 'pairwise_files', 'description', 'full_description\n']
 map_file.write("\t".join(headers))
 for cdd_id in hits_collection:
 cdd_output = hits_collection[cdd_id]["short_description"].replace(" ", "_")
 short_description = cdd_output
 file_seq_aligned = cdd_output + '/seq_aligned.final_tree.fa'
 tree_file = cdd_output + '/tree.dnd.png'
 file_cluster = cdd_output + '/otu_cluster.csv'
 file_matrix = cdd_output + "/identity_matrix.csv"
 cluster_nb_reads_files = cdd_output + "/cluster_nb_reads_files.tab"
 map_file.write(cdd_id + "\t" + file_seq_aligned + "\t" + tree_file + "\t")
 map_file.write(file_cluster + "\t" + cluster_nb_reads_files + "\t" + file_matrix + "\t")
 map_file.write(short_description + "\t" + hits_collection[cdd_id]["full_description"] + "\n")
-map_file.close()
 log.info("Writing HTML report")
 html_cmd = os.path.join(options.tool_path, 'rps2tree_html.py') + ' -m ' + map_file_path + ' -o ' + options.output
 log.debug(html_cmd)
 os.system(html_cmd)

Mercurial > repos > iuc > virannot_blast2tsv

comparison otu.py @ 3:f8ebd1e802d7 draft