Mercurial > repos > iuc > virannot_otu
diff otu.py @ 3:40fb54cc6628 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virAnnot commit 16701bfbffd605805e847897799251ab748f559f
author | iuc |
---|---|
date | Sun, 08 Sep 2024 14:09:31 +0000 |
parents | c9dac9b2e01c |
children | adcf06db3030 |
line wrap: on
line diff
--- a/otu.py Wed Aug 21 13:13:50 2024 +0000 +++ b/otu.py Sun Sep 08 14:09:31 2024 +0000 @@ -186,6 +186,8 @@ os.mkdir(cdd_output) if os.path.exists(cdd_output + "/seq_to_align.fasta"): os.remove(cdd_output + "/seq_to_align.fasta") + if os.path.exists(cdd_output + "/seq_nucc.fasta"): + os.remove(cdd_output + "/seq_nucc.fasta") file_seq_to_align = cdd_output + "/seq_to_align.fasta" file_color_config = cdd_output + "/color_config.txt" f = open(file_seq_to_align, "a") @@ -298,6 +300,7 @@ cdd_output = options.output + "/" + hits_collection[cdd_id]["short_description"].replace(" ", "_") worksheet = workbook.add_worksheet(hits_collection[cdd_id]["short_description"]) # add a worksheet file_cluster = cdd_output + '/otu_cluster.csv' + file_fasta_nucc = cdd_output + '/representative_nucc.fasta' with open(file_cluster, 'r') as clust: otu_reader = csv.reader(clust, delimiter=',') samples_list = [] @@ -342,6 +345,8 @@ if sample not in ['contigs_list', 'global_taxonomy']: total_nb_read = 0 for contig in otu_collection[otu][sample]: + if otu_collection[otu][sample][contig]['nb'] == '': + otu_collection[otu][sample][contig]['nb'] = 0 total_nb_read += int(otu_collection[otu][sample][contig]['nb']) otu_collection[otu][sample]['total_nb_read'] = total_nb_read row = 0 @@ -355,26 +360,30 @@ worksheet.write(row, column + 2, 'contigs_list') row = 1 # column = 0 - for otu in otu_collection: - if isinstance(otu_collection[otu], dict): - column = 0 - worksheet.write(row, column, otu) - # prepare table with 0 in each cells - for sample in otu_collection[otu]: - column = 1 - for samp in samples_list: - worksheet.write(row, column, 0) - column += 1 - # fill in table with nb of read for each sample and each OTU - for sample in otu_collection[otu]: - column = 1 - for samp in samples_list: - if samp == sample: - worksheet.write(row, column, otu_collection[otu][sample]['total_nb_read']) - column += 1 - worksheet.write(row, len(samples_list) + 1, otu_collection[otu]['global_taxonomy'].replace(';', ' ')) - worksheet.write(row, len(samples_list) + 2, ",".join(otu_collection[otu]['contigs_list'])) - row += 1 + with open(file_fasta_nucc, "w+") as f_nucc: + for otu in otu_collection: + log.info(otu) + if isinstance(otu_collection[otu], dict): + column = 0 + worksheet.write(row, column, otu) + # prepare table with 0 in each cells + for sample in otu_collection[otu]: + column = 1 + for samp in samples_list: + worksheet.write(row, column, 0) + column += 1 + # fill in table with nb of read for each sample and each OTU + for sample in otu_collection[otu]: + column = 1 + for samp in samples_list: + if samp == sample: + worksheet.write(row, column, otu_collection[otu][sample]['total_nb_read']) + column += 1 + worksheet.write(row, len(samples_list) + 1, otu_collection[otu]['global_taxonomy'].replace(';', ' ')) + worksheet.write(row, len(samples_list) + 2, ",".join(otu_collection[otu]['contigs_list'])) + row += 1 + f_nucc.write(">" + cdd_id + "_" + otu + "_" + otu_collection[otu]['contigs_list'][0] + "\n") + f_nucc.write(str(hits_collection[cdd_id][otu_collection[otu]['contigs_list'][0]]['nuccleotide']) + "\n") workbook.close() read_file = pd.ExcelFile(file_xlsx) for sheet in read_file.sheet_names: @@ -392,21 +401,20 @@ if os.path.exists(map_file_path): os.remove(map_file_path) - map_file = open(map_file_path, "w+") - headers = ['#cdd_id', 'align_files', 'tree_files', 'cluster_files', 'cluster_nb_reads_files', 'pairwise_files', 'description', 'full_description\n'] - map_file.write("\t".join(headers)) - for cdd_id in hits_collection: - cdd_output = hits_collection[cdd_id]["short_description"].replace(" ", "_") - short_description = cdd_output - file_seq_aligned = cdd_output + '/seq_aligned.final_tree.fa' - tree_file = cdd_output + '/tree.dnd.png' - file_cluster = cdd_output + '/otu_cluster.csv' - file_matrix = cdd_output + "/identity_matrix.csv" - cluster_nb_reads_files = cdd_output + "/cluster_nb_reads_files.tab" - map_file.write(cdd_id + "\t" + file_seq_aligned + "\t" + tree_file + "\t") - map_file.write(file_cluster + "\t" + cluster_nb_reads_files + "\t" + file_matrix + "\t") - map_file.write(short_description + "\t" + hits_collection[cdd_id]["full_description"] + "\n") - map_file.close() + with open(map_file_path, "w+") as map_file: + headers = ['#cdd_id', 'align_files', 'tree_files', 'cluster_files', 'cluster_nb_reads_files', 'pairwise_files', 'description', 'full_description\n'] + map_file.write("\t".join(headers)) + for cdd_id in hits_collection: + cdd_output = hits_collection[cdd_id]["short_description"].replace(" ", "_") + short_description = cdd_output + file_seq_aligned = cdd_output + '/seq_aligned.final_tree.fa' + tree_file = cdd_output + '/tree.dnd.png' + file_cluster = cdd_output + '/otu_cluster.csv' + file_matrix = cdd_output + "/identity_matrix.csv" + cluster_nb_reads_files = cdd_output + "/cluster_nb_reads_files.tab" + map_file.write(cdd_id + "\t" + file_seq_aligned + "\t" + tree_file + "\t") + map_file.write(file_cluster + "\t" + cluster_nb_reads_files + "\t" + file_matrix + "\t") + map_file.write(short_description + "\t" + hits_collection[cdd_id]["full_description"] + "\n") log.info("Writing HTML report") html_cmd = os.path.join(options.tool_path, 'rps2tree_html.py') + ' -m ' + map_file_path + ' -o ' + options.output log.debug(html_cmd)