Mercurial > repos > bebatut > compare_humann2_output
diff compare_humann2_output.py @ 3:eaa95ea1195c draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/compare_humann2_output commit eea46077010e699403ce6995d7d4aac77b2e0b43"
author | bgruening |
---|---|
date | Wed, 19 Oct 2022 14:49:06 +0000 |
parents | 05766022dfc4 |
children |
line wrap: on
line diff
--- a/compare_humann2_output.py Mon Sep 14 13:50:30 2020 +0000 +++ b/compare_humann2_output.py Wed Oct 19 14:49:06 2022 +0000 @@ -8,19 +8,19 @@ abundances = {} more_abund_charact = [] abund_sum = 0 - with open(fp, 'r') as abundance_f: + with open(fp, "r") as abundance_f: for line in abundance_f.readlines()[1:]: - split_line = line[:-1].split('\t') + split_line = line[:-1].split("\t") charact_id = split_line[0] abund = float(split_line[1]) - abundances[charact_id] = 100*abund + abundances[charact_id] = 100 * abund abund_sum += abundances[charact_id] if len(more_abund_charact) < nb_charact_to_extract: more_abund_charact.append(charact_id) else: best_pos = None - for i in range(len(more_abund_charact)-1, -1, -1): + for i in range(len(more_abund_charact) - 1, -1, -1): if abundances[more_abund_charact[i]] < abund: best_pos = i else: @@ -34,34 +34,34 @@ def format_characteristic_name(all_name): - if all_name.find(':') != -1: - charact_id = all_name.split(':')[0] - char_name = all_name.split(':')[1][1:] + if all_name.find(":") != -1: + charact_id = all_name.split(":")[0] + char_name = all_name.split(":")[1][1:] else: charact_id = all_name - char_name = '' + char_name = "" - char_name = char_name.replace('/', ' ') - char_name = char_name.replace('-', ' ') - char_name = char_name.replace("'", '') - if char_name.find('(') != -1 and char_name.find(')') != -1: - open_bracket = char_name.find('(') - close_bracket = char_name.find(')')+1 + char_name = char_name.replace("/", " ") + char_name = char_name.replace("-", " ") + char_name = char_name.replace("'", "") + if char_name.find("(") != -1 and char_name.find(")") != -1: + open_bracket = char_name.find("(") + close_bracket = char_name.find(")") + 1 char_name = char_name[:open_bracket] + char_name[close_bracket:] return charact_id, char_name def write_more_abundant_charat(abundances, more_abund_charact, output_fp): - with open(output_fp, 'w') as output_f: - output_f.write('id\tname\t%s\n' % '\t'.join(abundances.keys())) + with open(output_fp, "w") as output_f: + output_f.write("id\tname\t%s\n" % "\t".join(abundances.keys())) for mac in more_abund_charact: charact_id, charact_name = format_characteristic_name(mac) - output_f.write('%s\t%s' % (charact_id, charact_name)) + output_f.write("%s\t%s" % (charact_id, charact_name)) for sample in abundances: abund = abundances[sample].get(mac, 0) - output_f.write('\t%s' % (abund)) - output_f.write('\n') + output_f.write("\t%s" % (abund)) + output_f.write("\n") def extract_similar_characteristics(abund, sim_output_fp, output_files): @@ -69,38 +69,41 @@ sim_characteristics = set(abund[abund_keys[0]].keys()) for sample in abund_keys[1:]: sim_characteristics.intersection_update(abund[sample].keys()) - print('Similar between all samples: %s' % len(sim_characteristics)) + print("Similar between all samples: %s" % len(sim_characteristics)) - with open(sim_output_fp, 'w') as sim_output_f: - sim_output_f.write('id\tname\t%s\n' % '\t'.join(abund_keys)) + with open(sim_output_fp, "w") as sim_output_f: + sim_output_f.write("id\tname\t%s\n" % "\t".join(abund_keys)) for charact in list(sim_characteristics): charact_id, charact_name = format_characteristic_name(charact) - sim_output_f.write('%s\t%s' % (charact_id, charact_name)) + sim_output_f.write("%s\t%s" % (charact_id, charact_name)) for sample in abund_keys: - sim_output_f.write('\t%s' % abund[sample][charact]) - sim_output_f.write('\n') + sim_output_f.write("\t%s" % abund[sample][charact]) + sim_output_f.write("\n") - print('Specific to samples:') + print("Specific to samples:") diff_char = {} for i in range(len(abund_keys)): sample = abund_keys[i] - print(' %s' % sample ) - print(' All: %s' % len(abund[sample].keys())) + print(" %s" % sample) + print(" All: %s" % len(abund[sample].keys())) diff_char[sample] = set(abund[sample].keys()) diff_char[sample].difference_update(sim_characteristics) - perc = 100*len(diff_char[sample])/(1.*len(abund[sample].keys())) - print(' Number of specific characteristics: %s' % len(diff_char[sample])) - print(' Percentage of specific characteristics: %s' % perc) + perc = 100 * len(diff_char[sample]) / (1.0 * len(abund[sample].keys())) + print(" Number of specific characteristics: %s" % len(diff_char[sample])) + print(" Percentage of specific characteristics: %s" % perc) relative_abundance = 0 - with open(output_files[i], 'w') as output_f: - output_f.write('id\tname\tabundances\n') + with open(output_files[i], "w") as output_f: + output_f.write("id\tname\tabundances\n") for charact in list(diff_char[sample]): charact_id, charact_name = format_characteristic_name(charact) - output_f.write('%s\t%s' % (charact_id, charact_name)) - output_f.write('%s\n' % abund[sample][charact]) + output_f.write("%s\t%s" % (charact_id, charact_name)) + output_f.write("%s\n" % abund[sample][charact]) relative_abundance += abund[sample][charact] - print(' Relative abundance of specific characteristics: %s' % relative_abundance) + print( + " Relative abundance of specific characteristics: %s" + % relative_abundance + ) return sim_characteristics @@ -111,34 +114,28 @@ for i in range(len(args.sample_name)): abund[args.sample_name[i]], mac = extract_abundances( - args.charact_input_fp[i], - args.most_abundant_characteristics_to_extract) + args.charact_input_fp[i], args.most_abundant_characteristics_to_extract + ) more_abund_charact += mac write_more_abundant_charat( - abund, - list(set(more_abund_charact)), - args.more_abundant_output_fp) + abund, list(set(more_abund_charact)), args.more_abundant_output_fp + ) extract_similar_characteristics( - abund, - args.similar_output_fp, - args.specific_output_fp) + abund, args.similar_output_fp, args.specific_output_fp + ) -if __name__ == '__main__': +if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument('--sample_name', required=True, action='append') - parser.add_argument('--charact_input_fp', required=True, action='append') + parser.add_argument("--sample_name", required=True, action="append") + parser.add_argument("--charact_input_fp", required=True, action="append") parser.add_argument( - '--most_abundant_characteristics_to_extract', - required=True, - type=int) - parser.add_argument('--more_abundant_output_fp', required=True) - parser.add_argument('--similar_output_fp', required=True) - parser.add_argument( - '--specific_output_fp', - required=True, - action='append') + "--most_abundant_characteristics_to_extract", required=True, type=int + ) + parser.add_argument("--more_abundant_output_fp", required=True) + parser.add_argument("--similar_output_fp", required=True) + parser.add_argument("--specific_output_fp", required=True, action="append") args = parser.parse_args() if len(args.sample_name) != len(args.charact_input_fp):