Mercurial > repos > bebatut > compare_humann2_output
comparison compare_humann2_output.py @ 3:eaa95ea1195c draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/compare_humann2_output commit eea46077010e699403ce6995d7d4aac77b2e0b43"
author | bgruening |
---|---|
date | Wed, 19 Oct 2022 14:49:06 +0000 |
parents | 05766022dfc4 |
children |
comparison
equal
deleted
inserted
replaced
2:05766022dfc4 | 3:eaa95ea1195c |
---|---|
6 | 6 |
7 def extract_abundances(fp, nb_charact_to_extract): | 7 def extract_abundances(fp, nb_charact_to_extract): |
8 abundances = {} | 8 abundances = {} |
9 more_abund_charact = [] | 9 more_abund_charact = [] |
10 abund_sum = 0 | 10 abund_sum = 0 |
11 with open(fp, 'r') as abundance_f: | 11 with open(fp, "r") as abundance_f: |
12 for line in abundance_f.readlines()[1:]: | 12 for line in abundance_f.readlines()[1:]: |
13 split_line = line[:-1].split('\t') | 13 split_line = line[:-1].split("\t") |
14 charact_id = split_line[0] | 14 charact_id = split_line[0] |
15 abund = float(split_line[1]) | 15 abund = float(split_line[1]) |
16 abundances[charact_id] = 100*abund | 16 abundances[charact_id] = 100 * abund |
17 abund_sum += abundances[charact_id] | 17 abund_sum += abundances[charact_id] |
18 | 18 |
19 if len(more_abund_charact) < nb_charact_to_extract: | 19 if len(more_abund_charact) < nb_charact_to_extract: |
20 more_abund_charact.append(charact_id) | 20 more_abund_charact.append(charact_id) |
21 else: | 21 else: |
22 best_pos = None | 22 best_pos = None |
23 for i in range(len(more_abund_charact)-1, -1, -1): | 23 for i in range(len(more_abund_charact) - 1, -1, -1): |
24 if abundances[more_abund_charact[i]] < abund: | 24 if abundances[more_abund_charact[i]] < abund: |
25 best_pos = i | 25 best_pos = i |
26 else: | 26 else: |
27 break | 27 break |
28 if best_pos is not None: | 28 if best_pos is not None: |
32 more_abund_charact += tmp_more_abund_charact[best_pos:-1] | 32 more_abund_charact += tmp_more_abund_charact[best_pos:-1] |
33 return abundances, more_abund_charact | 33 return abundances, more_abund_charact |
34 | 34 |
35 | 35 |
36 def format_characteristic_name(all_name): | 36 def format_characteristic_name(all_name): |
37 if all_name.find(':') != -1: | 37 if all_name.find(":") != -1: |
38 charact_id = all_name.split(':')[0] | 38 charact_id = all_name.split(":")[0] |
39 char_name = all_name.split(':')[1][1:] | 39 char_name = all_name.split(":")[1][1:] |
40 else: | 40 else: |
41 charact_id = all_name | 41 charact_id = all_name |
42 char_name = '' | 42 char_name = "" |
43 | 43 |
44 char_name = char_name.replace('/', ' ') | 44 char_name = char_name.replace("/", " ") |
45 char_name = char_name.replace('-', ' ') | 45 char_name = char_name.replace("-", " ") |
46 char_name = char_name.replace("'", '') | 46 char_name = char_name.replace("'", "") |
47 if char_name.find('(') != -1 and char_name.find(')') != -1: | 47 if char_name.find("(") != -1 and char_name.find(")") != -1: |
48 open_bracket = char_name.find('(') | 48 open_bracket = char_name.find("(") |
49 close_bracket = char_name.find(')')+1 | 49 close_bracket = char_name.find(")") + 1 |
50 char_name = char_name[:open_bracket] + char_name[close_bracket:] | 50 char_name = char_name[:open_bracket] + char_name[close_bracket:] |
51 return charact_id, char_name | 51 return charact_id, char_name |
52 | 52 |
53 | 53 |
54 def write_more_abundant_charat(abundances, more_abund_charact, output_fp): | 54 def write_more_abundant_charat(abundances, more_abund_charact, output_fp): |
55 with open(output_fp, 'w') as output_f: | 55 with open(output_fp, "w") as output_f: |
56 output_f.write('id\tname\t%s\n' % '\t'.join(abundances.keys())) | 56 output_f.write("id\tname\t%s\n" % "\t".join(abundances.keys())) |
57 | 57 |
58 for mac in more_abund_charact: | 58 for mac in more_abund_charact: |
59 charact_id, charact_name = format_characteristic_name(mac) | 59 charact_id, charact_name = format_characteristic_name(mac) |
60 output_f.write('%s\t%s' % (charact_id, charact_name)) | 60 output_f.write("%s\t%s" % (charact_id, charact_name)) |
61 for sample in abundances: | 61 for sample in abundances: |
62 abund = abundances[sample].get(mac, 0) | 62 abund = abundances[sample].get(mac, 0) |
63 output_f.write('\t%s' % (abund)) | 63 output_f.write("\t%s" % (abund)) |
64 output_f.write('\n') | 64 output_f.write("\n") |
65 | 65 |
66 | 66 |
67 def extract_similar_characteristics(abund, sim_output_fp, output_files): | 67 def extract_similar_characteristics(abund, sim_output_fp, output_files): |
68 abund_keys = list(abund) | 68 abund_keys = list(abund) |
69 sim_characteristics = set(abund[abund_keys[0]].keys()) | 69 sim_characteristics = set(abund[abund_keys[0]].keys()) |
70 for sample in abund_keys[1:]: | 70 for sample in abund_keys[1:]: |
71 sim_characteristics.intersection_update(abund[sample].keys()) | 71 sim_characteristics.intersection_update(abund[sample].keys()) |
72 print('Similar between all samples: %s' % len(sim_characteristics)) | 72 print("Similar between all samples: %s" % len(sim_characteristics)) |
73 | 73 |
74 with open(sim_output_fp, 'w') as sim_output_f: | 74 with open(sim_output_fp, "w") as sim_output_f: |
75 sim_output_f.write('id\tname\t%s\n' % '\t'.join(abund_keys)) | 75 sim_output_f.write("id\tname\t%s\n" % "\t".join(abund_keys)) |
76 for charact in list(sim_characteristics): | 76 for charact in list(sim_characteristics): |
77 charact_id, charact_name = format_characteristic_name(charact) | 77 charact_id, charact_name = format_characteristic_name(charact) |
78 sim_output_f.write('%s\t%s' % (charact_id, charact_name)) | 78 sim_output_f.write("%s\t%s" % (charact_id, charact_name)) |
79 for sample in abund_keys: | 79 for sample in abund_keys: |
80 sim_output_f.write('\t%s' % abund[sample][charact]) | 80 sim_output_f.write("\t%s" % abund[sample][charact]) |
81 sim_output_f.write('\n') | 81 sim_output_f.write("\n") |
82 | 82 |
83 print('Specific to samples:') | 83 print("Specific to samples:") |
84 diff_char = {} | 84 diff_char = {} |
85 for i in range(len(abund_keys)): | 85 for i in range(len(abund_keys)): |
86 sample = abund_keys[i] | 86 sample = abund_keys[i] |
87 print(' %s' % sample ) | 87 print(" %s" % sample) |
88 print(' All: %s' % len(abund[sample].keys())) | 88 print(" All: %s" % len(abund[sample].keys())) |
89 diff_char[sample] = set(abund[sample].keys()) | 89 diff_char[sample] = set(abund[sample].keys()) |
90 diff_char[sample].difference_update(sim_characteristics) | 90 diff_char[sample].difference_update(sim_characteristics) |
91 perc = 100*len(diff_char[sample])/(1.*len(abund[sample].keys())) | 91 perc = 100 * len(diff_char[sample]) / (1.0 * len(abund[sample].keys())) |
92 print(' Number of specific characteristics: %s' % len(diff_char[sample])) | 92 print(" Number of specific characteristics: %s" % len(diff_char[sample])) |
93 print(' Percentage of specific characteristics: %s' % perc) | 93 print(" Percentage of specific characteristics: %s" % perc) |
94 | 94 |
95 relative_abundance = 0 | 95 relative_abundance = 0 |
96 with open(output_files[i], 'w') as output_f: | 96 with open(output_files[i], "w") as output_f: |
97 output_f.write('id\tname\tabundances\n') | 97 output_f.write("id\tname\tabundances\n") |
98 for charact in list(diff_char[sample]): | 98 for charact in list(diff_char[sample]): |
99 charact_id, charact_name = format_characteristic_name(charact) | 99 charact_id, charact_name = format_characteristic_name(charact) |
100 output_f.write('%s\t%s' % (charact_id, charact_name)) | 100 output_f.write("%s\t%s" % (charact_id, charact_name)) |
101 output_f.write('%s\n' % abund[sample][charact]) | 101 output_f.write("%s\n" % abund[sample][charact]) |
102 relative_abundance += abund[sample][charact] | 102 relative_abundance += abund[sample][charact] |
103 print(' Relative abundance of specific characteristics: %s' % relative_abundance) | 103 print( |
104 " Relative abundance of specific characteristics: %s" | |
105 % relative_abundance | |
106 ) | |
104 | 107 |
105 return sim_characteristics | 108 return sim_characteristics |
106 | 109 |
107 | 110 |
108 def compare_humann2_output(args): | 111 def compare_humann2_output(args): |
109 abund = {} | 112 abund = {} |
110 more_abund_charact = [] | 113 more_abund_charact = [] |
111 | 114 |
112 for i in range(len(args.sample_name)): | 115 for i in range(len(args.sample_name)): |
113 abund[args.sample_name[i]], mac = extract_abundances( | 116 abund[args.sample_name[i]], mac = extract_abundances( |
114 args.charact_input_fp[i], | 117 args.charact_input_fp[i], args.most_abundant_characteristics_to_extract |
115 args.most_abundant_characteristics_to_extract) | 118 ) |
116 more_abund_charact += mac | 119 more_abund_charact += mac |
117 | 120 |
118 write_more_abundant_charat( | 121 write_more_abundant_charat( |
119 abund, | 122 abund, list(set(more_abund_charact)), args.more_abundant_output_fp |
120 list(set(more_abund_charact)), | 123 ) |
121 args.more_abundant_output_fp) | |
122 extract_similar_characteristics( | 124 extract_similar_characteristics( |
123 abund, | 125 abund, args.similar_output_fp, args.specific_output_fp |
124 args.similar_output_fp, | 126 ) |
125 args.specific_output_fp) | |
126 | 127 |
127 | 128 |
128 if __name__ == '__main__': | 129 if __name__ == "__main__": |
129 parser = argparse.ArgumentParser() | 130 parser = argparse.ArgumentParser() |
130 parser.add_argument('--sample_name', required=True, action='append') | 131 parser.add_argument("--sample_name", required=True, action="append") |
131 parser.add_argument('--charact_input_fp', required=True, action='append') | 132 parser.add_argument("--charact_input_fp", required=True, action="append") |
132 parser.add_argument( | 133 parser.add_argument( |
133 '--most_abundant_characteristics_to_extract', | 134 "--most_abundant_characteristics_to_extract", required=True, type=int |
134 required=True, | 135 ) |
135 type=int) | 136 parser.add_argument("--more_abundant_output_fp", required=True) |
136 parser.add_argument('--more_abundant_output_fp', required=True) | 137 parser.add_argument("--similar_output_fp", required=True) |
137 parser.add_argument('--similar_output_fp', required=True) | 138 parser.add_argument("--specific_output_fp", required=True, action="append") |
138 parser.add_argument( | |
139 '--specific_output_fp', | |
140 required=True, | |
141 action='append') | |
142 args = parser.parse_args() | 139 args = parser.parse_args() |
143 | 140 |
144 if len(args.sample_name) != len(args.charact_input_fp): | 141 if len(args.sample_name) != len(args.charact_input_fp): |
145 string = "Same number of values (in same order) are expected for " | 142 string = "Same number of values (in same order) are expected for " |
146 string += "--sample_name and --charact_input_fp" | 143 string += "--sample_name and --charact_input_fp" |