Mercurial > repos > bebatut > compare_humann2_output
comparison compare_humann2_output.py @ 0:9959fa526f1a draft
planemo upload for repository https://github.com/asaim/galaxytools/tree/master/tools/compare_humann2_output commit c16428041ae3d60b61b6570035c9268726730543-dirty
author | bebatut |
---|---|
date | Wed, 20 Apr 2016 08:30:08 -0400 |
parents | |
children | 05766022dfc4 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:9959fa526f1a |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 import sys | |
5 import os | |
6 import argparse | |
7 import re | |
8 | |
9 def extract_abundances(filepath, nb_charact_to_extract): | |
10 abundances = {} | |
11 more_abund_charact = [] | |
12 abund_sum = 0 | |
13 with open(filepath, 'r') as abundance_file: | |
14 for line in abundance_file.readlines()[1:]: | |
15 split_line = line[:-1].split('\t') | |
16 charact_id = split_line[0] | |
17 abund = float(split_line[1]) | |
18 abundances[charact_id] = 100*abund | |
19 abund_sum += abundances[charact_id] | |
20 | |
21 if len(more_abund_charact) < nb_charact_to_extract: | |
22 more_abund_charact.append(charact_id) | |
23 else: | |
24 best_pos = None | |
25 for i in range(len(more_abund_charact)-1,-1,-1): | |
26 if abundances[more_abund_charact[i]] < abund: | |
27 best_pos = i | |
28 else: | |
29 break | |
30 if best_pos != None: | |
31 tmp_more_abund_charact = more_abund_charact | |
32 more_abund_charact = tmp_more_abund_charact[:best_pos] | |
33 more_abund_charact += [charact_id] | |
34 more_abund_charact += tmp_more_abund_charact[best_pos:-1] | |
35 return abundances, more_abund_charact | |
36 | |
37 def format_characteristic_name(all_name): | |
38 if all_name.find(':') != -1: | |
39 charact_id = all_name.split(':')[0] | |
40 charact_name = all_name.split(':')[1][1:] | |
41 else: | |
42 charact_id = all_name | |
43 charact_name = '' | |
44 | |
45 charact_name = charact_name.replace('/',' ') | |
46 charact_name = charact_name.replace('-',' ') | |
47 charact_name = charact_name.replace("'",'') | |
48 if charact_name.find('(') != -1 and charact_name.find(')') != -1: | |
49 open_bracket = charact_name.find('(') | |
50 close_bracket = charact_name.find(')')+1 | |
51 charact_name = charact_name[:open_bracket] + charact_name[close_bracket:] | |
52 return charact_id,charact_name | |
53 | |
54 def write_more_abundant_charat(abundances,more_abund_charact, output_filepath): | |
55 with open(output_filepath,'w') as output_file: | |
56 output_file.write('id\tname\t') | |
57 output_file.write('\t'.join(abundances.keys()) + '\n') | |
58 | |
59 for mac in more_abund_charact: | |
60 charact_id,charact_name = format_characteristic_name(mac) | |
61 output_file.write(charact_id + '\t' + charact_name) | |
62 for sample in abundances: | |
63 abund = abundances[sample].get(mac, 0) | |
64 output_file.write('\t' + str(abund)) | |
65 output_file.write('\n') | |
66 | |
67 def extract_similar_characteristics(abundances, sim_output_filepath, | |
68 specific_output_files): | |
69 sim_characteristics = set(abundances[abundances.keys()[0]].keys()) | |
70 for sample in abundances.keys()[1:]: | |
71 sim_characteristics.intersection_update(abundances[sample].keys()) | |
72 print 'Similar between all samples:', len(sim_characteristics) | |
73 | |
74 with open(sim_output_filepath, 'w') as sim_output_file: | |
75 sim_output_file.write('id\tname\t' + '\t'.join(abundances.keys()) + '\n') | |
76 for charact in list(sim_characteristics): | |
77 charact_id,charact_name = format_characteristic_name(charact) | |
78 sim_output_file.write(charact_id + '\t' + charact_name) | |
79 for sample in abundances.keys(): | |
80 sim_output_file.write('\t' + str(abundances[sample][charact])) | |
81 sim_output_file.write('\n') | |
82 | |
83 print 'Specific to samples:' | |
84 diff_characteristics = {} | |
85 for i in range(len(abundances.keys())): | |
86 sample = abundances.keys()[i] | |
87 print ' ', sample, "" | |
88 print ' All:', len(abundances[sample].keys()) | |
89 diff_characteristics[sample] = set(abundances[sample].keys()) | |
90 diff_characteristics[sample].difference_update(sim_characteristics) | |
91 print ' Number of specific characteristics:', | |
92 print len(diff_characteristics[sample]) | |
93 print ' Percentage of specific characteristics:', | |
94 print 100*len(diff_characteristics[sample])/(1.*len(abundances[sample].keys())) | |
95 | |
96 relative_abundance = 0 | |
97 with open(specific_output_files[i], 'w') as output_file: | |
98 output_file.write('id\tname\tabundances\n') | |
99 for charact in list(diff_characteristics[sample]): | |
100 charact_id,charact_name = format_characteristic_name(charact) | |
101 output_file.write(charact_id + '\t' + charact_name + '\t') | |
102 output_file.write(str(abundances[sample][charact]) + '\n') | |
103 relative_abundance += abundances[sample][charact] | |
104 print ' Relative abundance of specific characteristics(%):', relative_abundance | |
105 | |
106 return sim_characteristics | |
107 | |
108 def compare_humann2_output(args): | |
109 abundances = {} | |
110 more_abund_charact = [] | |
111 | |
112 for i in range(len(args.sample_name)): | |
113 abundances[args.sample_name[i]], mac = extract_abundances(args.charact_input_file[i], | |
114 args.most_abundant_characteristics_to_extract) | |
115 more_abund_charact += mac | |
116 | |
117 write_more_abundant_charat(abundances, list(set(more_abund_charact)), | |
118 args.more_abundant_output_file) | |
119 sim_characteristics = extract_similar_characteristics(abundances, | |
120 args.similar_output_file, args.specific_output_file) | |
121 | |
122 if __name__ == '__main__': | |
123 parser = argparse.ArgumentParser() | |
124 parser.add_argument('--sample_name', required=True, action='append') | |
125 parser.add_argument('--charact_input_file', required=True, action='append') | |
126 parser.add_argument('--most_abundant_characteristics_to_extract', required=True, | |
127 type = int) | |
128 parser.add_argument('--more_abundant_output_file', required=True) | |
129 parser.add_argument('--similar_output_file', required=True) | |
130 parser.add_argument('--specific_output_file', required=True,action='append') | |
131 args = parser.parse_args() | |
132 | |
133 if len(args.sample_name) != len(args.charact_input_file): | |
134 raise ValueError("Same number of values (in same order) are expected for --sample_name and --charact_input_file") | |
135 if len(args.sample_name) != len(args.specific_output_file): | |
136 raise ValueError("Same number of values (in same order) are expected for --sample_name and --specific_output_file") | |
137 | |
138 compare_humann2_output(args) |