Mercurial > repos > iuc > extract_metaphlan_database
comparison formatoutput.py @ 1:1aaa9b943a83 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
author | iuc |
---|---|
date | Mon, 17 May 2021 20:08:58 +0000 |
parents | |
children | b6ecdfac241f |
comparison
equal
deleted
inserted
replaced
0:5b2f8b6a3609 | 1:1aaa9b943a83 |
---|---|
1 #!/usr/bin/env python | |
2 # -*- coding: utf-8 -*- | |
3 | |
4 import argparse | |
5 import re | |
6 from pathlib import Path | |
7 | |
8 taxo_level = { | |
9 'k': 'kingdom', | |
10 'p': 'phylum', | |
11 'c': 'class', | |
12 'o': 'order', | |
13 'f': 'family', | |
14 'g': 'genus', | |
15 's': 'species', | |
16 't': 'strains'} | |
17 | |
18 | |
19 def split_levels(metaphlan_output_fp, out_dp, legacy_output): | |
20 ''' | |
21 Split default MetaPhlAn into a report for each taxonomic level | |
22 | |
23 :param metaphlan_output_fp: Path default MetaPhlAn output | |
24 :param out_dp: Path to output directory | |
25 :param legacy_output: Boolean for legacy output | |
26 ''' | |
27 # prepare output files | |
28 abund_f = { | |
29 'k': open(out_dp / Path('kingdom'), 'w'), | |
30 'p': open(out_dp / Path('phylum'), 'w'), | |
31 'c': open(out_dp / Path('class'), 'w'), | |
32 'o': open(out_dp / Path('order'), 'w'), | |
33 'f': open(out_dp / Path('family'), 'w'), | |
34 'g': open(out_dp / Path('genus'), 'w'), | |
35 's': open(out_dp / Path('species'), 'w'), | |
36 't': open(out_dp / Path('strains'), 'w') | |
37 } | |
38 for level in abund_f: | |
39 abund_f[level].write("%s\t" % taxo_level[level]) | |
40 if not legacy_output: | |
41 abund_f[level].write("%s_id\t" % taxo_level[level]) | |
42 abund_f[level].write("abundance\n") | |
43 | |
44 levels_number = len(taxo_level) | |
45 | |
46 with open(metaphlan_output_fp, 'r') as metaphlan_output_f: | |
47 with open(out_dp / Path('all'), 'w') as all_level_f: | |
48 # write header in all leve file | |
49 for level in ['k', 'p', 'c', 'o', 'f', 'g', 's', 't']: | |
50 all_level_f.write("%s\t" % taxo_level[level]) | |
51 if not legacy_output: | |
52 all_level_f.write("%s_id\t" % taxo_level[level]) | |
53 all_level_f.write("abundance\n") | |
54 | |
55 # parse metaphlan file | |
56 for line in metaphlan_output_f.readlines(): | |
57 # skip headers | |
58 if line.startswith("#"): | |
59 continue | |
60 | |
61 # spit lines | |
62 split_line = line[:-1].split('\t') | |
63 taxo_n = split_line[0].split('|') | |
64 if legacy_output: | |
65 abundance = split_line[1] | |
66 else: | |
67 taxo_id = split_line[1].split('|') | |
68 abundance = split_line[2] | |
69 | |
70 # get taxon name and ids | |
71 for i in range(len(taxo_n)): | |
72 taxo = taxo_n[i].split('__')[1] | |
73 taxo = taxo.replace("_", " ") | |
74 all_level_f.write("%s\t" % taxo) | |
75 if not legacy_output: | |
76 all_level_f.write("%s\t" % taxo_id[i]) | |
77 | |
78 # if not all taxon levels | |
79 for i in range(len(taxo_n), levels_number): | |
80 all_level_f.write('\t') | |
81 | |
82 all_level_f.write("%s\n" % abundance) | |
83 | |
84 # write | |
85 last_taxo_level = taxo_n[-1].split('__') | |
86 taxo = last_taxo_level[1].replace("_", " ") | |
87 level = last_taxo_level[0] | |
88 abund_f[level].write("%s\t" % taxo) | |
89 if not legacy_output: | |
90 abund_f[level].write("%s\t" % taxo_id[-1]) | |
91 abund_f[level].write("%s\n" % abundance) | |
92 | |
93 # close files | |
94 for taxo_level_f in abund_f: | |
95 abund_f[taxo_level_f].close() | |
96 | |
97 | |
98 def format_for_krona(metaphlan_output_fp, krona_out_fp): | |
99 ''' | |
100 Split default MetaPhlAn into a report for each taxonomic levKRONAel | |
101 | |
102 :param metaphlan_output_fp: Path default MetaPhlAn output | |
103 :param krona_out: Path to output file for Krona | |
104 ''' | |
105 re_replace = re.compile(r"\w__") | |
106 re_bar = re.compile(r"\|") | |
107 re_underscore = re.compile(r"_") | |
108 | |
109 with open(metaphlan_output_fp, 'r') as metaphlan_output_f: | |
110 with open(krona_out_fp, 'w') as krona_out_f: | |
111 for line in metaphlan_output_f.readlines(): | |
112 if "s__" in line: | |
113 x = line.rstrip().split('\t') | |
114 lineage = re.sub(re_bar, '', x[0]) | |
115 lineage = re.sub(re_replace, '\t', lineage) | |
116 lineage = re.sub(re_underscore, ' ', lineage) | |
117 krona_out_f.write("%s\t%s\n" % (x[-1], lineage)) | |
118 | |
119 | |
120 if __name__ == '__main__': | |
121 parser = argparse.ArgumentParser(description='Format MetaPhlAn output') | |
122 subparsers = parser.add_subparsers(dest='function') | |
123 # split_levels | |
124 split_levels_parser = subparsers.add_parser('split_levels', help='Split default MetaPhlAn into a report for each taxonomic level') | |
125 split_levels_parser.add_argument('--metaphlan_output', help="Path to default MetaPhlAn output") | |
126 split_levels_parser.add_argument('--outdir', help="Path to output directory") | |
127 split_levels_parser.add_argument('--legacy-output', dest='legacy_output', action='store_true', help="Old MetaPhlAn2 two columns output") | |
128 split_levels_parser.set_defaults(legacy_output=False) | |
129 # format_for_krona | |
130 format_for_krona_parser = subparsers.add_parser('format_for_krona', help='Split default MetaPhlAn into a report for each taxonomic level') | |
131 format_for_krona_parser.add_argument('--metaphlan_output', help="Path to default MetaPhlAn output") | |
132 format_for_krona_parser.add_argument('--krona_output', help="Path to Krona output directory") | |
133 | |
134 args = parser.parse_args() | |
135 | |
136 if args.function == 'split_levels': | |
137 split_levels( | |
138 Path(args.metaphlan_output), | |
139 Path(args.outdir), | |
140 args.legacy_output) | |
141 elif args.function == 'format_for_krona': | |
142 format_for_krona( | |
143 Path(args.metaphlan_output), | |
144 Path(args.krona_output)) |