annotate formatoutput.py @ 2:487da152fb43 draft

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f1c6f4fe1e572ace84cf9106bc253603f55aac55"
author iuc
date Mon, 14 Jun 2021 12:48:54 +0000
parents 025b0113ee97
children 892a0629e2ed
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
1 #!/usr/bin/env python
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
2 # -*- coding: utf-8 -*-
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
3
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
4 import argparse
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
5 import re
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
6 from pathlib import Path
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
7
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
8 taxo_level = {
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
9 'k': 'kingdom',
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
10 'p': 'phylum',
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
11 'c': 'class',
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
12 'o': 'order',
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
13 'f': 'family',
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
14 'g': 'genus',
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
15 's': 'species',
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
16 't': 'strains'}
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
17
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
18
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
19 def split_levels(metaphlan_output_fp, out_dp, legacy_output):
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
20 '''
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
21 Split default MetaPhlAn into a report for each taxonomic level
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
22
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
23 :param metaphlan_output_fp: Path default MetaPhlAn output
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
24 :param out_dp: Path to output directory
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
25 :param legacy_output: Boolean for legacy output
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
26 '''
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
27 # prepare output files
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
28 abund_f = {
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
29 'k': open(out_dp / Path('kingdom'), 'w'),
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
30 'p': open(out_dp / Path('phylum'), 'w'),
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
31 'c': open(out_dp / Path('class'), 'w'),
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
32 'o': open(out_dp / Path('order'), 'w'),
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
33 'f': open(out_dp / Path('family'), 'w'),
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
34 'g': open(out_dp / Path('genus'), 'w'),
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
35 's': open(out_dp / Path('species'), 'w'),
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
36 't': open(out_dp / Path('strains'), 'w')
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
37 }
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
38 for level in abund_f:
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
39 abund_f[level].write("%s\t" % taxo_level[level])
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
40 if not legacy_output:
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
41 abund_f[level].write("%s_id\t" % taxo_level[level])
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
42 abund_f[level].write("abundance\n")
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
43
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
44 levels_number = len(taxo_level)
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
45
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
46 with open(metaphlan_output_fp, 'r') as metaphlan_output_f:
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
47 with open(out_dp / Path('all'), 'w') as all_level_f:
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
48 # write header in all leve file
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
49 for level in ['k', 'p', 'c', 'o', 'f', 'g', 's', 't']:
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
50 all_level_f.write("%s\t" % taxo_level[level])
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
51 if not legacy_output:
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
52 all_level_f.write("%s_id\t" % taxo_level[level])
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
53 all_level_f.write("abundance\n")
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
54
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
55 # parse metaphlan file
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
56 for line in metaphlan_output_f.readlines():
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
57 # skip headers
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
58 if line.startswith("#"):
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
59 continue
2
487da152fb43 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f1c6f4fe1e572ace84cf9106bc253603f55aac55"
iuc
parents: 1
diff changeset
60 # skip UNKNOWN lines in Predicted taxon relative abundances
487da152fb43 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f1c6f4fe1e572ace84cf9106bc253603f55aac55"
iuc
parents: 1
diff changeset
61 if "UNKNOWN" in line:
487da152fb43 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f1c6f4fe1e572ace84cf9106bc253603f55aac55"
iuc
parents: 1
diff changeset
62 continue
1
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
63 # spit lines
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
64 split_line = line[:-1].split('\t')
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
65 taxo_n = split_line[0].split('|')
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
66 if legacy_output:
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
67 abundance = split_line[1]
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
68 else:
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
69 taxo_id = split_line[1].split('|')
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
70 abundance = split_line[2]
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
71
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
72 # get taxon name and ids
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
73 for i in range(len(taxo_n)):
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
74 taxo = taxo_n[i].split('__')[1]
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
75 taxo = taxo.replace("_", " ")
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
76 all_level_f.write("%s\t" % taxo)
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
77 if not legacy_output:
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
78 all_level_f.write("%s\t" % taxo_id[i])
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
79
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
80 # if not all taxon levels
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
81 for i in range(len(taxo_n), levels_number):
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
82 all_level_f.write('\t')
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
83
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
84 all_level_f.write("%s\n" % abundance)
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
85
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
86 # write
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
87 last_taxo_level = taxo_n[-1].split('__')
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
88 taxo = last_taxo_level[1].replace("_", " ")
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
89 level = last_taxo_level[0]
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
90 abund_f[level].write("%s\t" % taxo)
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
91 if not legacy_output:
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
92 abund_f[level].write("%s\t" % taxo_id[-1])
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
93 abund_f[level].write("%s\n" % abundance)
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
94
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
95 # close files
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
96 for taxo_level_f in abund_f:
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
97 abund_f[taxo_level_f].close()
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
98
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
99
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
100 def format_for_krona(metaphlan_output_fp, krona_out_fp):
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
101 '''
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
102 Split default MetaPhlAn into a report for each taxonomic levKRONAel
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
103
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
104 :param metaphlan_output_fp: Path default MetaPhlAn output
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
105 :param krona_out: Path to output file for Krona
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
106 '''
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
107 re_replace = re.compile(r"\w__")
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
108 re_bar = re.compile(r"\|")
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
109 re_underscore = re.compile(r"_")
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
110
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
111 with open(metaphlan_output_fp, 'r') as metaphlan_output_f:
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
112 with open(krona_out_fp, 'w') as krona_out_f:
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
113 for line in metaphlan_output_f.readlines():
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
114 if "s__" in line:
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
115 x = line.rstrip().split('\t')
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
116 lineage = re.sub(re_bar, '', x[0])
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
117 lineage = re.sub(re_replace, '\t', lineage)
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
118 lineage = re.sub(re_underscore, ' ', lineage)
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
119 krona_out_f.write("%s\t%s\n" % (x[-1], lineage))
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
120
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
121
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
122 if __name__ == '__main__':
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
123 parser = argparse.ArgumentParser(description='Format MetaPhlAn output')
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
124 subparsers = parser.add_subparsers(dest='function')
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
125 # split_levels
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
126 split_levels_parser = subparsers.add_parser('split_levels', help='Split default MetaPhlAn into a report for each taxonomic level')
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
127 split_levels_parser.add_argument('--metaphlan_output', help="Path to default MetaPhlAn output")
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
128 split_levels_parser.add_argument('--outdir', help="Path to output directory")
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
129 split_levels_parser.add_argument('--legacy-output', dest='legacy_output', action='store_true', help="Old MetaPhlAn2 two columns output")
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
130 split_levels_parser.set_defaults(legacy_output=False)
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
131 # format_for_krona
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
132 format_for_krona_parser = subparsers.add_parser('format_for_krona', help='Split default MetaPhlAn into a report for each taxonomic level')
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
133 format_for_krona_parser.add_argument('--metaphlan_output', help="Path to default MetaPhlAn output")
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
134 format_for_krona_parser.add_argument('--krona_output', help="Path to Krona output directory")
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
135
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
136 args = parser.parse_args()
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
137
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
138 if args.function == 'split_levels':
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
139 split_levels(
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
140 Path(args.metaphlan_output),
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
141 Path(args.outdir),
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
142 args.legacy_output)
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
143 elif args.function == 'format_for_krona':
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
144 format_for_krona(
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
145 Path(args.metaphlan_output),
025b0113ee97 "planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff changeset
146 Path(args.krona_output))