Mercurial > repos > iuc > extract_metaphlan_database
annotate formatoutput.py @ 11:4b6ac1f0042b draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 3c1a0c4a94f78437c6df74b5348826e33e734a05
author | iuc |
---|---|
date | Mon, 29 Jul 2024 07:13:48 +0000 |
parents | b4c750fe0fe3 |
children |
rev | line source |
---|---|
1
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
2 # -*- coding: utf-8 -*- |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
3 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
4 import argparse |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
5 import re |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
6 from pathlib import Path |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
7 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
8 taxo_level = { |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
9 'k': 'kingdom', |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
10 'p': 'phylum', |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
11 'c': 'class', |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
12 'o': 'order', |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
13 'f': 'family', |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
14 'g': 'genus', |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
15 's': 'species', |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
16 't': 'strains'} |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
17 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
18 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
19 def split_levels(metaphlan_output_fp, out_dp, legacy_output): |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
20 ''' |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
21 Split default MetaPhlAn into a report for each taxonomic level |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
22 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
23 :param metaphlan_output_fp: Path default MetaPhlAn output |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
24 :param out_dp: Path to output directory |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
25 :param legacy_output: Boolean for legacy output |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
26 ''' |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
27 # prepare output files |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
28 abund_f = { |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
29 'k': open(out_dp / Path('kingdom'), 'w'), |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
30 'p': open(out_dp / Path('phylum'), 'w'), |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
31 'c': open(out_dp / Path('class'), 'w'), |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
32 'o': open(out_dp / Path('order'), 'w'), |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
33 'f': open(out_dp / Path('family'), 'w'), |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
34 'g': open(out_dp / Path('genus'), 'w'), |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
35 's': open(out_dp / Path('species'), 'w'), |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
36 't': open(out_dp / Path('strains'), 'w') |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
37 } |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
38 for level in abund_f: |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
39 abund_f[level].write("%s\t" % taxo_level[level]) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
40 if not legacy_output: |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
41 abund_f[level].write("%s_id\t" % taxo_level[level]) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
42 abund_f[level].write("abundance\n") |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
43 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
44 levels_number = len(taxo_level) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
45 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
46 with open(metaphlan_output_fp, 'r') as metaphlan_output_f: |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
47 with open(out_dp / Path('all'), 'w') as all_level_f: |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
48 # write header in all leve file |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
49 for level in ['k', 'p', 'c', 'o', 'f', 'g', 's', 't']: |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
50 all_level_f.write("%s\t" % taxo_level[level]) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
51 if not legacy_output: |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
52 all_level_f.write("%s_id\t" % taxo_level[level]) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
53 all_level_f.write("abundance\n") |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
54 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
55 # parse metaphlan file |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
56 for line in metaphlan_output_f.readlines(): |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
57 # skip headers |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
58 if line.startswith("#"): |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
59 continue |
5
b4c750fe0fe3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f3a89af137b13715f9fb13383577aceb2c445ce6
iuc
parents:
2
diff
changeset
|
60 |
b4c750fe0fe3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f3a89af137b13715f9fb13383577aceb2c445ce6
iuc
parents:
2
diff
changeset
|
61 # skip UNKNOWN (v3) or UNCLASSIFIED (v4) lines in predicted taxon relative abundances |
b4c750fe0fe3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f3a89af137b13715f9fb13383577aceb2c445ce6
iuc
parents:
2
diff
changeset
|
62 if "UNKNOWN" in line or 'UNCLASSIFIED' in line: |
2
b6ecdfac241f
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f1c6f4fe1e572ace84cf9106bc253603f55aac55"
iuc
parents:
1
diff
changeset
|
63 continue |
5
b4c750fe0fe3
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit f3a89af137b13715f9fb13383577aceb2c445ce6
iuc
parents:
2
diff
changeset
|
64 |
1
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
65 # spit lines |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
66 split_line = line[:-1].split('\t') |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
67 taxo_n = split_line[0].split('|') |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
68 if legacy_output: |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
69 abundance = split_line[1] |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
70 else: |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
71 taxo_id = split_line[1].split('|') |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
72 abundance = split_line[2] |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
73 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
74 # get taxon name and ids |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
75 for i in range(len(taxo_n)): |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
76 taxo = taxo_n[i].split('__')[1] |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
77 taxo = taxo.replace("_", " ") |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
78 all_level_f.write("%s\t" % taxo) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
79 if not legacy_output: |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
80 all_level_f.write("%s\t" % taxo_id[i]) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
81 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
82 # if not all taxon levels |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
83 for i in range(len(taxo_n), levels_number): |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
84 all_level_f.write('\t') |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
85 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
86 all_level_f.write("%s\n" % abundance) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
87 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
88 # write |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
89 last_taxo_level = taxo_n[-1].split('__') |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
90 taxo = last_taxo_level[1].replace("_", " ") |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
91 level = last_taxo_level[0] |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
92 abund_f[level].write("%s\t" % taxo) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
93 if not legacy_output: |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
94 abund_f[level].write("%s\t" % taxo_id[-1]) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
95 abund_f[level].write("%s\n" % abundance) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
96 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
97 # close files |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
98 for taxo_level_f in abund_f: |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
99 abund_f[taxo_level_f].close() |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
100 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
101 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
102 def format_for_krona(metaphlan_output_fp, krona_out_fp): |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
103 ''' |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
104 Split default MetaPhlAn into a report for each taxonomic levKRONAel |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
105 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
106 :param metaphlan_output_fp: Path default MetaPhlAn output |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
107 :param krona_out: Path to output file for Krona |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
108 ''' |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
109 re_replace = re.compile(r"\w__") |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
110 re_bar = re.compile(r"\|") |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
111 re_underscore = re.compile(r"_") |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
112 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
113 with open(metaphlan_output_fp, 'r') as metaphlan_output_f: |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
114 with open(krona_out_fp, 'w') as krona_out_f: |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
115 for line in metaphlan_output_f.readlines(): |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
116 if "s__" in line: |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
117 x = line.rstrip().split('\t') |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
118 lineage = re.sub(re_bar, '', x[0]) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
119 lineage = re.sub(re_replace, '\t', lineage) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
120 lineage = re.sub(re_underscore, ' ', lineage) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
121 krona_out_f.write("%s\t%s\n" % (x[-1], lineage)) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
122 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
123 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
124 if __name__ == '__main__': |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
125 parser = argparse.ArgumentParser(description='Format MetaPhlAn output') |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
126 subparsers = parser.add_subparsers(dest='function') |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
127 # split_levels |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
128 split_levels_parser = subparsers.add_parser('split_levels', help='Split default MetaPhlAn into a report for each taxonomic level') |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
129 split_levels_parser.add_argument('--metaphlan_output', help="Path to default MetaPhlAn output") |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
130 split_levels_parser.add_argument('--outdir', help="Path to output directory") |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
131 split_levels_parser.add_argument('--legacy-output', dest='legacy_output', action='store_true', help="Old MetaPhlAn2 two columns output") |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
132 split_levels_parser.set_defaults(legacy_output=False) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
133 # format_for_krona |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
134 format_for_krona_parser = subparsers.add_parser('format_for_krona', help='Split default MetaPhlAn into a report for each taxonomic level') |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
135 format_for_krona_parser.add_argument('--metaphlan_output', help="Path to default MetaPhlAn output") |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
136 format_for_krona_parser.add_argument('--krona_output', help="Path to Krona output directory") |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
137 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
138 args = parser.parse_args() |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
139 |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
140 if args.function == 'split_levels': |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
141 split_levels( |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
142 Path(args.metaphlan_output), |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
143 Path(args.outdir), |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
144 args.legacy_output) |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
145 elif args.function == 'format_for_krona': |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
146 format_for_krona( |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
147 Path(args.metaphlan_output), |
1aaa9b943a83
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
iuc
parents:
diff
changeset
|
148 Path(args.krona_output)) |