# HG changeset patch
# User iuc
# Date 1621282224 0
# Node ID b89b0765695d3344e26b8beb3f499c11e7ed6264
# Parent f5df500fcc3ced9d2ea7a4a9067b9e274532e739
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/metaphlan/ commit 2b87bc7417360e2b2c9ec0605d475909f6f0482f"
diff -r f5df500fcc3c -r b89b0765695d formatoutput.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/formatoutput.py Mon May 17 20:10:24 2021 +0000
@@ -0,0 +1,144 @@
+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+
+import argparse
+import re
+from pathlib import Path
+
+taxo_level = {
+ 'k': 'kingdom',
+ 'p': 'phylum',
+ 'c': 'class',
+ 'o': 'order',
+ 'f': 'family',
+ 'g': 'genus',
+ 's': 'species',
+ 't': 'strains'}
+
+
+def split_levels(metaphlan_output_fp, out_dp, legacy_output):
+ '''
+ Split default MetaPhlAn into a report for each taxonomic level
+
+ :param metaphlan_output_fp: Path default MetaPhlAn output
+ :param out_dp: Path to output directory
+ :param legacy_output: Boolean for legacy output
+ '''
+ # prepare output files
+ abund_f = {
+ 'k': open(out_dp / Path('kingdom'), 'w'),
+ 'p': open(out_dp / Path('phylum'), 'w'),
+ 'c': open(out_dp / Path('class'), 'w'),
+ 'o': open(out_dp / Path('order'), 'w'),
+ 'f': open(out_dp / Path('family'), 'w'),
+ 'g': open(out_dp / Path('genus'), 'w'),
+ 's': open(out_dp / Path('species'), 'w'),
+ 't': open(out_dp / Path('strains'), 'w')
+ }
+ for level in abund_f:
+ abund_f[level].write("%s\t" % taxo_level[level])
+ if not legacy_output:
+ abund_f[level].write("%s_id\t" % taxo_level[level])
+ abund_f[level].write("abundance\n")
+
+ levels_number = len(taxo_level)
+
+ with open(metaphlan_output_fp, 'r') as metaphlan_output_f:
+ with open(out_dp / Path('all'), 'w') as all_level_f:
+ # write header in all leve file
+ for level in ['k', 'p', 'c', 'o', 'f', 'g', 's', 't']:
+ all_level_f.write("%s\t" % taxo_level[level])
+ if not legacy_output:
+ all_level_f.write("%s_id\t" % taxo_level[level])
+ all_level_f.write("abundance\n")
+
+ # parse metaphlan file
+ for line in metaphlan_output_f.readlines():
+ # skip headers
+ if line.startswith("#"):
+ continue
+
+ # spit lines
+ split_line = line[:-1].split('\t')
+ taxo_n = split_line[0].split('|')
+ if legacy_output:
+ abundance = split_line[1]
+ else:
+ taxo_id = split_line[1].split('|')
+ abundance = split_line[2]
+
+ # get taxon name and ids
+ for i in range(len(taxo_n)):
+ taxo = taxo_n[i].split('__')[1]
+ taxo = taxo.replace("_", " ")
+ all_level_f.write("%s\t" % taxo)
+ if not legacy_output:
+ all_level_f.write("%s\t" % taxo_id[i])
+
+ # if not all taxon levels
+ for i in range(len(taxo_n), levels_number):
+ all_level_f.write('\t')
+
+ all_level_f.write("%s\n" % abundance)
+
+ # write
+ last_taxo_level = taxo_n[-1].split('__')
+ taxo = last_taxo_level[1].replace("_", " ")
+ level = last_taxo_level[0]
+ abund_f[level].write("%s\t" % taxo)
+ if not legacy_output:
+ abund_f[level].write("%s\t" % taxo_id[-1])
+ abund_f[level].write("%s\n" % abundance)
+
+ # close files
+ for taxo_level_f in abund_f:
+ abund_f[taxo_level_f].close()
+
+
+def format_for_krona(metaphlan_output_fp, krona_out_fp):
+ '''
+ Split default MetaPhlAn into a report for each taxonomic levKRONAel
+
+ :param metaphlan_output_fp: Path default MetaPhlAn output
+ :param krona_out: Path to output file for Krona
+ '''
+ re_replace = re.compile(r"\w__")
+ re_bar = re.compile(r"\|")
+ re_underscore = re.compile(r"_")
+
+ with open(metaphlan_output_fp, 'r') as metaphlan_output_f:
+ with open(krona_out_fp, 'w') as krona_out_f:
+ for line in metaphlan_output_f.readlines():
+ if "s__" in line:
+ x = line.rstrip().split('\t')
+ lineage = re.sub(re_bar, '', x[0])
+ lineage = re.sub(re_replace, '\t', lineage)
+ lineage = re.sub(re_underscore, ' ', lineage)
+ krona_out_f.write("%s\t%s\n" % (x[-1], lineage))
+
+
+if __name__ == '__main__':
+ parser = argparse.ArgumentParser(description='Format MetaPhlAn output')
+ subparsers = parser.add_subparsers(dest='function')
+ # split_levels
+ split_levels_parser = subparsers.add_parser('split_levels', help='Split default MetaPhlAn into a report for each taxonomic level')
+ split_levels_parser.add_argument('--metaphlan_output', help="Path to default MetaPhlAn output")
+ split_levels_parser.add_argument('--outdir', help="Path to output directory")
+ split_levels_parser.add_argument('--legacy-output', dest='legacy_output', action='store_true', help="Old MetaPhlAn2 two columns output")
+ split_levels_parser.set_defaults(legacy_output=False)
+ # format_for_krona
+ format_for_krona_parser = subparsers.add_parser('format_for_krona', help='Split default MetaPhlAn into a report for each taxonomic level')
+ format_for_krona_parser.add_argument('--metaphlan_output', help="Path to default MetaPhlAn output")
+ format_for_krona_parser.add_argument('--krona_output', help="Path to Krona output directory")
+
+ args = parser.parse_args()
+
+ if args.function == 'split_levels':
+ split_levels(
+ Path(args.metaphlan_output),
+ Path(args.outdir),
+ args.legacy_output)
+ elif args.function == 'format_for_krona':
+ format_for_krona(
+ Path(args.metaphlan_output),
+ Path(args.krona_output))
diff -r f5df500fcc3c -r b89b0765695d macros.xml
--- a/macros.xml Mon Apr 19 20:56:20 2021 +0000
+++ b/macros.xml Mon May 17 20:10:24 2021 +0000
@@ -1,6 +1,6 @@
- 3.0.7
+ 3.0.8
0
20.01
@@ -24,17 +24,4 @@
1101/2020.11.19.388223
- fastq,fastq.gz,fastq.bz2,fasta,fasta.gz,fasta.bz2
-
-
-
-
-
-
-
-
-
-
-
-
diff -r f5df500fcc3c -r b89b0765695d metaphlan.xml
--- a/metaphlan.xml Mon Apr 19 20:56:20 2021 +0000
+++ b/metaphlan.xml Mon May 17 20:10:24 2021 +0000
@@ -2,6 +2,32 @@
to profile the composition of microbial communities
macros.xml
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ fastq,fastq.gz,fastq.bz2,fasta,fasta.gz,fasta.bz2
@@ -110,7 +136,7 @@
#end if
-t '$analysis.analysis_type.t'
#if $analysis.analysis_type.t == "rel_ab" or $analysis.analysis_type.t == "rel_ab_w_read_stats"
- --tax_lev '$analysis.analysis_type.tax_lev'
+ --tax_lev '$analysis.analysis_type.tax_lev.tax_lev'
#else if $analysis.analysis_type.t == "clade_specific_strain_tracker"
--clade '$analysis.analysis_type.clade'
#if str($analysis.analysis_type.min_ab) != ''
@@ -159,6 +185,27 @@
&&
mv 'bowtie2out' '$bowtie2out'
#end if
+
+#if $analysis.analysis_type.tax_lev.tax_lev == 'a' and $analysis.analysis_type.tax_lev.split_levels
+&&
+mkdir 'split_levels'
+&&
+python '$__tool_directory__/formatoutput.py'
+ split_levels
+ --metaphlan_output '$output_file'
+ --outdir 'split_levels'
+ $out.legacy_output
+&&
+ls split_levels
+#end if
+
+#if $out.krona_output
+&&
+python '$__tool_directory__/formatoutput.py'
+ format_for_krona
+ --metaphlan_output '$output_file'
+ --krona_output '$krona_output_file'
+#end if
]]>
@@ -294,6 +341,7 @@
label="Report the profiling using the CAMI output format?"/>
+
@@ -305,9 +353,16 @@
inputs['in']['selector'] == "raw"
+
+
+ analysis['analysis_type']['tax_lev']['tax_lev'] == "a" and analysis['analysis_type']['tax_lev']['split_levels']
+
+
+ out['krona_output']
+
-
+
+
+
+
+
+
+
+
@@ -343,6 +405,7 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
@@ -403,6 +551,7 @@
+
-
+
+
+
+
+
+
+
+
@@ -623,6 +803,7 @@
+
@@ -647,6 +828,77 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+