diff combine_metaphlan2_humann2.py @ 2:fdfb35745104 draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/combine_metaphlan2_humann2 commit eea46077010e699403ce6995d7d4aac77b2e0b43"
author bgruening
date Wed, 19 Oct 2022 14:44:00 +0000
parents e25efca0a49c
children
line wrap: on
line diff
--- a/combine_metaphlan2_humann2.py	Mon Sep 14 12:19:49 2020 +0000
+++ b/combine_metaphlan2_humann2.py	Wed Oct 19 14:44:00 2022 +0000
@@ -6,50 +6,48 @@
 
 def extract_clade_abundance(metaphlan2_fp):
     clade_abund = {}
-    with open(metaphlan2_fp, 'r') as metaphlan2_f:
+    with open(metaphlan2_fp, "r") as metaphlan2_f:
         for line in metaphlan2_f.readlines():
-            if line.find('g__') == -1:
+            if line.find("g__") == -1:
                 continue
 
-            split_line = line[:-1].split('\t')
+            split_line = line[:-1].split("\t")
             taxo = split_line[0]
             abundance = split_line[1]
 
-            genus = taxo[(taxo.find('g__')+3):]
-            if genus.find('|') != -1:
-                genus = genus[:(genus.find('|'))]
-            clade_abund.setdefault(genus, {'abundance': 0, 'species': {}})
-            if taxo.find('t__') != -1:
+            genus = taxo[(taxo.find("g__") + 3):]
+            if genus.find("|") != -1:
+                genus = genus[: (genus.find("|"))]
+            clade_abund.setdefault(genus, {"abundance": 0, "species": {}})
+            if taxo.find("t__") != -1:
                 continue
-            elif taxo.find('s__') != -1:
-                species = taxo[(taxo.find('s__')+3):]
-                clade_abund[genus]['species'].setdefault(
-                    species,
-                    abundance)
+            elif taxo.find("s__") != -1:
+                species = taxo[(taxo.find("s__") + 3):]
+                clade_abund[genus]["species"].setdefault(species, abundance)
             else:
-                clade_abund[genus]['abundance'] = abundance
+                clade_abund[genus]["abundance"] = abundance
     return clade_abund
 
 
 def compute_overall_abundance(humann2_fp):
     overall_abundance = 0
-    with open(humann2_fp, 'r') as humann2_f:
+    with open(humann2_fp, "r") as humann2_f:
         for line in humann2_f.readlines():
-            if line.find('|') != -1 or line.startswith('#'):
+            if line.find("|") != -1 or line.startswith("#"):
                 continue
-            split_line = line[:-1].split('\t')
+            split_line = line[:-1].split("\t")
             overall_abundance += float(split_line[1])
     return overall_abundance
 
 
 def format_characteristic_name(name):
     formatted_n = name
-    formatted_n = formatted_n.replace('/', ' ')
-    formatted_n = formatted_n.replace('-', ' ')
-    formatted_n = formatted_n.replace("'", '')
-    if formatted_n.find('(') != -1 and formatted_n.find(')') != -1:
-        open_bracket = formatted_n.find('(')
-        close_bracket = formatted_n.find(')')+1
+    formatted_n = formatted_n.replace("/", " ")
+    formatted_n = formatted_n.replace("-", " ")
+    formatted_n = formatted_n.replace("'", "")
+    if formatted_n.find("(") != -1 and formatted_n.find(")") != -1:
+        open_bracket = formatted_n.find("(")
+        close_bracket = formatted_n.find(")") + 1
         formatted_n = formatted_n[:open_bracket] + formatted_n[close_bracket:]
     return formatted_n
 
@@ -58,26 +56,26 @@
     clade_abund = extract_clade_abundance(args.metaphlan2_fp)
     overall_abund = compute_overall_abundance(args.humann2_fp)
 
-    with open(args.output_fp, 'w') as output_f:
-        s = 'genus\tgenus_abundance\tspecies\tspecies_abundance\t'
-        s = '%s\t%s_id\t%s_name\t%s_abundance\n' % (s, args.type, args.type, args.type)
+    with open(args.output_fp, "w") as output_f:
+        s = "genus\tgenus_abundance\tspecies\tspecies_abundance\t"
+        s = "%s\t%s_id\t%s_name\t%s_abundance\n" % (s, args.type, args.type, args.type)
         output_f.write(s)
-        with open(args.humann2_fp, 'r') as humann2_f:
+        with open(args.humann2_fp, "r") as humann2_f:
             for line in humann2_f.readlines():
-                if line.find('|') == -1:
+                if line.find("|") == -1:
                     continue
 
-                split_line = line[:-1].split('\t')
-                abundance = 100*float(split_line[1])/overall_abund
-                annotation = split_line[0].split('|')
-                charact = annotation[0].split(':')
+                split_line = line[:-1].split("\t")
+                abundance = 100 * float(split_line[1]) / overall_abund
+                annotation = split_line[0].split("|")
+                charact = annotation[0].split(":")
                 charact_id = charact[0]
-                char_name = ''
+                char_name = ""
                 if len(charact) > 1:
                     char_name = format_characteristic_name(charact[-1])
-                taxo = annotation[1].split('.')
+                taxo = annotation[1].split(".")
 
-                if taxo[0] == 'unclassified':
+                if taxo[0] == "unclassified":
                     continue
                 genus = taxo[0][3:]
                 species = taxo[1][3:]
@@ -85,25 +83,25 @@
                 if genus not in clade_abund:
                     print("no %s found in %s" % (genus, args.metaphlan2_fp))
                     continue
-                if species not in clade_abund[genus]['species']:
-                    print("no %s found in %s for % s" % (species, args.metaphlan2_fp, genus))
+                if species not in clade_abund[genus]["species"]:
+                    print(
+                        "no %s found in %s for % s"
+                        % (species, args.metaphlan2_fp, genus)
+                    )
                     continue
 
-                s = "%s\t%s\t" % (genus, clade_abund[genus]['abundance'])
-                s += "%s\t%s\t" % (species, clade_abund[genus]['species'][species])
+                s = "%s\t%s\t" % (genus, clade_abund[genus]["abundance"])
+                s += "%s\t%s\t" % (species, clade_abund[genus]["species"][species])
                 s += "%s\t%s\t%s\n" % (charact_id, char_name, abundance)
                 output_f.write(s)
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('--humann2_fp', required=True)
-    parser.add_argument('--metaphlan2_fp', required=True)
-    parser.add_argument('--output_fp', required=True)
-    parser.add_argument(
-        '--type',
-        required=True,
-        choices=['gene_families', 'pathways'])
+    parser.add_argument("--humann2_fp", required=True)
+    parser.add_argument("--metaphlan2_fp", required=True)
+    parser.add_argument("--output_fp", required=True)
+    parser.add_argument("--type", required=True, choices=["gene_families", "pathways"])
     args = parser.parse_args()
 
     combine_metaphlan2_humann2(args)