diff compare_humann2_output.py @ 3:eaa95ea1195c draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/compare_humann2_output commit eea46077010e699403ce6995d7d4aac77b2e0b43"
author bgruening
date Wed, 19 Oct 2022 14:49:06 +0000
parents 05766022dfc4
children
line wrap: on
line diff
--- a/compare_humann2_output.py	Mon Sep 14 13:50:30 2020 +0000
+++ b/compare_humann2_output.py	Wed Oct 19 14:49:06 2022 +0000
@@ -8,19 +8,19 @@
     abundances = {}
     more_abund_charact = []
     abund_sum = 0
-    with open(fp, 'r') as abundance_f:
+    with open(fp, "r") as abundance_f:
         for line in abundance_f.readlines()[1:]:
-            split_line = line[:-1].split('\t')
+            split_line = line[:-1].split("\t")
             charact_id = split_line[0]
             abund = float(split_line[1])
-            abundances[charact_id] = 100*abund
+            abundances[charact_id] = 100 * abund
             abund_sum += abundances[charact_id]
 
             if len(more_abund_charact) < nb_charact_to_extract:
                 more_abund_charact.append(charact_id)
             else:
                 best_pos = None
-                for i in range(len(more_abund_charact)-1, -1, -1):
+                for i in range(len(more_abund_charact) - 1, -1, -1):
                     if abundances[more_abund_charact[i]] < abund:
                         best_pos = i
                     else:
@@ -34,34 +34,34 @@
 
 
 def format_characteristic_name(all_name):
-    if all_name.find(':') != -1:
-        charact_id = all_name.split(':')[0]
-        char_name = all_name.split(':')[1][1:]
+    if all_name.find(":") != -1:
+        charact_id = all_name.split(":")[0]
+        char_name = all_name.split(":")[1][1:]
     else:
         charact_id = all_name
-        char_name = ''
+        char_name = ""
 
-    char_name = char_name.replace('/', ' ')
-    char_name = char_name.replace('-', ' ')
-    char_name = char_name.replace("'", '')
-    if char_name.find('(') != -1 and char_name.find(')') != -1:
-        open_bracket = char_name.find('(')
-        close_bracket = char_name.find(')')+1
+    char_name = char_name.replace("/", " ")
+    char_name = char_name.replace("-", " ")
+    char_name = char_name.replace("'", "")
+    if char_name.find("(") != -1 and char_name.find(")") != -1:
+        open_bracket = char_name.find("(")
+        close_bracket = char_name.find(")") + 1
         char_name = char_name[:open_bracket] + char_name[close_bracket:]
     return charact_id, char_name
 
 
 def write_more_abundant_charat(abundances, more_abund_charact, output_fp):
-    with open(output_fp, 'w') as output_f:
-        output_f.write('id\tname\t%s\n' % '\t'.join(abundances.keys()))
+    with open(output_fp, "w") as output_f:
+        output_f.write("id\tname\t%s\n" % "\t".join(abundances.keys()))
 
         for mac in more_abund_charact:
             charact_id, charact_name = format_characteristic_name(mac)
-            output_f.write('%s\t%s' % (charact_id, charact_name))
+            output_f.write("%s\t%s" % (charact_id, charact_name))
             for sample in abundances:
                 abund = abundances[sample].get(mac, 0)
-                output_f.write('\t%s' % (abund))
-            output_f.write('\n')
+                output_f.write("\t%s" % (abund))
+            output_f.write("\n")
 
 
 def extract_similar_characteristics(abund, sim_output_fp, output_files):
@@ -69,38 +69,41 @@
     sim_characteristics = set(abund[abund_keys[0]].keys())
     for sample in abund_keys[1:]:
         sim_characteristics.intersection_update(abund[sample].keys())
-    print('Similar between all samples: %s' % len(sim_characteristics))
+    print("Similar between all samples: %s" % len(sim_characteristics))
 
-    with open(sim_output_fp, 'w') as sim_output_f:
-        sim_output_f.write('id\tname\t%s\n' % '\t'.join(abund_keys))
+    with open(sim_output_fp, "w") as sim_output_f:
+        sim_output_f.write("id\tname\t%s\n" % "\t".join(abund_keys))
         for charact in list(sim_characteristics):
             charact_id, charact_name = format_characteristic_name(charact)
-            sim_output_f.write('%s\t%s' % (charact_id, charact_name))
+            sim_output_f.write("%s\t%s" % (charact_id, charact_name))
             for sample in abund_keys:
-                sim_output_f.write('\t%s' % abund[sample][charact])
-            sim_output_f.write('\n')
+                sim_output_f.write("\t%s" % abund[sample][charact])
+            sim_output_f.write("\n")
 
-    print('Specific to samples:')
+    print("Specific to samples:")
     diff_char = {}
     for i in range(len(abund_keys)):
         sample = abund_keys[i]
-        print(' %s' % sample )
-        print('    All: %s' % len(abund[sample].keys()))
+        print(" %s" % sample)
+        print("    All: %s" % len(abund[sample].keys()))
         diff_char[sample] = set(abund[sample].keys())
         diff_char[sample].difference_update(sim_characteristics)
-        perc = 100*len(diff_char[sample])/(1.*len(abund[sample].keys()))
-        print('    Number of specific characteristics: %s' % len(diff_char[sample]))
-        print('    Percentage of specific characteristics: %s' % perc)
+        perc = 100 * len(diff_char[sample]) / (1.0 * len(abund[sample].keys()))
+        print("    Number of specific characteristics: %s" % len(diff_char[sample]))
+        print("    Percentage of specific characteristics: %s" % perc)
 
         relative_abundance = 0
-        with open(output_files[i], 'w') as output_f:
-            output_f.write('id\tname\tabundances\n')
+        with open(output_files[i], "w") as output_f:
+            output_f.write("id\tname\tabundances\n")
             for charact in list(diff_char[sample]):
                 charact_id, charact_name = format_characteristic_name(charact)
-                output_f.write('%s\t%s' % (charact_id, charact_name))
-                output_f.write('%s\n' % abund[sample][charact])
+                output_f.write("%s\t%s" % (charact_id, charact_name))
+                output_f.write("%s\n" % abund[sample][charact])
                 relative_abundance += abund[sample][charact]
-        print('    Relative abundance of specific characteristics: %s' % relative_abundance)
+        print(
+            "    Relative abundance of specific characteristics: %s"
+            % relative_abundance
+        )
 
     return sim_characteristics
 
@@ -111,34 +114,28 @@
 
     for i in range(len(args.sample_name)):
         abund[args.sample_name[i]], mac = extract_abundances(
-            args.charact_input_fp[i],
-            args.most_abundant_characteristics_to_extract)
+            args.charact_input_fp[i], args.most_abundant_characteristics_to_extract
+        )
         more_abund_charact += mac
 
     write_more_abundant_charat(
-        abund,
-        list(set(more_abund_charact)),
-        args.more_abundant_output_fp)
+        abund, list(set(more_abund_charact)), args.more_abundant_output_fp
+    )
     extract_similar_characteristics(
-        abund,
-        args.similar_output_fp,
-        args.specific_output_fp)
+        abund, args.similar_output_fp, args.specific_output_fp
+    )
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     parser = argparse.ArgumentParser()
-    parser.add_argument('--sample_name', required=True, action='append')
-    parser.add_argument('--charact_input_fp', required=True, action='append')
+    parser.add_argument("--sample_name", required=True, action="append")
+    parser.add_argument("--charact_input_fp", required=True, action="append")
     parser.add_argument(
-        '--most_abundant_characteristics_to_extract',
-        required=True,
-        type=int)
-    parser.add_argument('--more_abundant_output_fp', required=True)
-    parser.add_argument('--similar_output_fp', required=True)
-    parser.add_argument(
-        '--specific_output_fp',
-        required=True,
-        action='append')
+        "--most_abundant_characteristics_to_extract", required=True, type=int
+    )
+    parser.add_argument("--more_abundant_output_fp", required=True)
+    parser.add_argument("--similar_output_fp", required=True)
+    parser.add_argument("--specific_output_fp", required=True, action="append")
     args = parser.parse_args()
 
     if len(args.sample_name) != len(args.charact_input_fp):