diff hcluster_sg_parser.py @ 3:f9e418125021 draft

planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit 66af14bc1642c1ca6ceb21f6018c8d665da890e8
author earlhaminst
date Fri, 28 Apr 2017 12:51:35 -0400
parents 17aa68582a05
children 02d73e6ca869
line wrap: on
line diff
--- a/hcluster_sg_parser.py	Fri Mar 24 12:33:12 2017 -0400
+++ b/hcluster_sg_parser.py	Fri Apr 28 12:51:35 2017 -0400
@@ -1,5 +1,5 @@
 """
-A simple parser to convert the hcluster_sg 3-column output into lists of IDs, one list for each cluster.
+A simple parser to convert the hcluster_sg output into lists of IDs, one list for each cluster.
 
 When a minimum and/or maximum number of cluster elements are specified, the IDs contained in the filtered-out clusters are collected in the "discarded IDS" output dataset.
 
@@ -21,9 +21,10 @@
         with open(args[0]) as fh:
             for line in fh:
                 line = line.rstrip()
-                (cluster_id, n_ids, id_list) = line.split('\t')
-                n_ids = int(n_ids)
-                id_list = id_list.replace(',', '\n')
+                line_cols = line.split('\t')
+                cluster_id = line_cols[0]
+                n_ids = int(line_cols[-2])
+                id_list = line_cols[-1].replace(',', '\n')
                 if n_ids >= options.min and n_ids <= options.max:
                     outfile = cluster_id + '_output.txt'
                     with open(outfile, 'w') as f: