Mercurial > repos > earlhaminst > hcluster_sg_parser
comparison hcluster_sg_parser.py @ 1:17aa68582a05 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
| author | earlhaminst |
|---|---|
| date | Fri, 20 Jan 2017 06:13:23 -0500 |
| parents | |
| children | f9e418125021 |
comparison
equal
deleted
inserted
replaced
| 0:dbc49bd1a3e9 | 1:17aa68582a05 |
|---|---|
| 1 """ | |
| 2 A simple parser to convert the hcluster_sg 3-column output into lists of IDs, one list for each cluster. | |
| 3 | |
| 4 When a minimum and/or maximum number of cluster elements are specified, the IDs contained in the filtered-out clusters are collected in the "discarded IDS" output dataset. | |
| 5 | |
| 6 Usage: | |
| 7 | |
| 8 python hcluster_sg_parser.py [-m <N>] [-M <N>] <file> <discarded_out> | |
| 9 """ | |
| 10 import optparse | |
| 11 import sys | |
| 12 | |
| 13 | |
| 14 def main(): | |
| 15 parser = optparse.OptionParser() | |
| 16 parser.add_option('-m', '--min', type='int', default=0, help='Minimum number of cluster elements') | |
| 17 parser.add_option('-M', '--max', type='int', default=sys.maxsize, help='Maximum number of cluster elements') | |
| 18 options, args = parser.parse_args() | |
| 19 | |
| 20 with open(args[1], 'w') as discarded_out: | |
| 21 with open(args[0]) as fh: | |
| 22 for line in fh: | |
| 23 line = line.rstrip() | |
| 24 (cluster_id, n_ids, id_list) = line.split('\t') | |
| 25 n_ids = int(n_ids) | |
| 26 id_list = id_list.replace(',', '\n') | |
| 27 if n_ids >= options.min and n_ids <= options.max: | |
| 28 outfile = cluster_id + '_output.txt' | |
| 29 with open(outfile, 'w') as f: | |
| 30 f.write(id_list) | |
| 31 else: | |
| 32 discarded_out.write(id_list) | |
| 33 | |
| 34 | |
| 35 if __name__ == "__main__": | |
| 36 main() |
