Mercurial > repos > earlhaminst > hcluster_sg_parser
annotate hcluster_sg_parser.py @ 1:17aa68582a05 draft
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
author | earlhaminst |
---|---|
date | Fri, 20 Jan 2017 06:13:23 -0500 |
parents | |
children | f9e418125021 |
rev | line source |
---|---|
1
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
1 """ |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
2 A simple parser to convert the hcluster_sg 3-column output into lists of IDs, one list for each cluster. |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
3 |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
4 When a minimum and/or maximum number of cluster elements are specified, the IDs contained in the filtered-out clusters are collected in the "discarded IDS" output dataset. |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
5 |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
6 Usage: |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
7 |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
8 python hcluster_sg_parser.py [-m <N>] [-M <N>] <file> <discarded_out> |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
9 """ |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
10 import optparse |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
11 import sys |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
12 |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
13 |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
14 def main(): |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
15 parser = optparse.OptionParser() |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
16 parser.add_option('-m', '--min', type='int', default=0, help='Minimum number of cluster elements') |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
17 parser.add_option('-M', '--max', type='int', default=sys.maxsize, help='Maximum number of cluster elements') |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
18 options, args = parser.parse_args() |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
19 |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
20 with open(args[1], 'w') as discarded_out: |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
21 with open(args[0]) as fh: |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
22 for line in fh: |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
23 line = line.rstrip() |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
24 (cluster_id, n_ids, id_list) = line.split('\t') |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
25 n_ids = int(n_ids) |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
26 id_list = id_list.replace(',', '\n') |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
27 if n_ids >= options.min and n_ids <= options.max: |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
28 outfile = cluster_id + '_output.txt' |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
29 with open(outfile, 'w') as f: |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
30 f.write(id_list) |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
31 else: |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
32 discarded_out.write(id_list) |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
33 |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
34 |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
35 if __name__ == "__main__": |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
36 main() |