Mercurial > repos > earlhaminst > hcluster_sg_parser
annotate hcluster_sg_parser.py @ 5:07f5b2c5ac10 draft default tip
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e015ac3e0d8c1bc62b1301527241ee5e377b91db"
author | earlhaminst |
---|---|
date | Fri, 10 Sep 2021 15:08:06 +0000 |
parents | 02d73e6ca869 |
children |
rev | line source |
---|---|
1
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
1 """ |
3
f9e418125021
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit 66af14bc1642c1ca6ceb21f6018c8d665da890e8
earlhaminst
parents:
1
diff
changeset
|
2 A simple parser to convert the hcluster_sg output into lists of IDs, one list for each cluster. |
1
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
3 |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
4 When a minimum and/or maximum number of cluster elements are specified, the IDs contained in the filtered-out clusters are collected in the "discarded IDS" output dataset. |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
5 |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
6 Usage: |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
7 |
5
07f5b2c5ac10
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e015ac3e0d8c1bc62b1301527241ee5e377b91db"
earlhaminst
parents:
4
diff
changeset
|
8 python hcluster_sg_parser.py [-m <N>] [-M <N>] <file> <discarded_min_out> <discarded_max_out> |
1
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
9 """ |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
10 import optparse |
5
07f5b2c5ac10
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e015ac3e0d8c1bc62b1301527241ee5e377b91db"
earlhaminst
parents:
4
diff
changeset
|
11 import os |
1
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
12 import sys |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
13 |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
14 |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
15 def main(): |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
16 parser = optparse.OptionParser() |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
17 parser.add_option('-m', '--min', type='int', default=0, help='Minimum number of cluster elements') |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
18 parser.add_option('-M', '--max', type='int', default=sys.maxsize, help='Maximum number of cluster elements') |
5
07f5b2c5ac10
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e015ac3e0d8c1bc62b1301527241ee5e377b91db"
earlhaminst
parents:
4
diff
changeset
|
19 parser.add_option('-d', '--dir', type='string', help="Absolute or relative path to output directory. If the directory does not exist, it will be created") |
1
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
20 options, args = parser.parse_args() |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
21 |
5
07f5b2c5ac10
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e015ac3e0d8c1bc62b1301527241ee5e377b91db"
earlhaminst
parents:
4
diff
changeset
|
22 if options.dir and not os.path.exists(options.dir): |
07f5b2c5ac10
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e015ac3e0d8c1bc62b1301527241ee5e377b91db"
earlhaminst
parents:
4
diff
changeset
|
23 os.mkdir(options.dir) |
4
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
24 with open(args[2], 'w') as discarded_max_out: |
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
25 with open(args[1], 'w') as discarded_min_out: |
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
26 with open(args[0]) as fh: |
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
27 for line in fh: |
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
28 line = line.rstrip() |
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
29 line_cols = line.split('\t') |
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
30 cluster_id = line_cols[0] |
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
31 n_ids = int(line_cols[-2]) |
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
32 id_list = line_cols[-1].replace(',', '\n') |
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
33 if n_ids < options.min: |
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
34 discarded_min_out.write(id_list) |
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
35 elif n_ids > options.max: |
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
36 discarded_max_out.write(id_list) |
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
37 else: |
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
38 outfile = cluster_id + '_output.txt' |
5
07f5b2c5ac10
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e015ac3e0d8c1bc62b1301527241ee5e377b91db"
earlhaminst
parents:
4
diff
changeset
|
39 if options.dir: |
07f5b2c5ac10
"planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e015ac3e0d8c1bc62b1301527241ee5e377b91db"
earlhaminst
parents:
4
diff
changeset
|
40 outfile = os.path.join(options.dir, outfile) |
4
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
41 with open(outfile, 'w') as f: |
02d73e6ca869
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit e35f2ea026b79daaced9a2a50da02b4881d6154b
earlhaminst
parents:
3
diff
changeset
|
42 f.write(id_list) |
1
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
43 |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
44 |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
45 if __name__ == "__main__": |
17aa68582a05
planemo upload for repository https://github.com/TGAC/earlham-galaxytools/tree/master/tools/hcluster_sg_parser commit a79c8f1799189754eae80aede6fbe5428570f36b
earlhaminst
parents:
diff
changeset
|
46 main() |