Mercurial > repos > vipints > fml_mergeloci
annotate fml_gff_groomer/scripts/gff_available_limits.py @ 0:79726c328621 default tip
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
author | vipints |
---|---|
date | Tue, 07 Jun 2011 17:29:24 -0400 |
parents | |
children |
rev | line source |
---|---|
0
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
1 #!/usr/bin/env python |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
2 # |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
3 # This program is free software; you can redistribute it and/or modify |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
4 # it under the terms of the GNU General Public License as published by |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
5 # the Free Software Foundation; either version 3 of the License, or |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
6 # (at your option) any later version. |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
7 # |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
8 # Written (W) 2010 Vipin T Sreedharan, Friedrich Miescher Laboratory of the Max Planck Society |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
9 # Copyright (C) 2010 Max Planck Society |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
10 # |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
11 # Description : Provide available source, feature types from a GFF file |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
12 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
13 import re, sys |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
14 import time |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
15 import collections |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
16 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
17 def available_limits(gff_handle): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
18 """Figure out the available feature types from the given GFF file""" |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
19 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
20 filter_info = dict(gff_id = [0], gff_source_type = [1, 2], |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
21 gff_source = [1], gff_type = [2]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
22 cur_limits = dict() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
23 for filter_key in filter_info.keys(): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
24 cur_limits[filter_key] = collections.defaultdict(int) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
25 for line in gff_handle: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
26 if line.strip('\n\r')[0] != "#": |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
27 parts = [p.strip() for p in line.split('\t')] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
28 if len(parts) == 1 and re.search(r'\w+', parts[0]):continue ## GFF files with FASTA sequence together |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
29 assert len(parts) == 9, line |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
30 for filter_key, cur_indexes in filter_info.items(): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
31 cur_id = tuple([parts[i] for i in cur_indexes]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
32 cur_limits[filter_key][cur_id] += 1 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
33 # get rid of the default dicts |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
34 final_dict = dict() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
35 for key, value_dict in cur_limits.items(): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
36 if len(key) == 1: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
37 key = key[0] |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
38 final_dict[key] = dict(value_dict) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
39 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
40 return final_dict |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
41 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
42 if __name__=='__main__': |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
43 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
44 stime = time.asctime( time.localtime(time.time()) ) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
45 print '-------------------------------------------------------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
46 print 'FeatureScan started on ' + stime |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
47 print '-------------------------------------------------------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
48 |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
49 try: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
50 gff_handle = open(sys.argv[1], 'rU') |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
51 except: |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
52 sys.stderr.write("Can't open the GFF3 file, terminating...\n") |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
53 sys.stderr.write("USAGE: gff_available_limits.py <gff file>\n") |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
54 sys.exit(-1) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
55 final_dict = available_limits(gff_handle) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
56 gff_handle.close() |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
57 print |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
58 print "==Overview of available source(s) and feature type(s) from GFF file==" |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
59 print |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
60 print "Chromosome identifier(s) and corresponding count:" |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
61 for contig, cnt in sorted(final_dict['gff_id'].items()): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
62 print '\t' + str(contig[0]) + '\t' + str(cnt) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
63 print |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
64 print "Source(s) of feature and corresponding count:" |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
65 for source, cnt in sorted(final_dict['gff_source'].items()): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
66 print '\t' + str(source[0]) + '\t' + str(cnt) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
67 print |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
68 print "Feature type(s) and corresponding count:" |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
69 for ftype, cnt in sorted(final_dict['gff_type'].items()): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
70 print '\t' + str(cnt) + '\t' + str(ftype[0]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
71 print |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
72 print "Unique combination of Feature type(s), Source(s) and corresponding count:" |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
73 for sftype, cnt in sorted(final_dict['gff_source_type'].items()): |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
74 print '\t' + str(cnt) + '\t' + str(sftype[0]) + ', '+ str(sftype[1]) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
75 print |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
76 stime = time.asctime( time.localtime(time.time()) ) |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
77 print '-------------------------------------------------------' |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
78 print 'FeatureScan finished at ' + stime |
79726c328621
Migrated tool version 1.0.0 from old tool shed archive to new tool shed repository
vipints
parents:
diff
changeset
|
79 print '-------------------------------------------------------' |