Mercurial > repos > jjohnson > gmap
annotate gmap/lib/galaxy/datatypes/gmap.py @ 5:f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Tue, 08 Nov 2011 13:07:25 -0600 |
parents | 52da588232b0 |
children |
rev | line source |
---|---|
0 | 1 """ |
2 GMAP indexes | |
3 """ | |
4 import logging | |
5 import os,os.path,re | |
2
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
6 import data |
0 | 7 from data import Text |
2
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
8 from galaxy import util |
0 | 9 from metadata import MetadataElement |
10 | |
11 log = logging.getLogger(__name__) | |
12 | |
13 class GmapDB( Text ): | |
14 """ | |
15 A GMAP DB for indexes | |
16 """ | |
17 MetadataElement( name="db_name", desc="The db name for this index set", default='unknown', set_in_upload=True, readonly=True ) | |
18 MetadataElement( name="basesize", default="12", desc="The basesize for offsetscomp", visible=True, readonly=True ) | |
19 MetadataElement( name="kmers", default=[''], desc="The kmer sizes for indexes", visible=True, no_value=[''], readonly=True ) | |
20 MetadataElement( name="map_dir", desc="The maps directory", default='unknown', set_in_upload=True, readonly=True ) | |
21 MetadataElement( name="maps", default=[''], desc="The names of maps stored for this gmap gmapdb", visible=True, no_value=[''], readonly=True ) | |
22 MetadataElement( name="snps", default=[''], desc="The names of SNP indexes stored for this gmapdb", visible=True, no_value=[''], readonly=True ) | |
23 MetadataElement( name="cmet", default=False, desc="Has a cmet index", visible=True, readonly=True ) | |
24 MetadataElement( name="atoi", default=False, desc="Has a atoi index", visible=True, readonly=True ) | |
25 | |
26 file_ext = 'gmapdb' | |
27 is_binary = True | |
28 composite_type = 'auto_primary_file' | |
29 allow_datatype_change = False | |
30 | |
31 def generate_primary_file( self, dataset = None ): | |
32 """ | |
33 This is called only at upload to write the html file | |
34 cannot rename the datasets here - they come with the default unfortunately | |
35 """ | |
36 return '<html><head></head><body>AutoGenerated Primary File for Composite Dataset</body></html>' | |
37 | |
38 def regenerate_primary_file(self,dataset): | |
39 """ | |
40 cannot do this until we are setting metadata | |
41 """ | |
42 bn = dataset.metadata.db_name | |
43 log.info( "GmapDB regenerate_primary_file %s" % (bn)) | |
44 rval = ['<html><head><title>GMAPDB %s</title></head><p/><H3>GMAPDB %s</H3><p/>cmet %s<br>atoi %s<H4>Maps:</H4><ul>' % (bn,bn,dataset.metadata.cmet,dataset.metadata.atoi)] | |
45 for i,name in enumerate(dataset.metadata.maps): | |
46 rval.append( '<li>%s' % name) | |
47 rval.append( '</ul></html>' ) | |
48 f = file(dataset.file_name,'w') | |
49 f.write("\n".join( rval )) | |
50 f.write('\n') | |
51 f.close() | |
52 | |
53 def set_peek( self, dataset, is_multi_byte=False ): | |
54 log.info( "GmapDB set_peek %s" % (dataset)) | |
55 if not dataset.dataset.purged: | |
56 dataset.peek = "GMAPDB index %s\n cmet %s\n atoi %s\n maps %s" % ( dataset.metadata.db_name,dataset.metadata.cmet,dataset.metadata.atoi,dataset.metadata.maps ) | |
57 dataset.blurb = "GMAPDB %s" % ( dataset.metadata.db_name ) | |
58 else: | |
59 dataset.peek = 'file does not exist' | |
60 dataset.blurb = 'file purged from disk' | |
61 def display_peek( self, dataset ): | |
62 try: | |
63 return dataset.peek | |
64 except: | |
65 return "GMAP index file" | |
2
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
66 |
0 | 67 def sniff( self, filename ): |
68 return False | |
69 def set_meta( self, dataset, overwrite = True, **kwd ): | |
70 """ | |
71 Expecting: | |
72 extra_files_path/<db_name>/db_name>.ref<basesize><kmer>3<index> | |
73 extra_files_path/db_name/db_name.ref1[2345]1[2345]3offsetscomp | |
74 extra_files_path/db_name/db_name.ref1[2345]1[2345]3positions | |
75 extra_files_path/db_name/db_name.ref1[2345]1[2345]3gammaptrs | |
76 index maps: | |
77 extra_files_path/db_name/db_name.maps/*.iit | |
78 """ | |
79 log.info( "GmapDB set_meta %s %s" % (dataset,dataset.extra_files_path)) | |
80 pat = '(.*)\.((ref)|(met)[atgc][atgc]|(a2i)[atgc][atgc])((\d\d)(\d\d))?3positions(\.(.+))?' | |
81 efp = dataset.extra_files_path | |
82 flist = os.listdir(efp) | |
83 for i,fname in enumerate(flist): | |
84 log.info( "GmapDB set_meta %s %s" % (i,fname)) | |
85 fpath = os.path.join(efp,fname) | |
86 if os.path.isdir(fpath): | |
87 ilist = os.listdir(fpath) | |
88 kmers = {'':'default'} # HACK '' empty key added so user has default choice when selecting kmer from metadata | |
89 for j,iname in enumerate(ilist): | |
90 log.info( "GmapDB set_meta file %s %s" % (j,iname)) | |
91 ipath = os.path.join(fpath,iname) | |
92 if os.path.isdir(ipath): # find maps | |
93 dataset.metadata.map_dir = iname | |
94 for mapfile in os.listdir(ipath): | |
95 mapname = mapfile.replace('.iit','') | |
96 log.info( "GmapDB set_meta map %s %s" % (mapname,mapfile)) | |
97 dataset.metadata.maps.append(mapname) | |
98 else: | |
99 m = re.match(pat,iname) | |
100 if m: | |
101 log.info( "GmapDB set_meta m %s %s " % (iname, m)) | |
102 assert len(m.groups()) == 10 | |
103 dataset.metadata.db_name = fname | |
104 if m.groups()[2] == 'ref': | |
105 if m.groups()[-1] != None: | |
106 dataset.metadata.snps.append(m.groups()[-1]) | |
107 else: | |
108 if m.groups()[-3] != None: | |
109 k = int(m.groups()[-3]) | |
110 kmers[k] = k | |
111 if m.groups()[-4] != None: | |
112 dataset.metadata.basesize = int( m.groups()[-4]) | |
113 elif m.groups()[3] == 'met': | |
114 dataset.metadata.cmet = True | |
115 elif m.groups()[4] == 'a2i': | |
116 dataset.metadata.atoi = True | |
117 dataset.metadata.kmers = kmers.keys() | |
118 | |
2
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
119 class GmapSnpIndex( Text ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
120 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
121 A GMAP SNP index created by snpindex |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
122 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
123 MetadataElement( name="db_name", desc="The db name for this index set", default='unknown', set_in_upload=True, readonly=True ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
124 MetadataElement( name="snps_name", default='snps', desc="The name of SNP index", visible=True, no_value='', readonly=True ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
125 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
126 file_ext = 'gmapsnpindex' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
127 is_binary = True |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
128 composite_type = 'auto_primary_file' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
129 allow_datatype_change = False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
130 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
131 def generate_primary_file( self, dataset = None ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
132 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
133 This is called only at upload to write the html file |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
134 cannot rename the datasets here - they come with the default unfortunately |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
135 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
136 return '<html><head></head><body>AutoGenerated Primary File for Composite Dataset</body></html>' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
137 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
138 def regenerate_primary_file(self,dataset): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
139 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
140 cannot do this until we are setting metadata |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
141 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
142 bn = dataset.metadata.db_name |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
143 log.info( "GmapDB regenerate_primary_file %s" % (bn)) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
144 rval = ['<html><head><title>GMAPDB %s</title></head><p/><H3>GMAPDB %s</H3><p/>cmet %s<br>atoi %s<H4>Maps:</H4><ul>' % (bn,bn,dataset.metadata.cmet,dataset.metadata.atoi)] |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
145 for i,name in enumerate(dataset.metadata.maps): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
146 rval.append( '<li>%s' % name) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
147 rval.append( '</ul></html>' ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
148 f = file(dataset.file_name,'w') |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
149 f.write("\n".join( rval )) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
150 f.write('\n') |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
151 f.close() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
152 def set_peek( self, dataset, is_multi_byte=False ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
153 log.info( "GmapSnpIndex set_peek %s" % (dataset)) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
154 if not dataset.dataset.purged: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
155 dataset.peek = "GMAP SNPindex %s on %s\n" % ( dataset.metadata.snps_name,dataset.metadata.db_name) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
156 dataset.blurb = "GMAP SNPindex %s on %s\n" % ( dataset.metadata.snps_name,dataset.metadata.db_name) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
157 else: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
158 dataset.peek = 'file does not exist' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
159 dataset.blurb = 'file purged from disk' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
160 def display_peek( self, dataset ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
161 try: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
162 return dataset.peek |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
163 except: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
164 return "GMAP SNP index" |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
165 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
166 def sniff( self, filename ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
167 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
168 def set_meta( self, dataset, overwrite = True, **kwd ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
169 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
170 Expecting: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
171 extra_files_path/snp_name.iit |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
172 extra_files_path/db_name/db_name.ref1[2345]1[2345]3offsetscomp.snp_name |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
173 extra_files_path/db_name/db_name.ref1[2345]1[2345]3positions.snp_name |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
174 extra_files_path/db_name/db_name.ref1[2345]1[2345]3gammaptrs.snp_name |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
175 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
176 log.info( "GmapSnpIndex set_meta %s %s" % (dataset,dataset.extra_files_path)) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
177 pat = '(.*)\.(ref((\d\d)(\d\d))?3positions)\.(.+)?' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
178 efp = dataset.extra_files_path |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
179 flist = os.listdir(efp) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
180 for i,fname in enumerate(flist): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
181 m = re.match(pat,fname) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
182 if m: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
183 assert len(m.groups()) == 6 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
184 dataset.metadata.db_name = m.groups()[0] |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
185 dataset.metadata.snps_name = m.groups()[-1] |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
186 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
187 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
188 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
189 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
190 class IntervalIndexTree( Text ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
191 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
192 A GMAP Interval Index Tree Map |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
193 created by iit_store |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
194 (/path/to/map)/(mapname).iit |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
195 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
196 file_ext = 'iit' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
197 is_binary = True |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
198 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
199 class SpliceSitesIntervalIndexTree( IntervalIndexTree ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
200 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
201 A GMAP Interval Index Tree Map |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
202 created by iit_store |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
203 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
204 file_ext = 'splicesites.iit' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
205 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
206 class IntronsIntervalIndexTree( IntervalIndexTree ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
207 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
208 A GMAP Interval Index Tree Map |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
209 created by iit_store |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
210 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
211 file_ext = 'introns.iit' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
212 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
213 class SNPsIntervalIndexTree( IntervalIndexTree ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
214 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
215 A GMAP Interval Index Tree Map |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
216 created by iit_store |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
217 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
218 file_ext = 'snps.iit' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
219 |
5
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
220 class TallyIntervalIndexTree( IntervalIndexTree ): |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
221 """ |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
222 A GMAP Interval Index Tree Map |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
223 created by iit_store |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
224 """ |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
225 file_ext = 'tally.iit' |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
226 |
2
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
227 class IntervalAnnotation( Text ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
228 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
229 Class describing a GMAP Interval format: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
230 >label coords optional_tag |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
231 optional_annotation (which may be zero, one, or multiple lines) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
232 The coords should be of the form: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
233 chr:position |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
234 chr:startposition..endposition |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
235 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
236 file_ext = 'gmap_annotation' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
237 """Add metadata elements""" |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
238 MetadataElement( name="annotations", default=0, desc="Number of interval annotations", readonly=True, optional=True, visible=False, no_value=0 ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
239 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
240 def set_meta( self, dataset, **kwd ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
241 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
242 Set the number of annotations and the number of data lines in dataset. |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
243 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
244 data_lines = 0 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
245 annotations = 0 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
246 for line in file( dataset.file_name ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
247 line = line.strip() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
248 if line and line.startswith( '>' ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
249 annotations += 1 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
250 data_lines +=1 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
251 else: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
252 data_lines += 1 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
253 dataset.metadata.data_lines = data_lines |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
254 dataset.metadata.annotations = annotations |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
255 def set_peek( self, dataset, is_multi_byte=False ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
256 if not dataset.dataset.purged: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
257 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
258 if dataset.metadata.annotations: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
259 dataset.blurb = "%s annotations" % util.commaify( str( dataset.metadata.annotations ) ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
260 else: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
261 dataset.blurb = data.nice_size( dataset.get_size() ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
262 else: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
263 dataset.peek = 'file does not exist' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
264 dataset.blurb = 'file purged from disk' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
265 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
266 def sniff( self, filename ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
267 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
268 Determines whether the file is a gmap annotation file |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
269 Format: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
270 >label coords optional_tag |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
271 optional_annotation (which may be zero, one, or multiple lines) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
272 For example, the label may be an EST accession, with the coords |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
273 representing its genomic position. Labels may be duplicated if |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
274 necessary. |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
275 The coords should be of the form |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
276 chr:position |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
277 chr:startposition..endposition |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
278 The term "chr:position" is equivalent to "chr:position..position". If |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
279 you want to indicate that the interval is on the minus strand or |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
280 reverse direction, then <endposition> may be less than <startposition>. |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
281 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
282 try: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
283 pat = '>(\S+)\s((\S+):(\d+)(\.\.(\d+))?(\s.(.+))?$' #>label chr:position[..endposition][ optional_tag] |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
284 fh = open( filename ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
285 count = 0 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
286 while True and count < 10: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
287 line = fh.readline() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
288 if not line: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
289 break #EOF |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
290 line = line.strip() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
291 if line: #first non-empty line |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
292 if line.startswith( '>' ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
293 count += 1 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
294 if re.match(pat,line) == None: # Failed to match |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
295 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
296 finally: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
297 fh.close() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
298 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
299 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
300 class SpliceSiteAnnotation(IntervalAnnotation): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
301 file_ext = 'gmap_splicesites' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
302 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
303 Example: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
304 >NM_004448.ERBB2.exon1 17:35110090..35110091 donor 6678 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
305 >NM_004448.ERBB2.exon2 17:35116768..35116769 acceptor 6678 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
306 >NM_004448.ERBB2.exon2 17:35116920..35116921 donor 1179 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
307 >NM_004448.ERBB2.exon3 17:35118099..35118100 acceptor 1179 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
308 >NM_004449.ERG.exon1 21:38955452..38955451 donor 783 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
309 >NM_004449.ERG.exon2 21:38878740..38878739 acceptor 783 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
310 >NM_004449.ERG.exon2 21:38878638..38878637 donor 360 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
311 >NM_004449.ERG.exon3 21:38869542..38869541 acceptor 360 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
312 Each line must start with a ">" character, then be followed by an |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
313 identifier, which may have duplicates and can have any format, with |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
314 the gene name or exon number shown here only as a suggestion. Then |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
315 there should be the chromosomal coordinates which straddle the |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
316 exon-intron boundary, so one coordinate is on the exon and one is on |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
317 the intron. (Coordinates are all 1-based, so the first character of a |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
318 chromosome is number 1.) Finally, there should be the splice type: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
319 "donor" or "acceptor". You may optionally store the intron distance |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
320 at the end. GSNAP can use this intron distance, if it is longer than |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
321 its value for --localsplicedist, to look for long introns at that |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
322 splice site. The same splice site may have different intron distances |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
323 in the database; GSNAP will use the longest intron distance reported |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
324 in searching for long introns. |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
325 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
326 def sniff( self, filename ): # TODO |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
327 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
328 Determines whether the file is a gmap splice site annotation file |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
329 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
330 try: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
331 pat = '>(\S+\.intron\d+)\s((\S+):(\d+)\.\.(\d+))\s(donor|acceptor)(\s(\d+))?$' #>label chr:position..position donor|acceptor[ intron_dist] |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
332 fh = open( filename ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
333 count = 0 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
334 while True and count < 10: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
335 line = fh.readline() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
336 if not line: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
337 break #EOF |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
338 line = line.strip() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
339 if line: #first non-empty line |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
340 count += 1 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
341 if re.match(pat,line) == None: # Failed to match |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
342 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
343 finally: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
344 fh.close() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
345 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
346 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
347 class IntronAnnotation(IntervalAnnotation): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
348 file_ext = 'gmap_introns' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
349 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
350 Example: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
351 >NM_004448.ERBB2.intron1 17:35110090..35116769 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
352 >NM_004448.ERBB2.intron2 17:35116920..35118100 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
353 >NM_004449.ERG.intron1 21:38955452..38878739 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
354 >NM_004449.ERG.intron2 21:38878638..38869541 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
355 The coordinates are 1-based, and specify the exon coordinates |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
356 surrounding the intron, with the first coordinate being from the donor |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
357 exon and the second one being from the acceptor exon. |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
358 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
359 def sniff( self, filename ): # TODO |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
360 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
361 Determines whether the file is a gmap Intron annotation file |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
362 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
363 try: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
364 pat = '>(\S+\.intron\d+)\s((\S+):(\d+)\.\.(\d+)(\s(.)+)?$' #>label chr:position |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
365 fh = open( filename ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
366 count = 0 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
367 while True and count < 10: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
368 line = fh.readline() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
369 if not line: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
370 break #EOF |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
371 line = line.strip() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
372 if line: #first non-empty line |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
373 count += 1 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
374 if re.match(pat,line) == None: # Failed to match |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
375 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
376 finally: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
377 fh.close() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
378 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
379 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
380 class SNPAnnotation(IntervalAnnotation): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
381 file_ext = 'gmap_snps' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
382 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
383 Example: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
384 >rs62211261 21:14379270 CG |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
385 >rs62211262 21:14379281 AT |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
386 >rs62211263 21:14379298 WN |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
387 Each line must start with a ">" character, then be followed by an |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
388 identifier (which may have duplicates). Then there should be the |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
389 chromosomal coordinate of the SNP. (Coordinates are all 1-based, so |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
390 the first character of a chromosome is number 1.) Finally, there |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
391 should be the two possible alleles. (Previous versions required that |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
392 these be in alphabetical order: "AC", "AG", "AT", "CG", "CT", or "GT", |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
393 but that is no longer a requirement.) These alleles must correspond |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
394 to the possible nucleotides on the plus strand of the genome. If the |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
395 one of these two letters does not match the allele in the reference |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
396 sequence, that SNP will be ignored in subsequent processing as a |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
397 probable error. |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
398 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
399 GSNAP also supports the idea of a wildcard SNP. A wildcard SNP allows |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
400 all nucleotides to match at that position, not just a given reference |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
401 and alternate allele. It is essentially as if an "N" were recorded at |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
402 that genomic location, although the index files still keep track of |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
403 the reference allele. To indicate that a position has a wildcard SNP, |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
404 you can indicate the genotype as "WN", where "W" is the reference |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
405 allele. Another indication of a wildcard SNP is to provide two |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
406 separate lines at that position with the genotypes "WX" and "WY", |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
407 where "W" is the reference allele and "X" and "Y" are two different |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
408 alternate alleles. |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
409 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
410 def sniff( self, filename ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
411 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
412 Determines whether the file is a gmap SNP annotation file |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
413 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
414 try: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
415 pat = '>(\S+)\s((\S+):(\d+)\s([TACGW][TACGN])$' #>label chr:position ATCG |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
416 fh = open( filename ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
417 count = 0 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
418 while True and count < 10: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
419 line = fh.readline() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
420 if not line: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
421 break #EOF |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
422 line = line.strip() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
423 if line: #first non-empty line |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
424 count += 1 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
425 if re.match(pat,line) == None: # Failed to match |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
426 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
427 finally: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
428 fh.close() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
429 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
430 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
431 |
5
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
432 class TallyAnnotation(IntervalAnnotation): |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
433 file_ext = 'gsnap_tally' |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
434 """ |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
435 Output produced by gsnap_tally |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
436 Example: |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
437 >144 chr20:57268791..57268935 |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
438 G0 |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
439 A1(1@7|1Q-3) |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
440 A2(1@36,1@1|1Q2,1Q-8) |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
441 C2 0.889,0.912,0.889,0.889,0.933,0.912,0.912,0.889,0.889,0.889 -2.66,-2.89,-2.66,-2.66,-3.16,-2.89,-2.89,-2.66,-2.66,-2.66 |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
442 C1 T1 0.888,0.9,0.888,0.9,0.913,0.9,0.911,0.888,0.9,0.913 -2.66,-2.78,-2.66,-2.78,-2.91,-2.78,-2.89,-2.66,-2.78,-2.91 |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
443 """ |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
444 def sniff( self, filename ): # TODO |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
445 """ |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
446 Determines whether the file is a gmap splice site annotation file |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
447 """ |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
448 try: |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
449 pat = '^>(\d+)\s((\S+):(\d+)\.\.(\d+))$' #>total chr:position..position |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
450 pat2 = '^[GATCN]\d.*$' #BaseCountDeatails |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
451 fh = open( filename ) |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
452 count = 0 |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
453 while True and count < 10: |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
454 line = fh.readline() |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
455 if not line: |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
456 break #EOF |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
457 line = line.strip() |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
458 if line: #first non-empty line |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
459 count += 1 |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
460 if re.match(pat,line) == None and re.match(pat2,line) == None: # Failed to match |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
461 return False |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
462 finally: |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
463 fh.close() |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
464 return False |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
465 |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
466 class GsnapResult( Text ): |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
467 """ |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
468 The default output format for gsnap. Can be used as input for gsnap_tally. |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
469 """ |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
470 file_ext = 'gsnap' |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
471 |
f4b4c1712e39
GSNAP - added datatypes for tally related data and gnsap default output
Jim Johnson <jj@umn.edu>
parents:
2
diff
changeset
|
472 |