Mercurial > repos > jjohnson > gmap
annotate gmap/lib/galaxy/datatypes/gmap.py @ 2:52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
author | Jim Johnson <jj@umn.edu> |
---|---|
date | Fri, 21 Oct 2011 11:38:55 -0500 |
parents | d58d272914e7 |
children | f4b4c1712e39 |
rev | line source |
---|---|
0 | 1 """ |
2 GMAP indexes | |
3 """ | |
4 import logging | |
5 import os,os.path,re | |
2
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
6 import data |
0 | 7 from data import Text |
2
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
8 from galaxy import util |
0 | 9 from metadata import MetadataElement |
10 | |
11 log = logging.getLogger(__name__) | |
12 | |
13 class GmapDB( Text ): | |
14 """ | |
15 A GMAP DB for indexes | |
16 """ | |
17 MetadataElement( name="db_name", desc="The db name for this index set", default='unknown', set_in_upload=True, readonly=True ) | |
18 MetadataElement( name="basesize", default="12", desc="The basesize for offsetscomp", visible=True, readonly=True ) | |
19 MetadataElement( name="kmers", default=[''], desc="The kmer sizes for indexes", visible=True, no_value=[''], readonly=True ) | |
20 MetadataElement( name="map_dir", desc="The maps directory", default='unknown', set_in_upload=True, readonly=True ) | |
21 MetadataElement( name="maps", default=[''], desc="The names of maps stored for this gmap gmapdb", visible=True, no_value=[''], readonly=True ) | |
22 MetadataElement( name="snps", default=[''], desc="The names of SNP indexes stored for this gmapdb", visible=True, no_value=[''], readonly=True ) | |
23 MetadataElement( name="cmet", default=False, desc="Has a cmet index", visible=True, readonly=True ) | |
24 MetadataElement( name="atoi", default=False, desc="Has a atoi index", visible=True, readonly=True ) | |
25 | |
26 file_ext = 'gmapdb' | |
27 is_binary = True | |
28 composite_type = 'auto_primary_file' | |
29 allow_datatype_change = False | |
30 | |
31 def generate_primary_file( self, dataset = None ): | |
32 """ | |
33 This is called only at upload to write the html file | |
34 cannot rename the datasets here - they come with the default unfortunately | |
35 """ | |
36 return '<html><head></head><body>AutoGenerated Primary File for Composite Dataset</body></html>' | |
37 | |
38 def regenerate_primary_file(self,dataset): | |
39 """ | |
40 cannot do this until we are setting metadata | |
41 """ | |
42 bn = dataset.metadata.db_name | |
43 log.info( "GmapDB regenerate_primary_file %s" % (bn)) | |
44 rval = ['<html><head><title>GMAPDB %s</title></head><p/><H3>GMAPDB %s</H3><p/>cmet %s<br>atoi %s<H4>Maps:</H4><ul>' % (bn,bn,dataset.metadata.cmet,dataset.metadata.atoi)] | |
45 for i,name in enumerate(dataset.metadata.maps): | |
46 rval.append( '<li>%s' % name) | |
47 rval.append( '</ul></html>' ) | |
48 f = file(dataset.file_name,'w') | |
49 f.write("\n".join( rval )) | |
50 f.write('\n') | |
51 f.close() | |
52 | |
53 def set_peek( self, dataset, is_multi_byte=False ): | |
54 log.info( "GmapDB set_peek %s" % (dataset)) | |
55 if not dataset.dataset.purged: | |
56 dataset.peek = "GMAPDB index %s\n cmet %s\n atoi %s\n maps %s" % ( dataset.metadata.db_name,dataset.metadata.cmet,dataset.metadata.atoi,dataset.metadata.maps ) | |
57 dataset.blurb = "GMAPDB %s" % ( dataset.metadata.db_name ) | |
58 else: | |
59 dataset.peek = 'file does not exist' | |
60 dataset.blurb = 'file purged from disk' | |
61 def display_peek( self, dataset ): | |
62 try: | |
63 return dataset.peek | |
64 except: | |
65 return "GMAP index file" | |
2
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
66 |
0 | 67 def sniff( self, filename ): |
68 return False | |
69 def set_meta( self, dataset, overwrite = True, **kwd ): | |
70 """ | |
71 Expecting: | |
72 extra_files_path/<db_name>/db_name>.ref<basesize><kmer>3<index> | |
73 extra_files_path/db_name/db_name.ref1[2345]1[2345]3offsetscomp | |
74 extra_files_path/db_name/db_name.ref1[2345]1[2345]3positions | |
75 extra_files_path/db_name/db_name.ref1[2345]1[2345]3gammaptrs | |
76 index maps: | |
77 extra_files_path/db_name/db_name.maps/*.iit | |
78 """ | |
79 log.info( "GmapDB set_meta %s %s" % (dataset,dataset.extra_files_path)) | |
80 pat = '(.*)\.((ref)|(met)[atgc][atgc]|(a2i)[atgc][atgc])((\d\d)(\d\d))?3positions(\.(.+))?' | |
81 efp = dataset.extra_files_path | |
82 flist = os.listdir(efp) | |
83 for i,fname in enumerate(flist): | |
84 log.info( "GmapDB set_meta %s %s" % (i,fname)) | |
85 fpath = os.path.join(efp,fname) | |
86 if os.path.isdir(fpath): | |
87 ilist = os.listdir(fpath) | |
88 kmers = {'':'default'} # HACK '' empty key added so user has default choice when selecting kmer from metadata | |
89 for j,iname in enumerate(ilist): | |
90 log.info( "GmapDB set_meta file %s %s" % (j,iname)) | |
91 ipath = os.path.join(fpath,iname) | |
92 if os.path.isdir(ipath): # find maps | |
93 dataset.metadata.map_dir = iname | |
94 for mapfile in os.listdir(ipath): | |
95 mapname = mapfile.replace('.iit','') | |
96 log.info( "GmapDB set_meta map %s %s" % (mapname,mapfile)) | |
97 dataset.metadata.maps.append(mapname) | |
98 else: | |
99 m = re.match(pat,iname) | |
100 if m: | |
101 log.info( "GmapDB set_meta m %s %s " % (iname, m)) | |
102 assert len(m.groups()) == 10 | |
103 dataset.metadata.db_name = fname | |
104 if m.groups()[2] == 'ref': | |
105 if m.groups()[-1] != None: | |
106 dataset.metadata.snps.append(m.groups()[-1]) | |
107 else: | |
108 if m.groups()[-3] != None: | |
109 k = int(m.groups()[-3]) | |
110 kmers[k] = k | |
111 if m.groups()[-4] != None: | |
112 dataset.metadata.basesize = int( m.groups()[-4]) | |
113 elif m.groups()[3] == 'met': | |
114 dataset.metadata.cmet = True | |
115 elif m.groups()[4] == 'a2i': | |
116 dataset.metadata.atoi = True | |
117 dataset.metadata.kmers = kmers.keys() | |
118 | |
2
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
119 class GmapSnpIndex( Text ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
120 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
121 A GMAP SNP index created by snpindex |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
122 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
123 MetadataElement( name="db_name", desc="The db name for this index set", default='unknown', set_in_upload=True, readonly=True ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
124 MetadataElement( name="snps_name", default='snps', desc="The name of SNP index", visible=True, no_value='', readonly=True ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
125 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
126 file_ext = 'gmapsnpindex' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
127 is_binary = True |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
128 composite_type = 'auto_primary_file' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
129 allow_datatype_change = False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
130 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
131 def generate_primary_file( self, dataset = None ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
132 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
133 This is called only at upload to write the html file |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
134 cannot rename the datasets here - they come with the default unfortunately |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
135 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
136 return '<html><head></head><body>AutoGenerated Primary File for Composite Dataset</body></html>' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
137 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
138 def regenerate_primary_file(self,dataset): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
139 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
140 cannot do this until we are setting metadata |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
141 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
142 bn = dataset.metadata.db_name |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
143 log.info( "GmapDB regenerate_primary_file %s" % (bn)) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
144 rval = ['<html><head><title>GMAPDB %s</title></head><p/><H3>GMAPDB %s</H3><p/>cmet %s<br>atoi %s<H4>Maps:</H4><ul>' % (bn,bn,dataset.metadata.cmet,dataset.metadata.atoi)] |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
145 for i,name in enumerate(dataset.metadata.maps): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
146 rval.append( '<li>%s' % name) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
147 rval.append( '</ul></html>' ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
148 f = file(dataset.file_name,'w') |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
149 f.write("\n".join( rval )) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
150 f.write('\n') |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
151 f.close() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
152 def set_peek( self, dataset, is_multi_byte=False ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
153 log.info( "GmapSnpIndex set_peek %s" % (dataset)) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
154 if not dataset.dataset.purged: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
155 dataset.peek = "GMAP SNPindex %s on %s\n" % ( dataset.metadata.snps_name,dataset.metadata.db_name) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
156 dataset.blurb = "GMAP SNPindex %s on %s\n" % ( dataset.metadata.snps_name,dataset.metadata.db_name) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
157 else: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
158 dataset.peek = 'file does not exist' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
159 dataset.blurb = 'file purged from disk' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
160 def display_peek( self, dataset ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
161 try: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
162 return dataset.peek |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
163 except: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
164 return "GMAP SNP index" |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
165 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
166 def sniff( self, filename ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
167 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
168 def set_meta( self, dataset, overwrite = True, **kwd ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
169 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
170 Expecting: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
171 extra_files_path/snp_name.iit |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
172 extra_files_path/db_name/db_name.ref1[2345]1[2345]3offsetscomp.snp_name |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
173 extra_files_path/db_name/db_name.ref1[2345]1[2345]3positions.snp_name |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
174 extra_files_path/db_name/db_name.ref1[2345]1[2345]3gammaptrs.snp_name |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
175 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
176 log.info( "GmapSnpIndex set_meta %s %s" % (dataset,dataset.extra_files_path)) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
177 pat = '(.*)\.(ref((\d\d)(\d\d))?3positions)\.(.+)?' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
178 efp = dataset.extra_files_path |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
179 flist = os.listdir(efp) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
180 for i,fname in enumerate(flist): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
181 m = re.match(pat,fname) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
182 if m: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
183 assert len(m.groups()) == 6 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
184 dataset.metadata.db_name = m.groups()[0] |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
185 dataset.metadata.snps_name = m.groups()[-1] |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
186 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
187 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
188 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
189 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
190 class IntervalIndexTree( Text ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
191 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
192 A GMAP Interval Index Tree Map |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
193 created by iit_store |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
194 (/path/to/map)/(mapname).iit |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
195 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
196 file_ext = 'iit' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
197 is_binary = True |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
198 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
199 class SpliceSitesIntervalIndexTree( IntervalIndexTree ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
200 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
201 A GMAP Interval Index Tree Map |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
202 created by iit_store |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
203 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
204 file_ext = 'splicesites.iit' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
205 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
206 class IntronsIntervalIndexTree( IntervalIndexTree ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
207 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
208 A GMAP Interval Index Tree Map |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
209 created by iit_store |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
210 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
211 file_ext = 'introns.iit' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
212 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
213 class SNPsIntervalIndexTree( IntervalIndexTree ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
214 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
215 A GMAP Interval Index Tree Map |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
216 created by iit_store |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
217 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
218 file_ext = 'snps.iit' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
219 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
220 class IntervalAnnotation( Text ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
221 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
222 Class describing a GMAP Interval format: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
223 >label coords optional_tag |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
224 optional_annotation (which may be zero, one, or multiple lines) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
225 The coords should be of the form: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
226 chr:position |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
227 chr:startposition..endposition |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
228 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
229 file_ext = 'gmap_annotation' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
230 """Add metadata elements""" |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
231 MetadataElement( name="annotations", default=0, desc="Number of interval annotations", readonly=True, optional=True, visible=False, no_value=0 ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
232 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
233 def set_meta( self, dataset, **kwd ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
234 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
235 Set the number of annotations and the number of data lines in dataset. |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
236 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
237 data_lines = 0 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
238 annotations = 0 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
239 for line in file( dataset.file_name ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
240 line = line.strip() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
241 if line and line.startswith( '>' ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
242 annotations += 1 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
243 data_lines +=1 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
244 else: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
245 data_lines += 1 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
246 dataset.metadata.data_lines = data_lines |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
247 dataset.metadata.annotations = annotations |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
248 def set_peek( self, dataset, is_multi_byte=False ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
249 if not dataset.dataset.purged: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
250 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
251 if dataset.metadata.annotations: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
252 dataset.blurb = "%s annotations" % util.commaify( str( dataset.metadata.annotations ) ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
253 else: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
254 dataset.blurb = data.nice_size( dataset.get_size() ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
255 else: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
256 dataset.peek = 'file does not exist' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
257 dataset.blurb = 'file purged from disk' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
258 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
259 def sniff( self, filename ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
260 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
261 Determines whether the file is a gmap annotation file |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
262 Format: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
263 >label coords optional_tag |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
264 optional_annotation (which may be zero, one, or multiple lines) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
265 For example, the label may be an EST accession, with the coords |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
266 representing its genomic position. Labels may be duplicated if |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
267 necessary. |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
268 The coords should be of the form |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
269 chr:position |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
270 chr:startposition..endposition |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
271 The term "chr:position" is equivalent to "chr:position..position". If |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
272 you want to indicate that the interval is on the minus strand or |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
273 reverse direction, then <endposition> may be less than <startposition>. |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
274 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
275 try: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
276 pat = '>(\S+)\s((\S+):(\d+)(\.\.(\d+))?(\s.(.+))?$' #>label chr:position[..endposition][ optional_tag] |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
277 fh = open( filename ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
278 count = 0 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
279 while True and count < 10: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
280 line = fh.readline() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
281 if not line: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
282 break #EOF |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
283 line = line.strip() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
284 if line: #first non-empty line |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
285 if line.startswith( '>' ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
286 count += 1 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
287 if re.match(pat,line) == None: # Failed to match |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
288 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
289 finally: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
290 fh.close() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
291 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
292 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
293 class SpliceSiteAnnotation(IntervalAnnotation): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
294 file_ext = 'gmap_splicesites' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
295 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
296 Example: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
297 >NM_004448.ERBB2.exon1 17:35110090..35110091 donor 6678 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
298 >NM_004448.ERBB2.exon2 17:35116768..35116769 acceptor 6678 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
299 >NM_004448.ERBB2.exon2 17:35116920..35116921 donor 1179 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
300 >NM_004448.ERBB2.exon3 17:35118099..35118100 acceptor 1179 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
301 >NM_004449.ERG.exon1 21:38955452..38955451 donor 783 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
302 >NM_004449.ERG.exon2 21:38878740..38878739 acceptor 783 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
303 >NM_004449.ERG.exon2 21:38878638..38878637 donor 360 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
304 >NM_004449.ERG.exon3 21:38869542..38869541 acceptor 360 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
305 Each line must start with a ">" character, then be followed by an |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
306 identifier, which may have duplicates and can have any format, with |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
307 the gene name or exon number shown here only as a suggestion. Then |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
308 there should be the chromosomal coordinates which straddle the |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
309 exon-intron boundary, so one coordinate is on the exon and one is on |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
310 the intron. (Coordinates are all 1-based, so the first character of a |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
311 chromosome is number 1.) Finally, there should be the splice type: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
312 "donor" or "acceptor". You may optionally store the intron distance |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
313 at the end. GSNAP can use this intron distance, if it is longer than |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
314 its value for --localsplicedist, to look for long introns at that |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
315 splice site. The same splice site may have different intron distances |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
316 in the database; GSNAP will use the longest intron distance reported |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
317 in searching for long introns. |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
318 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
319 def sniff( self, filename ): # TODO |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
320 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
321 Determines whether the file is a gmap splice site annotation file |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
322 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
323 try: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
324 pat = '>(\S+\.intron\d+)\s((\S+):(\d+)\.\.(\d+))\s(donor|acceptor)(\s(\d+))?$' #>label chr:position..position donor|acceptor[ intron_dist] |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
325 fh = open( filename ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
326 count = 0 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
327 while True and count < 10: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
328 line = fh.readline() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
329 if not line: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
330 break #EOF |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
331 line = line.strip() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
332 if line: #first non-empty line |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
333 count += 1 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
334 if re.match(pat,line) == None: # Failed to match |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
335 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
336 finally: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
337 fh.close() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
338 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
339 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
340 class IntronAnnotation(IntervalAnnotation): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
341 file_ext = 'gmap_introns' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
342 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
343 Example: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
344 >NM_004448.ERBB2.intron1 17:35110090..35116769 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
345 >NM_004448.ERBB2.intron2 17:35116920..35118100 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
346 >NM_004449.ERG.intron1 21:38955452..38878739 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
347 >NM_004449.ERG.intron2 21:38878638..38869541 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
348 The coordinates are 1-based, and specify the exon coordinates |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
349 surrounding the intron, with the first coordinate being from the donor |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
350 exon and the second one being from the acceptor exon. |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
351 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
352 def sniff( self, filename ): # TODO |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
353 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
354 Determines whether the file is a gmap Intron annotation file |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
355 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
356 try: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
357 pat = '>(\S+\.intron\d+)\s((\S+):(\d+)\.\.(\d+)(\s(.)+)?$' #>label chr:position |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
358 fh = open( filename ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
359 count = 0 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
360 while True and count < 10: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
361 line = fh.readline() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
362 if not line: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
363 break #EOF |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
364 line = line.strip() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
365 if line: #first non-empty line |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
366 count += 1 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
367 if re.match(pat,line) == None: # Failed to match |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
368 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
369 finally: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
370 fh.close() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
371 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
372 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
373 class SNPAnnotation(IntervalAnnotation): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
374 file_ext = 'gmap_snps' |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
375 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
376 Example: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
377 >rs62211261 21:14379270 CG |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
378 >rs62211262 21:14379281 AT |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
379 >rs62211263 21:14379298 WN |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
380 Each line must start with a ">" character, then be followed by an |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
381 identifier (which may have duplicates). Then there should be the |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
382 chromosomal coordinate of the SNP. (Coordinates are all 1-based, so |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
383 the first character of a chromosome is number 1.) Finally, there |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
384 should be the two possible alleles. (Previous versions required that |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
385 these be in alphabetical order: "AC", "AG", "AT", "CG", "CT", or "GT", |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
386 but that is no longer a requirement.) These alleles must correspond |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
387 to the possible nucleotides on the plus strand of the genome. If the |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
388 one of these two letters does not match the allele in the reference |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
389 sequence, that SNP will be ignored in subsequent processing as a |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
390 probable error. |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
391 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
392 GSNAP also supports the idea of a wildcard SNP. A wildcard SNP allows |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
393 all nucleotides to match at that position, not just a given reference |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
394 and alternate allele. It is essentially as if an "N" were recorded at |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
395 that genomic location, although the index files still keep track of |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
396 the reference allele. To indicate that a position has a wildcard SNP, |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
397 you can indicate the genotype as "WN", where "W" is the reference |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
398 allele. Another indication of a wildcard SNP is to provide two |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
399 separate lines at that position with the genotypes "WX" and "WY", |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
400 where "W" is the reference allele and "X" and "Y" are two different |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
401 alternate alleles. |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
402 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
403 def sniff( self, filename ): |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
404 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
405 Determines whether the file is a gmap SNP annotation file |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
406 """ |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
407 try: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
408 pat = '>(\S+)\s((\S+):(\d+)\s([TACGW][TACGN])$' #>label chr:position ATCG |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
409 fh = open( filename ) |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
410 count = 0 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
411 while True and count < 10: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
412 line = fh.readline() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
413 if not line: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
414 break #EOF |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
415 line = line.strip() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
416 if line: #first non-empty line |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
417 count += 1 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
418 if re.match(pat,line) == None: # Failed to match |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
419 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
420 finally: |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
421 fh.close() |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
422 return False |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
423 |
52da588232b0
Add datatypes for maps and snpindex, add iit_store and snpindex tools, update GMAP and GSNAP to use these.
Jim Johnson <jj@umn.edu>
parents:
0
diff
changeset
|
424 |