annotate lib/galaxy/datatypes/proteomics.py @ 2:418f42b34049 draft

Reuploading
author iracooke
date Mon, 23 Jul 2012 00:20:58 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
418f42b34049 Reuploading
iracooke
parents:
diff changeset
1 """
418f42b34049 Reuploading
iracooke
parents:
diff changeset
2 Proteomics format classes
418f42b34049 Reuploading
iracooke
parents:
diff changeset
3 """
418f42b34049 Reuploading
iracooke
parents:
diff changeset
4 import logging
418f42b34049 Reuploading
iracooke
parents:
diff changeset
5 import re
418f42b34049 Reuploading
iracooke
parents:
diff changeset
6 from galaxy.datatypes.data import *
418f42b34049 Reuploading
iracooke
parents:
diff changeset
7 from galaxy.datatypes.xml import *
418f42b34049 Reuploading
iracooke
parents:
diff changeset
8 from galaxy.datatypes.sniff import *
418f42b34049 Reuploading
iracooke
parents:
diff changeset
9 from galaxy.datatypes.binary import *
418f42b34049 Reuploading
iracooke
parents:
diff changeset
10
418f42b34049 Reuploading
iracooke
parents:
diff changeset
11 log = logging.getLogger(__name__)
418f42b34049 Reuploading
iracooke
parents:
diff changeset
12
418f42b34049 Reuploading
iracooke
parents:
diff changeset
13
418f42b34049 Reuploading
iracooke
parents:
diff changeset
14 class Xls( Binary ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
15 """Class describing a binary excel spreadsheet file"""
418f42b34049 Reuploading
iracooke
parents:
diff changeset
16 file_ext = "xls"
418f42b34049 Reuploading
iracooke
parents:
diff changeset
17
418f42b34049 Reuploading
iracooke
parents:
diff changeset
18 def set_peek( self, dataset, is_multi_byte=False ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
19 if not dataset.dataset.purged:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
20 dataset.peek = "Excel Spreadsheet file"
418f42b34049 Reuploading
iracooke
parents:
diff changeset
21 dataset.blurb = data.nice_size( dataset.get_size() )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
22 else:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
23 dataset.peek = 'file does not exist'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
24 dataset.blurb = 'file purged from disk'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
25 def display_peek( self, dataset ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
26 try:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
27 return dataset.peek
418f42b34049 Reuploading
iracooke
parents:
diff changeset
28 except:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
29 return "Binary xls file (%s)" % ( data.nice_size( dataset.get_size() ) )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
30
418f42b34049 Reuploading
iracooke
parents:
diff changeset
31 class PepXml(GenericXml):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
32 """pepXML data"""
418f42b34049 Reuploading
iracooke
parents:
diff changeset
33 file_ext = "pepxml"
418f42b34049 Reuploading
iracooke
parents:
diff changeset
34
418f42b34049 Reuploading
iracooke
parents:
diff changeset
35 def set_peek( self, dataset, is_multi_byte=False ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
36 """Set the peek and blurb text"""
418f42b34049 Reuploading
iracooke
parents:
diff changeset
37 if not dataset.dataset.purged:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
38 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
39 dataset.blurb = 'pepXML data'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
40 else:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
41 dataset.peek = 'file does not exist'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
42 dataset.blurb = 'file purged from disk'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
43 def sniff( self, filename ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
44 """
418f42b34049 Reuploading
iracooke
parents:
diff changeset
45 Determines whether the file is pepXML
418f42b34049 Reuploading
iracooke
parents:
diff changeset
46 """
418f42b34049 Reuploading
iracooke
parents:
diff changeset
47 #TODO - Use a context manager on Python 2.5+ to close handle
418f42b34049 Reuploading
iracooke
parents:
diff changeset
48 handle = open(filename)
418f42b34049 Reuploading
iracooke
parents:
diff changeset
49 xmlns_re = re.compile(".*pepXML\"")
418f42b34049 Reuploading
iracooke
parents:
diff changeset
50 for i in range(3):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
51 line = handle.readline()
418f42b34049 Reuploading
iracooke
parents:
diff changeset
52 if xmlns_re.match(line.strip()):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
53 handle.close()
418f42b34049 Reuploading
iracooke
parents:
diff changeset
54 return True
418f42b34049 Reuploading
iracooke
parents:
diff changeset
55
418f42b34049 Reuploading
iracooke
parents:
diff changeset
56 handle.close()
418f42b34049 Reuploading
iracooke
parents:
diff changeset
57 return False
418f42b34049 Reuploading
iracooke
parents:
diff changeset
58
418f42b34049 Reuploading
iracooke
parents:
diff changeset
59 class MzML( GenericXml ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
60 """mzML data"""
418f42b34049 Reuploading
iracooke
parents:
diff changeset
61 file_ext = "mzml"
418f42b34049 Reuploading
iracooke
parents:
diff changeset
62
418f42b34049 Reuploading
iracooke
parents:
diff changeset
63 def set_peek( self, dataset, is_multi_byte=False ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
64 """Set the peek and blurb text"""
418f42b34049 Reuploading
iracooke
parents:
diff changeset
65 if not dataset.dataset.purged:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
66 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
67 dataset.blurb = 'mzML Mass Spectrometry data'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
68 else:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
69 dataset.peek = 'file does not exist'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
70 dataset.blurb = 'file purged from disk'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
71
418f42b34049 Reuploading
iracooke
parents:
diff changeset
72 def sniff( self, filename ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
73 handle = open(filename)
418f42b34049 Reuploading
iracooke
parents:
diff changeset
74 xmlns_re = re.compile("^<mzML")
418f42b34049 Reuploading
iracooke
parents:
diff changeset
75 for i in range(3):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
76 line = handle.readline()
418f42b34049 Reuploading
iracooke
parents:
diff changeset
77 if xmlns_re.match(line.strip()):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
78 handle.close()
418f42b34049 Reuploading
iracooke
parents:
diff changeset
79 return True
418f42b34049 Reuploading
iracooke
parents:
diff changeset
80
418f42b34049 Reuploading
iracooke
parents:
diff changeset
81 handle.close()
418f42b34049 Reuploading
iracooke
parents:
diff changeset
82 return False
418f42b34049 Reuploading
iracooke
parents:
diff changeset
83
418f42b34049 Reuploading
iracooke
parents:
diff changeset
84
418f42b34049 Reuploading
iracooke
parents:
diff changeset
85 class ProtXML( Text ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
86 """protXML data"""
418f42b34049 Reuploading
iracooke
parents:
diff changeset
87 file_ext = "protxml"
418f42b34049 Reuploading
iracooke
parents:
diff changeset
88
418f42b34049 Reuploading
iracooke
parents:
diff changeset
89 def set_peek( self, dataset, is_multi_byte=False ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
90 """Set the peek and blurb text"""
418f42b34049 Reuploading
iracooke
parents:
diff changeset
91 if not dataset.dataset.purged:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
92 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
93 dataset.blurb = 'prot XML Search Results'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
94 else:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
95 dataset.peek = 'file does not exist'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
96 dataset.blurb = 'file purged from disk'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
97 def sniff( self, filename ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
98 protxml_header = [ '<?xml version="1.0" encoding="ISO-8859-1"?>',
418f42b34049 Reuploading
iracooke
parents:
diff changeset
99 'xmlns="http://regis-web.systemsbiology.net/protXML"' ]
418f42b34049 Reuploading
iracooke
parents:
diff changeset
100
418f42b34049 Reuploading
iracooke
parents:
diff changeset
101 for i, line in enumerate( file( filename ) ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
102 if i >= len( pepxml_header ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
103 return True
418f42b34049 Reuploading
iracooke
parents:
diff changeset
104 line = line.rstrip( '\n\r' )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
105 if protxml_header[ i ] not in line:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
106 return False
418f42b34049 Reuploading
iracooke
parents:
diff changeset
107
418f42b34049 Reuploading
iracooke
parents:
diff changeset
108
418f42b34049 Reuploading
iracooke
parents:
diff changeset
109
418f42b34049 Reuploading
iracooke
parents:
diff changeset
110 class MzXML( Text ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
111 """mzXML data"""
418f42b34049 Reuploading
iracooke
parents:
diff changeset
112 file_ext = "mzXML"
418f42b34049 Reuploading
iracooke
parents:
diff changeset
113
418f42b34049 Reuploading
iracooke
parents:
diff changeset
114 def set_peek( self, dataset, is_multi_byte=False ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
115 """Set the peek and blurb text"""
418f42b34049 Reuploading
iracooke
parents:
diff changeset
116 if not dataset.dataset.purged:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
117 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
118 dataset.blurb = 'mzXML Mass Spectrometry data'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
119 else:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
120 dataset.peek = 'file does not exist'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
121 dataset.blurb = 'file purged from disk'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
122 def sniff( self, filename ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
123 mzxml_header = [ '<?xml version="1.0" encoding="ISO-8859-1"?>',
418f42b34049 Reuploading
iracooke
parents:
diff changeset
124 '<mzXML xmlns="http://sashimi.sourceforge.net/schema_revision/mzXML_2.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/mzXML_2.1 http://sashimi.sourceforge.net/schema_revision/mzXML_2.1/mzXML_idx_2.1.xsd">' ]
418f42b34049 Reuploading
iracooke
parents:
diff changeset
125 for i, line in enumerate( file( filename ) ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
126 if i >= len( mzxml_header ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
127 return True
418f42b34049 Reuploading
iracooke
parents:
diff changeset
128 line = line.rstrip( '\n\r' )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
129 if line != mzxml_header[ i ]:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
130 return False
418f42b34049 Reuploading
iracooke
parents:
diff changeset
131
418f42b34049 Reuploading
iracooke
parents:
diff changeset
132 class Mgf( Text ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
133 """Mascot Generic Format data"""
418f42b34049 Reuploading
iracooke
parents:
diff changeset
134 file_ext = "mgf"
418f42b34049 Reuploading
iracooke
parents:
diff changeset
135
418f42b34049 Reuploading
iracooke
parents:
diff changeset
136 def set_peek( self, dataset, is_multi_byte=False ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
137 """Set the peek and blurb text"""
418f42b34049 Reuploading
iracooke
parents:
diff changeset
138 if not dataset.dataset.purged:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
139 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
140 dataset.blurb = 'mgf Mascot Generic Format'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
141 else:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
142 dataset.peek = 'file does not exist'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
143 dataset.blurb = 'file purged from disk'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
144
418f42b34049 Reuploading
iracooke
parents:
diff changeset
145
418f42b34049 Reuploading
iracooke
parents:
diff changeset
146 def sniff( self, filename ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
147 mgf_begin_ions = "BEGIN IONS"
418f42b34049 Reuploading
iracooke
parents:
diff changeset
148 max_lines=100
418f42b34049 Reuploading
iracooke
parents:
diff changeset
149
418f42b34049 Reuploading
iracooke
parents:
diff changeset
150 for i, line in enumerate( file( filename ) ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
151 line = line.rstrip( '\n\r' )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
152 if line==mgf_begin_ions:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
153 return True
418f42b34049 Reuploading
iracooke
parents:
diff changeset
154 if i>max_lines:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
155 return False
418f42b34049 Reuploading
iracooke
parents:
diff changeset
156
418f42b34049 Reuploading
iracooke
parents:
diff changeset
157
418f42b34049 Reuploading
iracooke
parents:
diff changeset
158 class MascotDat( Text ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
159 """Mascot search results """
418f42b34049 Reuploading
iracooke
parents:
diff changeset
160 file_ext = "mascotdat"
418f42b34049 Reuploading
iracooke
parents:
diff changeset
161
418f42b34049 Reuploading
iracooke
parents:
diff changeset
162 def set_peek( self, dataset, is_multi_byte=False ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
163 """Set the peek and blurb text"""
418f42b34049 Reuploading
iracooke
parents:
diff changeset
164 if not dataset.dataset.purged:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
165 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
166 dataset.blurb = 'mascotdat Mascot Search Results'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
167 else:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
168 dataset.peek = 'file does not exist'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
169 dataset.blurb = 'file purged from disk'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
170
418f42b34049 Reuploading
iracooke
parents:
diff changeset
171
418f42b34049 Reuploading
iracooke
parents:
diff changeset
172 def sniff( self, filename ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
173 mime_version = "MIME-Version: 1.0 (Generated by Mascot version 1.0)"
418f42b34049 Reuploading
iracooke
parents:
diff changeset
174 max_lines=10
418f42b34049 Reuploading
iracooke
parents:
diff changeset
175
418f42b34049 Reuploading
iracooke
parents:
diff changeset
176 for i, line in enumerate( file( filename ) ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
177 line = line.rstrip( '\n\r' )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
178 if line==mime_version:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
179 return True
418f42b34049 Reuploading
iracooke
parents:
diff changeset
180 if i>max_lines:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
181 return False
418f42b34049 Reuploading
iracooke
parents:
diff changeset
182
418f42b34049 Reuploading
iracooke
parents:
diff changeset
183
418f42b34049 Reuploading
iracooke
parents:
diff changeset
184 class RAW( Binary ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
185 """Class describing a Thermo Finnigan binary RAW file"""
418f42b34049 Reuploading
iracooke
parents:
diff changeset
186 file_ext = "raw"
418f42b34049 Reuploading
iracooke
parents:
diff changeset
187 def sniff( self, filename ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
188 # Thermo Finnigan RAW format is proprietary and hence not well documented.
418f42b34049 Reuploading
iracooke
parents:
diff changeset
189 # Files start with 2 bytes that seem to differ followed by F\0i\0n\0n\0i\0g\0a\0n
418f42b34049 Reuploading
iracooke
parents:
diff changeset
190 # This combination represents 17 bytes, but to play safe we read 20 bytes from
418f42b34049 Reuploading
iracooke
parents:
diff changeset
191 # the start of the file.
418f42b34049 Reuploading
iracooke
parents:
diff changeset
192 try:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
193 header = open( filename ).read(20)
418f42b34049 Reuploading
iracooke
parents:
diff changeset
194 hexheader = binascii.b2a_hex( header )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
195 finnigan = binascii.hexlify( 'F\0i\0n\0n\0i\0g\0a\0n' )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
196 if hexheader.find(finnigan) != -1:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
197 return True
418f42b34049 Reuploading
iracooke
parents:
diff changeset
198 return False
418f42b34049 Reuploading
iracooke
parents:
diff changeset
199 except:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
200 return False
418f42b34049 Reuploading
iracooke
parents:
diff changeset
201 def set_peek( self, dataset, is_multi_byte=False ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
202 if not dataset.dataset.purged:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
203 dataset.peek = "Thermo Finnigan RAW file"
418f42b34049 Reuploading
iracooke
parents:
diff changeset
204 dataset.blurb = data.nice_size( dataset.get_size() )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
205 else:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
206 dataset.peek = 'file does not exist'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
207 dataset.blurb = 'file purged from disk'
418f42b34049 Reuploading
iracooke
parents:
diff changeset
208 def display_peek( self, dataset ):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
209 try:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
210 return dataset.peek
418f42b34049 Reuploading
iracooke
parents:
diff changeset
211 except:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
212 return "Thermo Finnigan RAW file (%s)" % ( data.nice_size( dataset.get_size() ) )
418f42b34049 Reuploading
iracooke
parents:
diff changeset
213
418f42b34049 Reuploading
iracooke
parents:
diff changeset
214
418f42b34049 Reuploading
iracooke
parents:
diff changeset
215 class Msp(Text):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
216 """ Output of NIST MS Search Program chemdata.nist.gov/mass-spc/ftp/mass-spc/PepLib.pdf """
418f42b34049 Reuploading
iracooke
parents:
diff changeset
217 file_ext = "msp"
418f42b34049 Reuploading
iracooke
parents:
diff changeset
218
418f42b34049 Reuploading
iracooke
parents:
diff changeset
219 @staticmethod
418f42b34049 Reuploading
iracooke
parents:
diff changeset
220 def next_line_starts_with(contents, prefix):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
221 next_line = contents.readline()
418f42b34049 Reuploading
iracooke
parents:
diff changeset
222 return next_line != None and next_line.startswith(prefix)
418f42b34049 Reuploading
iracooke
parents:
diff changeset
223
418f42b34049 Reuploading
iracooke
parents:
diff changeset
224 def sniff(self, filename):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
225 """ Determines whether the file is a NIST MSP output file.
418f42b34049 Reuploading
iracooke
parents:
diff changeset
226
418f42b34049 Reuploading
iracooke
parents:
diff changeset
227 >>> fname = get_test_fname('test.msp')
418f42b34049 Reuploading
iracooke
parents:
diff changeset
228 >>> Msp().sniff(fname)
418f42b34049 Reuploading
iracooke
parents:
diff changeset
229 True
418f42b34049 Reuploading
iracooke
parents:
diff changeset
230 >>> fname = get_test_fname('test.mzXML')
418f42b34049 Reuploading
iracooke
parents:
diff changeset
231 >>> Msp().sniff(fname)
418f42b34049 Reuploading
iracooke
parents:
diff changeset
232 False
418f42b34049 Reuploading
iracooke
parents:
diff changeset
233 """
418f42b34049 Reuploading
iracooke
parents:
diff changeset
234 with open(filename, 'r') as contents:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
235 return Msp.next_line_starts_with(contents, "Name:") and Msp.next_line_starts_with(contents, "MW:")
418f42b34049 Reuploading
iracooke
parents:
diff changeset
236
418f42b34049 Reuploading
iracooke
parents:
diff changeset
237 class Ms2(Text):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
238 file_ext = "ms2"
418f42b34049 Reuploading
iracooke
parents:
diff changeset
239
418f42b34049 Reuploading
iracooke
parents:
diff changeset
240 def sniff(self, filename):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
241 """ Determines whether the file is a valid ms2 file.
418f42b34049 Reuploading
iracooke
parents:
diff changeset
242
418f42b34049 Reuploading
iracooke
parents:
diff changeset
243 >>> fname = get_test_fname('test.msp')
418f42b34049 Reuploading
iracooke
parents:
diff changeset
244 >>> Ms2().sniff(fname)
418f42b34049 Reuploading
iracooke
parents:
diff changeset
245 False
418f42b34049 Reuploading
iracooke
parents:
diff changeset
246 >>> fname = get_test_fname('test.ms2')
418f42b34049 Reuploading
iracooke
parents:
diff changeset
247 >>> Ms2().sniff(fname)
418f42b34049 Reuploading
iracooke
parents:
diff changeset
248 True
418f42b34049 Reuploading
iracooke
parents:
diff changeset
249 """
418f42b34049 Reuploading
iracooke
parents:
diff changeset
250
418f42b34049 Reuploading
iracooke
parents:
diff changeset
251 with open(filename, 'r') as contents:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
252 header_lines = []
418f42b34049 Reuploading
iracooke
parents:
diff changeset
253 while True:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
254 line = contents.readline()
418f42b34049 Reuploading
iracooke
parents:
diff changeset
255 if line == None or len(line) == 0:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
256 pass
418f42b34049 Reuploading
iracooke
parents:
diff changeset
257 elif line.startswith('H\t'):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
258 header_lines.append(line)
418f42b34049 Reuploading
iracooke
parents:
diff changeset
259 else:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
260 break
418f42b34049 Reuploading
iracooke
parents:
diff changeset
261 for header_field in ['CreationDate', 'Extractor', 'ExtractorVersion', 'ExtractorOptions']:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
262 found_header = False
418f42b34049 Reuploading
iracooke
parents:
diff changeset
263 for header_line in header_lines:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
264 if header_line.startswith('H\t%s' % (header_field)):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
265 found_header = True
418f42b34049 Reuploading
iracooke
parents:
diff changeset
266 break
418f42b34049 Reuploading
iracooke
parents:
diff changeset
267 if not found_header:
418f42b34049 Reuploading
iracooke
parents:
diff changeset
268 return False
418f42b34049 Reuploading
iracooke
parents:
diff changeset
269
418f42b34049 Reuploading
iracooke
parents:
diff changeset
270 return True
418f42b34049 Reuploading
iracooke
parents:
diff changeset
271
418f42b34049 Reuploading
iracooke
parents:
diff changeset
272 # unsniffable binary format, should do something about this
418f42b34049 Reuploading
iracooke
parents:
diff changeset
273 class XHunterAslFormat(Binary):
418f42b34049 Reuploading
iracooke
parents:
diff changeset
274 """ Annotated Spectra in the HLF format http://www.thegpm.org/HUNTER/format_2006_09_15.html """
418f42b34049 Reuploading
iracooke
parents:
diff changeset
275 file_ext = "hlf"