annotate proteomics.py @ 2:f74290b136fc

Uploaded
author iracooke
date Mon, 04 Mar 2013 18:34:32 -0500
parents c10a62c886b8
children 09b89b345de2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
1 """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
2 Proteomics format classes
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
3 """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
4 import logging
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
5 import re
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
6 from galaxy.datatypes.data import *
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
7 from galaxy.datatypes.xml import *
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
8 from galaxy.datatypes.sniff import *
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
9 from galaxy.datatypes.binary import *
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
10
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
11 log = logging.getLogger(__name__)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
12
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
13
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
14 class Xls( Binary ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
15 """Class describing a binary excel spreadsheet file"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
16 file_ext = "xls"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
17
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
18 def set_peek( self, dataset, is_multi_byte=False ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
19 if not dataset.dataset.purged:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
20 dataset.peek = "Excel Spreadsheet file"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
21 dataset.blurb = data.nice_size( dataset.get_size() )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
22 else:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
23 dataset.peek = 'file does not exist'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
24 dataset.blurb = 'file purged from disk'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
25 def display_peek( self, dataset ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
26 try:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
27 return dataset.peek
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
28 except:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
29 return "Binary xls file (%s)" % ( data.nice_size( dataset.get_size() ) )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
30
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
31 class ProteomicsXml(GenericXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
32 """ An enhanced XML datatype used to reuse code across several
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
33 proteomic/mass-spec datatypes. """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
34
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
35 def sniff(self, filename):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
36 """ Determines whether the file is the correct XML type. """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
37 with open(filename, 'r') as contents:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
38 while True:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
39 line = contents.readline()
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
40 if line == None or not line.startswith('<?'):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
41 break
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
42 pattern = '^<(\w*:)?%s' % self.root # pattern match <root or <ns:root for any ns string
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
43 return line != None and re.match(pattern, line) != None
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
44
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
45 def set_peek( self, dataset, is_multi_byte=False ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
46 """Set the peek and blurb text"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
47 if not dataset.dataset.purged:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
48 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
49 dataset.blurb = self.blurb
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
50 else:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
51 dataset.peek = 'file does not exist'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
52 dataset.blurb = 'file purged from disk'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
53
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
54 class PepXml(ProteomicsXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
55 """pepXML data"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
56 file_ext = "pepxml"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
57 blurb = 'pepXML data'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
58 root = "msms_pipeline_analysis"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
59
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
60
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
61 class MzML(ProteomicsXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
62 """mzML data"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
63 file_ext = "mzml"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
64 blurb = 'mzML Mass Spectrometry data'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
65 root = "(mzML|indexedmzML)"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
66
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
67
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
68 class ProtXML(ProteomicsXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
69 """protXML data"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
70 file_ext = "protxml"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
71 blurb = 'prot XML Search Results'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
72 root = "protein_summary"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
73
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
74
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
75 class MzXML(ProteomicsXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
76 """mzXML data"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
77 file_ext = "mzXML"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
78 blurb = "mzXML Mass Spectrometry data"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
79 root = "mzXML"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
80
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
81 ## PSI datatypes
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
82 class MzIdentML(ProteomicsXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
83 file_ext = "mzid"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
84 blurb = "XML identified peptides and proteins."
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
85 root = "MzIdentML"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
86
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
87
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
88 class TraML(ProteomicsXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
89 file_ext = "traML"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
90 blurb = "TraML transition list"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
91 root = "TraML"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
92
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
93
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
94 class MzQuantML(ProteomicsXml):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
95 file_ext = "mzq"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
96 blurb = "XML quantification data"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
97 root = "MzQuantML"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
98
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
99
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
100 class Mgf( Text ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
101 """Mascot Generic Format data"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
102 file_ext = "mgf"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
103
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
104 def set_peek( self, dataset, is_multi_byte=False ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
105 """Set the peek and blurb text"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
106 if not dataset.dataset.purged:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
107 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
108 dataset.blurb = 'mgf Mascot Generic Format'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
109 else:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
110 dataset.peek = 'file does not exist'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
111 dataset.blurb = 'file purged from disk'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
112
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
113
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
114 def sniff( self, filename ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
115 mgf_begin_ions = "BEGIN IONS"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
116 max_lines=100
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
117
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
118 for i, line in enumerate( file( filename ) ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
119 line = line.rstrip( '\n\r' )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
120 if line==mgf_begin_ions:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
121 return True
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
122 if i>max_lines:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
123 return False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
124
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
125
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
126 class MascotDat( Text ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
127 """Mascot search results """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
128 file_ext = "mascotdat"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
129
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
130 def set_peek( self, dataset, is_multi_byte=False ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
131 """Set the peek and blurb text"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
132 if not dataset.dataset.purged:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
133 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
134 dataset.blurb = 'mascotdat Mascot Search Results'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
135 else:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
136 dataset.peek = 'file does not exist'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
137 dataset.blurb = 'file purged from disk'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
138
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
139
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
140 def sniff( self, filename ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
141 mime_version = "MIME-Version: 1.0 (Generated by Mascot version 1.0)"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
142 max_lines=10
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
143
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
144 for i, line in enumerate( file( filename ) ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
145 line = line.rstrip( '\n\r' )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
146 if line==mime_version:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
147 return True
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
148 if i>max_lines:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
149 return False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
150
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
151
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
152 class RAW( Binary ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
153 """Class describing a Thermo Finnigan binary RAW file"""
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
154 file_ext = "raw"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
155 def sniff( self, filename ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
156 # Thermo Finnigan RAW format is proprietary and hence not well documented.
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
157 # Files start with 2 bytes that seem to differ followed by F\0i\0n\0n\0i\0g\0a\0n
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
158 # This combination represents 17 bytes, but to play safe we read 20 bytes from
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
159 # the start of the file.
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
160 try:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
161 header = open( filename ).read(20)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
162 hexheader = binascii.b2a_hex( header )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
163 finnigan = binascii.hexlify( 'F\0i\0n\0n\0i\0g\0a\0n' )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
164 if hexheader.find(finnigan) != -1:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
165 return True
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
166 return False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
167 except:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
168 return False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
169 def set_peek( self, dataset, is_multi_byte=False ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
170 if not dataset.dataset.purged:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
171 dataset.peek = "Thermo Finnigan RAW file"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
172 dataset.blurb = data.nice_size( dataset.get_size() )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
173 else:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
174 dataset.peek = 'file does not exist'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
175 dataset.blurb = 'file purged from disk'
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
176 def display_peek( self, dataset ):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
177 try:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
178 return dataset.peek
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
179 except:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
180 return "Thermo Finnigan RAW file (%s)" % ( data.nice_size( dataset.get_size() ) )
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
181
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
182
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
183 if hasattr(Binary, 'register_sniffable_binary_format'):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
184 Binary.register_sniffable_binary_format('RAW', 'RAW', RAW)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
185
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
186
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
187 class Msp(Text):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
188 """ Output of NIST MS Search Program chemdata.nist.gov/mass-spc/ftp/mass-spc/PepLib.pdf """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
189 file_ext = "msp"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
190
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
191 @staticmethod
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
192 def next_line_starts_with(contents, prefix):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
193 next_line = contents.readline()
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
194 return next_line != None and next_line.startswith(prefix)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
195
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
196 def sniff(self, filename):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
197 """ Determines whether the file is a NIST MSP output file.
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
198
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
199 >>> fname = get_test_fname('test.msp')
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
200 >>> Msp().sniff(fname)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
201 True
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
202 >>> fname = get_test_fname('test.mzXML')
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
203 >>> Msp().sniff(fname)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
204 False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
205 """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
206 with open(filename, 'r') as contents:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
207 return Msp.next_line_starts_with(contents, "Name:") and Msp.next_line_starts_with(contents, "MW:")
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
208
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
209 class Ms2(Text):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
210 file_ext = "ms2"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
211
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
212 def sniff(self, filename):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
213 """ Determines whether the file is a valid ms2 file.
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
214
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
215 >>> fname = get_test_fname('test.msp')
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
216 >>> Ms2().sniff(fname)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
217 False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
218 >>> fname = get_test_fname('test.ms2')
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
219 >>> Ms2().sniff(fname)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
220 True
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
221 """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
222
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
223 with open(filename, 'r') as contents:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
224 header_lines = []
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
225 while True:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
226 line = contents.readline()
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
227 if line == None or len(line) == 0:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
228 pass
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
229 elif line.startswith('H\t'):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
230 header_lines.append(line)
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
231 else:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
232 break
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
233 for header_field in ['CreationDate', 'Extractor', 'ExtractorVersion', 'ExtractorOptions']:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
234 found_header = False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
235 for header_line in header_lines:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
236 if header_line.startswith('H\t%s' % (header_field)):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
237 found_header = True
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
238 break
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
239 if not found_header:
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
240 return False
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
241
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
242 return True
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
243
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
244 # unsniffable binary format, should do something about this
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
245 class XHunterAslFormat(Binary):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
246 """ Annotated Spectra in the HLF format http://www.thegpm.org/HUNTER/format_2006_09_15.html """
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
247 file_ext = "hlf"
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
248
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
249
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
250 if hasattr(Binary, 'register_unsniffable_binary_ext'):
c10a62c886b8 Uploaded
iracooke
parents:
diff changeset
251 Binary.register_unsniffable_binary_ext('hlf')