comparison lib/galaxy/datatypes/proteomics.py @ 0:a929e27eb203 draft

Uploaded
author iracooke
date Thu, 21 Jun 2012 22:30:48 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:a929e27eb203
1 """
2 Proteomics format classes
3 """
4 import logging
5 import re
6 from galaxy.datatypes.data import *
7 from galaxy.datatypes.xml import *
8 from galaxy.datatypes.sniff import *
9 from galaxy.datatypes.binary import *
10
11 log = logging.getLogger(__name__)
12
13
14 class Xls( Binary ):
15 """Class describing a binary excel spreadsheet file"""
16 file_ext = "xls"
17
18 def set_peek( self, dataset, is_multi_byte=False ):
19 if not dataset.dataset.purged:
20 dataset.peek = "Excel Spreadsheet file"
21 dataset.blurb = data.nice_size( dataset.get_size() )
22 else:
23 dataset.peek = 'file does not exist'
24 dataset.blurb = 'file purged from disk'
25 def display_peek( self, dataset ):
26 try:
27 return dataset.peek
28 except:
29 return "Binary xls file (%s)" % ( data.nice_size( dataset.get_size() ) )
30
31 class PepXml(GenericXml):
32 """pepXML data"""
33 file_ext = "pepxml"
34
35 def set_peek( self, dataset, is_multi_byte=False ):
36 """Set the peek and blurb text"""
37 if not dataset.dataset.purged:
38 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
39 dataset.blurb = 'pepXML data'
40 else:
41 dataset.peek = 'file does not exist'
42 dataset.blurb = 'file purged from disk'
43 def sniff( self, filename ):
44 """
45 Determines whether the file is pepXML
46 """
47 #TODO - Use a context manager on Python 2.5+ to close handle
48 handle = open(filename)
49 xmlns_re = re.compile(".*pepXML\"")
50 for i in range(3):
51 line = handle.readline()
52 if xmlns_re.match(line.strip()):
53 handle.close()
54 return True
55
56 handle.close()
57 return False
58
59 class MzML( GenericXml ):
60 """mzML data"""
61 file_ext = "mzml"
62
63 def set_peek( self, dataset, is_multi_byte=False ):
64 """Set the peek and blurb text"""
65 if not dataset.dataset.purged:
66 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
67 dataset.blurb = 'mzML Mass Spectrometry data'
68 else:
69 dataset.peek = 'file does not exist'
70 dataset.blurb = 'file purged from disk'
71
72 def sniff( self, filename ):
73 handle = open(filename)
74 xmlns_re = re.compile("^<mzML")
75 for i in range(3):
76 line = handle.readline()
77 if xmlns_re.match(line.strip()):
78 handle.close()
79 return True
80
81 handle.close()
82 return False
83
84
85 class ProtXML( Text ):
86 """protXML data"""
87 file_ext = "protxml"
88
89 def set_peek( self, dataset, is_multi_byte=False ):
90 """Set the peek and blurb text"""
91 if not dataset.dataset.purged:
92 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
93 dataset.blurb = 'prot XML Search Results'
94 else:
95 dataset.peek = 'file does not exist'
96 dataset.blurb = 'file purged from disk'
97 def sniff( self, filename ):
98 protxml_header = [ '<?xml version="1.0" encoding="ISO-8859-1"?>',
99 'xmlns="http://regis-web.systemsbiology.net/protXML"' ]
100
101 for i, line in enumerate( file( filename ) ):
102 if i >= len( pepxml_header ):
103 return True
104 line = line.rstrip( '\n\r' )
105 if protxml_header[ i ] not in line:
106 return False
107
108
109
110 class MzXML( Text ):
111 """mzXML data"""
112 file_ext = "mzXML"
113
114 def set_peek( self, dataset, is_multi_byte=False ):
115 """Set the peek and blurb text"""
116 if not dataset.dataset.purged:
117 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
118 dataset.blurb = 'mzXML Mass Spectrometry data'
119 else:
120 dataset.peek = 'file does not exist'
121 dataset.blurb = 'file purged from disk'
122 def sniff( self, filename ):
123 mzxml_header = [ '<?xml version="1.0" encoding="ISO-8859-1"?>',
124 '<mzXML xmlns="http://sashimi.sourceforge.net/schema_revision/mzXML_2.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/mzXML_2.1 http://sashimi.sourceforge.net/schema_revision/mzXML_2.1/mzXML_idx_2.1.xsd">' ]
125 for i, line in enumerate( file( filename ) ):
126 if i >= len( mzxml_header ):
127 return True
128 line = line.rstrip( '\n\r' )
129 if line != mzxml_header[ i ]:
130 return False
131
132 class Mgf( Text ):
133 """Mascot Generic Format data"""
134 file_ext = "mgf"
135
136 def set_peek( self, dataset, is_multi_byte=False ):
137 """Set the peek and blurb text"""
138 if not dataset.dataset.purged:
139 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
140 dataset.blurb = 'mgf Mascot Generic Format'
141 else:
142 dataset.peek = 'file does not exist'
143 dataset.blurb = 'file purged from disk'
144
145
146 def sniff( self, filename ):
147 mgf_begin_ions = "BEGIN IONS"
148 max_lines=100
149
150 for i, line in enumerate( file( filename ) ):
151 line = line.rstrip( '\n\r' )
152 if line==mgf_begin_ions:
153 return True
154 if i>max_lines:
155 return False
156
157
158 class MascotDat( Text ):
159 """Mascot search results """
160 file_ext = "mascotdat"
161
162 def set_peek( self, dataset, is_multi_byte=False ):
163 """Set the peek and blurb text"""
164 if not dataset.dataset.purged:
165 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
166 dataset.blurb = 'mascotdat Mascot Search Results'
167 else:
168 dataset.peek = 'file does not exist'
169 dataset.blurb = 'file purged from disk'
170
171
172 def sniff( self, filename ):
173 mime_version = "MIME-Version: 1.0 (Generated by Mascot version 1.0)"
174 max_lines=10
175
176 for i, line in enumerate( file( filename ) ):
177 line = line.rstrip( '\n\r' )
178 if line==mime_version:
179 return True
180 if i>max_lines:
181 return False