Mercurial > repos > iracooke > protk
comparison lib/galaxy/datatypes/proteomics.py @ 0:a929e27eb203 draft
Uploaded
author | iracooke |
---|---|
date | Thu, 21 Jun 2012 22:30:48 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:a929e27eb203 |
---|---|
1 """ | |
2 Proteomics format classes | |
3 """ | |
4 import logging | |
5 import re | |
6 from galaxy.datatypes.data import * | |
7 from galaxy.datatypes.xml import * | |
8 from galaxy.datatypes.sniff import * | |
9 from galaxy.datatypes.binary import * | |
10 | |
11 log = logging.getLogger(__name__) | |
12 | |
13 | |
14 class Xls( Binary ): | |
15 """Class describing a binary excel spreadsheet file""" | |
16 file_ext = "xls" | |
17 | |
18 def set_peek( self, dataset, is_multi_byte=False ): | |
19 if not dataset.dataset.purged: | |
20 dataset.peek = "Excel Spreadsheet file" | |
21 dataset.blurb = data.nice_size( dataset.get_size() ) | |
22 else: | |
23 dataset.peek = 'file does not exist' | |
24 dataset.blurb = 'file purged from disk' | |
25 def display_peek( self, dataset ): | |
26 try: | |
27 return dataset.peek | |
28 except: | |
29 return "Binary xls file (%s)" % ( data.nice_size( dataset.get_size() ) ) | |
30 | |
31 class PepXml(GenericXml): | |
32 """pepXML data""" | |
33 file_ext = "pepxml" | |
34 | |
35 def set_peek( self, dataset, is_multi_byte=False ): | |
36 """Set the peek and blurb text""" | |
37 if not dataset.dataset.purged: | |
38 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | |
39 dataset.blurb = 'pepXML data' | |
40 else: | |
41 dataset.peek = 'file does not exist' | |
42 dataset.blurb = 'file purged from disk' | |
43 def sniff( self, filename ): | |
44 """ | |
45 Determines whether the file is pepXML | |
46 """ | |
47 #TODO - Use a context manager on Python 2.5+ to close handle | |
48 handle = open(filename) | |
49 xmlns_re = re.compile(".*pepXML\"") | |
50 for i in range(3): | |
51 line = handle.readline() | |
52 if xmlns_re.match(line.strip()): | |
53 handle.close() | |
54 return True | |
55 | |
56 handle.close() | |
57 return False | |
58 | |
59 class MzML( GenericXml ): | |
60 """mzML data""" | |
61 file_ext = "mzml" | |
62 | |
63 def set_peek( self, dataset, is_multi_byte=False ): | |
64 """Set the peek and blurb text""" | |
65 if not dataset.dataset.purged: | |
66 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | |
67 dataset.blurb = 'mzML Mass Spectrometry data' | |
68 else: | |
69 dataset.peek = 'file does not exist' | |
70 dataset.blurb = 'file purged from disk' | |
71 | |
72 def sniff( self, filename ): | |
73 handle = open(filename) | |
74 xmlns_re = re.compile("^<mzML") | |
75 for i in range(3): | |
76 line = handle.readline() | |
77 if xmlns_re.match(line.strip()): | |
78 handle.close() | |
79 return True | |
80 | |
81 handle.close() | |
82 return False | |
83 | |
84 | |
85 class ProtXML( Text ): | |
86 """protXML data""" | |
87 file_ext = "protxml" | |
88 | |
89 def set_peek( self, dataset, is_multi_byte=False ): | |
90 """Set the peek and blurb text""" | |
91 if not dataset.dataset.purged: | |
92 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | |
93 dataset.blurb = 'prot XML Search Results' | |
94 else: | |
95 dataset.peek = 'file does not exist' | |
96 dataset.blurb = 'file purged from disk' | |
97 def sniff( self, filename ): | |
98 protxml_header = [ '<?xml version="1.0" encoding="ISO-8859-1"?>', | |
99 'xmlns="http://regis-web.systemsbiology.net/protXML"' ] | |
100 | |
101 for i, line in enumerate( file( filename ) ): | |
102 if i >= len( pepxml_header ): | |
103 return True | |
104 line = line.rstrip( '\n\r' ) | |
105 if protxml_header[ i ] not in line: | |
106 return False | |
107 | |
108 | |
109 | |
110 class MzXML( Text ): | |
111 """mzXML data""" | |
112 file_ext = "mzXML" | |
113 | |
114 def set_peek( self, dataset, is_multi_byte=False ): | |
115 """Set the peek and blurb text""" | |
116 if not dataset.dataset.purged: | |
117 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | |
118 dataset.blurb = 'mzXML Mass Spectrometry data' | |
119 else: | |
120 dataset.peek = 'file does not exist' | |
121 dataset.blurb = 'file purged from disk' | |
122 def sniff( self, filename ): | |
123 mzxml_header = [ '<?xml version="1.0" encoding="ISO-8859-1"?>', | |
124 '<mzXML xmlns="http://sashimi.sourceforge.net/schema_revision/mzXML_2.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/mzXML_2.1 http://sashimi.sourceforge.net/schema_revision/mzXML_2.1/mzXML_idx_2.1.xsd">' ] | |
125 for i, line in enumerate( file( filename ) ): | |
126 if i >= len( mzxml_header ): | |
127 return True | |
128 line = line.rstrip( '\n\r' ) | |
129 if line != mzxml_header[ i ]: | |
130 return False | |
131 | |
132 class Mgf( Text ): | |
133 """Mascot Generic Format data""" | |
134 file_ext = "mgf" | |
135 | |
136 def set_peek( self, dataset, is_multi_byte=False ): | |
137 """Set the peek and blurb text""" | |
138 if not dataset.dataset.purged: | |
139 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | |
140 dataset.blurb = 'mgf Mascot Generic Format' | |
141 else: | |
142 dataset.peek = 'file does not exist' | |
143 dataset.blurb = 'file purged from disk' | |
144 | |
145 | |
146 def sniff( self, filename ): | |
147 mgf_begin_ions = "BEGIN IONS" | |
148 max_lines=100 | |
149 | |
150 for i, line in enumerate( file( filename ) ): | |
151 line = line.rstrip( '\n\r' ) | |
152 if line==mgf_begin_ions: | |
153 return True | |
154 if i>max_lines: | |
155 return False | |
156 | |
157 | |
158 class MascotDat( Text ): | |
159 """Mascot search results """ | |
160 file_ext = "mascotdat" | |
161 | |
162 def set_peek( self, dataset, is_multi_byte=False ): | |
163 """Set the peek and blurb text""" | |
164 if not dataset.dataset.purged: | |
165 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) | |
166 dataset.blurb = 'mascotdat Mascot Search Results' | |
167 else: | |
168 dataset.peek = 'file does not exist' | |
169 dataset.blurb = 'file purged from disk' | |
170 | |
171 | |
172 def sniff( self, filename ): | |
173 mime_version = "MIME-Version: 1.0 (Generated by Mascot version 1.0)" | |
174 max_lines=10 | |
175 | |
176 for i, line in enumerate( file( filename ) ): | |
177 line = line.rstrip( '\n\r' ) | |
178 if line==mime_version: | |
179 return True | |
180 if i>max_lines: | |
181 return False |