annotate prims_proteomics_datatypes.py @ 18:ad911e9aaf33

small fix in msfilt report output
author pieter.lukasse@wur.nl
date Fri, 01 Aug 2014 17:22:37 +0200
parents d50f079096ee
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
1 """
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
2 PRIMS proteomics classes for types defined in datatypes_conf.xml
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
3 """
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
4 import logging
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
5 import re
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
6 from galaxy.datatypes.data import *
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
7 from galaxy.datatypes.xml import *
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
8 from galaxy.datatypes.sniff import *
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
9 from galaxy.datatypes.binary import *
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
10 from galaxy.datatypes.interval import *
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
11
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
12 log = logging.getLogger(__name__)
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
13
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
14
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
15 class ProteomicsXml(GenericXml):
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
16 """ An enhanced XML datatype used to reuse code across several
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
17 proteomic/mass-spec datatypes. (this part of the code is taken from protk proteomics datatypes package) """
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
18
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
19 def sniff(self, filename):
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
20 """ Determines whether the file is the correct XML type. """
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
21 with open(filename, 'r') as contents:
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
22 while True:
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
23 line = contents.readline()
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
24 if line == None or not line.startswith('<?'):
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
25 break
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
26 pattern = '^<(\w*:)?%s' % self.root # pattern match <root or <ns:root for any ns string
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
27 return line != None and re.match(pattern, line) != None
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
28
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
29 def set_peek( self, dataset, is_multi_byte=False ):
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
30 """Set the peek and blurb text"""
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
31 if not dataset.dataset.purged:
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
32 dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
33 dataset.blurb = self.blurb
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
34 else:
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
35 dataset.peek = 'file does not exist'
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
36 dataset.blurb = 'file purged from disk'
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
37
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
38 class Apml( ProteomicsXml ):
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
39 """APML data"""
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
40 file_ext = "apml"
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
41 blurb = 'PRIMS APML proteomics data'
d50f079096ee Push to main toolshed
pieter.lukasse@wur.nl
parents:
diff changeset
42 root = "apml"