Mercurial > repos > iracooke > protk
diff lib/galaxy/datatypes/proteomics.py @ 0:a929e27eb203 draft
Uploaded
author | iracooke |
---|---|
date | Thu, 21 Jun 2012 22:30:48 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/lib/galaxy/datatypes/proteomics.py Thu Jun 21 22:30:48 2012 -0400 @@ -0,0 +1,181 @@ +""" +Proteomics format classes +""" +import logging +import re +from galaxy.datatypes.data import * +from galaxy.datatypes.xml import * +from galaxy.datatypes.sniff import * +from galaxy.datatypes.binary import * + +log = logging.getLogger(__name__) + + +class Xls( Binary ): + """Class describing a binary excel spreadsheet file""" + file_ext = "xls" + + def set_peek( self, dataset, is_multi_byte=False ): + if not dataset.dataset.purged: + dataset.peek = "Excel Spreadsheet file" + dataset.blurb = data.nice_size( dataset.get_size() ) + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def display_peek( self, dataset ): + try: + return dataset.peek + except: + return "Binary xls file (%s)" % ( data.nice_size( dataset.get_size() ) ) + +class PepXml(GenericXml): + """pepXML data""" + file_ext = "pepxml" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'pepXML data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def sniff( self, filename ): + """ + Determines whether the file is pepXML + """ + #TODO - Use a context manager on Python 2.5+ to close handle + handle = open(filename) + xmlns_re = re.compile(".*pepXML\"") + for i in range(3): + line = handle.readline() + if xmlns_re.match(line.strip()): + handle.close() + return True + + handle.close() + return False + +class MzML( GenericXml ): + """mzML data""" + file_ext = "mzml" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'mzML Mass Spectrometry data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + def sniff( self, filename ): + handle = open(filename) + xmlns_re = re.compile("^<mzML") + for i in range(3): + line = handle.readline() + if xmlns_re.match(line.strip()): + handle.close() + return True + + handle.close() + return False + + +class ProtXML( Text ): + """protXML data""" + file_ext = "protxml" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'prot XML Search Results' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def sniff( self, filename ): + protxml_header = [ '<?xml version="1.0" encoding="ISO-8859-1"?>', + 'xmlns="http://regis-web.systemsbiology.net/protXML"' ] + + for i, line in enumerate( file( filename ) ): + if i >= len( pepxml_header ): + return True + line = line.rstrip( '\n\r' ) + if protxml_header[ i ] not in line: + return False + + + +class MzXML( Text ): + """mzXML data""" + file_ext = "mzXML" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'mzXML Mass Spectrometry data' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + def sniff( self, filename ): + mzxml_header = [ '<?xml version="1.0" encoding="ISO-8859-1"?>', + '<mzXML xmlns="http://sashimi.sourceforge.net/schema_revision/mzXML_2.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/mzXML_2.1 http://sashimi.sourceforge.net/schema_revision/mzXML_2.1/mzXML_idx_2.1.xsd">' ] + for i, line in enumerate( file( filename ) ): + if i >= len( mzxml_header ): + return True + line = line.rstrip( '\n\r' ) + if line != mzxml_header[ i ]: + return False + +class Mgf( Text ): + """Mascot Generic Format data""" + file_ext = "mgf" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'mgf Mascot Generic Format' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + + def sniff( self, filename ): + mgf_begin_ions = "BEGIN IONS" + max_lines=100 + + for i, line in enumerate( file( filename ) ): + line = line.rstrip( '\n\r' ) + if line==mgf_begin_ions: + return True + if i>max_lines: + return False + + +class MascotDat( Text ): + """Mascot search results """ + file_ext = "mascotdat" + + def set_peek( self, dataset, is_multi_byte=False ): + """Set the peek and blurb text""" + if not dataset.dataset.purged: + dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) + dataset.blurb = 'mascotdat Mascot Search Results' + else: + dataset.peek = 'file does not exist' + dataset.blurb = 'file purged from disk' + + + def sniff( self, filename ): + mime_version = "MIME-Version: 1.0 (Generated by Mascot version 1.0)" + max_lines=10 + + for i, line in enumerate( file( filename ) ): + line = line.rstrip( '\n\r' ) + if line==mime_version: + return True + if i>max_lines: + return False