Mercurial > repos > iracooke > protk
view lib/galaxy/datatypes/proteomics.py @ 0:a929e27eb203 draft
Uploaded
author | iracooke |
---|---|
date | Thu, 21 Jun 2012 22:30:48 -0400 |
parents | |
children |
line wrap: on
line source
""" Proteomics format classes """ import logging import re from galaxy.datatypes.data import * from galaxy.datatypes.xml import * from galaxy.datatypes.sniff import * from galaxy.datatypes.binary import * log = logging.getLogger(__name__) class Xls( Binary ): """Class describing a binary excel spreadsheet file""" file_ext = "xls" def set_peek( self, dataset, is_multi_byte=False ): if not dataset.dataset.purged: dataset.peek = "Excel Spreadsheet file" dataset.blurb = data.nice_size( dataset.get_size() ) else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def display_peek( self, dataset ): try: return dataset.peek except: return "Binary xls file (%s)" % ( data.nice_size( dataset.get_size() ) ) class PepXml(GenericXml): """pepXML data""" file_ext = "pepxml" def set_peek( self, dataset, is_multi_byte=False ): """Set the peek and blurb text""" if not dataset.dataset.purged: dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) dataset.blurb = 'pepXML data' else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def sniff( self, filename ): """ Determines whether the file is pepXML """ #TODO - Use a context manager on Python 2.5+ to close handle handle = open(filename) xmlns_re = re.compile(".*pepXML\"") for i in range(3): line = handle.readline() if xmlns_re.match(line.strip()): handle.close() return True handle.close() return False class MzML( GenericXml ): """mzML data""" file_ext = "mzml" def set_peek( self, dataset, is_multi_byte=False ): """Set the peek and blurb text""" if not dataset.dataset.purged: dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) dataset.blurb = 'mzML Mass Spectrometry data' else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def sniff( self, filename ): handle = open(filename) xmlns_re = re.compile("^<mzML") for i in range(3): line = handle.readline() if xmlns_re.match(line.strip()): handle.close() return True handle.close() return False class ProtXML( Text ): """protXML data""" file_ext = "protxml" def set_peek( self, dataset, is_multi_byte=False ): """Set the peek and blurb text""" if not dataset.dataset.purged: dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) dataset.blurb = 'prot XML Search Results' else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def sniff( self, filename ): protxml_header = [ '<?xml version="1.0" encoding="ISO-8859-1"?>', 'xmlns="http://regis-web.systemsbiology.net/protXML"' ] for i, line in enumerate( file( filename ) ): if i >= len( pepxml_header ): return True line = line.rstrip( '\n\r' ) if protxml_header[ i ] not in line: return False class MzXML( Text ): """mzXML data""" file_ext = "mzXML" def set_peek( self, dataset, is_multi_byte=False ): """Set the peek and blurb text""" if not dataset.dataset.purged: dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) dataset.blurb = 'mzXML Mass Spectrometry data' else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def sniff( self, filename ): mzxml_header = [ '<?xml version="1.0" encoding="ISO-8859-1"?>', '<mzXML xmlns="http://sashimi.sourceforge.net/schema_revision/mzXML_2.1" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://sashimi.sourceforge.net/schema_revision/mzXML_2.1 http://sashimi.sourceforge.net/schema_revision/mzXML_2.1/mzXML_idx_2.1.xsd">' ] for i, line in enumerate( file( filename ) ): if i >= len( mzxml_header ): return True line = line.rstrip( '\n\r' ) if line != mzxml_header[ i ]: return False class Mgf( Text ): """Mascot Generic Format data""" file_ext = "mgf" def set_peek( self, dataset, is_multi_byte=False ): """Set the peek and blurb text""" if not dataset.dataset.purged: dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) dataset.blurb = 'mgf Mascot Generic Format' else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def sniff( self, filename ): mgf_begin_ions = "BEGIN IONS" max_lines=100 for i, line in enumerate( file( filename ) ): line = line.rstrip( '\n\r' ) if line==mgf_begin_ions: return True if i>max_lines: return False class MascotDat( Text ): """Mascot search results """ file_ext = "mascotdat" def set_peek( self, dataset, is_multi_byte=False ): """Set the peek and blurb text""" if not dataset.dataset.purged: dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte ) dataset.blurb = 'mascotdat Mascot Search Results' else: dataset.peek = 'file does not exist' dataset.blurb = 'file purged from disk' def sniff( self, filename ): mime_version = "MIME-Version: 1.0 (Generated by Mascot version 1.0)" max_lines=10 for i, line in enumerate( file( filename ) ): line = line.rstrip( '\n\r' ) if line==mime_version: return True if i>max_lines: return False