# HG changeset patch
# User iracooke
# Date 1357517242 18000
# Node ID c10a62c886b8c5a3d1341231c2b7419e1d0aa1ca
Uploaded
diff -r 000000000000 -r c10a62c886b8 datatypes_conf.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml Sun Jan 06 19:07:22 2013 -0500
@@ -0,0 +1,48 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff -r 000000000000 -r c10a62c886b8 display_applications/proteomics/PepXml.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/display_applications/proteomics/PepXml.xml Sun Jan 06 19:07:22 2013 -0500
@@ -0,0 +1,18 @@
+
+
+
+
+
+
+ ${site_url}/init_local?file=${encoded_filename.qp}&type=pepxml
+
+
+
+ #import binascii
+ ${binascii.hexlify( $pepxml_file.file_name )}
+
+
+ ${BASE_URL.split(":")[1][2:]}
+
+
+
diff -r 000000000000 -r c10a62c886b8 display_applications/proteomics/ProtXml.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/display_applications/proteomics/ProtXml.xml Sun Jan 06 19:07:22 2013 -0500
@@ -0,0 +1,18 @@
+
+
+
+
+
+
+ ${site_url}/init_local?file=${encoded_filename.qp}&type=protxml
+
+
+
+ #import binascii
+ ${binascii.hexlify( $protxml_file.file_name )}
+
+
+ ${BASE_URL.split(":")[1][2:]}
+
+
+
\ No newline at end of file
diff -r 000000000000 -r c10a62c886b8 display_applications/proteomics/mzML.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/display_applications/proteomics/mzML.xml Sun Jan 06 19:07:22 2013 -0500
@@ -0,0 +1,18 @@
+
+
+
+
+
+
+ ${site_url}/init_local?file=${encoded_filename.qp}&type=mzml
+
+
+
+ #import binascii
+ ${binascii.hexlify( $mzml_file.file_name )}
+
+
+ ${BASE_URL.split(":")[1][2:]}
+
+
+
\ No newline at end of file
diff -r 000000000000 -r c10a62c886b8 pepxml_to_table.xml
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/pepxml_to_table.xml Sun Jan 06 19:07:22 2013 -0500
@@ -0,0 +1,23 @@
+
+ protkgem
+ Converts a pepXML file to a tab delimited text file
+
+
+
+pepxml_to_table.rb $input_file -o $output
+
+
+
+
+
+
+
+
+
+
+
+
+ Convert a pepXML file to Tab delimited text
+
+
+
diff -r 000000000000 -r c10a62c886b8 proteomics.py
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/proteomics.py Sun Jan 06 19:07:22 2013 -0500
@@ -0,0 +1,251 @@
+"""
+Proteomics format classes
+"""
+import logging
+import re
+from galaxy.datatypes.data import *
+from galaxy.datatypes.xml import *
+from galaxy.datatypes.sniff import *
+from galaxy.datatypes.binary import *
+
+log = logging.getLogger(__name__)
+
+
+class Xls( Binary ):
+ """Class describing a binary excel spreadsheet file"""
+ file_ext = "xls"
+
+ def set_peek( self, dataset, is_multi_byte=False ):
+ if not dataset.dataset.purged:
+ dataset.peek = "Excel Spreadsheet file"
+ dataset.blurb = data.nice_size( dataset.get_size() )
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+ def display_peek( self, dataset ):
+ try:
+ return dataset.peek
+ except:
+ return "Binary xls file (%s)" % ( data.nice_size( dataset.get_size() ) )
+
+class ProteomicsXml(GenericXml):
+ """ An enhanced XML datatype used to reuse code across several
+ proteomic/mass-spec datatypes. """
+
+ def sniff(self, filename):
+ """ Determines whether the file is the correct XML type. """
+ with open(filename, 'r') as contents:
+ while True:
+ line = contents.readline()
+ if line == None or not line.startswith(''):
+ break
+ pattern = '^<(\w*:)?%s' % self.root # pattern match max_lines:
+ return False
+
+
+class MascotDat( Text ):
+ """Mascot search results """
+ file_ext = "mascotdat"
+
+ def set_peek( self, dataset, is_multi_byte=False ):
+ """Set the peek and blurb text"""
+ if not dataset.dataset.purged:
+ dataset.peek = data.get_file_peek( dataset.file_name, is_multi_byte=is_multi_byte )
+ dataset.blurb = 'mascotdat Mascot Search Results'
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+
+
+ def sniff( self, filename ):
+ mime_version = "MIME-Version: 1.0 (Generated by Mascot version 1.0)"
+ max_lines=10
+
+ for i, line in enumerate( file( filename ) ):
+ line = line.rstrip( '\n\r' )
+ if line==mime_version:
+ return True
+ if i>max_lines:
+ return False
+
+
+class RAW( Binary ):
+ """Class describing a Thermo Finnigan binary RAW file"""
+ file_ext = "raw"
+ def sniff( self, filename ):
+ # Thermo Finnigan RAW format is proprietary and hence not well documented.
+ # Files start with 2 bytes that seem to differ followed by F\0i\0n\0n\0i\0g\0a\0n
+ # This combination represents 17 bytes, but to play safe we read 20 bytes from
+ # the start of the file.
+ try:
+ header = open( filename ).read(20)
+ hexheader = binascii.b2a_hex( header )
+ finnigan = binascii.hexlify( 'F\0i\0n\0n\0i\0g\0a\0n' )
+ if hexheader.find(finnigan) != -1:
+ return True
+ return False
+ except:
+ return False
+ def set_peek( self, dataset, is_multi_byte=False ):
+ if not dataset.dataset.purged:
+ dataset.peek = "Thermo Finnigan RAW file"
+ dataset.blurb = data.nice_size( dataset.get_size() )
+ else:
+ dataset.peek = 'file does not exist'
+ dataset.blurb = 'file purged from disk'
+ def display_peek( self, dataset ):
+ try:
+ return dataset.peek
+ except:
+ return "Thermo Finnigan RAW file (%s)" % ( data.nice_size( dataset.get_size() ) )
+
+
+if hasattr(Binary, 'register_sniffable_binary_format'):
+ Binary.register_sniffable_binary_format('RAW', 'RAW', RAW)
+
+
+class Msp(Text):
+ """ Output of NIST MS Search Program chemdata.nist.gov/mass-spc/ftp/mass-spc/PepLib.pdf """
+ file_ext = "msp"
+
+ @staticmethod
+ def next_line_starts_with(contents, prefix):
+ next_line = contents.readline()
+ return next_line != None and next_line.startswith(prefix)
+
+ def sniff(self, filename):
+ """ Determines whether the file is a NIST MSP output file.
+
+ >>> fname = get_test_fname('test.msp')
+ >>> Msp().sniff(fname)
+ True
+ >>> fname = get_test_fname('test.mzXML')
+ >>> Msp().sniff(fname)
+ False
+ """
+ with open(filename, 'r') as contents:
+ return Msp.next_line_starts_with(contents, "Name:") and Msp.next_line_starts_with(contents, "MW:")
+
+class Ms2(Text):
+ file_ext = "ms2"
+
+ def sniff(self, filename):
+ """ Determines whether the file is a valid ms2 file.
+
+ >>> fname = get_test_fname('test.msp')
+ >>> Ms2().sniff(fname)
+ False
+ >>> fname = get_test_fname('test.ms2')
+ >>> Ms2().sniff(fname)
+ True
+ """
+
+ with open(filename, 'r') as contents:
+ header_lines = []
+ while True:
+ line = contents.readline()
+ if line == None or len(line) == 0:
+ pass
+ elif line.startswith('H\t'):
+ header_lines.append(line)
+ else:
+ break
+ for header_field in ['CreationDate', 'Extractor', 'ExtractorVersion', 'ExtractorOptions']:
+ found_header = False
+ for header_line in header_lines:
+ if header_line.startswith('H\t%s' % (header_field)):
+ found_header = True
+ break
+ if not found_header:
+ return False
+
+ return True
+
+# unsniffable binary format, should do something about this
+class XHunterAslFormat(Binary):
+ """ Annotated Spectra in the HLF format http://www.thegpm.org/HUNTER/format_2006_09_15.html """
+ file_ext = "hlf"
+
+
+if hasattr(Binary, 'register_unsniffable_binary_ext'):
+ Binary.register_unsniffable_binary_ext('hlf')
diff -r 000000000000 -r c10a62c886b8 tool-data/protk_display_site.txt.sample
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/protk_display_site.txt.sample Sun Jan 06 19:07:22 2013 -0500
@@ -0,0 +1,3 @@
+#Proteomic Visualization application should be hosted on the same server as galaxy
+#Entries in this file are of the format "site_id" site_url
+Proteomics Visualize http://127.0.0.1:8500