changeset 8:61d634d123c0 draft

Added datatypes_conf and sra class.
author matt-shirley <mdshw5@gmail.com>
date Sun, 18 Nov 2012 17:12:24 -0500
parents d7708f338c82
children cf0980cfd040
files datatypes_conf.xml sra.py
diffstat 2 files changed, 58 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/datatypes_conf.xml	Sun Nov 18 17:12:24 2012 -0500
@@ -0,0 +1,12 @@
+<?xml version="1.0"?>
+<datatypes>
+  <datatype_files>
+    <datatype_file name="sra.py"/>
+  </datatype_files>
+  <registration>
+    <datatype extension="sra" type="galaxy.datatypes.binary:Sra" display_in_upload="true"/>
+  </registration>
+  <sniffers>
+    <sniffer type="galaxy.datatypes.binary:Sra"/>
+  </sniffers>
+</datatypes>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/sra.py	Sun Nov 18 17:12:24 2012 -0500
@@ -0,0 +1,46 @@
+"""
+Sra class
+"""
+
+import galaxy.datatypes.binary
+from galaxy.datatypes.binary import Binary
+import data, logging, binascii
+from galaxy.datatypes.metadata import MetadataElement
+from galaxy.datatypes import metadata
+from galaxy.datatypes.sniff import *
+from galaxy import eggs
+import pkg_resources
+pkg_resources.require( "bx-python" )
+import os, subprocess, tempfile
+import struct
+
+class Sra( Binary ):
+    """ Sequence Read Archive (SRA) """
+    file_ext = "sra"
+
+    def __init__( self, **kwd ):
+        Binary.__init__( self, **kwd )
+    def sniff( self, filename ):
+        # The first 8 bytes of any NCBI sra file is 'NCIB.sra', and the file is binary. EBI and DDBJ files may differ. For details
+        # about the format, see http://www.ncbi.nlm.nih.gov/books/n/helpsra/SRA_Overview_BK/#SRA_Overview_BK.4_SRA_Data_Structure
+        try:
+            header = open( filename ).read(8)
+            if binascii.b2a_hex( header ) == binascii.hexlify( 'NCBI.sra' ):
+                return True
+            return False
+        except:
+            return False
+    def set_peek( self, dataset, is_multi_byte=False ):
+        if not dataset.dataset.purged:
+            dataset.peek  = "Binary sra file" 
+            dataset.blurb = data.nice_size( dataset.get_size() )
+        else:
+            dataset.peek = 'file does not exist'
+            dataset.blurb = 'file purged from disk'
+    def display_peek( self, dataset ):
+        try:
+            return dataset.peek
+        except:
+            return "Binary sra file (%s)" % ( data.nice_size( dataset.get_size() ) )
+
+Binary.register_sniffable_binary_format("sra", "sra", Sra)