diff snpeff.py @ 0:d78b2b2a3388 draft

Uploaded
author iuc
date Thu, 22 Jan 2015 07:58:16 -0500
parents
children 2b53f59de80c
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/snpeff.py	Thu Jan 22 07:58:16 2015 -0500
@@ -0,0 +1,77 @@
+"""
+SnpEff datatypes
+"""
+import os,os.path,re,sys,gzip,logging
+import galaxy.datatypes.data
+from galaxy.datatypes.data import Text
+from galaxy.datatypes.metadata import MetadataElement
+
+log = logging.getLogger(__name__)
+
+class SnpEffDb( Text ):
+    """Class describing a SnpEff genome build"""
+    file_ext = "snpeffdb"
+    MetadataElement( name="genome_version", default=None, desc="Genome Version", readonly=True, visible=True, no_value=None )
+    MetadataElement( name="snpeff_version", default="SnpEff4.0", desc="SnpEff Version", readonly=True, visible=True, no_value=None )
+    MetadataElement( name="regulation", default=[], desc="Regulation Names", readonly=True, visible=True, no_value=[], optional=True)
+    MetadataElement( name="annotation", default=[], desc="Annotation Names", readonly=True, visible=True, no_value=[], optional=True)
+
+    def __init__( self, **kwd ):
+        Text.__init__( self, **kwd )
+
+    "" The SnpEff version line was added in SnpEff version 4.1 
+    def getSnpeffVersionFromFile(self, path):
+        snpeff_version = None
+        try:
+            fh = gzip.open(path, 'rb')
+            buf = fh.read(100)
+            lines = buf.splitlines()
+            m = re.match('^(SnpEff)\s+(\d+\.\d+).*$',lines[0].strip())
+            if m:
+                snpeff_version = m.groups()[0] + m.groups()[1]
+            fh.close()
+        except Exception, e:
+            pass
+        return snpeff_version
+
+    def set_meta( self, dataset, **kwd ):
+        Text.set_meta(self, dataset, **kwd )
+        data_dir = dataset.extra_files_path
+        ## search data_dir/genome_version for files
+        regulation_pattern = 'regulation_(.+).bin'
+        #  annotation files that are included in snpEff by a flag
+        annotations_dict = {'nextProt.bin' : '-nextprot','motif.bin': '-motif'}
+        regulations = []
+        annotations = []
+        genome_version = None
+        snpeff_version = None
+        if data_dir and os.path.isdir(data_dir):
+            for root, dirs, files in os.walk(data_dir):
+                for fname in files:
+                    if fname.startswith('snpEffectPredictor'):
+                        # if snpEffectPredictor.bin download succeeded
+                        genome_version = os.path.basename(root)
+                        dataset.metadata.genome_version = genome_version
+                    else:
+                        m = re.match(regulation_pattern,fname)
+                        if m:
+                            name = m.groups()[0]
+                            regulations.append(name)
+                        elif fname in annotations_dict:
+                            value = annotations_dict[fname]
+                            name = value.lstrip('-')
+                            annotations.append(name)
+            dataset.metadata.regulation = regulations
+            dataset.metadata.annotation = annotations
+            try:
+                fh = file(dataset.file_name,'w')
+                fh.write("%s\n" % genome_version if genome_version else 'Genome unknown')
+                fh.write("%s\n" % snpeff_version if snpeff_version else 'SnpEff version unknown')
+                if annotations:
+                    fh.write("annotations: %s\n" % ','.join(annotations))
+                if regulations:
+                    fh.write("regulations: %s\n" % ','.join(regulations))
+                fh.close()
+            except:
+                pass
+