diff commons/pyRepetUnit/hmmer/hmmOutput/HmmpfamOutputProcessing.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/commons/pyRepetUnit/hmmer/hmmOutput/HmmpfamOutputProcessing.py	Tue Apr 30 14:33:21 2013 -0400
@@ -0,0 +1,36 @@
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutput import HmmOutput
+import re
+from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutputProcessing import HmmOutputProcessing
+
+##Concrete implementation for specific methods
+#
+class HmmpfamOutputProcessing (HmmOutputProcessing):
+    
+    ##read an output from hmmpfam and return a array with results useful to build a .align file
+    #    
+    # @param file handle of file generated by software searching hmm profiles
+    #
+    def readHmmOutput( self, hmmerOutputFile ):
+        #Tested with HMMER 2.3.2 on Linux (Debian) and on Unix (Solaris)
+        line = hmmerOutputFile.readline()
+        tabResult = None
+        aRecup = 0        
+        if (line == ""):            
+            tabResult = None            
+            return tabResult
+        tabResult = HmmOutput()
+        while not re.match("Alignments of top-scoring domains:.*", line):
+            line = hmmerOutputFile.readline()                   
+            m = re.match("Query sequence:\s*(.*)", line)
+            if m:                    
+                seqName = m.group(1)
+            #warning : do no use a return character because they change between several OS                                              
+            if re.match("--------\s+-------\s+-----\s+-----\s+-----\s+-----\s+-----\s+-------.*", line):
+                aRecup = 1                
+            m = re.match("(\S+)\s+\d+\/\d+\s+(\d+)\s+(\d+)\s+.+\s+(\d+)\s+(\d+)\s+.+\s+(.+\.\d)\s+(.+)", line)
+            if m and aRecup == 1:                
+                tabResult.append([seqName, m.group(2), m.group(3), m.group(1), m.group(4), m.group(5), m.group(7), m.group(6)]) 
+            if line == "":            
+                tabResult = None            
+                return tabResult
+        return tabResult