view commons/pyRepetUnit/hmmer/hmmOutput/HmmpfamOutputProcessing.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
line wrap: on
line source

from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutput import HmmOutput
import re
from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutputProcessing import HmmOutputProcessing

##Concrete implementation for specific methods
#
class HmmpfamOutputProcessing (HmmOutputProcessing):
    
    ##read an output from hmmpfam and return a array with results useful to build a .align file
    #    
    # @param file handle of file generated by software searching hmm profiles
    #
    def readHmmOutput( self, hmmerOutputFile ):
        #Tested with HMMER 2.3.2 on Linux (Debian) and on Unix (Solaris)
        line = hmmerOutputFile.readline()
        tabResult = None
        aRecup = 0        
        if (line == ""):            
            tabResult = None            
            return tabResult
        tabResult = HmmOutput()
        while not re.match("Alignments of top-scoring domains:.*", line):
            line = hmmerOutputFile.readline()                   
            m = re.match("Query sequence:\s*(.*)", line)
            if m:                    
                seqName = m.group(1)
            #warning : do no use a return character because they change between several OS                                              
            if re.match("--------\s+-------\s+-----\s+-----\s+-----\s+-----\s+-----\s+-------.*", line):
                aRecup = 1                
            m = re.match("(\S+)\s+\d+\/\d+\s+(\d+)\s+(\d+)\s+.+\s+(\d+)\s+(\d+)\s+.+\s+(.+\.\d)\s+(.+)", line)
            if m and aRecup == 1:                
                tabResult.append([seqName, m.group(2), m.group(3), m.group(1), m.group(4), m.group(5), m.group(7), m.group(6)]) 
            if line == "":            
                tabResult = None            
                return tabResult
        return tabResult