18
|
1 from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutput import HmmOutput
|
|
2 import re
|
|
3 from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutputProcessing import HmmOutputProcessing
|
|
4
|
|
5 ##Concrete implementation for specific methods
|
|
6 #
|
|
7 class HmmpfamOutputProcessing (HmmOutputProcessing):
|
|
8
|
|
9 ##read an output from hmmpfam and return a array with results useful to build a .align file
|
|
10 #
|
|
11 # @param file handle of file generated by software searching hmm profiles
|
|
12 #
|
|
13 def readHmmOutput( self, hmmerOutputFile ):
|
|
14 #Tested with HMMER 2.3.2 on Linux (Debian) and on Unix (Solaris)
|
|
15 line = hmmerOutputFile.readline()
|
|
16 tabResult = None
|
|
17 aRecup = 0
|
|
18 if (line == ""):
|
|
19 tabResult = None
|
|
20 return tabResult
|
|
21 tabResult = HmmOutput()
|
|
22 while not re.match("Alignments of top-scoring domains:.*", line):
|
|
23 line = hmmerOutputFile.readline()
|
|
24 m = re.match("Query sequence:\s*(.*)", line)
|
|
25 if m:
|
|
26 seqName = m.group(1)
|
|
27 #warning : do no use a return character because they change between several OS
|
|
28 if re.match("--------\s+-------\s+-----\s+-----\s+-----\s+-----\s+-----\s+-------.*", line):
|
|
29 aRecup = 1
|
|
30 m = re.match("(\S+)\s+\d+\/\d+\s+(\d+)\s+(\d+)\s+.+\s+(\d+)\s+(\d+)\s+.+\s+(.+\.\d)\s+(.+)", line)
|
|
31 if m and aRecup == 1:
|
|
32 tabResult.append([seqName, m.group(2), m.group(3), m.group(1), m.group(4), m.group(5), m.group(7), m.group(6)])
|
|
33 if line == "":
|
|
34 tabResult = None
|
|
35 return tabResult
|
|
36 return tabResult
|