annotate commons/pyRepetUnit/hmmer/hmmOutput/HmmpfamOutputProcessing.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutput import HmmOutput
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2 import re
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 from commons.pyRepetUnit.hmmer.hmmOutput.HmmOutputProcessing import HmmOutputProcessing
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 ##Concrete implementation for specific methods
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 class HmmpfamOutputProcessing (HmmOutputProcessing):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 ##read an output from hmmpfam and return a array with results useful to build a .align file
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 # @param file handle of file generated by software searching hmm profiles
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 def readHmmOutput( self, hmmerOutputFile ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 #Tested with HMMER 2.3.2 on Linux (Debian) and on Unix (Solaris)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 line = hmmerOutputFile.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 tabResult = None
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 aRecup = 0
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 if (line == ""):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 tabResult = None
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 return tabResult
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 tabResult = HmmOutput()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22 while not re.match("Alignments of top-scoring domains:.*", line):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 line = hmmerOutputFile.readline()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 m = re.match("Query sequence:\s*(.*)", line)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 if m:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 seqName = m.group(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 #warning : do no use a return character because they change between several OS
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 if re.match("--------\s+-------\s+-----\s+-----\s+-----\s+-----\s+-----\s+-------.*", line):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 aRecup = 1
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 m = re.match("(\S+)\s+\d+\/\d+\s+(\d+)\s+(\d+)\s+.+\s+(\d+)\s+(\d+)\s+.+\s+(.+\.\d)\s+(.+)", line)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31 if m and aRecup == 1:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 tabResult.append([seqName, m.group(2), m.group(3), m.group(1), m.group(4), m.group(5), m.group(7), m.group(6)])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 if line == "":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 tabResult = None
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 return tabResult
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 return tabResult