annotate commons/pyRepetUnit/profilesDB/ProfilesDB4Repet.py @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #!/usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3 import re
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 import getopt
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 import sys
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6 from commons.pyRepetUnit.profilesDB.Profiles import Profiles
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 from commons.core.LoggerFactory import LoggerFactory
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 LOG_DEPTH = "commons.pyRepetUnit.profiles"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 ## Format a profiles DB for pipelines in REPET
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 class ProfilesDB4Repet( object ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 def __init__(self):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 self.profile = Profiles()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 self._inputFile = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 self._outputFile = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 self._verbosity = 2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23 def _help( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 print "usage: %s.py [ options ]" % ( type(self).__name__ )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 print "options:"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 print " -h: this help"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 print " -i: name of the profiles DB to format for Repet"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 print " -o: name of the output profiles DB for Repet"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 print
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 def _setAttributesFromCmdLine( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 opts, args = getopt.getopt(sys.argv[1:],"hi:o:")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 except getopt.GetoptError, err:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 print str(err); self._help(); sys.exit(1)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 for o,a in opts:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39 if o == "-h":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 self._help(); sys.exit(0)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 elif o == "-i":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 self.setInputFile( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 elif o == "-o":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 self.setOutputFile( a )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 #TDOD: add nb of each domain in log file, verbose...
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 def _searchCurrentDomain(self, profile):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 currentDomain = ""
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 #TODO: pattern GAGA and GAGE should be excluded !
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 #TODO: add new tag like "ORF1_LTR" for ATHILA as key word in Pfam
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51 #TODO: add new tags from GypsyDB (MOV etc...)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 if (re.search("[gG][aA][Gg]", profile[1]) or re.search("[gG][aA][Gg]", profile[3])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 currentDomain = "GAG"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 elif (re.search("Zinc knuckle", profile[1]) or re.search("Zinc knuckle", profile[3])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 currentDomain = "GAG"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 elif (re.search("PF02813", profile[2]) or re.search("PF01021", profile[2])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 currentDomain = "GAG"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 elif (re.search("GAG_", profile[1])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59 currentDomain = "GAG"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 elif (re.search("GAGCOAT_", profile[1])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 currentDomain = "GAG"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 elif ((re.search("[aA]spartic", profile[1]) or re.search("[aA]aspartic", profile[3])) and (re.search("[pP]roteinase", profile[1]) or re.search("[pP]roteinase", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 currentDomain = "AP"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 elif ((re.search("[aA]spartic", profile[1]) or re.search("[aA]spartic", profile[3])) and (re.search("[pP]rotease", profile[1]) or re.search("[pP]rotease", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 currentDomain = "AP"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 elif ((re.search("[rR]etrotransposon", profile[1]) or re.search("[rR]etrotransposon", profile[3])) and (re.search("[pP]eptidase", profile[1]) or re.search("[pP]eptidase", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 currentDomain = "AP"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 elif ((re.search("[aA]spartic", profile[1]) or re.search("[aA]spartic", profile[3])) and (re.search("[pP]eptidase", profile[1]) or re.search("[pP]eptidase", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 currentDomain = "AP"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 elif ((re.search("[aA]spartic", profile[1]) or re.search("[aA]spartic", profile[3])) and (re.search("[eE]ndopeptidase", profile[1]) or re.search("[eE]ndopeptidase", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 currentDomain = "AP"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 elif ((re.search("[aA]spartyl", profile[1]) or re.search("[aA]spartyl", profile[3])) and (re.search("[pP]roteinase", profile[1]) or re.search("[pP]roteinase", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 currentDomain = "AP"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 elif ((re.search("[aA]spartyl", profile[1]) or re.search("[aA]spartyl", profile[3])) and (re.search("[pP]rotease", profile[1]) or re.search("[pP]rotease", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 currentDomain = "AP"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 elif ((re.search("[aA]spartyl", profile[1]) or re.search("[aA]spartyl", profile[3])) and (re.search("[pP]eptidase", profile[1]) or re.search("[pP]eptidase", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 currentDomain = "AP"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 elif ((re.search("[aA]spartyl", profile[1]) or re.search("[aA]spartyl", profile[3])) and (re.search("[eE]ndopeptidase", profile[1]) or re.search("[eE]ndopeptidase", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 currentDomain = "AP"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 elif ((re.search("AP_", profile[1])) and not (re.search("[eE]ndonuclease", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 currentDomain = "AP"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 elif ((re.search("[iI]ntegrase", profile[1]) or re.search("[iI]ntegrase", profile[3])) and not (re.search(" C ", profile[1])) and not (re.search(" C ", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 currentDomain = "INT"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 elif (re.search(".*[Cc]hromo.*", profile[1]) or re.search(".*[Cc]hromo.*", profile[3])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 currentDomain = "INT"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 elif (re.search("INT_", profile[1])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89 currentDomain = "INT"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92 elif ((re.search("[rR]everse", profile[1]) or re.search("[Rr]everse", profile[3])) and (re.search("[tT]ranscriptase", profile[1]) or re.search("[tT]ranscriptase", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 currentDomain = "RT"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 elif (re.search("RT_", profile[1])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95 currentDomain = "RT"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
97
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
98 elif ((re.search("R[nN]ase", profile[1]) or re.search("R[nN]ase", profile[3])) and (re.search("H", profile[1]) or re.search("H", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
99 currentDomain = "RH"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
100 elif ((re.search("R[nN]ase", profile[1]) or re.search("R[nN]ase", profile[3])) and (re.search("H ", profile[1]) or re.search("H ", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
101 currentDomain = "RH"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
102 elif (re.search("RNaseH_", profile[1])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
103 currentDomain = "RH"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
104
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
105
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
106 elif ((re.search("[eE]nvelope", profile[1]) or re.search("[eE]nvelope", profile[3])) and (re.search("[pP]rotein", profile[1]) or re.search("[pP]rotein", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
107 currentDomain = "ENV"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
108 elif ((re.search("ENV", profile[1]) or re.search("ENV", profile[3])) and (re.search("[pP]olyprotein", profile[1]) or re.search("[pP]olyprotein", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
109 currentDomain = "ENV"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
110 elif ((re.search("[eE]nvelope", profile[1]) or re.search("[eE]nvelope", profile[3])) and (re.search("[pP]olyprotein", profile[1]) or re.search("[pP]olyprotein", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
111 currentDomain = "ENV"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
112 elif ((re.search("[eE]nvelope", profile[1]) or re.search("[eE]nvelope", profile[3])) and (re.search("[gG]lycoprotein", profile[1]) or re.search("[gG]lycoprotein", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
113 currentDomain = "ENV"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
114 elif (re.search("PF07253", profile[2]) or re.search("PF03811", profile[2])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
115 currentDomain = "ENV"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
116 elif (re.search("ENV_", profile[1])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
117 currentDomain = "ENV"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
118
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
119 elif ((re.search("[tT]yrosine", profile[1]) or re.search("[tT]yrosine", profile[3])) and (re.search("[rR]ecombinase", profile[1]) or re.search("[rR]ecombinase", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
120 currentDomain = "YR"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
121
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
122 elif ((re.search("[eE]ndonuclease", profile[1]) or re.search("[eE]ndonuclease", profile[3])) and not (re.search("AP ", profile[1]) or re.search("AP ", profile[3])) and not (re.search("[aA]purinic", profile[1]) or re.search("[aA]purinic", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
123 currentDomain = "EN"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
124
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
125 elif ((re.search("[eE]ndonuclease", profile[1]) or re.search("[eE]ndonuclease", profile[3])) and (re.search("AP ", profile[1]) or re.search("AP ", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
126 currentDomain = "APE"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
127 elif ((re.search("[eE]ndonuclease", profile[1]) or re.search("[eE]ndonuclease", profile[3])) and (re.search("[aA]purinic", profile[1]) or re.search("[aA]purinic", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
128 currentDomain = "APE"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
129
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
130 elif ((re.search("[tT]ransposase", profile[1]) or re.search("[tT]ransposase", profile[3])) and not (re.search("DDE ", profile[1]) or re.search("DDE ", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
131 currentDomain = "Tase"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
132 elif (re.search("DUF659", profile[1])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
133 currentDomain = "Tase"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
134 elif (re.search("PF01695", profile[2])) or (re.search("PF02316", profile[2])) or (re.search("PF09039", profile[2])) or (re.search("PF04827", profile[2])) or (re.search("PF05699", profile[2])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
135 currentDomain = "Tase"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
136
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
137 elif ((re.search("[tT]ransposase", profile[1]) or re.search("[tT]ransposase", profile[3])) and (re.search("DDE ", profile[1]) or re.search("DDE ", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
138 currentDomain = "Tase*"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
139
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
140 elif ((re.search("[rR]eplication", profile[1]) or re.search("[rR]eplication", profile[3])) and (re.search("[pP]rotein", profile[1]) or re.search("[pP]rotein", profile[3])) and (re.search("A ", profile[1]) or re.search("A ", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
141 currentDomain = "RPA"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
142 elif (re.search("[rR]epA ", profile[1]) or re.search("[rR]epA ", profile[3])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
143 currentDomain = "RPA"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
144 elif (re.search("RPA", profile[1]) or re.search("RPA", profile[3])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
145 currentDomain = "RPA"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
146
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
147 elif (re.search("[cC]-integrase", profile[1]) or re.search("[cC]-integrase", profile[3])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
148 currentDomain = "C-INT"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
149
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
150 elif ((re.search("[pP]ackaging", profile[1]) or re.search("[pP]ackaging", profile[3])) and (re.search("ATPase", profile[1]) or re.search("ATPase", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
151 currentDomain = "ATP"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
152
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
153 elif ((re.search("[cC]ysteine", profile[1]) or re.search("[cC]ysteine", profile[3])) and (re.search("[pP]rotease", profile[1]) or re.search("[pP]rotease", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
154 currentDomain = "CYP"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
155 elif ((re.search("[cC]ysteine", profile[1]) or re.search("[cC]ysteine", profile[3])) and (re.search("[pP]eptidase", profile[1]) or re.search("[pP]eptidase", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
156 currentDomain = "CYP"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
157 elif (re.search("[pP]eptidase_C", profile[1]) or re.search("[pP]eptidase_C", profile[3])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
158 currentDomain = "CYP"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
159 elif (re.search("PF00559", profile[2])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
160 currentDomain = "CYP"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
161
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
162 elif (re.search("[pP]ol\S*_*B", profile[1]) or re.search("[pP]ol\S*_*B", profile[3])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
163 currentDomain = "POLB"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
164 elif ((re.search("[pP]olymerase", profile[1]) or re.search("[pP]olymerase", profile[3])) and (re.search("B ", profile[1]) or re.search("B ", profile[3]))):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
165 currentDomain = "POLB"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
166
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
167 elif (re.search("[hH]elicase", profile[1]) or re.search("[hH]elicase", profile[3])):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
168 currentDomain = "HEL"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
169
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
170 else :
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
171 currentDomain = "OTHER"
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
172 return currentDomain
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
173
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
174
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
175 ## Replace the old profile name by accession number, name, domain and gather cut off
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
176 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
177 # @param fout file handle
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
178 # @param profile Profiles instance
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
179 # @param currentDomain string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
180 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
181 def _writeModifiedProfile(self, fout, profile, currentDomain):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
182 for i in xrange(0, len(profile), 1):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
183 if i != 1:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
184 fout.write(profile[i])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
185 else:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
186 fout.write("NAME " + self.profile.accNumber + "_"\
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
187 + self.profile.name + "_"\
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
188 + currentDomain + "_"\
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
189 + str(self.profile.GA_cut_off) + "\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
190
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
191
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
192 ## Set input file name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
193 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
194 # @param inputFileName string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
195 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
196 def setInputFile(self, inputFileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
197 self._inputFile = inputFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
198
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
199
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
200 ## Set output file name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
201 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
202 # @param outputFileName string
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
203 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
204 def setOutputFile(self, outputFileName):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
205 self._outputFile = outputFileName
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
206
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
207
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
208 ## Read a profiles DB file, parse it and, write a new profiles DB with TE domain information and GA score cut_off placed side by side of the name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
209 #
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
210 def run( self ):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
211 LoggerFactory.setLevel(self._log, self._verbosity)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
212 fileIn = open( self._inputFile )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
213 fout = open( self._outputFile, "w" )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
214 profile = self.profile.readAndRetrieve( fileIn )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
215 while profile != None:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
216 currentDomain = self._searchCurrentDomain(profile)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
217 if currentDomain == "OTHER":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
218 self._log.warning(self.profile.accNumber + " " + self.profile.name + " has no associated domain")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
219 self._writeModifiedProfile(fout, profile, currentDomain)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
220 profile = self.profile.read( fileIn )
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
221
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
222
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
223 if __name__ == "__main__":
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
224 i = ProfilesDB4Repet()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
225 i._setAttributesFromCmdLine()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
226 i.run()