6
|
1 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
2 # http://www.inra.fr
|
|
3 # http://urgi.versailles.inra.fr
|
|
4 #
|
|
5 # This software is governed by the CeCILL license under French law and
|
|
6 # abiding by the rules of distribution of free software. You can use,
|
|
7 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
8 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
9 # "http://www.cecill.info".
|
|
10 #
|
|
11 # As a counterpart to the access to the source code and rights to copy,
|
|
12 # modify and redistribute granted by the license, users are provided only
|
|
13 # with a limited warranty and the software's author, the holder of the
|
|
14 # economic rights, and the successive licensors have only limited
|
|
15 # liability.
|
|
16 #
|
|
17 # In this respect, the user's attention is drawn to the risks associated
|
|
18 # with loading, using, modifying and/or developing or reproducing the
|
|
19 # software by the user in light of its specific status of free software,
|
|
20 # that may mean that it is complicated to manipulate, and that also
|
|
21 # therefore means that it is reserved for developers and experienced
|
|
22 # professionals having in-depth computer knowledge. Users are therefore
|
|
23 # encouraged to load and test the software's suitability as regards their
|
|
24 # requirements in conditions enabling the security of their systems and/or
|
|
25 # data to be ensured and, more generally, to use and operate it in the
|
|
26 # same conditions as regards security.
|
|
27 #
|
|
28 # The fact that you are presently reading this means that you have had
|
|
29 # knowledge of the CeCILL license and that you accept its terms.
|
|
30
|
|
31
|
|
32 from commons.core.checker.CheckerException import CheckerException
|
|
33 from commons.core.parsing.VarscanHit import VarscanHit
|
|
34 import re
|
|
35
|
|
36 class VarscanHitForGnpSNP(VarscanHit):
|
|
37
|
|
38 def __init__(self):
|
|
39 VarscanHit.__init__(self)
|
|
40 self._reads1 = ''
|
|
41 self._reads2 = ''
|
|
42 self._varFreq = ''
|
|
43 self._strands1 = ''
|
|
44 self._strands2 = ''
|
|
45 self._qual1 = ''
|
|
46 self._qual2 = ''
|
|
47 self._pvalue = ''
|
|
48 self._5flank = ''
|
|
49 self._3flank = ''
|
|
50 self._gnpSnp_ref = ''
|
|
51 self._gnpSnp_var = ''
|
|
52 self._gnpSnp_position = 0
|
|
53 self._polymType = ''
|
|
54 self._polymLength = 0
|
|
55 self._occurrence = 1
|
|
56
|
|
57 ## Equal operator
|
|
58 #
|
|
59 # @param o a VarscanFileAnalysis instance
|
|
60 #
|
|
61 def __eq__(self, o):
|
|
62 return VarscanHit.__eq__(self, o) \
|
|
63 and self._reads1 == o._reads1 and self._reads2 == o._reads2 \
|
|
64 and self._varFreq == o._varFreq and self._strands1 == o._strands1 \
|
|
65 and self._strands2 == o._strands2 and self._qual1 == o._qual1 \
|
|
66 and self._qual2 == o._qual2 and self._pvalue == o._pvalue \
|
|
67 and self._3flank == o._3flank and self._5flank == o._5flank \
|
|
68 and self._gnpSnp_position == o._gnpSnp_position and self._gnpSnp_ref == o._gnpSnp_ref \
|
|
69 and self._gnpSnp_var == o._gnpSnp_var and self._polymLength == o._polymLength \
|
|
70 and self._polymType == o._polymType and self._occurrence == o._occurrence
|
|
71
|
|
72 def isPolymTypeAlreadyFoundAtThisChromAndThisPosition(self, iVarscanHitForGnpSNP):
|
|
73 return self._chrom == iVarscanHitForGnpSNP.getChrom() \
|
|
74 and self._position == iVarscanHitForGnpSNP.getPosition() \
|
|
75 and self._polymType == iVarscanHitForGnpSNP.getPolymType()
|
|
76
|
|
77 def manageOccurrence(self, iVarscanHitForGnpSNP=None):
|
|
78 if iVarscanHitForGnpSNP != None and self.isPolymTypeAlreadyFoundAtThisChromAndThisPosition(iVarscanHitForGnpSNP):
|
|
79 self._occurrence = iVarscanHitForGnpSNP.getOccurrence() + 1
|
|
80
|
|
81 def formatAlleles2GnpSnp(self):
|
|
82 if self.getVar().find("-") != -1:
|
|
83 self._polymType = "DELETION"
|
|
84 self._gnpSnp_position = int(self._position) + 1
|
|
85 self._gnpSnp_ref = self._var[1:]
|
|
86 self._gnpSnp_var = "-" * len(self._gnpSnp_ref)
|
|
87 self._polymLength = len(self._gnpSnp_ref)
|
|
88 elif self.getVar().find("+") != -1:
|
|
89 self._polymType = "INSERTION"
|
|
90 self._gnpSnp_position = int(self._position)
|
|
91 self._gnpSnp_var = self._var[1:]
|
|
92 self._gnpSnp_ref = "-" * len(self._gnpSnp_var)
|
|
93 self._polymLength = 1
|
|
94 else:
|
|
95 self._polymType = "SNP"
|
|
96 self._gnpSnp_position = int(self._position)
|
|
97 self._gnpSnp_var = self._var
|
|
98 self._gnpSnp_ref = self._ref
|
|
99 self._polymLength = 1
|
|
100
|
|
101 def setReads1(self, nbReadsLikeRef):
|
|
102 self._reads1 = nbReadsLikeRef
|
|
103
|
|
104 def setReads2(self, nbReadsLikeVar):
|
|
105 self._reads2 = nbReadsLikeVar
|
|
106
|
|
107 def setVarFreq(self, frequencyOfVariantAllele):
|
|
108 frequencyOfVariantAllele = frequencyOfVariantAllele.replace("%","")
|
|
109 frequencyOfVariantAllele = frequencyOfVariantAllele.replace(",",".")
|
|
110 self._varFreq = float(frequencyOfVariantAllele)
|
|
111
|
|
112 def setStrands1(self, strandsOfReferenceAllele):
|
|
113 self._strands1 = strandsOfReferenceAllele
|
|
114
|
|
115 def setStrands2(self, strandsOfVariantAllele):
|
|
116 self._strands2 = strandsOfVariantAllele
|
|
117
|
|
118 def setQual1(self, averageQualityOfRef):
|
|
119 self._qual1 = averageQualityOfRef
|
|
120
|
|
121 def setQual2(self, averageQualityOfVar):
|
|
122 self._qual2 = averageQualityOfVar
|
|
123
|
|
124 def setPvalue(self, pvalue):
|
|
125 self._pvalue = pvalue
|
|
126
|
|
127 def set5flank(self, s5flank):
|
|
128 self._5flank = s5flank
|
|
129
|
|
130 def set3flank(self, s3flank):
|
|
131 self._3flank = s3flank
|
|
132
|
|
133 def setGnpSNPRef(self, ref):
|
|
134 self._gnpSnp_ref = ref
|
|
135
|
|
136 def setGnpSNPVar(self, var):
|
|
137 self._gnpSnp_var = var
|
|
138
|
|
139 def setGnpSNPPosition(self, position):
|
|
140 self._gnpSnp_position = position
|
|
141
|
|
142 def setOccurrence(self, occurrence):
|
|
143 self._occurrence = occurrence
|
|
144
|
|
145 def setPolymType(self, polymType):
|
|
146 self._polymType = polymType
|
|
147
|
|
148 def setPolymLength(self, polymLength):
|
|
149 self._polymLength = polymLength
|
|
150
|
|
151 def getReads1(self):
|
|
152 return self._reads1
|
|
153
|
|
154 def getReads2(self):
|
|
155 return self._reads2
|
|
156
|
|
157 def getVarFreq(self):
|
|
158 return self._varFreq
|
|
159
|
|
160 def getStrands1(self):
|
|
161 return self._strands1
|
|
162
|
|
163 def getStrands2(self):
|
|
164 return self._strands2
|
|
165
|
|
166 def getQual1(self):
|
|
167 return self._qual1
|
|
168
|
|
169 def getQual2(self):
|
|
170 return self._qual2
|
|
171
|
|
172 def getPvalue(self):
|
|
173 return self._pvalue
|
|
174
|
|
175 def get5flank(self):
|
|
176 return self._5flank
|
|
177
|
|
178 def get3flank(self):
|
|
179 return self._3flank
|
|
180
|
|
181 def getPolymType(self):
|
|
182 return self._polymType
|
|
183
|
|
184 def getGnpSnpVar(self):
|
|
185 return self._gnpSnp_var
|
|
186
|
|
187 def getGnpSnpRef(self):
|
|
188 return self._gnpSnp_ref
|
|
189
|
|
190 def getGnpSnpPosition(self):
|
|
191 return self._gnpSnp_position
|
|
192
|
|
193 def getPolymLength(self):
|
|
194 return self._polymLength
|
|
195
|
|
196 def getOccurrence(self):
|
|
197 return self._occurrence
|
|
198
|
|
199 def setAttributes(self, lResults, iCurrentLineNumber):
|
|
200 VarscanHit.setAttributes(self, lResults, iCurrentLineNumber)
|
|
201 if lResults[4] != '':
|
|
202 self.setReads1(lResults[4])
|
|
203 else:
|
|
204 raise CheckerException ("The field Reads1 is empty in varscan file in line %s" % (iCurrentLineNumber))
|
|
205 if lResults[5] != '':
|
|
206 self.setReads2(lResults[5])
|
|
207 else:
|
|
208 raise CheckerException ("The field Reads2 is empty in varscan file in line %s" % (iCurrentLineNumber))
|
|
209 if lResults[6] != '' and re.match("[0-9\,\%]+", lResults[6]):
|
|
210 self.setVarFreq(lResults[6])
|
|
211 else:
|
|
212 raise CheckerException ("The field VarFreq is empty or in bad format in varscan file in line %s" % (iCurrentLineNumber))
|
|
213 if lResults[7] != '':
|
|
214 self.setStrands1(lResults[7])
|
|
215 else:
|
|
216 raise CheckerException ("The field Strands1 is empty in varscan file in line %s" % (iCurrentLineNumber))
|
|
217 if lResults[8] != '':
|
|
218 self.setStrands2(lResults[8])
|
|
219 else:
|
|
220 raise CheckerException ("The field Strands2 is empty in varscan file in line %s" % (iCurrentLineNumber))
|
|
221 if lResults[9] != '':
|
|
222 self.setQual1(lResults[9])
|
|
223 else:
|
|
224 raise CheckerException ("The field Qual1 is empty in varscan file in line %s" % (iCurrentLineNumber))
|
|
225 if lResults[10] != '':
|
|
226 self.setQual2(lResults[10])
|
|
227 else:
|
|
228 raise CheckerException ("The field Qual2 is empty in varscan file in line %s" % (iCurrentLineNumber))
|
|
229 if lResults[11] != '':
|
|
230 self.setPvalue(lResults[11])
|
|
231 else:
|
|
232 raise CheckerException ("The field Pvalue is empty in varscan file in line %s" % (iCurrentLineNumber)) |