18
|
1 #!/usr/bin/env python
|
|
2
|
|
3 # Copyright INRA (Institut National de la Recherche Agronomique)
|
|
4 # http://www.inra.fr
|
|
5 # http://urgi.versailles.inra.fr
|
|
6 #
|
|
7 # This software is governed by the CeCILL license under French law and
|
|
8 # abiding by the rules of distribution of free software. You can use,
|
|
9 # modify and/ or redistribute the software under the terms of the CeCILL
|
|
10 # license as circulated by CEA, CNRS and INRIA at the following URL
|
|
11 # "http://www.cecill.info".
|
|
12 #
|
|
13 # As a counterpart to the access to the source code and rights to copy,
|
|
14 # modify and redistribute granted by the license, users are provided only
|
|
15 # with a limited warranty and the software's author, the holder of the
|
|
16 # economic rights, and the successive licensors have only limited
|
|
17 # liability.
|
|
18 #
|
|
19 # In this respect, the user's attention is drawn to the risks associated
|
|
20 # with loading, using, modifying and/or developing or reproducing the
|
|
21 # software by the user in light of its specific status of free software,
|
|
22 # that may mean that it is complicated to manipulate, and that also
|
|
23 # therefore means that it is reserved for developers and experienced
|
|
24 # professionals having in-depth computer knowledge. Users are therefore
|
|
25 # encouraged to load and test the software's suitability as regards their
|
|
26 # requirements in conditions enabling the security of their systems and/or
|
|
27 # data to be ensured and, more generally, to use and operate it in the
|
|
28 # same conditions as regards security.
|
|
29 #
|
|
30 # The fact that you are presently reading this means that you have had
|
|
31 # knowledge of the CeCILL license and that you accept its terms.
|
|
32
|
|
33 import math
|
|
34 from commons.core.LoggerFactory import LoggerFactory
|
|
35 from commons.core.utils.RepetOptionParser import RepetOptionParser
|
|
36 from commons.core.utils.FileUtils import FileUtils
|
|
37 from commons.core.parsing.VarscanFile import VarscanFile
|
|
38 from commons.core.seq.Bioseq import Bioseq
|
|
39
|
|
40 LOG_DEPTH = "core.parsing"
|
|
41
|
|
42 ##Reference launcher implementation
|
|
43 #
|
|
44 class VarscanToVCF(object):
|
|
45
|
|
46 def __init__(self, varscanFileName = "", vcfFileName = "", doClean = False, verbosity = 0):
|
|
47 self._varscanFileName = varscanFileName
|
|
48 self.setvcfFileName(vcfFileName)
|
|
49 self._doClean = doClean
|
|
50 self._verbosity = verbosity
|
|
51
|
|
52 self._vcfRevision = "VCFv4.1"
|
|
53 self._vcfHeader = "#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO"
|
|
54
|
|
55 self._log = LoggerFactory.createLogger("%s.%s" % (LOG_DEPTH, self.__class__.__name__), self._verbosity)
|
|
56
|
|
57 def setAttributesFromCmdLine(self):
|
|
58 description = "Conver Varscan file to VCF file."
|
|
59 epilog = "\t$ python VarscanToVCF.py -i varscanFileName -v 2"
|
|
60 parser = RepetOptionParser(description = description, epilog = epilog)
|
|
61 parser.add_option("-i", "--Varscan", dest = "varscanFileName", action = "store", type = "string", help = "input Varscan file name [compulsory] [format: varscan2.2.8]", default = "")
|
|
62 parser.add_option("-o", "--vcfFileName",dest = "vcfFileName", action = "store", type = "string", help = "vcfFileName file name [default: <input>.vcf]", default = "")
|
|
63 parser.add_option("-c", "--clean", dest = "doClean", action = "store_true", help = "clean temporary files [optional] [default: False]", default = False)
|
|
64 parser.add_option("-v", "--verbosity", dest = "verbosity", action = "store", type = "int", help = "verbosity [optional] [default: 1]", default = 1)
|
|
65 options = parser.parse_args()[0]
|
|
66 self._setAttributesFromOptions(options)
|
|
67
|
|
68 def _setAttributesFromOptions(self, options):
|
|
69 self.setvarscanFileName(options.varscanFileName)
|
|
70 self.setvcfFileName(options.vcfFileName)
|
|
71 self.setDoClean(options.doClean)
|
|
72 self.setVerbosity(options.verbosity)
|
|
73
|
|
74 def setvarscanFileName(self, varscanFileName):
|
|
75 self._varscanFileName = varscanFileName
|
|
76
|
|
77 def setvcfFileName(self, vcfFileName):
|
|
78 if vcfFileName == "":
|
|
79 self._vcfFileName = "%s.vcf" % self._varscanFileName
|
|
80 else:
|
|
81 self._vcfFileName = vcfFileName
|
|
82
|
|
83 def setDoClean(self, doClean):
|
|
84 self._doClean = doClean
|
|
85
|
|
86 def setVerbosity(self, verbosity):
|
|
87 self._verbosity = verbosity
|
|
88
|
|
89 def _checkOptions(self):
|
|
90 if self._varscanFileName == "":
|
|
91 self._logAndRaise("ERROR: Missing input file name")
|
|
92 else:
|
|
93 if not FileUtils.isRessourceExists(self._varscanFileName):
|
|
94 self._logAndRaise("ERROR: Input Varscan file '%s' does not exist!" % self._varscanFileName)
|
|
95
|
|
96 def _logAndRaise(self, errorMsg):
|
|
97 self._log.error(errorMsg)
|
|
98 raise Exception(errorMsg)
|
|
99
|
|
100 def _convertVarscanLineToVCFRecord(self, varscanLine, lineNumber):
|
|
101 iVarscanFile = VarscanFile()
|
|
102 iVarscanFile.setTypeOfVarscanFile("Varscan_2_2_8")
|
|
103 iVarscanHit = iVarscanFile.createVarscanObjectFromLine(varscanLine, lineNumber)
|
|
104 Chrom = iVarscanHit.getChrom()
|
|
105 Pos = int(iVarscanHit.getPosition())
|
|
106 #ID = str(lineNumber)
|
|
107 ID = "."
|
|
108 Ref = iVarscanHit.getRef()
|
|
109 Alt = iVarscanHit.getVar()
|
|
110 Qual = -10*math.log10(float(iVarscanHit.getPValue()))
|
|
111 Filter = "."
|
|
112 AF = float(iVarscanHit.getVarFreq()[:-1])/100
|
|
113 DP = int(iVarscanHit.getReadsRef()) + int(iVarscanHit.getReadsVar())
|
|
114 RBQ = iVarscanHit.getQualRef()
|
|
115 ABQ = iVarscanHit.getQualVar()
|
|
116 #MQ = iVarscanHit.getMapQualRef()
|
|
117 Info = ";".join(["AF=%.4f" %AF,"DP=%d" %DP,"RBQ=%s" %RBQ, "ABQ=%s" %ABQ])
|
|
118
|
|
119 allel = Bioseq().getATGCNFromIUPACandATGCN(iVarscanHit.getCns(), Ref)
|
|
120 if allel != Alt:
|
|
121 self._log.warning("'VarAllele' attribute of Varscan file line '%d' was not correct. Correcting using '%s' instead of '%s'." % (lineNumber, allel, Alt))
|
|
122 Alt = allel
|
|
123
|
|
124 vcfLine = "%s\t%s\t%s\t%s\t%s\t%.9f\t%s\t%s\n" % (Chrom, Pos, ID, Ref, Alt, Qual, Filter, Info)
|
|
125 return vcfLine
|
|
126
|
|
127 def run(self):
|
|
128 LoggerFactory.setLevel(self._log, self._verbosity)
|
|
129 self._checkOptions()
|
|
130 self._log.info("START Varscan To VCF")
|
|
131 self._log.debug("Input file name: %s" % self._varscanFileName)
|
|
132
|
|
133 with open(self._vcfFileName, "w") as fVCF:
|
|
134 fVCF.write("##fileformat=%s\n" % self._vcfRevision)
|
|
135 fVCF.write("%s\n" % self._vcfHeader)
|
|
136
|
|
137 with open(self._varscanFileName, "r") as fVarscan:
|
|
138 lineNumber = 1
|
|
139 line = fVarscan.readline()
|
|
140 while line:
|
|
141 if line[0] != "#" and "Chrom\tPosition\tRef\tCons" not in line:
|
|
142 vcfLine = self._convertVarscanLineToVCFRecord(line, lineNumber)
|
|
143 fVCF.write(vcfLine)
|
|
144 line = fVarscan.readline()
|
|
145 lineNumber += 1
|
|
146
|
|
147 self._log.info("END Varscan To VCF")
|
|
148
|
|
149 if __name__ == "__main__":
|
|
150 iLaunch = VarscanToVCF()
|
|
151 iLaunch.setAttributesFromCmdLine()
|
|
152 iLaunch.run() |