Mercurial > repos > urgi-team > gandalfworkflow
diff VCFFiltering_wrapper.py @ 7:a6b557df86db draft
Uploaded
author | urgi-team |
---|---|
date | Tue, 15 Dec 2015 05:35:36 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/VCFFiltering_wrapper.py Tue Dec 15 05:35:36 2015 -0500 @@ -0,0 +1,110 @@ +#!/usr/bin/env python + + +import subprocess, tempfile, sys, os, glob, shutil, time +from optparse import OptionParser +from optparse import Option, OptionValueError + +class VCFFilteringWrapper(object): + + def __init__(self): + self._options = None + + + def stop_err(self, msg ): + sys.stderr.write( "%s\n" % msg ) + sys.exit() + + + def setAttributesFromCmdLine(self): + description = "VCFFiltering_wrapper" + description += "\nWrapper for VCFFiltering ;\n VCFFiltering filters SNP on a VCF depending on depth (DP) allele number (AN), allele frequency (AF) and SNP quality.\n" + description += "example 1 : VCFFiltering.py -f myVCF.vcf -o FilteredVCF.vcf\n" + description += "example 2 : VCFFiltering.py -f myVCF.vcf -N 2 -F 0.87 -b bed1.bed bed2.bed -o FilteredVCF.vcf\n" + parser = OptionParser(description = description, version = "0.1") + parser.add_option("-f", "--vcf", dest = "VCFFile", action = "store", type = "string", help = "Input VCF File name [compulsory] [format: VCF]", default = "") + parser.add_option("-o", "--output", dest = "outFile", action = "store", type = "string", help = "output VCF File name [compulsory] [format: VCF]", default = "") + parser.add_option("-m", "--minDP", dest = "minDP", action = "store", type = "int", help = "minimum of depth ; if both minDP and maxDP are set, optimal DP will not be calculated ", default = 0) + parser.add_option("-M", "--maxDP", dest = "maxDP", action = "store", type = "int", help = "maximum of depth ; if both minDP and maxDP are set, optimal DP will not be calculated ", default = 0) + parser.add_option("-N", "--AN", dest = "AN", action = "store", type = "int", help = "maximum number of allele for a SNP; default = 2", default = 2) + parser.add_option("-F", "--AF", dest = "AF", action = "store", type = "float", help = "minimum frequency for the alternative allele of a SNP; default = 0.9", default = 0.9) + parser.add_option("-b", "--bed", dest = "bedFiles", action = "append", type = "string", help = "bed files: list of coordinates to filter, multiple arguments allowed '-b file1 file2' ", default = []) + parser.add_option("-G", "--graphHTML", dest = "graphHTML", action = "store", type = "string", help = "name of the HTML linking to graphs ", default = "") + parser.add_option("-d", "--dirGraphs", dest = "dirGraphs", action = "store", type = "string", help = "name of the folder containing graphs ", default = "") + options = parser.parse_args()[0] + self._setAttributesFromOptions(options) + + + def _setAttributesFromOptions(self, options): + self._options = options + + def run(self): + if self._options.minDP and self._options.maxDP : + if self._options.minDP > self._options.maxDP : + self.stop_err( 'error in options : minDP > max DP (%s > %s)' %(self._options.minDP,self._options.maxDP)) + + prg = "VCFFiltering.py -g -G 'png' " + args = "" + args += "-f %s" % self._options.VCFFile + args += " " + args += "-o %s" % self._options.outFile + if self._options.AF : + args += " " + args += "-F %s" % self._options.AF + if self._options.AN : + args += " " + args += "-N %s" % self._options.AN + if self._options.minDP : + args += " " + args += "-m %s" % self._options.minDP + if self._options.maxDP : + args += " " + args += "-M %s" % self._options.maxDP + if self._options.bedFiles : + if self._options.bedFiles == "": + pass + else : + self._lBedFiles = self._options.bedFiles + cmd = "%s %s" %(prg, args) + + print cmd + + try: + tmp_err = tempfile.NamedTemporaryFile().name + tmp_stderr = open( tmp_err, 'wb' ) + proc = subprocess.Popen( args=cmd, shell=True, cwd=".", stderr=tmp_stderr ) + returncode = proc.wait() + tmp_stderr.close() + # get stderr, allowing for case where it's very large + tmp_stderr = open( tmp_err, 'rb' ) + stderr = '' + buffsize = 1048576 + try: + while True: + stderr += tmp_stderr.read( buffsize ) + if not stderr or len( stderr ) % buffsize != 0: + break + except OverflowError: + pass + tmp_stderr.close() + if stderr: + raise Exception, stderr + except Exception, e: + self.stop_err( 'Error in VCFFiltering:\n' + str( e ) ) + + if True : + html = open(self._options.graphHTML, "w") + + os.mkdir(self._options.dirGraphs) + lGraphsFiles = glob.glob("VCFFiltering_graphs/*") + for file in lGraphsFiles : + baseName = os.path.basename(file) + shutil.move( file ,"%s/%s" %(self._options.dirGraphs, baseName)) + line = "<img src=\"%s\" > \n" %(baseName) + html.write(line) + + +if __name__ == "__main__": + iWrapper = VCFFilteringWrapper() + iWrapper.setAttributesFromCmdLine() + iWrapper.run()