Mercurial > repos > yufei-luo > s_mart
view SMART/DiffExpAnal/compareOverlapping_parallel.py @ 34:529e3e6a0954
Deleted selected files
author | m-zytnicki |
---|---|
date | Tue, 30 Apr 2013 14:35:27 -0400 |
parents | 94ab73e8a190 |
children |
line wrap: on
line source
#! /usr/bin/env python #This program is a wrapp for CompareOverlapping.py. import optparse, os, sys, subprocess, tempfile, shutil, tarfile, glob import os, struct, time, random from optparse import OptionParser from commons.core.parsing.ParserChooser import ParserChooser from commons.core.writer.Gff3Writer import Gff3Writer from SMART.Java.Python.CompareOverlapping import CompareOverlapping from SMART.Java.Python.structure.Transcript import Transcript from SMART.Java.Python.structure.Interval import Interval from SMART.Java.Python.ncList.NCList import NCList from SMART.Java.Python.ncList.NCListCursor import NCListCursor from SMART.Java.Python.ncList.NCListFilePickle import NCListFilePickle, NCListFileUnpickle from SMART.Java.Python.ncList.FileSorter import FileSorter from SMART.Java.Python.misc.Progress import Progress from SMART.Java.Python.misc.UnlimitedProgress import UnlimitedProgress from SMART.Java.Python.misc import Utils def stop_err( msg ): sys.stderr.write( "%s\n" % msg ) sys.exit() def toTar(tarFileName, overlapOutputNames): dir = os.path.dirname(tarFileName) tfile = tarfile.open(tarFileName + ".tmp.tar", "w") currentPath = os.getcwd() os.chdir(dir) for file in overlapOutputNames: relativeFileName = os.path.basename(file) tfile.add(relativeFileName) os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName)) tfile.close() os.chdir(currentPath) def __main__(): description = "Compare Overlapping wrapp script: Get the a list of data which overlap with a reference set. [Category: Data Comparison]" parser = OptionParser(description = description) parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file 1 (for annotation) [compulsory] [format: file in transcript format given by -f]") parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format of file 1 [compulsory] [format: transcript file format]") parser.add_option("", "--inputTxt", dest="inputTxt", action="store", type="string", help="input, a txt file for a list of input reads files. Should identify all reads files format, given by -g [compulsory]") #parser.add_option("-j", "--input2", dest="inputFileName2", action="store", default="inputRead", type="string", help="input file 2 [compulsory] [format: file in transcript format given by -g]") parser.add_option("-g", "--format2", dest="format2", action="store", type="string", help="format of file 2 [compulsory] [format: transcript file format]") #parser.add_option("-o", "--output", dest="output", action="store", default=None, type="string", help="output file [compulsory] [format: output file in GFF3 format]") parser.add_option("-S", "--start1", dest="start1", action="store", default=None, type="int", help="only consider the n first nucleotides of the transcripts in file 1 (do not use it with -U) [format: int]") parser.add_option("-s", "--start2", dest="start2", action="store", default=None, type="int", help="only consider the n first nucleotides of the transcripts in file 2 (do not use it with -u) [format: int]") parser.add_option("-U", "--end1", dest="end1", action="store", default=None, type="int", help="only consider the n last nucleotides of the transcripts in file 1 (do not use it with -S) [format: int]") parser.add_option("-u", "--end2", dest="end2", action="store", default=None, type="int", help="only consider the n last nucleotides of the transcripts in file 2 (do not use it with -s) [format: int]") parser.add_option("-t", "--intron", dest="introns", action="store_true", default=False, help="also report introns [format: bool] [default: false]") parser.add_option("-E", "--5primeExtension1", dest="fivePrime1", action="store", default=None, type="int", help="extension towards 5' in file 1 [format: int]") parser.add_option("-e", "--5primeExtension2", dest="fivePrime2", action="store", default=None, type="int", help="extension towards 5' in file 2 [format: int]") parser.add_option("-N", "--3primeExtension1", dest="threePrime1", action="store", default=None, type="int", help="extension towards 3' in file 1 [format: int]") parser.add_option("-n", "--3primeExtension2", dest="threePrime2", action="store", default=None, type="int", help="extension towards 3' in file 2 [format: int]") parser.add_option("-c", "--colinear", dest="colinear", action="store_true", default=False, help="colinear only [format: bool] [default: false]") parser.add_option("-a", "--antisense", dest="antisense", action="store_true", default=False, help="antisense only [format: bool] [default: false]") parser.add_option("-d", "--distance", dest="distance", action="store", default=None, type="int", help="accept some distance between query and reference [format: int]") parser.add_option("-k", "--included", dest="included", action="store_true", default=False, help="keep only elements from file 1 which are included in an element of file 2 [format: bool] [default: false]") parser.add_option("-K", "--including", dest="including", action="store_true", default=False, help="keep only elements from file 2 which are included in an element of file 1 [format: bool] [default: false]") parser.add_option("-m", "--minOverlap", dest="minOverlap", action="store", default=None, type="int", help="minimum number of nucleotides overlapping to declare an overlap [format: int] [default: 1]") parser.add_option("-p", "--pcOverlap", dest="pcOverlap", action="store", default=None, type="int", help="minimum percentage of nucleotides to overlap to declare an overlap [format: int]") parser.add_option("-O", "--notOverlapping", dest="notOverlapping", action="store_true", default=False, help="also output not overlapping data [format: bool] [default: false]") parser.add_option("-x", "--exclude", dest="exclude", action="store_true", default=False, help="invert the match [format: bool] [default: false]") parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]") parser.add_option('', '--tar', dest='outputTar', default=None, help='output all SAM results in a tar file.' ) parser.add_option( '', '--outTxt', dest='outTxtFile', help='The output list of results files on txt format.[compulsory]' ) (options, args) = parser.parse_args() #Parse the input txt file and read a list of BAM files. file = open(options.inputTxt, "r") lines = file.readlines() inputFileNames = [] overlapOutputNames = [] outputName = options.outTxtFile resDirName = os.path.dirname(outputName) + "/" #Write output txt file and define all output sam file names. out = open(outputName, "w") for line in lines: tab = line.split() inputFileNames.append(tab[1]) overlapOutName = resDirName + tab[0] + '_overlapOut_%s.gff3' % random.randrange(0, 10000) overlapOutputNames.append(overlapOutName) out.write(tab[0] + '\t' + overlapOutName + '\n') file.close() out.close() #construction the commandes for each input file cmds = [] for i in range(len(inputFileNames)): absFile = sys.argv[0] absDir = os.path.dirname(absFile) parentDir = os.path.abspath(os.path.join(absDir, os.path.pardir)) cmd = "python %s/Java/Python/CompareOverlappingSmallQuery.py " % parentDir opts = "-i %s -f %s -j %s -g %s -o %s " % (options.inputFileName1, options.format1, inputFileNames[i], options.format2, overlapOutputNames[i]) #if options.start1 != None: # opts += "-S %s " % options.start1 #if options.start2 != None: # opts += "-s %s " % options.start2 #if options.end1 != None: # opts += "-U %s " % options.end1 #if options.end2 != None: # opts += "-u %s " % options.end2 #if options.fivePrime1 != None: # opts += "-E %s " % options.fivePrime1 #if options.fivePrime2 != None: # opts += "-e %s " % options.fivePrime2 #if options.threePrime1 != None: # opts += "-N %s " % options.threePrime1 #if options.threePrime2 != None: # opts += "-n %s " % options.threePrime2 #if options.colinear: # opts += "-c " #if options.antisense: # opts +="-a " #if options.included: # opts += "-k " #if options.including: # opts += "-K " #if options.pcOverlap != None: # opts += "-p %s " % options.pcOverlap if options.notOverlapping: opts += "-O " if options.exclude: opts += "-x " if options.distance != None: opts += "-d %s " % options.distance #if options.minOverlap != None: # opts += "-m %s " % options.minOverlap cmd += opts cmds.append(cmd) print "les commandes sont %s \n" % cmds tmp_files = [] for i in range(len(cmds)): try: tmp_out = tempfile.NamedTemporaryFile().name tmp_files.append(tmp_out) tmp_stdout = open( tmp_out, 'wb' ) tmp_err = tempfile.NamedTemporaryFile().name tmp_files.append(tmp_err) tmp_stderr = open( tmp_err, 'wb' ) proc = subprocess.Popen( args=cmds[i], shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr ) returncode = proc.wait() tmp_stderr.close() # get stderr, allowing for case where it's very large tmp_stderr = open( tmp_err, 'rb' ) stderr = '' buffsize = 1048576 try: while True: stderr += tmp_stderr.read( buffsize ) if not stderr or len( stderr ) % buffsize != 0: break except OverflowError: pass tmp_stdout.close() tmp_stderr.close() if returncode != 0: raise Exception, stderr except Exception, e: stop_err( 'Error in :\n' + str( e ) ) if options.outputTar != None: toTar(options.outputTar, overlapOutputNames) for tmp_file in tmp_files: os.remove(tmp_file) if __name__=="__main__": __main__()