annotate SMART/DiffExpAnal/countNumber_parallel.py @ 31:0ab839023fe4

Uploaded
author m-zytnicki
date Tue, 30 Apr 2013 14:33:21 -0400
parents 94ab73e8a190
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
18
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
1 #! /usr/bin/env python
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
2
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
3
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
4 import optparse, os, sys, subprocess, tempfile, shutil, tarfile, random
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
5 from optparse import OptionParser
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
6
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
7 def stop_err(msg):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
8 sys.stderr.write('%s\n' % msg)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
9 sys.exit()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
10
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
11 def toTar(tarFileName, outCountNames):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
12 dir = os.path.dirname(tarFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
13 tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
14 currentPath = os.getcwd()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
15 os.chdir(dir)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
16 for file in outCountNames:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
17 relativeFileName = os.path.basename(file)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
18 tfile.add(relativeFileName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
19 os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
20 tfile.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
21 os.chdir(currentPath)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
22
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
23
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
24 def __main__():
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
25 #Parse Command Line
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
26 parser = optparse.OptionParser()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
27 parser.add_option("-i", "--input", dest="inputFile", help="input txt file, a list of overlapping results files.")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
28 parser.add_option("-o", "--output", dest="outputFile", help="Out txt file.")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
29 parser.add_option("-t", "--tar", dest="outputTar", default=None, help="output all count results in a tar file.")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
30 (options, args) = parser.parse_args()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
31
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
32 #Parse the input txt file and read a list of transcripts files.
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
33 file = open(options.inputFile, "r")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
34 lines = file.readlines()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
35 inputFileNames = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
36 outCountNames = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
37 outputName = options.outputFile
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
38 resDirName = os.path.dirname(outputName) + '/'
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
39
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
40 #Write output txt file and define all output count file names
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
41 out = open(outputName, "w")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
42 out.write("label\tfiles\tgroup\n")
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
43 for line in lines:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
44 tab = line.split()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
45 inputFileNames.append(tab[1])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
46 outCountName = resDirName + tab[0] + "_outCount_%s.csv" % random.randrange(0, 10000)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
47 outCountNames.append(outCountName)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
48 out.write(tab[0] + '\t' + outCountName + '\t' + tab[0][5] + '\n')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
49 file.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
50 out.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
51
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
52 #Construct the lines commands
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
53 cmds = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
54 for i in range(len(inputFileNames)):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
55 cmd = "perl %s/SMART/DiffExpAnal/countNumber.pl " % os.environ["REPET_PATH"]
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
56 opts = "%s %s " % (inputFileNames[i], outCountNames[i])
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
57 cmd += opts
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
58 cmds.append(cmd)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
59
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
60 tmp_files = []
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
61 for i in range(len(cmds)):
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
62 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
63 tmp_out = tempfile.NamedTemporaryFile().name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
64 tmp_files.append(tmp_out)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
65 tmp_stdout = open(tmp_out, 'wb')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
66 tmp_err = tempfile.NamedTemporaryFile().name
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
67 tmp_files.append(tmp_err)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
68 tmp_stderr = open(tmp_err, 'wb')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
69 proc = subprocess.Popen(args=cmds[i], shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
70 returncode = proc.wait()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
71 tmp_stderr.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
72 #get stderr, allowing for case where it's very large
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
73 tmp_stderr = open(tmp_err, 'rb')
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
74 stderr = ''
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
75 buffsize = 1048576
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
76 try:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
77 while True:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
78 stderr += tmp_stderr.read(buffsize)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
79 if not stderr or len(stderr) % buffsize != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
80 break
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
81 except OverflowError:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
82 pass
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
83 tmp_stdout.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
84 tmp_stderr.close()
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
85 if returncode != 0:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
86 raise Exception, stderr
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
87 except Exception, e:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
88 stop_err('Error in :\n' + str(e))
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
89
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
90 if options.outputTar != None:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
91 toTar(options.outputTar, outCountNames)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
92
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
93 for tmp_file in tmp_files:
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
94 os.remove(tmp_file)
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
95
94ab73e8a190 Uploaded
m-zytnicki
parents:
diff changeset
96 if __name__=="__main__":__main__()