comparison deseq/differential_expression_analysis_pipeline_for_rnaseq_data-a03838a6eb54/DiffExpAnal/countNumber_parallel.py @ 10:6e573fd3c41b draft

Uploaded
author yufei-luo
date Mon, 13 May 2013 10:06:30 -0400
parents
children
comparison
equal deleted inserted replaced
9:a03838a6eb54 10:6e573fd3c41b
1 #! /usr/bin/env python
2 """
3 Yufei LUO
4 """
5
6 import optparse, os, sys, subprocess, tempfile, shutil, tarfile, random
7 from optparse import OptionParser
8
9 def stop_err(msg):
10 sys.stderr.write('%s\n' % msg)
11 sys.exit()
12
13 def toTar(tarFileName, outCountNames):
14 dir = os.path.dirname(tarFileName)
15 tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
16 currentPath = os.getcwd()
17 os.chdir(dir)
18 for file in outCountNames:
19 relativeFileName = os.path.basename(file)
20 tfile.add(relativeFileName)
21 os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
22 tfile.close()
23 os.chdir(currentPath)
24
25
26 def __main__():
27 #Parse Command Line
28 parser = optparse.OptionParser()
29 parser.add_option("-i", "--input", dest="inputFile", help="input txt file, a list of overlapping results files.")
30 parser.add_option("-o", "--output", dest="outputFile", help="Out txt file.")
31 parser.add_option("-t", "--tar", dest="outputTar", default=None, help="output all count results in a tar file.")
32 (options, args) = parser.parse_args()
33
34 #Parse the input txt file and read a list of transcripts files.
35 file = open(options.inputFile, "r")
36 lines = file.readlines()
37 inputFileNames = []
38 outCountNames = []
39 outputName = options.outputFile
40 resDirName = os.path.dirname(outputName) + '/'
41
42 #Write output txt file and define all output count file names
43 out = open(outputName, "w")
44 out.write("label\tfiles\tgroup\n")
45 for line in lines:
46 tab = line.split()
47 inputFileNames.append(tab[1])
48 outCountName = resDirName + tab[0] + "_outCount_%s.csv" % random.randrange(0, 10000)
49 outCountNames.append(outCountName)
50 out.write(tab[0] + '\t' + outCountName + '\t' + tab[0][5] + '\n')
51 file.close()
52 out.close()
53
54 #Construct the lines commands
55 cmds = []
56 for i in range(len(inputFileNames)):
57 cmd = "perl countNumber.pl "
58 opts = "%s %s " % (inputFileNames[i], outCountNames[i])
59 cmd += opts
60 cmds.append(cmd)
61
62 tmp_files = []
63 for i in range(len(cmds)):
64 try:
65 tmp_out = tempfile.NamedTemporaryFile().name
66 tmp_files.append(tmp_out)
67 tmp_stdout = open(tmp_out, 'wb')
68 tmp_err = tempfile.NamedTemporaryFile().name
69 tmp_files.append(tmp_err)
70 tmp_stderr = open(tmp_err, 'wb')
71 proc = subprocess.Popen(args=cmds[i], shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr)
72 returncode = proc.wait()
73 tmp_stderr.close()
74 #get stderr, allowing for case where it's very large
75 tmp_stderr = open(tmp_err, 'rb')
76 stderr = ''
77 buffsize = 1048576
78 try:
79 while True:
80 stderr += tmp_stderr.read(buffsize)
81 if not stderr or len(stderr) % buffsize != 0:
82 break
83 except OverflowError:
84 pass
85 tmp_stdout.close()
86 tmp_stderr.close()
87 if returncode != 0:
88 raise Exception, stderr
89 except Exception, e:
90 stop_err('Error in :\n' + str(e))
91
92 if options.outputTar != None:
93 toTar(options.outputTar, outCountNames)
94
95 for tmp_file in tmp_files:
96 os.remove(tmp_file)
97
98 if __name__=="__main__":__main__()