Mercurial > repos > yufei-luo > differential_expression_analysis_pipeline_for_rnaseq_data
comparison deseq/differential_expression_analysis_pipeline_for_rnaseq_data-a03838a6eb54/DiffExpAnal/countNumber_parallel.py @ 10:6e573fd3c41b draft
Uploaded
author | yufei-luo |
---|---|
date | Mon, 13 May 2013 10:06:30 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:a03838a6eb54 | 10:6e573fd3c41b |
---|---|
1 #! /usr/bin/env python | |
2 """ | |
3 Yufei LUO | |
4 """ | |
5 | |
6 import optparse, os, sys, subprocess, tempfile, shutil, tarfile, random | |
7 from optparse import OptionParser | |
8 | |
9 def stop_err(msg): | |
10 sys.stderr.write('%s\n' % msg) | |
11 sys.exit() | |
12 | |
13 def toTar(tarFileName, outCountNames): | |
14 dir = os.path.dirname(tarFileName) | |
15 tfile = tarfile.open(tarFileName + ".tmp.tar", "w") | |
16 currentPath = os.getcwd() | |
17 os.chdir(dir) | |
18 for file in outCountNames: | |
19 relativeFileName = os.path.basename(file) | |
20 tfile.add(relativeFileName) | |
21 os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName)) | |
22 tfile.close() | |
23 os.chdir(currentPath) | |
24 | |
25 | |
26 def __main__(): | |
27 #Parse Command Line | |
28 parser = optparse.OptionParser() | |
29 parser.add_option("-i", "--input", dest="inputFile", help="input txt file, a list of overlapping results files.") | |
30 parser.add_option("-o", "--output", dest="outputFile", help="Out txt file.") | |
31 parser.add_option("-t", "--tar", dest="outputTar", default=None, help="output all count results in a tar file.") | |
32 (options, args) = parser.parse_args() | |
33 | |
34 #Parse the input txt file and read a list of transcripts files. | |
35 file = open(options.inputFile, "r") | |
36 lines = file.readlines() | |
37 inputFileNames = [] | |
38 outCountNames = [] | |
39 outputName = options.outputFile | |
40 resDirName = os.path.dirname(outputName) + '/' | |
41 | |
42 #Write output txt file and define all output count file names | |
43 out = open(outputName, "w") | |
44 out.write("label\tfiles\tgroup\n") | |
45 for line in lines: | |
46 tab = line.split() | |
47 inputFileNames.append(tab[1]) | |
48 outCountName = resDirName + tab[0] + "_outCount_%s.csv" % random.randrange(0, 10000) | |
49 outCountNames.append(outCountName) | |
50 out.write(tab[0] + '\t' + outCountName + '\t' + tab[0][5] + '\n') | |
51 file.close() | |
52 out.close() | |
53 | |
54 #Construct the lines commands | |
55 cmds = [] | |
56 for i in range(len(inputFileNames)): | |
57 cmd = "perl countNumber.pl " | |
58 opts = "%s %s " % (inputFileNames[i], outCountNames[i]) | |
59 cmd += opts | |
60 cmds.append(cmd) | |
61 | |
62 tmp_files = [] | |
63 for i in range(len(cmds)): | |
64 try: | |
65 tmp_out = tempfile.NamedTemporaryFile().name | |
66 tmp_files.append(tmp_out) | |
67 tmp_stdout = open(tmp_out, 'wb') | |
68 tmp_err = tempfile.NamedTemporaryFile().name | |
69 tmp_files.append(tmp_err) | |
70 tmp_stderr = open(tmp_err, 'wb') | |
71 proc = subprocess.Popen(args=cmds[i], shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr) | |
72 returncode = proc.wait() | |
73 tmp_stderr.close() | |
74 #get stderr, allowing for case where it's very large | |
75 tmp_stderr = open(tmp_err, 'rb') | |
76 stderr = '' | |
77 buffsize = 1048576 | |
78 try: | |
79 while True: | |
80 stderr += tmp_stderr.read(buffsize) | |
81 if not stderr or len(stderr) % buffsize != 0: | |
82 break | |
83 except OverflowError: | |
84 pass | |
85 tmp_stdout.close() | |
86 tmp_stderr.close() | |
87 if returncode != 0: | |
88 raise Exception, stderr | |
89 except Exception, e: | |
90 stop_err('Error in :\n' + str(e)) | |
91 | |
92 if options.outputTar != None: | |
93 toTar(options.outputTar, outCountNames) | |
94 | |
95 for tmp_file in tmp_files: | |
96 os.remove(tmp_file) | |
97 | |
98 if __name__=="__main__":__main__() |