diff deseq/differential_expression_analysis_pipeline_for_rnaseq_data-a03838a6eb54/DiffExpAnal/countNumber_parallel.py @ 10:6e573fd3c41b draft

Uploaded
author yufei-luo
date Mon, 13 May 2013 10:06:30 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/deseq/differential_expression_analysis_pipeline_for_rnaseq_data-a03838a6eb54/DiffExpAnal/countNumber_parallel.py	Mon May 13 10:06:30 2013 -0400
@@ -0,0 +1,98 @@
+#! /usr/bin/env python
+"""
+Yufei LUO
+"""
+
+import optparse, os, sys, subprocess, tempfile, shutil, tarfile, random
+from optparse import OptionParser
+
+def stop_err(msg):
+	sys.stderr.write('%s\n' % msg)
+	sys.exit()
+
+def toTar(tarFileName, outCountNames):
+	dir = os.path.dirname(tarFileName)
+	tfile = tarfile.open(tarFileName + ".tmp.tar", "w")
+	currentPath = os.getcwd()
+	os.chdir(dir)
+	for file in outCountNames:
+		relativeFileName = os.path.basename(file)
+		tfile.add(relativeFileName)
+	os.system("mv %s %s" % (tarFileName + ".tmp.tar", tarFileName))
+	tfile.close()
+	os.chdir(currentPath)
+
+
+def __main__():
+	#Parse Command Line
+	parser = optparse.OptionParser()
+	parser.add_option("-i", "--input", dest="inputFile", help="input txt file, a list of overlapping results files.")
+	parser.add_option("-o", "--output", dest="outputFile", help="Out txt file.")
+	parser.add_option("-t", "--tar", dest="outputTar", default=None, help="output all count results in a tar file.")
+	(options, args) = parser.parse_args()
+
+	#Parse the input txt file and read a list of transcripts files.
+	file = open(options.inputFile, "r")
+	lines = file.readlines()
+	inputFileNames = []
+	outCountNames = []
+	outputName = options.outputFile
+	resDirName = os.path.dirname(outputName) + '/'
+
+	#Write output txt file and define all output count file names
+	out = open(outputName, "w")
+	out.write("label\tfiles\tgroup\n")
+	for line in lines:
+		tab = line.split()
+		inputFileNames.append(tab[1])
+		outCountName = resDirName + tab[0] + "_outCount_%s.csv" % random.randrange(0, 10000)
+		outCountNames.append(outCountName)
+		out.write(tab[0] + '\t' + outCountName + '\t' + tab[0][5] + '\n')
+	file.close()
+	out.close()
+
+	#Construct the lines commands
+	cmds = []
+	for i in range(len(inputFileNames)):
+		cmd = "perl countNumber.pl "
+		opts = "%s %s " % (inputFileNames[i], outCountNames[i])
+		cmd += opts
+		cmds.append(cmd)
+
+	tmp_files = []
+	for i in range(len(cmds)):
+		try:
+			tmp_out = tempfile.NamedTemporaryFile().name
+			tmp_files.append(tmp_out)
+			tmp_stdout = open(tmp_out, 'wb')
+			tmp_err = tempfile.NamedTemporaryFile().name
+			tmp_files.append(tmp_err)
+			tmp_stderr = open(tmp_err, 'wb')
+			proc = subprocess.Popen(args=cmds[i], shell=True, cwd=".", stdout=tmp_stdout, stderr=tmp_stderr)
+			returncode = proc.wait()
+			tmp_stderr.close()
+			#get stderr, allowing for case where it's very large
+			tmp_stderr = open(tmp_err, 'rb')
+			stderr = ''
+			buffsize = 1048576
+			try:
+				while True:
+					stderr += tmp_stderr.read(buffsize)
+					if not stderr or len(stderr) % buffsize != 0:
+						break
+			except OverflowError:
+				pass
+			tmp_stdout.close()
+			tmp_stderr.close()
+			if returncode != 0:
+				raise Exception, stderr
+		except Exception, e:
+			stop_err('Error in :\n' + str(e))
+	
+	if options.outputTar != None:
+		toTar(options.outputTar, outCountNames)
+
+	for tmp_file in tmp_files:
+		os.remove(tmp_file)
+
+if __name__=="__main__":__main__()