Mercurial > repos > fcaramia > contra

diff Contra/scripts/convert_gene_coordinate.py @ 0:7564f3b1e675
Uploaded
author: fcaramia
date: Thu, 13 Sep 2012 02:31:43 -0400
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/Contra/scripts/convert_gene_coordinate.py	Thu Sep 13 02:31:43 2012 -0400
@@ -0,0 +1,218 @@
+# ----------------------------------------------------------------------#
+# Copyright (c) 2011, Richard Lupat & Jason Li.
+#
+# > Source License <
+# This file is part of CONTRA.
+#
+#    CONTRA is free software: you can redistribute it and/or modify
+#    it under the terms of the GNU General Public License as published by
+#    the Free Software Foundation, either version 3 of the License, or
+#    (at your option) any later version.
+#
+#    CONTRA is distributed in the hope that it will be useful,
+#    but WITHOUT ANY WARRANTY; without even the implied warranty of
+#    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#    GNU General Public License for more details.
+#
+#    You should have received a copy of the GNU General Public License
+#    along with CONTRA.  If not, see <http://www.gnu.org/licenses/>.
+#
+# 
+#-----------------------------------------------------------------------#
+# Last Updated : 03 Oct 2011 11:00AM
+
+import sys
+
+def outputwrite(output, gene,chr,a,b,c,id,n,sOri, eOri):
+        id = str(id)
+	n = str(n)
+        output.write(gene+"\t"+chr+"\t"+a+"\t"+b+"\t"+c+"\t"+id+"\t"+n+"\t"+sOri+"\t"+eOri+"\n")
+
+def outputwrite2(output2, chr,c,sOri, eOri):
+        output2.write(chr+"\t"+sOri+"\t"+eOri+"\t"+c+"\n")
+
+
+def convertGeneCoordinate(targetList, bufLocFolder): 
+	inputfile2 = bufLocFolder + "chr/"
+	outputfile = bufLocFolder + "geneRefCoverage.txt"
+	outputfile2= bufLocFolder + "geneRefCoverage2.txt"
+
+	output= open(outputfile,"w")
+	output2 = open(outputfile2 , "w")
+
+	rn = 1
+	prevchr = ""
+	for target in targetList:
+		chr = target.chr
+		starts = target.oriStart.split(",")
+		ends = target.oriEnd.split(",")
+
+		if ((len(chr) > 5) or (chr[len(chr)-1] == "M")):
+			continue
+
+		if (prevchr != chr):
+			print chr #progress checking
+			prevchr = chr
+			t = 0
+			covFile = file.readlines(open(inputfile2+chr+".txt","r"))
+
+		for n in range(0,target.numberExon):
+			if t >= len(covFile):
+				break
+			cov = covFile[t].split()
+			while  ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))):
+				if (int(cov[1]) > int(starts[n])):
+					t-=1
+				else:
+					t+=1
+				cov = covFile[t].split() 
+	
+			while  ((int(ends[n]) < int(cov[1])) or (int(ends[n]) >  int(cov[2]))):
+                		# print output #
+				if (rn == 1):
+					prev = target.id
+                		nID = target.id
+                		if (nID != prev):
+					rn = 1
+				ref1 = str(rn)
+				ref2 = str(int(cov[2]) - int(starts[n]) + rn)
+				outputwrite(output, target.gene,chr,ref1,ref2,cov[3],target.id,n,starts[n],cov[2])
+
+				outputwrite2(output2, chr,cov[3],starts[n],cov[2])
+
+
+				rn = int(ref2)
+				prev = nID
+				# -- #
+
+				# get to the next line of inputfile#
+				t+= 1
+				cov = covFile[t].split()
+				starts[n] = cov[1]	
+		
+		#print output #
+		if (t == 0) and (t >= len(covFile)):
+			continue
+
+		if (rn == 1):
+			prev = target.id
+		nID = target.id
+		if (nID != prev):
+			rn = 1
+		ref1 = str(rn)
+		ref2 = str(int(ends[n]) - int(starts[n]) + rn)	
+		outputwrite(output, target.gene, chr, ref1, ref2, cov[3], target.id, n, starts[n], ends[n])	
+
+		outputwrite2(output2, chr, cov[3], starts[n], ends[n])
+
+
+		rn = int(ref2)
+		prev = nID
+		# -- #
+	output.close()	
+	output2.close()
+
+
+def convertGeneCoordinate2(targetList, bufLocFolder):
+        inputfile2 = bufLocFolder + "chr/"
+        outputfile = bufLocFolder + "geneRefCoverage.txt"
+        outputfile_avg = bufLocFolder + "geneRefCoverage_targetAverage.txt"
+
+        output= open(outputfile,"w")
+        output_avg = open(outputfile_avg,"w")
+
+        rn = 1
+        prevchr = ""
+        for target in targetList:
+                chr = target.chr
+                starts = target.oriStart.split(",")
+                ends = target.oriEnd.split(",")
+                target_ttl_readdepth = 0
+                starts_leftmost = starts[0]
+
+
+                if ((len(chr) > 5) or (chr[len(chr)-1] == "M")):
+                        continue
+
+                if (prevchr != chr):
+                        print chr #progress checking
+                        prevchr = chr
+                        t = 0
+                        covFile = file.readlines(open(inputfile2+chr+".txt","r"))
+
+                for n in range(0,target.numberExon):
+                        if t >= len(covFile):
+                                break
+                        cov = covFile[t].split()
+                        while  ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))):
+                                if (int(cov[1]) > int(starts[n])):                                        t-=1
+                                else:
+                                        t+=1
+                                cov = covFile[t].split()
+
+                        while  ((int(ends[n]) < int(cov[1])) or (int(ends[n]) >  int(cov[2]))):
+                                # print output #
+                                if (rn == 1):
+                                        prev = target.id
+                                nID = target.id
+                                if (nID != prev):
+                                        rn = 1
+                                ref1 = str(rn)
+                                ref2 = str(int(cov[2]) - int(starts[n]) + rn)
+                                outputwrite2(output, chr,cov[3],starts[n],cov[2])
+                                tmprange=int(cov[2])-int(starts[n])+1
+                                target_ttl_readdepth+=int(cov[3])*tmprange
+                                #target_length+=tmprange
+
+                                rn = int(ref2)
+                                prev = nID
+                                # -- #
+
+                                # get to the next line of inputfile#
+                                t+= 1
+                                cov = covFile[t].split()
+                                starts[n] = cov[1]
+
+                #print output #
+                if (t == 0) and (t >= len(covFile)):
+                        continue
+
+                if (rn == 1):
+                        prev = target.id
+                nID = target.id
+                if (nID != prev):
+                        rn = 1
+                ref1 = str(rn)
+                ref2 = str(int(ends[n]) - int(starts[n]) + rn)
+                outputwrite2(output, chr, cov[3], starts[n], ends[n])
+                tmprange=int(ends[n])-int(starts[n])+1
+                target_ttl_readdepth+=int(cov[3])*tmprange
+                #target_length+=tmprange
+                target_length = int(ends[n])-int(starts_leftmost)+1
+                output_avg.write("\t".join([chr,starts_leftmost,ends[n],str(target_ttl_readdepth/target_length),str(target_length)])+"\n")
+
+                rn = int(ref2)
+                prev = nID
+                # -- #
+        output.close()
+        output_avg.close()
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
author	fcaramia
date	Thu, 13 Sep 2012 02:31:43 -0400
parents
children