Mercurial > repos > fcaramia > contra
diff Contra/scripts/convert_gene_coordinate.py @ 0:7564f3b1e675
Uploaded
author | fcaramia |
---|---|
date | Thu, 13 Sep 2012 02:31:43 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Contra/scripts/convert_gene_coordinate.py Thu Sep 13 02:31:43 2012 -0400 @@ -0,0 +1,218 @@ +# ----------------------------------------------------------------------# +# Copyright (c) 2011, Richard Lupat & Jason Li. +# +# > Source License < +# This file is part of CONTRA. +# +# CONTRA is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# CONTRA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CONTRA. If not, see <http://www.gnu.org/licenses/>. +# +# +#-----------------------------------------------------------------------# +# Last Updated : 03 Oct 2011 11:00AM + +import sys + +def outputwrite(output, gene,chr,a,b,c,id,n,sOri, eOri): + id = str(id) + n = str(n) + output.write(gene+"\t"+chr+"\t"+a+"\t"+b+"\t"+c+"\t"+id+"\t"+n+"\t"+sOri+"\t"+eOri+"\n") + +def outputwrite2(output2, chr,c,sOri, eOri): + output2.write(chr+"\t"+sOri+"\t"+eOri+"\t"+c+"\n") + + +def convertGeneCoordinate(targetList, bufLocFolder): + inputfile2 = bufLocFolder + "chr/" + outputfile = bufLocFolder + "geneRefCoverage.txt" + outputfile2= bufLocFolder + "geneRefCoverage2.txt" + + output= open(outputfile,"w") + output2 = open(outputfile2 , "w") + + rn = 1 + prevchr = "" + for target in targetList: + chr = target.chr + starts = target.oriStart.split(",") + ends = target.oriEnd.split(",") + + if ((len(chr) > 5) or (chr[len(chr)-1] == "M")): + continue + + if (prevchr != chr): + print chr #progress checking + prevchr = chr + t = 0 + covFile = file.readlines(open(inputfile2+chr+".txt","r")) + + for n in range(0,target.numberExon): + if t >= len(covFile): + break + cov = covFile[t].split() + while ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))): + if (int(cov[1]) > int(starts[n])): + t-=1 + else: + t+=1 + cov = covFile[t].split() + + while ((int(ends[n]) < int(cov[1])) or (int(ends[n]) > int(cov[2]))): + # print output # + if (rn == 1): + prev = target.id + nID = target.id + if (nID != prev): + rn = 1 + ref1 = str(rn) + ref2 = str(int(cov[2]) - int(starts[n]) + rn) + outputwrite(output, target.gene,chr,ref1,ref2,cov[3],target.id,n,starts[n],cov[2]) + + outputwrite2(output2, chr,cov[3],starts[n],cov[2]) + + + rn = int(ref2) + prev = nID + # -- # + + # get to the next line of inputfile# + t+= 1 + cov = covFile[t].split() + starts[n] = cov[1] + + #print output # + if (t == 0) and (t >= len(covFile)): + continue + + if (rn == 1): + prev = target.id + nID = target.id + if (nID != prev): + rn = 1 + ref1 = str(rn) + ref2 = str(int(ends[n]) - int(starts[n]) + rn) + outputwrite(output, target.gene, chr, ref1, ref2, cov[3], target.id, n, starts[n], ends[n]) + + outputwrite2(output2, chr, cov[3], starts[n], ends[n]) + + + rn = int(ref2) + prev = nID + # -- # + output.close() + output2.close() + + +def convertGeneCoordinate2(targetList, bufLocFolder): + inputfile2 = bufLocFolder + "chr/" + outputfile = bufLocFolder + "geneRefCoverage.txt" + outputfile_avg = bufLocFolder + "geneRefCoverage_targetAverage.txt" + + output= open(outputfile,"w") + output_avg = open(outputfile_avg,"w") + + rn = 1 + prevchr = "" + for target in targetList: + chr = target.chr + starts = target.oriStart.split(",") + ends = target.oriEnd.split(",") + target_ttl_readdepth = 0 + starts_leftmost = starts[0] + + + if ((len(chr) > 5) or (chr[len(chr)-1] == "M")): + continue + + if (prevchr != chr): + print chr #progress checking + prevchr = chr + t = 0 + covFile = file.readlines(open(inputfile2+chr+".txt","r")) + + for n in range(0,target.numberExon): + if t >= len(covFile): + break + cov = covFile[t].split() + while ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))): + if (int(cov[1]) > int(starts[n])): t-=1 + else: + t+=1 + cov = covFile[t].split() + + while ((int(ends[n]) < int(cov[1])) or (int(ends[n]) > int(cov[2]))): + # print output # + if (rn == 1): + prev = target.id + nID = target.id + if (nID != prev): + rn = 1 + ref1 = str(rn) + ref2 = str(int(cov[2]) - int(starts[n]) + rn) + outputwrite2(output, chr,cov[3],starts[n],cov[2]) + tmprange=int(cov[2])-int(starts[n])+1 + target_ttl_readdepth+=int(cov[3])*tmprange + #target_length+=tmprange + + rn = int(ref2) + prev = nID + # -- # + + # get to the next line of inputfile# + t+= 1 + cov = covFile[t].split() + starts[n] = cov[1] + + #print output # + if (t == 0) and (t >= len(covFile)): + continue + + if (rn == 1): + prev = target.id + nID = target.id + if (nID != prev): + rn = 1 + ref1 = str(rn) + ref2 = str(int(ends[n]) - int(starts[n]) + rn) + outputwrite2(output, chr, cov[3], starts[n], ends[n]) + tmprange=int(ends[n])-int(starts[n])+1 + target_ttl_readdepth+=int(cov[3])*tmprange + #target_length+=tmprange + target_length = int(ends[n])-int(starts_leftmost)+1 + output_avg.write("\t".join([chr,starts_leftmost,ends[n],str(target_ttl_readdepth/target_length),str(target_length)])+"\n") + + rn = int(ref2) + prev = nID + # -- # + output.close() + output_avg.close() + + + + + + + + + + + + + + + + + + + +