Mercurial > repos > fcaramia > contra
view Contra/scripts/convert_gene_coordinate.py @ 0:7564f3b1e675
Uploaded
author | fcaramia |
---|---|
date | Thu, 13 Sep 2012 02:31:43 -0400 |
parents | |
children |
line wrap: on
line source
# ----------------------------------------------------------------------# # Copyright (c) 2011, Richard Lupat & Jason Li. # # > Source License < # This file is part of CONTRA. # # CONTRA is free software: you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation, either version 3 of the License, or # (at your option) any later version. # # CONTRA is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with CONTRA. If not, see <http://www.gnu.org/licenses/>. # # #-----------------------------------------------------------------------# # Last Updated : 03 Oct 2011 11:00AM import sys def outputwrite(output, gene,chr,a,b,c,id,n,sOri, eOri): id = str(id) n = str(n) output.write(gene+"\t"+chr+"\t"+a+"\t"+b+"\t"+c+"\t"+id+"\t"+n+"\t"+sOri+"\t"+eOri+"\n") def outputwrite2(output2, chr,c,sOri, eOri): output2.write(chr+"\t"+sOri+"\t"+eOri+"\t"+c+"\n") def convertGeneCoordinate(targetList, bufLocFolder): inputfile2 = bufLocFolder + "chr/" outputfile = bufLocFolder + "geneRefCoverage.txt" outputfile2= bufLocFolder + "geneRefCoverage2.txt" output= open(outputfile,"w") output2 = open(outputfile2 , "w") rn = 1 prevchr = "" for target in targetList: chr = target.chr starts = target.oriStart.split(",") ends = target.oriEnd.split(",") if ((len(chr) > 5) or (chr[len(chr)-1] == "M")): continue if (prevchr != chr): print chr #progress checking prevchr = chr t = 0 covFile = file.readlines(open(inputfile2+chr+".txt","r")) for n in range(0,target.numberExon): if t >= len(covFile): break cov = covFile[t].split() while ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))): if (int(cov[1]) > int(starts[n])): t-=1 else: t+=1 cov = covFile[t].split() while ((int(ends[n]) < int(cov[1])) or (int(ends[n]) > int(cov[2]))): # print output # if (rn == 1): prev = target.id nID = target.id if (nID != prev): rn = 1 ref1 = str(rn) ref2 = str(int(cov[2]) - int(starts[n]) + rn) outputwrite(output, target.gene,chr,ref1,ref2,cov[3],target.id,n,starts[n],cov[2]) outputwrite2(output2, chr,cov[3],starts[n],cov[2]) rn = int(ref2) prev = nID # -- # # get to the next line of inputfile# t+= 1 cov = covFile[t].split() starts[n] = cov[1] #print output # if (t == 0) and (t >= len(covFile)): continue if (rn == 1): prev = target.id nID = target.id if (nID != prev): rn = 1 ref1 = str(rn) ref2 = str(int(ends[n]) - int(starts[n]) + rn) outputwrite(output, target.gene, chr, ref1, ref2, cov[3], target.id, n, starts[n], ends[n]) outputwrite2(output2, chr, cov[3], starts[n], ends[n]) rn = int(ref2) prev = nID # -- # output.close() output2.close() def convertGeneCoordinate2(targetList, bufLocFolder): inputfile2 = bufLocFolder + "chr/" outputfile = bufLocFolder + "geneRefCoverage.txt" outputfile_avg = bufLocFolder + "geneRefCoverage_targetAverage.txt" output= open(outputfile,"w") output_avg = open(outputfile_avg,"w") rn = 1 prevchr = "" for target in targetList: chr = target.chr starts = target.oriStart.split(",") ends = target.oriEnd.split(",") target_ttl_readdepth = 0 starts_leftmost = starts[0] if ((len(chr) > 5) or (chr[len(chr)-1] == "M")): continue if (prevchr != chr): print chr #progress checking prevchr = chr t = 0 covFile = file.readlines(open(inputfile2+chr+".txt","r")) for n in range(0,target.numberExon): if t >= len(covFile): break cov = covFile[t].split() while ((int(starts[n]) < int(cov[1])) or (int(starts[n]) >= int(cov[2]))): if (int(cov[1]) > int(starts[n])): t-=1 else: t+=1 cov = covFile[t].split() while ((int(ends[n]) < int(cov[1])) or (int(ends[n]) > int(cov[2]))): # print output # if (rn == 1): prev = target.id nID = target.id if (nID != prev): rn = 1 ref1 = str(rn) ref2 = str(int(cov[2]) - int(starts[n]) + rn) outputwrite2(output, chr,cov[3],starts[n],cov[2]) tmprange=int(cov[2])-int(starts[n])+1 target_ttl_readdepth+=int(cov[3])*tmprange #target_length+=tmprange rn = int(ref2) prev = nID # -- # # get to the next line of inputfile# t+= 1 cov = covFile[t].split() starts[n] = cov[1] #print output # if (t == 0) and (t >= len(covFile)): continue if (rn == 1): prev = target.id nID = target.id if (nID != prev): rn = 1 ref1 = str(rn) ref2 = str(int(ends[n]) - int(starts[n]) + rn) outputwrite2(output, chr, cov[3], starts[n], ends[n]) tmprange=int(ends[n])-int(starts[n])+1 target_ttl_readdepth+=int(cov[3])*tmprange #target_length+=tmprange target_length = int(ends[n])-int(starts_leftmost)+1 output_avg.write("\t".join([chr,starts_leftmost,ends[n],str(target_ttl_readdepth/target_length),str(target_length)])+"\n") rn = int(ref2) prev = nID # -- # output.close() output_avg.close()