Mercurial > repos > fcaramia > contra
diff Contra/scripts/cn_apply_threshold.py @ 0:7564f3b1e675
Uploaded
author | fcaramia |
---|---|
date | Thu, 13 Sep 2012 02:31:43 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/Contra/scripts/cn_apply_threshold.py Thu Sep 13 02:31:43 2012 -0400 @@ -0,0 +1,111 @@ +# ----------------------------------------------------------------------# +# Copyright (c) 2011, Richard Lupat & Jason Li. +# +# > Source License < +# This file is part of CONTRA. +# +# CONTRA is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# CONTRA is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with CONTRA. If not, see <http://www.gnu.org/licenses/>. +# +# +#-----------------------------------------------------------------------# +# Last Updated : 12 Oct 2011 11:00AM + +def applyThreshold(outputName, bufTable, threshold, maxGap): + srcFile = outputName + ".txt" + outFile = bufTable + ".LargeVariations.txt" + bedFile = bufTable + ".BED" + fFile = outputName + ".DetailsFILTERED.txt" + ts = float(threshold) + + # Read and open files + srcTable = file.readlines(open(srcFile)) + outTable = open(outFile, "w") + bedOut = open(bedFile, "w") + filteredTable = open(fFile, "w") + + + #header + outTable.write("Chr \tStartCoordinate \tEndCoordinate \tGenes \tGain.Loss \n") + filteredTable.write(srcTable[0]) + + prevChr = '' + prevStatus = '' + prevEnd = -1 + genes = [] + chrList = [] + + for exons in srcTable: + exon = exons.split() + try: + adjPVal = float(exon[12]) + except: + continue + + if adjPVal <= ts: + chr = exon[3] + gene = exon[2] + status = exon[13] + start = exon[4] + end = exon[5] + + # For first row + if prevEnd == -1: + gap = 0 + else: + gap = int(prevEnd) - int(start) + + # Write Filtered Table + filteredTable.write(exons) + + # Write Bed File + bedOut.write(chr.strip("chr") +"\t" +start +"\t"+ end+"\t"+ + chr.strip("chr")+":"+start+"-"+end+":"+str(adjPVal)+"\n") + + if prevChr == '' and prevStatus == '': + if chr not in chrList: + print chr + chrList.append(chr) + elif (chr == prevChr) and (status == prevStatus) and (gap < maxGap): + start = prevStart + else: + outTable.write(prevChr +"\t" +prevStart +"\t" +prevEnd + "\t") + for gsym in genes: + outTable.write(gsym + ", ") + outTable.write("\t" + prevStatus + "\n") + genes=[] + + if gene not in genes: + genes.append(gene) + prevChr = chr + prevStatus = status + prevStart = start + prevEnd = end + elif len(genes) > 0: + outTable.write(prevChr +"\t" +prevStart +"\t" +prevEnd + "\t") + for gsym in genes: + outTable.write(gsym + ", " ) + outTable.write("\t" + prevStatus + "\n") + prevChr = '' + prevStatus = '' + genes = [] + + if len(genes) > 0: + outTable.write(prevChr +"\t" +prevStart +"\t" +prevEnd + "\t") + for gsym in genes: + outTable.write(gsym + ", ") + outTable.write("\t" + prevStatus + "\n") + + filteredTable.close() + bedOut.close() + outTable.close()