annotate Contra/scripts/cn_apply_threshold.py @ 5:be94ae8dac28

Uploaded
author fcaramia
date Sun, 02 Dec 2012 19:44:11 -0500
parents 7564f3b1e675
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
1 # ----------------------------------------------------------------------#
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
2 # Copyright (c) 2011, Richard Lupat & Jason Li.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
3 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
4 # > Source License <
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
5 # This file is part of CONTRA.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
6 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
7 # CONTRA is free software: you can redistribute it and/or modify
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
8 # it under the terms of the GNU General Public License as published by
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
9 # the Free Software Foundation, either version 3 of the License, or
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
10 # (at your option) any later version.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
11 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
12 # CONTRA is distributed in the hope that it will be useful,
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
15 # GNU General Public License for more details.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
16 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
17 # You should have received a copy of the GNU General Public License
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
18 # along with CONTRA. If not, see <http://www.gnu.org/licenses/>.
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
19 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
20 #
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
21 #-----------------------------------------------------------------------#
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
22 # Last Updated : 12 Oct 2011 11:00AM
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
23
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
24 def applyThreshold(outputName, bufTable, threshold, maxGap):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
25 srcFile = outputName + ".txt"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
26 outFile = bufTable + ".LargeVariations.txt"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
27 bedFile = bufTable + ".BED"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
28 fFile = outputName + ".DetailsFILTERED.txt"
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
29 ts = float(threshold)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
30
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
31 # Read and open files
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
32 srcTable = file.readlines(open(srcFile))
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
33 outTable = open(outFile, "w")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
34 bedOut = open(bedFile, "w")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
35 filteredTable = open(fFile, "w")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
36
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
37
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
38 #header
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
39 outTable.write("Chr \tStartCoordinate \tEndCoordinate \tGenes \tGain.Loss \n")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
40 filteredTable.write(srcTable[0])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
41
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
42 prevChr = ''
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
43 prevStatus = ''
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
44 prevEnd = -1
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
45 genes = []
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
46 chrList = []
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
47
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
48 for exons in srcTable:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
49 exon = exons.split()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
50 try:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
51 adjPVal = float(exon[12])
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
52 except:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
53 continue
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
54
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
55 if adjPVal <= ts:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
56 chr = exon[3]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
57 gene = exon[2]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
58 status = exon[13]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
59 start = exon[4]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
60 end = exon[5]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
61
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
62 # For first row
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
63 if prevEnd == -1:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
64 gap = 0
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
65 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
66 gap = int(prevEnd) - int(start)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
67
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
68 # Write Filtered Table
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
69 filteredTable.write(exons)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
70
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
71 # Write Bed File
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
72 bedOut.write(chr.strip("chr") +"\t" +start +"\t"+ end+"\t"+
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
73 chr.strip("chr")+":"+start+"-"+end+":"+str(adjPVal)+"\n")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
74
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
75 if prevChr == '' and prevStatus == '':
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
76 if chr not in chrList:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
77 print chr
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
78 chrList.append(chr)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
79 elif (chr == prevChr) and (status == prevStatus) and (gap < maxGap):
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
80 start = prevStart
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
81 else:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
82 outTable.write(prevChr +"\t" +prevStart +"\t" +prevEnd + "\t")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
83 for gsym in genes:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
84 outTable.write(gsym + ", ")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
85 outTable.write("\t" + prevStatus + "\n")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
86 genes=[]
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
87
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
88 if gene not in genes:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
89 genes.append(gene)
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
90 prevChr = chr
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
91 prevStatus = status
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
92 prevStart = start
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
93 prevEnd = end
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
94 elif len(genes) > 0:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
95 outTable.write(prevChr +"\t" +prevStart +"\t" +prevEnd + "\t")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
96 for gsym in genes:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
97 outTable.write(gsym + ", " )
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
98 outTable.write("\t" + prevStatus + "\n")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
99 prevChr = ''
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
100 prevStatus = ''
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
101 genes = []
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
102
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
103 if len(genes) > 0:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
104 outTable.write(prevChr +"\t" +prevStart +"\t" +prevEnd + "\t")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
105 for gsym in genes:
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
106 outTable.write(gsym + ", ")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
107 outTable.write("\t" + prevStatus + "\n")
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
108
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
109 filteredTable.close()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
110 bedOut.close()
7564f3b1e675 Uploaded
fcaramia
parents:
diff changeset
111 outTable.close()