annotate pyPRADA_1.2/gfclass.py @ 1:03815b87eb65 draft

Uploaded
author siyuan
date Fri, 21 Feb 2014 18:08:37 -0500
parents acc2ca1a3ba4
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
1
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
2 class Junction(object):
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
3 def __init__(self,init_str):
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
4 'init_str looks like PCBP2:12:53873398_GFAP:17:42985517'
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
5 part1,part2=init_str.split('_')
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
6 info1,info2=part1.split(':'), part2.split(':')
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
7 self.gene1=info1[0]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
8 self.end1_chr=info1[1]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
9 self.end1_pos=int(info1[2])
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
10 self.gene2=info2[0]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
11 self.end2_chr=info2[1]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
12 self.end2_pos=int(info2[2])
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
13 self.name='%s.%s.%s.%s.%s.%s'%(self.gene1,self.end1_chr,self.end1_pos,self.gene2,self.end2_chr,self.end2_pos)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
14 def distance(self):
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
15 betwn_junc_dist=None
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
16 if self.end1_chr==self.end2_chr:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
17 betwn_junc_dist=abs(int(self.end1_pos) - int(self.end2_pos))
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
18 return betwn_junc_dist
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
19 def junc_category(self):
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
20 cat=None
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
21 if self.end1_chr==self.end2_chr:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
22 cat='intrachromosome'
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
23 else:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
24 cat='interchromosome'
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
25 return cat
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
26
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
27 class JSR(object):
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
28 '''Ideally it should extend pysam.AlignedRead class, but run into segment error.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
29 Read is pysam.AlignedRead object'''
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
30 def __init__(self,read,junction):
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
31 self.read=read
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
32 self.junction=junction
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
33
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
34 class GeneFusion(object):
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
35 '''discs is [(r1,r2),...]; junc_rds is [jsr1,jsr2,...];'''
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
36 def __init__(self,gene1,gene2,discordantpairs=[],junc_reads=[]):
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
37 self.gene1=gene1
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
38 self.gene2=gene2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
39 self.discordantpairs=discordantpairs
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
40 self.fusionreads=junc_reads
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
41 def update(self,mm=1):
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
42 '''Generate a new PRADA object with the update parameter. Extendable.'''
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
43 filtdp,filtfus=[],[] #hold updated elements
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
44 #apply mm filter
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
45 for rp in self.discordantpairs:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
46 r1,r2=rp
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
47 nm1=[x[1] for x in r1.tags if x[0]=='NM'][0]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
48 nm2=[x[1] for x in r2.tags if x[0]=='NM'][0]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
49 if nm1 <= mm and nm2 <= mm:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
50 filtdp.append(rp)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
51 for fp in self.fusionreads:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
52 nm=[x[1] for x in fp.read.tags if x[0]=='NM'][0]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
53 if nm <= mm:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
54 filtfus.append(fp)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
55 newobject=GeneFusion(self.gene1,self.gene2,filtdp,filtfus)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
56 return newobject
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
57 def get_junction_freq(self):
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
58 juncs={}
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
59 for item in self.fusionreads:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
60 if juncs.has_key(item.junction):
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
61 juncs[item.junction]+=1
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
62 else:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
63 juncs[item.junction]=1
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
64 return juncs.items()
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
65 def get_junctions(self):
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
66 juncs=set([])
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
67 for item in self.fusionreads:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
68 juncs.add(item.junction)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
69 junobjdb=[Junction(x) for x in juncs]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
70 return junobjdb
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
71 def get_perfect_JSR(self):
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
72 pjsr=[]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
73 for item in self.fusionreads:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
74 r=item.read
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
75 nm=[x[1] for x in r.tags if x[0]=='NM'][0]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
76 if nm==0:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
77 pjsr.append(item)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
78 return pjsr
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
79 def positioncheck(self):
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
80 if len(self.fusionreads)==0: #no junction and junction spanning reads.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
81 return 'NA'
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
82 if len(self.discordantpairs)==0:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
83 return 'NA'
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
84 junctions=self.get_junctions()
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
85 jA=[x.end1_pos for x in junctions]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
86 jA_min,jA_max=min(jA),max(jA)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
87 jB=[x.end2_pos for x in junctions]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
88 jB_min,jB_max=min(jB),max(jB) ##
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
89 fwd=[x[0].pos for x in self.discordantpairs]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
90 fwd_min,fwd_max=min(fwd),max(fwd)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
91 rev=[x[1].pos for x in self.discordantpairs]
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
92 rev_min,rev_max=min(rev),max(rev)
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
93 #print 'junctionA',jA_min,jA_max
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
94 #print 'junctionB',jB_min,jB_max
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
95 #print 'Fwd Read',fwd_min,fwd_max
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
96 #print 'Rev Read',rev_min,rev_max
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
97 #####################################
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
98 #The following scoring process is translated from M. Berger's perl script.
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
99 const_score=0
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
100 if not self.discordantpairs[0][0].is_reverse: #gene A on + strand
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
101 if jA_min > fwd_max:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
102 const_score=const_score+3
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
103 elif jA_max > fwd_min:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
104 const_score=const_score+2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
105 elif self.discordantpairs[0][0].is_reverse: #gene A on - strand
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
106 if jA_max < fwd_min:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
107 const_score=const_score+3
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
108 elif jA_min < fwd_max:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
109 const_score=const_score+2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
110 if self.discordantpairs[0][1].is_reverse: #gene B on + strand // disc read map to -
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
111 if jB_max < rev_min:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
112 const_score=const_score+3
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
113 elif jB_min < rev_max:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
114 const_score=const_score+2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
115 elif not self.discordantpairs[0][1].is_reverse: #gene B on - strand
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
116 if jB_min > rev_max:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
117 const_score=const_score+3
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
118 elif jB_max > rev_min:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
119 const_score=const_score+2
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
120 if const_score==6:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
121 tag='YES'
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
122 elif const_score>=4:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
123 tag='PARTIALLY'
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
124 else:
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
125 tag='NO'
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
126 return tag
acc2ca1a3ba4 Uploaded
siyuan
parents:
diff changeset
127