comparison pyPRADA_1.2/gfclass.py @ 0:acc2ca1a3ba4

Uploaded
author siyuan
date Thu, 20 Feb 2014 00:44:58 -0500
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:acc2ca1a3ba4
1
2 class Junction(object):
3 def __init__(self,init_str):
4 'init_str looks like PCBP2:12:53873398_GFAP:17:42985517'
5 part1,part2=init_str.split('_')
6 info1,info2=part1.split(':'), part2.split(':')
7 self.gene1=info1[0]
8 self.end1_chr=info1[1]
9 self.end1_pos=int(info1[2])
10 self.gene2=info2[0]
11 self.end2_chr=info2[1]
12 self.end2_pos=int(info2[2])
13 self.name='%s.%s.%s.%s.%s.%s'%(self.gene1,self.end1_chr,self.end1_pos,self.gene2,self.end2_chr,self.end2_pos)
14 def distance(self):
15 betwn_junc_dist=None
16 if self.end1_chr==self.end2_chr:
17 betwn_junc_dist=abs(int(self.end1_pos) - int(self.end2_pos))
18 return betwn_junc_dist
19 def junc_category(self):
20 cat=None
21 if self.end1_chr==self.end2_chr:
22 cat='intrachromosome'
23 else:
24 cat='interchromosome'
25 return cat
26
27 class JSR(object):
28 '''Ideally it should extend pysam.AlignedRead class, but run into segment error.
29 Read is pysam.AlignedRead object'''
30 def __init__(self,read,junction):
31 self.read=read
32 self.junction=junction
33
34 class GeneFusion(object):
35 '''discs is [(r1,r2),...]; junc_rds is [jsr1,jsr2,...];'''
36 def __init__(self,gene1,gene2,discordantpairs=[],junc_reads=[]):
37 self.gene1=gene1
38 self.gene2=gene2
39 self.discordantpairs=discordantpairs
40 self.fusionreads=junc_reads
41 def update(self,mm=1):
42 '''Generate a new PRADA object with the update parameter. Extendable.'''
43 filtdp,filtfus=[],[] #hold updated elements
44 #apply mm filter
45 for rp in self.discordantpairs:
46 r1,r2=rp
47 nm1=[x[1] for x in r1.tags if x[0]=='NM'][0]
48 nm2=[x[1] for x in r2.tags if x[0]=='NM'][0]
49 if nm1 <= mm and nm2 <= mm:
50 filtdp.append(rp)
51 for fp in self.fusionreads:
52 nm=[x[1] for x in fp.read.tags if x[0]=='NM'][0]
53 if nm <= mm:
54 filtfus.append(fp)
55 newobject=GeneFusion(self.gene1,self.gene2,filtdp,filtfus)
56 return newobject
57 def get_junction_freq(self):
58 juncs={}
59 for item in self.fusionreads:
60 if juncs.has_key(item.junction):
61 juncs[item.junction]+=1
62 else:
63 juncs[item.junction]=1
64 return juncs.items()
65 def get_junctions(self):
66 juncs=set([])
67 for item in self.fusionreads:
68 juncs.add(item.junction)
69 junobjdb=[Junction(x) for x in juncs]
70 return junobjdb
71 def get_perfect_JSR(self):
72 pjsr=[]
73 for item in self.fusionreads:
74 r=item.read
75 nm=[x[1] for x in r.tags if x[0]=='NM'][0]
76 if nm==0:
77 pjsr.append(item)
78 return pjsr
79 def positioncheck(self):
80 if len(self.fusionreads)==0: #no junction and junction spanning reads.
81 return 'NA'
82 if len(self.discordantpairs)==0:
83 return 'NA'
84 junctions=self.get_junctions()
85 jA=[x.end1_pos for x in junctions]
86 jA_min,jA_max=min(jA),max(jA)
87 jB=[x.end2_pos for x in junctions]
88 jB_min,jB_max=min(jB),max(jB) ##
89 fwd=[x[0].pos for x in self.discordantpairs]
90 fwd_min,fwd_max=min(fwd),max(fwd)
91 rev=[x[1].pos for x in self.discordantpairs]
92 rev_min,rev_max=min(rev),max(rev)
93 #print 'junctionA',jA_min,jA_max
94 #print 'junctionB',jB_min,jB_max
95 #print 'Fwd Read',fwd_min,fwd_max
96 #print 'Rev Read',rev_min,rev_max
97 #####################################
98 #The following scoring process is translated from M. Berger's perl script.
99 const_score=0
100 if not self.discordantpairs[0][0].is_reverse: #gene A on + strand
101 if jA_min > fwd_max:
102 const_score=const_score+3
103 elif jA_max > fwd_min:
104 const_score=const_score+2
105 elif self.discordantpairs[0][0].is_reverse: #gene A on - strand
106 if jA_max < fwd_min:
107 const_score=const_score+3
108 elif jA_min < fwd_max:
109 const_score=const_score+2
110 if self.discordantpairs[0][1].is_reverse: #gene B on + strand // disc read map to -
111 if jB_max < rev_min:
112 const_score=const_score+3
113 elif jB_min < rev_max:
114 const_score=const_score+2
115 elif not self.discordantpairs[0][1].is_reverse: #gene B on - strand
116 if jB_min > rev_max:
117 const_score=const_score+3
118 elif jB_max > rev_min:
119 const_score=const_score+2
120 if const_score==6:
121 tag='YES'
122 elif const_score>=4:
123 tag='PARTIALLY'
124 else:
125 tag='NO'
126 return tag
127