Mercurial > repos > siyuan > prada
comparison pyPRADA_1.2/gfclass.py @ 0:acc2ca1a3ba4
Uploaded
author | siyuan |
---|---|
date | Thu, 20 Feb 2014 00:44:58 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:acc2ca1a3ba4 |
---|---|
1 | |
2 class Junction(object): | |
3 def __init__(self,init_str): | |
4 'init_str looks like PCBP2:12:53873398_GFAP:17:42985517' | |
5 part1,part2=init_str.split('_') | |
6 info1,info2=part1.split(':'), part2.split(':') | |
7 self.gene1=info1[0] | |
8 self.end1_chr=info1[1] | |
9 self.end1_pos=int(info1[2]) | |
10 self.gene2=info2[0] | |
11 self.end2_chr=info2[1] | |
12 self.end2_pos=int(info2[2]) | |
13 self.name='%s.%s.%s.%s.%s.%s'%(self.gene1,self.end1_chr,self.end1_pos,self.gene2,self.end2_chr,self.end2_pos) | |
14 def distance(self): | |
15 betwn_junc_dist=None | |
16 if self.end1_chr==self.end2_chr: | |
17 betwn_junc_dist=abs(int(self.end1_pos) - int(self.end2_pos)) | |
18 return betwn_junc_dist | |
19 def junc_category(self): | |
20 cat=None | |
21 if self.end1_chr==self.end2_chr: | |
22 cat='intrachromosome' | |
23 else: | |
24 cat='interchromosome' | |
25 return cat | |
26 | |
27 class JSR(object): | |
28 '''Ideally it should extend pysam.AlignedRead class, but run into segment error. | |
29 Read is pysam.AlignedRead object''' | |
30 def __init__(self,read,junction): | |
31 self.read=read | |
32 self.junction=junction | |
33 | |
34 class GeneFusion(object): | |
35 '''discs is [(r1,r2),...]; junc_rds is [jsr1,jsr2,...];''' | |
36 def __init__(self,gene1,gene2,discordantpairs=[],junc_reads=[]): | |
37 self.gene1=gene1 | |
38 self.gene2=gene2 | |
39 self.discordantpairs=discordantpairs | |
40 self.fusionreads=junc_reads | |
41 def update(self,mm=1): | |
42 '''Generate a new PRADA object with the update parameter. Extendable.''' | |
43 filtdp,filtfus=[],[] #hold updated elements | |
44 #apply mm filter | |
45 for rp in self.discordantpairs: | |
46 r1,r2=rp | |
47 nm1=[x[1] for x in r1.tags if x[0]=='NM'][0] | |
48 nm2=[x[1] for x in r2.tags if x[0]=='NM'][0] | |
49 if nm1 <= mm and nm2 <= mm: | |
50 filtdp.append(rp) | |
51 for fp in self.fusionreads: | |
52 nm=[x[1] for x in fp.read.tags if x[0]=='NM'][0] | |
53 if nm <= mm: | |
54 filtfus.append(fp) | |
55 newobject=GeneFusion(self.gene1,self.gene2,filtdp,filtfus) | |
56 return newobject | |
57 def get_junction_freq(self): | |
58 juncs={} | |
59 for item in self.fusionreads: | |
60 if juncs.has_key(item.junction): | |
61 juncs[item.junction]+=1 | |
62 else: | |
63 juncs[item.junction]=1 | |
64 return juncs.items() | |
65 def get_junctions(self): | |
66 juncs=set([]) | |
67 for item in self.fusionreads: | |
68 juncs.add(item.junction) | |
69 junobjdb=[Junction(x) for x in juncs] | |
70 return junobjdb | |
71 def get_perfect_JSR(self): | |
72 pjsr=[] | |
73 for item in self.fusionreads: | |
74 r=item.read | |
75 nm=[x[1] for x in r.tags if x[0]=='NM'][0] | |
76 if nm==0: | |
77 pjsr.append(item) | |
78 return pjsr | |
79 def positioncheck(self): | |
80 if len(self.fusionreads)==0: #no junction and junction spanning reads. | |
81 return 'NA' | |
82 if len(self.discordantpairs)==0: | |
83 return 'NA' | |
84 junctions=self.get_junctions() | |
85 jA=[x.end1_pos for x in junctions] | |
86 jA_min,jA_max=min(jA),max(jA) | |
87 jB=[x.end2_pos for x in junctions] | |
88 jB_min,jB_max=min(jB),max(jB) ## | |
89 fwd=[x[0].pos for x in self.discordantpairs] | |
90 fwd_min,fwd_max=min(fwd),max(fwd) | |
91 rev=[x[1].pos for x in self.discordantpairs] | |
92 rev_min,rev_max=min(rev),max(rev) | |
93 #print 'junctionA',jA_min,jA_max | |
94 #print 'junctionB',jB_min,jB_max | |
95 #print 'Fwd Read',fwd_min,fwd_max | |
96 #print 'Rev Read',rev_min,rev_max | |
97 ##################################### | |
98 #The following scoring process is translated from M. Berger's perl script. | |
99 const_score=0 | |
100 if not self.discordantpairs[0][0].is_reverse: #gene A on + strand | |
101 if jA_min > fwd_max: | |
102 const_score=const_score+3 | |
103 elif jA_max > fwd_min: | |
104 const_score=const_score+2 | |
105 elif self.discordantpairs[0][0].is_reverse: #gene A on - strand | |
106 if jA_max < fwd_min: | |
107 const_score=const_score+3 | |
108 elif jA_min < fwd_max: | |
109 const_score=const_score+2 | |
110 if self.discordantpairs[0][1].is_reverse: #gene B on + strand // disc read map to - | |
111 if jB_max < rev_min: | |
112 const_score=const_score+3 | |
113 elif jB_min < rev_max: | |
114 const_score=const_score+2 | |
115 elif not self.discordantpairs[0][1].is_reverse: #gene B on - strand | |
116 if jB_min > rev_max: | |
117 const_score=const_score+3 | |
118 elif jB_max > rev_min: | |
119 const_score=const_score+2 | |
120 if const_score==6: | |
121 tag='YES' | |
122 elif const_score>=4: | |
123 tag='PARTIALLY' | |
124 else: | |
125 tag='NO' | |
126 return tag | |
127 |