Mercurial > repos > siyuan > prada
comparison pyPRADA_1.2/gfclass.py @ 0:acc2ca1a3ba4
Uploaded
| author | siyuan |
|---|---|
| date | Thu, 20 Feb 2014 00:44:58 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:acc2ca1a3ba4 |
|---|---|
| 1 | |
| 2 class Junction(object): | |
| 3 def __init__(self,init_str): | |
| 4 'init_str looks like PCBP2:12:53873398_GFAP:17:42985517' | |
| 5 part1,part2=init_str.split('_') | |
| 6 info1,info2=part1.split(':'), part2.split(':') | |
| 7 self.gene1=info1[0] | |
| 8 self.end1_chr=info1[1] | |
| 9 self.end1_pos=int(info1[2]) | |
| 10 self.gene2=info2[0] | |
| 11 self.end2_chr=info2[1] | |
| 12 self.end2_pos=int(info2[2]) | |
| 13 self.name='%s.%s.%s.%s.%s.%s'%(self.gene1,self.end1_chr,self.end1_pos,self.gene2,self.end2_chr,self.end2_pos) | |
| 14 def distance(self): | |
| 15 betwn_junc_dist=None | |
| 16 if self.end1_chr==self.end2_chr: | |
| 17 betwn_junc_dist=abs(int(self.end1_pos) - int(self.end2_pos)) | |
| 18 return betwn_junc_dist | |
| 19 def junc_category(self): | |
| 20 cat=None | |
| 21 if self.end1_chr==self.end2_chr: | |
| 22 cat='intrachromosome' | |
| 23 else: | |
| 24 cat='interchromosome' | |
| 25 return cat | |
| 26 | |
| 27 class JSR(object): | |
| 28 '''Ideally it should extend pysam.AlignedRead class, but run into segment error. | |
| 29 Read is pysam.AlignedRead object''' | |
| 30 def __init__(self,read,junction): | |
| 31 self.read=read | |
| 32 self.junction=junction | |
| 33 | |
| 34 class GeneFusion(object): | |
| 35 '''discs is [(r1,r2),...]; junc_rds is [jsr1,jsr2,...];''' | |
| 36 def __init__(self,gene1,gene2,discordantpairs=[],junc_reads=[]): | |
| 37 self.gene1=gene1 | |
| 38 self.gene2=gene2 | |
| 39 self.discordantpairs=discordantpairs | |
| 40 self.fusionreads=junc_reads | |
| 41 def update(self,mm=1): | |
| 42 '''Generate a new PRADA object with the update parameter. Extendable.''' | |
| 43 filtdp,filtfus=[],[] #hold updated elements | |
| 44 #apply mm filter | |
| 45 for rp in self.discordantpairs: | |
| 46 r1,r2=rp | |
| 47 nm1=[x[1] for x in r1.tags if x[0]=='NM'][0] | |
| 48 nm2=[x[1] for x in r2.tags if x[0]=='NM'][0] | |
| 49 if nm1 <= mm and nm2 <= mm: | |
| 50 filtdp.append(rp) | |
| 51 for fp in self.fusionreads: | |
| 52 nm=[x[1] for x in fp.read.tags if x[0]=='NM'][0] | |
| 53 if nm <= mm: | |
| 54 filtfus.append(fp) | |
| 55 newobject=GeneFusion(self.gene1,self.gene2,filtdp,filtfus) | |
| 56 return newobject | |
| 57 def get_junction_freq(self): | |
| 58 juncs={} | |
| 59 for item in self.fusionreads: | |
| 60 if juncs.has_key(item.junction): | |
| 61 juncs[item.junction]+=1 | |
| 62 else: | |
| 63 juncs[item.junction]=1 | |
| 64 return juncs.items() | |
| 65 def get_junctions(self): | |
| 66 juncs=set([]) | |
| 67 for item in self.fusionreads: | |
| 68 juncs.add(item.junction) | |
| 69 junobjdb=[Junction(x) for x in juncs] | |
| 70 return junobjdb | |
| 71 def get_perfect_JSR(self): | |
| 72 pjsr=[] | |
| 73 for item in self.fusionreads: | |
| 74 r=item.read | |
| 75 nm=[x[1] for x in r.tags if x[0]=='NM'][0] | |
| 76 if nm==0: | |
| 77 pjsr.append(item) | |
| 78 return pjsr | |
| 79 def positioncheck(self): | |
| 80 if len(self.fusionreads)==0: #no junction and junction spanning reads. | |
| 81 return 'NA' | |
| 82 if len(self.discordantpairs)==0: | |
| 83 return 'NA' | |
| 84 junctions=self.get_junctions() | |
| 85 jA=[x.end1_pos for x in junctions] | |
| 86 jA_min,jA_max=min(jA),max(jA) | |
| 87 jB=[x.end2_pos for x in junctions] | |
| 88 jB_min,jB_max=min(jB),max(jB) ## | |
| 89 fwd=[x[0].pos for x in self.discordantpairs] | |
| 90 fwd_min,fwd_max=min(fwd),max(fwd) | |
| 91 rev=[x[1].pos for x in self.discordantpairs] | |
| 92 rev_min,rev_max=min(rev),max(rev) | |
| 93 #print 'junctionA',jA_min,jA_max | |
| 94 #print 'junctionB',jB_min,jB_max | |
| 95 #print 'Fwd Read',fwd_min,fwd_max | |
| 96 #print 'Rev Read',rev_min,rev_max | |
| 97 ##################################### | |
| 98 #The following scoring process is translated from M. Berger's perl script. | |
| 99 const_score=0 | |
| 100 if not self.discordantpairs[0][0].is_reverse: #gene A on + strand | |
| 101 if jA_min > fwd_max: | |
| 102 const_score=const_score+3 | |
| 103 elif jA_max > fwd_min: | |
| 104 const_score=const_score+2 | |
| 105 elif self.discordantpairs[0][0].is_reverse: #gene A on - strand | |
| 106 if jA_max < fwd_min: | |
| 107 const_score=const_score+3 | |
| 108 elif jA_min < fwd_max: | |
| 109 const_score=const_score+2 | |
| 110 if self.discordantpairs[0][1].is_reverse: #gene B on + strand // disc read map to - | |
| 111 if jB_max < rev_min: | |
| 112 const_score=const_score+3 | |
| 113 elif jB_min < rev_max: | |
| 114 const_score=const_score+2 | |
| 115 elif not self.discordantpairs[0][1].is_reverse: #gene B on - strand | |
| 116 if jB_min > rev_max: | |
| 117 const_score=const_score+3 | |
| 118 elif jB_max > rev_min: | |
| 119 const_score=const_score+2 | |
| 120 if const_score==6: | |
| 121 tag='YES' | |
| 122 elif const_score>=4: | |
| 123 tag='PARTIALLY' | |
| 124 else: | |
| 125 tag='NO' | |
| 126 return tag | |
| 127 |
