diff pyPRADA_1.2/gfclass.py @ 0:acc2ca1a3ba4

Uploaded
author siyuan
date Thu, 20 Feb 2014 00:44:58 -0500
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/pyPRADA_1.2/gfclass.py	Thu Feb 20 00:44:58 2014 -0500
@@ -0,0 +1,127 @@
+
+class Junction(object):
+    def __init__(self,init_str):
+        'init_str looks like PCBP2:12:53873398_GFAP:17:42985517'
+        part1,part2=init_str.split('_')
+        info1,info2=part1.split(':'), part2.split(':')
+        self.gene1=info1[0]
+        self.end1_chr=info1[1]
+        self.end1_pos=int(info1[2])
+        self.gene2=info2[0]
+        self.end2_chr=info2[1]
+        self.end2_pos=int(info2[2])
+        self.name='%s.%s.%s.%s.%s.%s'%(self.gene1,self.end1_chr,self.end1_pos,self.gene2,self.end2_chr,self.end2_pos)
+    def distance(self): 
+        betwn_junc_dist=None
+        if self.end1_chr==self.end2_chr:
+            betwn_junc_dist=abs(int(self.end1_pos) - int(self.end2_pos))
+        return betwn_junc_dist
+    def junc_category(self):
+        cat=None
+        if self.end1_chr==self.end2_chr:
+            cat='intrachromosome'
+        else:
+            cat='interchromosome'
+        return cat
+
+class JSR(object):
+    '''Ideally it should extend pysam.AlignedRead class, but run into segment error.
+       Read is pysam.AlignedRead object'''
+    def __init__(self,read,junction):
+        self.read=read
+        self.junction=junction
+
+class GeneFusion(object):
+    '''discs is [(r1,r2),...]; junc_rds is [jsr1,jsr2,...];'''
+    def __init__(self,gene1,gene2,discordantpairs=[],junc_reads=[]):
+        self.gene1=gene1
+        self.gene2=gene2
+        self.discordantpairs=discordantpairs
+        self.fusionreads=junc_reads
+    def update(self,mm=1):
+        '''Generate a new PRADA object with the update parameter. Extendable.'''
+        filtdp,filtfus=[],[]  #hold updated elements
+        #apply mm filter
+        for rp in self.discordantpairs:
+            r1,r2=rp
+            nm1=[x[1] for x in r1.tags if x[0]=='NM'][0]
+            nm2=[x[1] for x in r2.tags if x[0]=='NM'][0]
+            if nm1 <= mm and nm2 <= mm:
+                filtdp.append(rp)
+        for fp in self.fusionreads:
+            nm=[x[1] for x in fp.read.tags if x[0]=='NM'][0]
+            if nm <= mm:
+                filtfus.append(fp)
+        newobject=GeneFusion(self.gene1,self.gene2,filtdp,filtfus)
+        return newobject
+    def get_junction_freq(self):
+        juncs={}
+        for item in self.fusionreads:
+            if juncs.has_key(item.junction):
+                juncs[item.junction]+=1
+            else:
+                juncs[item.junction]=1
+        return juncs.items()
+    def get_junctions(self):
+        juncs=set([])
+        for item in self.fusionreads:
+            juncs.add(item.junction)
+        junobjdb=[Junction(x) for x in juncs]
+        return junobjdb
+    def get_perfect_JSR(self):
+        pjsr=[]
+        for item in self.fusionreads:
+            r=item.read
+            nm=[x[1] for x in r.tags if x[0]=='NM'][0]
+            if nm==0:
+                pjsr.append(item)
+        return pjsr
+    def positioncheck(self):
+        if len(self.fusionreads)==0:  #no junction and junction spanning reads.
+            return 'NA'
+        if len(self.discordantpairs)==0:
+            return 'NA'
+        junctions=self.get_junctions()
+        jA=[x.end1_pos for x in junctions]
+        jA_min,jA_max=min(jA),max(jA)
+        jB=[x.end2_pos for x in junctions]
+        jB_min,jB_max=min(jB),max(jB)    ##
+        fwd=[x[0].pos for x in self.discordantpairs] 
+        fwd_min,fwd_max=min(fwd),max(fwd)
+        rev=[x[1].pos for x in self.discordantpairs]
+        rev_min,rev_max=min(rev),max(rev)
+        #print 'junctionA',jA_min,jA_max
+        #print 'junctionB',jB_min,jB_max
+        #print 'Fwd Read',fwd_min,fwd_max
+        #print 'Rev Read',rev_min,rev_max
+        #####################################
+        #The following scoring process is translated from M. Berger's perl script.
+        const_score=0
+        if not self.discordantpairs[0][0].is_reverse:	#gene A on + strand
+            if jA_min > fwd_max:
+                const_score=const_score+3
+            elif jA_max > fwd_min:
+                const_score=const_score+2
+        elif self.discordantpairs[0][0].is_reverse:		#gene A on - strand
+            if jA_max < fwd_min:
+                const_score=const_score+3
+            elif jA_min < fwd_max:
+                const_score=const_score+2
+        if self.discordantpairs[0][1].is_reverse:   #gene B on + strand // disc read map to -
+            if jB_max < rev_min:
+                const_score=const_score+3
+            elif jB_min < rev_max:
+                const_score=const_score+2
+        elif not self.discordantpairs[0][1].is_reverse:   #gene B on - strand
+            if jB_min > rev_max:
+                const_score=const_score+3
+            elif jB_max > rev_min:
+                const_score=const_score+2
+        if const_score==6:
+            tag='YES'
+        elif const_score>=4:
+            tag='PARTIALLY'
+        else:
+            tag='NO'
+        return tag
+