view pyPRADA_1.2/gfclass.py @ 0:acc2ca1a3ba4

Uploaded
author siyuan
date Thu, 20 Feb 2014 00:44:58 -0500
parents
children
line wrap: on
line source


class Junction(object):
    def __init__(self,init_str):
        'init_str looks like PCBP2:12:53873398_GFAP:17:42985517'
        part1,part2=init_str.split('_')
        info1,info2=part1.split(':'), part2.split(':')
        self.gene1=info1[0]
        self.end1_chr=info1[1]
        self.end1_pos=int(info1[2])
        self.gene2=info2[0]
        self.end2_chr=info2[1]
        self.end2_pos=int(info2[2])
        self.name='%s.%s.%s.%s.%s.%s'%(self.gene1,self.end1_chr,self.end1_pos,self.gene2,self.end2_chr,self.end2_pos)
    def distance(self): 
        betwn_junc_dist=None
        if self.end1_chr==self.end2_chr:
            betwn_junc_dist=abs(int(self.end1_pos) - int(self.end2_pos))
        return betwn_junc_dist
    def junc_category(self):
        cat=None
        if self.end1_chr==self.end2_chr:
            cat='intrachromosome'
        else:
            cat='interchromosome'
        return cat

class JSR(object):
    '''Ideally it should extend pysam.AlignedRead class, but run into segment error.
       Read is pysam.AlignedRead object'''
    def __init__(self,read,junction):
        self.read=read
        self.junction=junction

class GeneFusion(object):
    '''discs is [(r1,r2),...]; junc_rds is [jsr1,jsr2,...];'''
    def __init__(self,gene1,gene2,discordantpairs=[],junc_reads=[]):
        self.gene1=gene1
        self.gene2=gene2
        self.discordantpairs=discordantpairs
        self.fusionreads=junc_reads
    def update(self,mm=1):
        '''Generate a new PRADA object with the update parameter. Extendable.'''
        filtdp,filtfus=[],[]  #hold updated elements
        #apply mm filter
        for rp in self.discordantpairs:
            r1,r2=rp
            nm1=[x[1] for x in r1.tags if x[0]=='NM'][0]
            nm2=[x[1] for x in r2.tags if x[0]=='NM'][0]
            if nm1 <= mm and nm2 <= mm:
                filtdp.append(rp)
        for fp in self.fusionreads:
            nm=[x[1] for x in fp.read.tags if x[0]=='NM'][0]
            if nm <= mm:
                filtfus.append(fp)
        newobject=GeneFusion(self.gene1,self.gene2,filtdp,filtfus)
        return newobject
    def get_junction_freq(self):
        juncs={}
        for item in self.fusionreads:
            if juncs.has_key(item.junction):
                juncs[item.junction]+=1
            else:
                juncs[item.junction]=1
        return juncs.items()
    def get_junctions(self):
        juncs=set([])
        for item in self.fusionreads:
            juncs.add(item.junction)
        junobjdb=[Junction(x) for x in juncs]
        return junobjdb
    def get_perfect_JSR(self):
        pjsr=[]
        for item in self.fusionreads:
            r=item.read
            nm=[x[1] for x in r.tags if x[0]=='NM'][0]
            if nm==0:
                pjsr.append(item)
        return pjsr
    def positioncheck(self):
        if len(self.fusionreads)==0:  #no junction and junction spanning reads.
            return 'NA'
        if len(self.discordantpairs)==0:
            return 'NA'
        junctions=self.get_junctions()
        jA=[x.end1_pos for x in junctions]
        jA_min,jA_max=min(jA),max(jA)
        jB=[x.end2_pos for x in junctions]
        jB_min,jB_max=min(jB),max(jB)    ##
        fwd=[x[0].pos for x in self.discordantpairs] 
        fwd_min,fwd_max=min(fwd),max(fwd)
        rev=[x[1].pos for x in self.discordantpairs]
        rev_min,rev_max=min(rev),max(rev)
        #print 'junctionA',jA_min,jA_max
        #print 'junctionB',jB_min,jB_max
        #print 'Fwd Read',fwd_min,fwd_max
        #print 'Rev Read',rev_min,rev_max
        #####################################
        #The following scoring process is translated from M. Berger's perl script.
        const_score=0
        if not self.discordantpairs[0][0].is_reverse:	#gene A on + strand
            if jA_min > fwd_max:
                const_score=const_score+3
            elif jA_max > fwd_min:
                const_score=const_score+2
        elif self.discordantpairs[0][0].is_reverse:		#gene A on - strand
            if jA_max < fwd_min:
                const_score=const_score+3
            elif jA_min < fwd_max:
                const_score=const_score+2
        if self.discordantpairs[0][1].is_reverse:   #gene B on + strand // disc read map to -
            if jB_max < rev_min:
                const_score=const_score+3
            elif jB_min < rev_max:
                const_score=const_score+2
        elif not self.discordantpairs[0][1].is_reverse:   #gene B on - strand
            if jB_min > rev_max:
                const_score=const_score+3
            elif jB_max > rev_min:
                const_score=const_score+2
        if const_score==6:
            tag='YES'
        elif const_score>=4:
            tag='PARTIALLY'
        else:
            tag='NO'
        return tag