4
|
1 import sys
|
|
2 import collections
|
|
3 import math
|
|
4 SAMPLINGCOL=11
|
|
5 ALLELE1COL=7
|
|
6 ALLELE2COL=8
|
|
7 SIGNCOL=4
|
|
8 readprofileCOL=2
|
|
9 motifCOL=3
|
|
10 filaname=sys.argv[1]
|
|
11 fd=open(filaname)
|
|
12 lines=fd.readlines()
|
|
13 binomialcombine=collections.defaultdict(list)
|
|
14 for line in lines:
|
|
15 temp=line.strip().split('\t')
|
|
16 allelelist=[]
|
|
17 allelelist.append(int(temp[ALLELE1COL-1]))
|
|
18 allelelist.append(int(temp[ALLELE2COL-1]))
|
|
19 allelelist.sort()
|
|
20 #allelelist=map(str,allelelist)
|
|
21 alleleave=str(allelelist[0])+'_'+str(allelelist[1])
|
|
22 #alleleave=str(sum(allelelist)/2.0)
|
|
23 ##alleleave=str(allelelist[0])+'_'+str(allelelist[1])
|
|
24 totalcov=len(temp[readprofileCOL-1].split(','))
|
|
25 motif=temp[motifCOL-1]
|
|
26 samplingvalue=float(temp[SAMPLINGCOL-1])
|
|
27 SIGN=1
|
|
28 binomialcombine[(totalcov,alleleave,motif)].append(SIGN*samplingvalue)
|
|
29 allkeys= binomialcombine.keys()
|
|
30 allkeys.sort()
|
|
31 ##print allkeys
|
|
32 print 'read_depth'+'\t'+'allele'+'\t'+'heterozygous_prob'+'\t'+'motif'
|
|
33 for key in allkeys:
|
|
34 ##templist=[str(key[0]),key[1],str(sum(binomialcombine[key])),key[2],str(map(str,(binomialcombine[key])))]
|
|
35 templist=[str(key[0]),key[1],str(sum(binomialcombine[key])),key[2]]
|
|
36
|
|
37 print '\t'.join(templist)
|
|
38 #print allkeys#,binomialcombine
|
|
39
|
|
40
|
|
41
|