0
|
1 import collections
|
|
2 import itertools
|
|
3 import sys
|
|
4
|
|
5 filename=sys.argv[1]
|
|
6 MOTIF=sys.argv[2]
|
|
7 MOTIFSIZE=len(MOTIF)
|
|
8 MaxDEPTH=int(sys.argv[3])
|
|
9 MINIMUMPROB=float(sys.argv[4])##1.0/(10**4)
|
|
10 MININUMCOUNT=1
|
|
11 fd=open(filename)
|
|
12 lines=fd.readlines()
|
|
13 countbymajorallele=collections.defaultdict(list)
|
|
14 for line in lines:
|
|
15 temp=line.strip().split('\t')
|
|
16 t_major=int(temp[0])
|
|
17 t_count=int(temp[2])
|
|
18 countbymajorallele[t_major].append(t_count)
|
|
19 fd.close()
|
|
20 sumbymajorallele=collections.defaultdict(int)
|
|
21 for t_majorallele in countbymajorallele.keys():
|
|
22 sumbymajorallele[t_majorallele]=sum(countbymajorallele[t_majorallele])
|
|
23
|
|
24 fd=open(filename)
|
|
25 ##fd=open('PCRinclude.mono.A.bymajorallele')
|
|
26 lines=fd.readlines()
|
|
27 allmajor=collections.defaultdict(list)
|
|
28 for line in lines:
|
|
29 temp=line.strip().split()
|
|
30 if int(temp[0])%MOTIFSIZE==0:
|
|
31 if (int(temp[2])/(sumbymajorallele[int(temp[0])]*1.0))>=MINIMUMPROB:
|
|
32 if int(temp[2])>=MININUMCOUNT:
|
|
33 allmajor[int(temp[0])].append(int(temp[1]))
|
|
34 ##print allmajor
|
|
35 allkey=allmajor.keys()
|
|
36 allkey.sort()
|
|
37 #print allkey
|
|
38 keycount=0
|
|
39 combinelist_collection=[]
|
|
40 for dummycount in range(len(allkey)-1):
|
|
41 pair1,pair2=allkey[keycount],allkey[keycount+1]
|
|
42 pair1list=allmajor[pair1]
|
|
43 pair2list=allmajor[pair2]
|
|
44 #print pair1list,pair2list
|
|
45 pair1list.extend(pair2list)
|
|
46 combinelist=list(set(pair1list))
|
|
47 combinelist.sort()
|
|
48 ##print combinelist
|
|
49 combinelist_collection.append(tuple(combinelist))
|
|
50 keycount+=1
|
|
51 combinelist_collection=list(set(combinelist_collection))
|
|
52 newcombinelist_collection=combinelist_collection[:]
|
|
53 #combinelist_collection=set(combinelist_collection)
|
|
54 for smallset1 in combinelist_collection:
|
|
55 for smallset2 in combinelist_collection:
|
|
56 if set(smallset1).issubset(set(smallset2)) and smallset1 != smallset2:
|
|
57 newcombinelist_collection.remove(smallset1)
|
|
58 break
|
|
59 ##print combinelist_collection
|
|
60
|
|
61 for depth in range(2,MaxDEPTH+1):
|
|
62 for member_list in newcombinelist_collection:
|
|
63 for member in itertools.combinations_with_replacement(member_list,depth):
|
|
64 print 'chr'+'\t'+','.join(map(str,member))+'\t'+MOTIF
|
|
65
|
|
66
|