annotate profilegenerator.py @ 3:3d58c22ea6c9 draft

Uploaded
author arkarachai-fungtammasan
date Sat, 22 Aug 2015 12:12:35 -0400
parents 07588b899c13
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
1 import collections
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
2 import itertools
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
3 import sys
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
4
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
5 filename=sys.argv[1]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
6 MOTIF=sys.argv[2]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
7 MOTIFSIZE=len(MOTIF)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
8 MaxDEPTH=int(sys.argv[3])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
9 MINIMUMPROB=float(sys.argv[4])##1.0/(10**4)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
10 MININUMCOUNT=1
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
11 fd=open(filename)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
12 lines=fd.readlines()
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
13 countbymajorallele=collections.defaultdict(list)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
14 for line in lines:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
15 temp=line.strip().split('\t')
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
16 t_major=int(temp[0])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
17 t_count=int(temp[2])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
18 countbymajorallele[t_major].append(t_count)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
19 fd.close()
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
20 sumbymajorallele=collections.defaultdict(int)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
21 for t_majorallele in countbymajorallele.keys():
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
22 sumbymajorallele[t_majorallele]=sum(countbymajorallele[t_majorallele])
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
23
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
24 fd=open(filename)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
25 ##fd=open('PCRinclude.mono.A.bymajorallele')
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
26 lines=fd.readlines()
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
27 allmajor=collections.defaultdict(list)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
28 for line in lines:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
29 temp=line.strip().split()
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
30 if int(temp[0])%MOTIFSIZE==0:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
31 if (int(temp[2])/(sumbymajorallele[int(temp[0])]*1.0))>=MINIMUMPROB:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
32 if int(temp[2])>=MININUMCOUNT:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
33 allmajor[int(temp[0])].append(int(temp[1]))
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
34 ##print allmajor
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
35 allkey=allmajor.keys()
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
36 allkey.sort()
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
37 #print allkey
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
38 keycount=0
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
39 combinelist_collection=[]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
40 for dummycount in range(len(allkey)-1):
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
41 pair1,pair2=allkey[keycount],allkey[keycount+1]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
42 pair1list=allmajor[pair1]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
43 pair2list=allmajor[pair2]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
44 #print pair1list,pair2list
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
45 pair1list.extend(pair2list)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
46 combinelist=list(set(pair1list))
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
47 combinelist.sort()
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
48 ##print combinelist
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
49 combinelist_collection.append(tuple(combinelist))
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
50 keycount+=1
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
51 combinelist_collection=list(set(combinelist_collection))
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
52 newcombinelist_collection=combinelist_collection[:]
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
53 #combinelist_collection=set(combinelist_collection)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
54 for smallset1 in combinelist_collection:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
55 for smallset2 in combinelist_collection:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
56 if set(smallset1).issubset(set(smallset2)) and smallset1 != smallset2:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
57 newcombinelist_collection.remove(smallset1)
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
58 break
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
59 ##print combinelist_collection
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
60
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
61 for depth in range(2,MaxDEPTH+1):
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
62 for member_list in newcombinelist_collection:
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
63 for member in itertools.combinations_with_replacement(member_list,depth):
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
64 print 'chr'+'\t'+','.join(map(str,member))+'\t'+MOTIF
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
65
07588b899c13 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
66