annotate profilegenerator.py @ 1:f265e26ab550

Uploaded
author arkarachai-fungtammasan
date Fri, 24 Oct 2014 15:44:34 -0400
parents 20ab85af9505
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
1 import collections
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
2 import itertools
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
3 import sys
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
4
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
5 filename=sys.argv[1]
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
6 MOTIF=sys.argv[2]
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
7 MOTIFSIZE=len(MOTIF)
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
8 MaxDEPTH=int(sys.argv[3])
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
9 MINIMUMPROB=float(sys.argv[4])##1.0/(10**4)
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
10 MININUMCOUNT=1
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
11 fd=open(filename)
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
12 lines=fd.readlines()
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
13 countbymajorallele=collections.defaultdict(list)
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
14 for line in lines:
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
15 temp=line.strip().split('\t')
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
16 t_major=int(temp[0])
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
17 t_count=int(temp[2])
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
18 countbymajorallele[t_major].append(t_count)
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
19 fd.close()
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
20 sumbymajorallele=collections.defaultdict(int)
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
21 for t_majorallele in countbymajorallele.keys():
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
22 sumbymajorallele[t_majorallele]=sum(countbymajorallele[t_majorallele])
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
23
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
24 fd=open(filename)
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
25 ##fd=open('PCRinclude.mono.A.bymajorallele')
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
26 lines=fd.readlines()
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
27 allmajor=collections.defaultdict(list)
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
28 for line in lines:
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
29 temp=line.strip().split()
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
30 if int(temp[0])%MOTIFSIZE==0:
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
31 if (int(temp[2])/(sumbymajorallele[int(temp[0])]*1.0))>=MINIMUMPROB:
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
32 if int(temp[2])>=MININUMCOUNT:
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
33 allmajor[int(temp[0])].append(int(temp[1]))
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
34 ##print allmajor
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
35 allkey=allmajor.keys()
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
36 allkey.sort()
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
37 #print allkey
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
38 keycount=0
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
39 combinelist_collection=[]
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
40 for dummycount in range(len(allkey)-1):
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
41 pair1,pair2=allkey[keycount],allkey[keycount+1]
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
42 pair1list=allmajor[pair1]
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
43 pair2list=allmajor[pair2]
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
44 #print pair1list,pair2list
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
45 pair1list.extend(pair2list)
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
46 combinelist=list(set(pair1list))
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
47 combinelist.sort()
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
48 ##print combinelist
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
49 combinelist_collection.append(tuple(combinelist))
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
50 keycount+=1
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
51 combinelist_collection=list(set(combinelist_collection))
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
52 newcombinelist_collection=combinelist_collection[:]
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
53 #combinelist_collection=set(combinelist_collection)
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
54 for smallset1 in combinelist_collection:
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
55 for smallset2 in combinelist_collection:
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
56 if set(smallset1).issubset(set(smallset2)) and smallset1 != smallset2:
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
57 newcombinelist_collection.remove(smallset1)
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
58 break
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
59 ##print combinelist_collection
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
60
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
61 for depth in range(2,MaxDEPTH+1):
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
62 for member_list in newcombinelist_collection:
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
63 for member in itertools.combinations_with_replacement(member_list,depth):
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
64 print 'chr'+'\t'+','.join(map(str,member))+'\t'+MOTIF
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
65
20ab85af9505 Uploaded
arkarachai-fungtammasan
parents:
diff changeset
66