comparison profilegenerator.py @ 0:07588b899c13 draft

Uploaded
author arkarachai-fungtammasan
date Wed, 01 Apr 2015 17:05:51 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:07588b899c13
1 import collections
2 import itertools
3 import sys
4
5 filename=sys.argv[1]
6 MOTIF=sys.argv[2]
7 MOTIFSIZE=len(MOTIF)
8 MaxDEPTH=int(sys.argv[3])
9 MINIMUMPROB=float(sys.argv[4])##1.0/(10**4)
10 MININUMCOUNT=1
11 fd=open(filename)
12 lines=fd.readlines()
13 countbymajorallele=collections.defaultdict(list)
14 for line in lines:
15 temp=line.strip().split('\t')
16 t_major=int(temp[0])
17 t_count=int(temp[2])
18 countbymajorallele[t_major].append(t_count)
19 fd.close()
20 sumbymajorallele=collections.defaultdict(int)
21 for t_majorallele in countbymajorallele.keys():
22 sumbymajorallele[t_majorallele]=sum(countbymajorallele[t_majorallele])
23
24 fd=open(filename)
25 ##fd=open('PCRinclude.mono.A.bymajorallele')
26 lines=fd.readlines()
27 allmajor=collections.defaultdict(list)
28 for line in lines:
29 temp=line.strip().split()
30 if int(temp[0])%MOTIFSIZE==0:
31 if (int(temp[2])/(sumbymajorallele[int(temp[0])]*1.0))>=MINIMUMPROB:
32 if int(temp[2])>=MININUMCOUNT:
33 allmajor[int(temp[0])].append(int(temp[1]))
34 ##print allmajor
35 allkey=allmajor.keys()
36 allkey.sort()
37 #print allkey
38 keycount=0
39 combinelist_collection=[]
40 for dummycount in range(len(allkey)-1):
41 pair1,pair2=allkey[keycount],allkey[keycount+1]
42 pair1list=allmajor[pair1]
43 pair2list=allmajor[pair2]
44 #print pair1list,pair2list
45 pair1list.extend(pair2list)
46 combinelist=list(set(pair1list))
47 combinelist.sort()
48 ##print combinelist
49 combinelist_collection.append(tuple(combinelist))
50 keycount+=1
51 combinelist_collection=list(set(combinelist_collection))
52 newcombinelist_collection=combinelist_collection[:]
53 #combinelist_collection=set(combinelist_collection)
54 for smallset1 in combinelist_collection:
55 for smallset2 in combinelist_collection:
56 if set(smallset1).issubset(set(smallset2)) and smallset1 != smallset2:
57 newcombinelist_collection.remove(smallset1)
58 break
59 ##print combinelist_collection
60
61 for depth in range(2,MaxDEPTH+1):
62 for member_list in newcombinelist_collection:
63 for member in itertools.combinations_with_replacement(member_list,depth):
64 print 'chr'+'\t'+','.join(map(str,member))+'\t'+MOTIF
65
66