Mercurial > repos > arkarachai-fungtammasan > microsatellite_ngs
comparison profilegenerator.py @ 0:20ab85af9505
Uploaded
author | arkarachai-fungtammasan |
---|---|
date | Fri, 03 Oct 2014 20:54:30 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:20ab85af9505 |
---|---|
1 import collections | |
2 import itertools | |
3 import sys | |
4 | |
5 filename=sys.argv[1] | |
6 MOTIF=sys.argv[2] | |
7 MOTIFSIZE=len(MOTIF) | |
8 MaxDEPTH=int(sys.argv[3]) | |
9 MINIMUMPROB=float(sys.argv[4])##1.0/(10**4) | |
10 MININUMCOUNT=1 | |
11 fd=open(filename) | |
12 lines=fd.readlines() | |
13 countbymajorallele=collections.defaultdict(list) | |
14 for line in lines: | |
15 temp=line.strip().split('\t') | |
16 t_major=int(temp[0]) | |
17 t_count=int(temp[2]) | |
18 countbymajorallele[t_major].append(t_count) | |
19 fd.close() | |
20 sumbymajorallele=collections.defaultdict(int) | |
21 for t_majorallele in countbymajorallele.keys(): | |
22 sumbymajorallele[t_majorallele]=sum(countbymajorallele[t_majorallele]) | |
23 | |
24 fd=open(filename) | |
25 ##fd=open('PCRinclude.mono.A.bymajorallele') | |
26 lines=fd.readlines() | |
27 allmajor=collections.defaultdict(list) | |
28 for line in lines: | |
29 temp=line.strip().split() | |
30 if int(temp[0])%MOTIFSIZE==0: | |
31 if (int(temp[2])/(sumbymajorallele[int(temp[0])]*1.0))>=MINIMUMPROB: | |
32 if int(temp[2])>=MININUMCOUNT: | |
33 allmajor[int(temp[0])].append(int(temp[1])) | |
34 ##print allmajor | |
35 allkey=allmajor.keys() | |
36 allkey.sort() | |
37 #print allkey | |
38 keycount=0 | |
39 combinelist_collection=[] | |
40 for dummycount in range(len(allkey)-1): | |
41 pair1,pair2=allkey[keycount],allkey[keycount+1] | |
42 pair1list=allmajor[pair1] | |
43 pair2list=allmajor[pair2] | |
44 #print pair1list,pair2list | |
45 pair1list.extend(pair2list) | |
46 combinelist=list(set(pair1list)) | |
47 combinelist.sort() | |
48 ##print combinelist | |
49 combinelist_collection.append(tuple(combinelist)) | |
50 keycount+=1 | |
51 combinelist_collection=list(set(combinelist_collection)) | |
52 newcombinelist_collection=combinelist_collection[:] | |
53 #combinelist_collection=set(combinelist_collection) | |
54 for smallset1 in combinelist_collection: | |
55 for smallset2 in combinelist_collection: | |
56 if set(smallset1).issubset(set(smallset2)) and smallset1 != smallset2: | |
57 newcombinelist_collection.remove(smallset1) | |
58 break | |
59 ##print combinelist_collection | |
60 | |
61 for depth in range(2,MaxDEPTH+1): | |
62 for member_list in newcombinelist_collection: | |
63 for member in itertools.combinations_with_replacement(member_list,depth): | |
64 print 'chr'+'\t'+','.join(map(str,member))+'\t'+MOTIF | |
65 | |
66 |