4
|
1 import sys
|
|
2 # remove all read that have impure microsat
|
|
3 # check only one line at a time
|
|
4
|
|
5
|
|
6 fd=open(sys.argv[1])
|
|
7 lines=fd.xreadlines()
|
|
8 ##motifIx=int(sys.argv[2])
|
|
9 period=int(sys.argv[2])
|
|
10 tr_ref_seqIx=int(sys.argv[3])-1
|
|
11 ##output=(sys.argv[4])
|
|
12 ##fout=open(output,'w')
|
|
13 for line in lines:
|
|
14 temp=line.strip().split('\t')
|
|
15 temp=filter(None,temp)
|
|
16 #motif=temp[motifIx]
|
|
17 tr_ref_seq=temp[tr_ref_seqIx]
|
|
18 ##period=len(motif)
|
|
19 cand_motif=tr_ref_seq[:period]
|
|
20 len_microsat=len(tr_ref_seq)
|
|
21 expand_microsat_cand=cand_motif*(len_microsat/period) + cand_motif[:(len_microsat%period)]
|
|
22 if tr_ref_seq == expand_microsat_cand:
|
|
23 print line.strip()
|
|
24 ##print line.strip() >> fout |