1
|
1 '''
|
|
2 #pattern name sequence name start stop score p-value q-value matched sequence
|
|
3 constitutive-donor mm9_chr1_39533592_39535592_- 1815 1823 12.032 4.26e-06 0.397 CAGGTAAGT
|
|
4 constitutive-donor mm9_chr1_59313750_59315750_+ 1889 1897 12.032 4.26e-06 0.397 CAGGTAAGT
|
|
5
|
|
6 #pattern name sequence name start stop score p-value q-value matched sequence
|
|
7 constitutive-donor mm9_chr1_172019075_172021075_- 1947 1955 12.032 4.26e-06 0.843 CAGGTAAGT
|
|
8 constitutive-donor mm9_chr1_15300532_15302532_+ 156 164 12.032 4.26e-06 0.843 CAGGTAAGT
|
|
9 '''
|
|
10
|
|
11 import sys
|
|
12
|
|
13 def fimo2bed(filename,rc):
|
|
14 '''
|
|
15 parse fimo output to make a bed file
|
|
16 rc: the sequence have been reverse complemented
|
|
17 '''
|
|
18 f = open(filename)
|
|
19 header = f.readline()
|
|
20 for line in f:
|
|
21 pattern,posi,begin,stop,score,pv,qv,seq = line.strip().split('\t')
|
|
22 flds = posi.split('_')
|
|
23 start = flds[-3]
|
|
24 end = flds[-2]
|
|
25 strand = flds[-1]
|
|
26 chrom = '_'.join(flds[1:-3]) #'chrX_random'
|
|
27 if not rc:
|
|
28 if strand == '+':
|
|
29 start1 = str(int(start) + int(begin)-1)
|
|
30 end1 = str(int(start) + int(stop))
|
|
31 print '\t'.join([chrom,start1,end1,seq,score,strand])
|
|
32 else:
|
|
33 start1 = str(int(end) - int(stop))
|
|
34 end1 = str(int(end) - int(begin)+1)
|
|
35 print '\t'.join([chrom,start1,end1,seq,score,strand])
|
|
36 else:
|
|
37 if strand == '-':
|
|
38 start1 = str(int(start) + int(begin)-1)
|
|
39 end1 = str(int(start) + int(stop))
|
|
40 print '\t'.join([chrom,start1,end1,seq,score,'+'])
|
|
41 else:
|
|
42 start1 = str(int(end) - int(stop))
|
|
43 end1 = str(int(end) - int(begin)+1)
|
|
44 print '\t'.join([chrom,start1,end1,seq,score,'-'])
|
|
45
|
|
46 fimo2bed(sys.argv[1],sys.argv[2]=='rc')
|