# HG changeset patch # User xuebing # Date 1333244443 14400 # Node ID e84f946fd52c33de09731158239256cb3c8752d8 # Parent 7147b6e3ba069f1814c69db4c1ce84eb4406ccfe Uploaded diff -r 7147b6e3ba06 -r e84f946fd52c fimo_to_bed.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/fimo_to_bed.py Sat Mar 31 21:40:43 2012 -0400 @@ -0,0 +1,46 @@ +''' +#pattern name sequence name start stop score p-value q-value matched sequence +constitutive-donor mm9_chr1_39533592_39535592_- 1815 1823 12.032 4.26e-06 0.397 CAGGTAAGT +constitutive-donor mm9_chr1_59313750_59315750_+ 1889 1897 12.032 4.26e-06 0.397 CAGGTAAGT + +#pattern name sequence name start stop score p-value q-value matched sequence +constitutive-donor mm9_chr1_172019075_172021075_- 1947 1955 12.032 4.26e-06 0.843 CAGGTAAGT +constitutive-donor mm9_chr1_15300532_15302532_+ 156 164 12.032 4.26e-06 0.843 CAGGTAAGT +''' + +import sys + +def fimo2bed(filename,rc): + ''' + parse fimo output to make a bed file + rc: the sequence have been reverse complemented + ''' + f = open(filename) + header = f.readline() + for line in f: + pattern,posi,begin,stop,score,pv,qv,seq = line.strip().split('\t') + flds = posi.split('_') + start = flds[-3] + end = flds[-2] + strand = flds[-1] + chrom = '_'.join(flds[1:-3]) #'chrX_random' + if not rc: + if strand == '+': + start1 = str(int(start) + int(begin)-1) + end1 = str(int(start) + int(stop)) + print '\t'.join([chrom,start1,end1,seq,score,strand]) + else: + start1 = str(int(end) - int(stop)) + end1 = str(int(end) - int(begin)+1) + print '\t'.join([chrom,start1,end1,seq,score,strand]) + else: + if strand == '-': + start1 = str(int(start) + int(begin)-1) + end1 = str(int(start) + int(stop)) + print '\t'.join([chrom,start1,end1,seq,score,'+']) + else: + start1 = str(int(end) - int(stop)) + end1 = str(int(end) - int(begin)+1) + print '\t'.join([chrom,start1,end1,seq,score,'-']) + +fimo2bed(sys.argv[1],sys.argv[2]=='rc')