diff fimo_to_bed.py @ 1:e84f946fd52c default tip

Uploaded
author xuebing
date Sat, 31 Mar 2012 21:40:43 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/fimo_to_bed.py	Sat Mar 31 21:40:43 2012 -0400
@@ -0,0 +1,46 @@
+'''
+#pattern name	sequence name	start	stop	score	p-value	q-value	matched sequence
+constitutive-donor	mm9_chr1_39533592_39535592_-	1815	1823	12.032	4.26e-06	0.397	CAGGTAAGT
+constitutive-donor	mm9_chr1_59313750_59315750_+	1889	1897	12.032	4.26e-06	0.397	CAGGTAAGT
+
+#pattern name	sequence name	start	stop	score	p-value	q-value	matched sequence
+constitutive-donor	mm9_chr1_172019075_172021075_-	1947	1955	12.032	4.26e-06	0.843	CAGGTAAGT
+constitutive-donor	mm9_chr1_15300532_15302532_+	156	164	12.032	4.26e-06	0.843	CAGGTAAGT
+'''
+
+import sys
+
+def fimo2bed(filename,rc):
+    '''
+    parse fimo output to make a bed file
+    rc: the sequence have been reverse complemented
+    '''
+    f = open(filename)
+    header = f.readline()
+    for line in f:
+        pattern,posi,begin,stop,score,pv,qv,seq = line.strip().split('\t')
+        flds = posi.split('_')
+        start = flds[-3]
+        end = flds[-2]
+        strand = flds[-1]
+        chrom = '_'.join(flds[1:-3]) #'chrX_random'
+        if not rc:
+            if strand == '+':
+                start1 = str(int(start) + int(begin)-1)
+                end1 = str(int(start) + int(stop))
+                print '\t'.join([chrom,start1,end1,seq,score,strand]) 
+            else:
+                start1 = str(int(end) - int(stop))
+                end1 = str(int(end) - int(begin)+1)
+                print '\t'.join([chrom,start1,end1,seq,score,strand])
+        else:
+            if strand == '-':
+                start1 = str(int(start) + int(begin)-1)
+                end1 = str(int(start) + int(stop))
+                print '\t'.join([chrom,start1,end1,seq,score,'+']) 
+            else:
+                start1 = str(int(end) - int(stop))
+                end1 = str(int(end) - int(begin)+1)
+                print '\t'.join([chrom,start1,end1,seq,score,'-'])      
+
+fimo2bed(sys.argv[1],sys.argv[2]=='rc')