comparison find_intervals.py @ 12:4b6590dd7250

Uploaded
author miller-lab
date Wed, 12 Sep 2012 17:10:26 -0400
parents 2c498d40ecde
children
comparison
equal deleted inserted replaced
11:d4ec09e8079f 12:4b6590dd7250
1 #!/usr/bin/env python
2
3 import errno
4 import os
5 import subprocess
6 import sys
7
8 ################################################################################
9
10 def mkdir_p(path):
11 try:
12 os.makedirs(path)
13 except OSError, e:
14 if e.errno <> errno.EEXIST:
15 raise
16
17 def run_program(prog, args, stdout_file=None):
18 #print "args:", ' '.join(args)
19 p = subprocess.Popen(args, bufsize=-1, executable=prog, stdin=None, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
20 (stdoutdata, stderrdata) = p.communicate()
21 rc = p.returncode
22
23 if stdout_file is not None:
24 with open(stdout_file, 'w') as ofh:
25 print >> ofh, stdoutdata.rstrip('\r\n')
26
27 if rc != 0:
28 print >> sys.stderr, "FAILED: rc={0}: {1}".format(rc, ' '.join(args))
29 print >> sys.stderr, stderrdata
30 sys.exit(1)
31
32 ################################################################################
33
34 if len(sys.argv) != 11:
35 print "usage"
36 sys.exit(1)
37
38 input, dbkey, output, output_files_path, chrom_col, pos_col, score_col, shuffles, cutoff, report_snps = sys.argv[1:11]
39
40 prog = 'sweep'
41
42 args = [ prog ]
43 args.append(input)
44 args.append(chrom_col)
45 args.append(pos_col)
46 args.append(score_col)
47 args.append(cutoff)
48 args.append(shuffles)
49 args.append(report_snps)
50
51 run_program(None, args, stdout_file=output)
52
53 if report_snps == "0":
54 sys.exit(0)
55
56 ################################################################################
57
58 mkdir_p(output_files_path)
59
60 bedgraph_filename = 'bedgraph.txt'
61 links_filename = os.path.join(output_files_path, 'links.txt')
62
63 data = []
64 links_data = []
65
66 with open(output) as fh:
67 chrom = None
68 for line in fh:
69 line = line.rstrip('\r\n')
70 if not line:
71 continue
72 if line[0] != ' ':
73 # chrom line, add a link
74 chrom, interval_begin, interval_end, interval_value = line.split('\t')
75 links_data.append((chrom, int(interval_begin), int(interval_end)))
76 else:
77 # data line, add a bedgraph line
78 begin, value = line.split()
79 data.append((chrom, int(begin), value))
80
81 with open(bedgraph_filename, 'w') as ofh:
82 print >> ofh, 'track type=bedGraph'
83 for chrom, begin, value in sorted(data):
84 print >> ofh, chrom, begin, begin+1, value
85
86 with open(links_filename, 'w') as ofh:
87 for chrom, begin, end in sorted(links_data):
88 print >> ofh, chrom, begin, end
89
90 ################################################################################
91
92 chrom_sizes_filename = '{0}.chrom.sizes'.format(dbkey)
93
94 prog = 'fetchChromSizes'
95
96 args = [ prog ]
97 args.append(dbkey)
98
99 run_program(None, args, stdout_file=chrom_sizes_filename)
100
101 ################################################################################
102
103 prog = 'bedGraphToBigWig'
104
105 args = [ prog ]
106 args.append(bedgraph_filename)
107 args.append(chrom_sizes_filename)
108 args.append(output)
109
110 run_program(None, args)
111
112 ################################################################################
113
114 sys.exit(0)
115