annotate kmersvm/scripts/split_genome.py @ 7:fd740d515502 draft default tip

Uploaded revised kmer-SVM to include modules from kmer-visual.
author cafletezbrant
date Sun, 16 Jun 2013 18:06:14 -0400
parents 7fe1103032f7
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
1 import os
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
2 import os.path
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
3 import sys
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
4 import optparse
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
5 import math
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
6 import re
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
7 from libkmersvm import *
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
8
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
9 def split(bed_file,options):
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
10 split_f = open(options.output, 'w')
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
11 incr = options.incr
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
12 size = options.size
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
13 file = open(bed_file, 'rb')
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
14
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
15 for line in file:
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
16 (name,start,length) = line.split('\t')
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
17 start = int(start)
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
18 length = int(length)
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
19 end = size + start
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
20
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
21 while True:
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
22 coords = "".join([name,"\t",str(start),"\t",str(end),"\n"])
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
23 split_f.write(coords)
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
24 if end + incr >= length:
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
25 end += incr-((end+incr)-length)
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
26 start += incr
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
27 coords = "".join([name,"\t",str(start),"\t",str(end),"\n"])
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
28 split_f.write(coords)
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
29 break
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
30 else:
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
31 start += incr
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
32 end += incr
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
33
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
34
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
35 def main(argv=sys.argv):
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
36 usage = "usage: %prog <bed_file>"
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
37 parser = optparse.OptionParser(usage=usage)
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
38
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
39 parser.add_option("-s", dest="size", type="int", \
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
40 default=1000, help="set chunk size")
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
41 parser.add_option("-i", dest="incr", type="int", \
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
42 default=500, help="set overlap size")
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
43 parser.add_option("-o", dest="output", default="split_genome_output.bed", \
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
44 help="output BED file (default is split_genome_output.bed)")
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
45
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
46 (options, args) = parser.parse_args()
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
47 if len(args) == 0:
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
48 parser.print_help()
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
49 sys.exit(0)
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
50
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
51 bed_file = args[0]
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
52
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
53 split(bed_file, options)
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
54
7fe1103032f7 Uploaded
cafletezbrant
parents:
diff changeset
55 if __name__ == "__main__": main()