0
|
1 #!/usr/bin/python
|
|
2
|
|
3 import os
|
|
4 from optparse import OptionParser, OptionGroup
|
|
5 from bs_index.wg_build import *
|
|
6 from bs_index.rrbs_build import *
|
|
7 from bs_utils.utils import *
|
|
8
|
|
9
|
|
10 if __name__ == '__main__':
|
|
11
|
|
12 parser = OptionParser()
|
|
13
|
|
14 parser.add_option("-f", "--file", dest="filename", help="Input your reference genome file (fasta)", metavar="FILE")
|
|
15 parser.add_option("--aligner", dest="aligner", help="Aligner program to perform the analysis: " + ', '.join(supported_aligners) + " [Default: %default]", metavar="ALIGNER", default = BOWTIE2)
|
|
16 parser.add_option("-p", "--path", dest="aligner_path", help="Path to the aligner program. Defaults: " +' '*70+ '\t'.join(('%s: %s '+' '*70) % (al, aligner_path[al]) for al in sorted(supported_aligners)),
|
|
17 metavar="PATH")
|
|
18 parser.add_option("-d", "--db", type="string", dest="dbpath", help="Path to the reference genome library (generated in preprocessing genome) [Default: %default]", metavar="DBPATH", default = reference_genome_path)
|
|
19
|
|
20 parser.add_option("-v", "--version", action="store_true", dest="version", help="show version of BS-Seeker2", default=False)
|
|
21
|
|
22 # RRBS options
|
|
23 rrbs_opts = OptionGroup(parser, "Reduced Representation Bisulfite Sequencing Options",
|
|
24 "Use this options with conjuction of -r [--rrbs]")
|
|
25 rrbs_opts.add_option("-r", "--rrbs", action="store_true", dest="rrbs", help = 'Build index specially for Reduced Representation Bisulfite Sequencing experiments. Genome other than certain fragments will be masked. [Default: %default]', default = False)
|
|
26 rrbs_opts.add_option("-l", "--low",type= "int", dest="low_bound", help="lower bound of fragment length (excluding recognition sequence such as C-CGG) [Default: %default]", default = 40)
|
|
27 rrbs_opts.add_option("-u", "--up", type= "int", dest="up_bound", help="upper bound of fragment length (excluding recognition sequence such as C-CGG ends) [Default: %default]", default = 500)
|
|
28 rrbs_opts.add_option("-c", "--cut-site", type= "string", dest="cut_format", help="Cut sites of restriction enzyme. Ex: MspI(C-CGG), Mael:(C-TAG), double-enzyme MspI&Mael:(C-CGG,C-TAG). [Default: %default]", default = "C-CGG")
|
|
29 parser.add_option_group(rrbs_opts)
|
|
30
|
|
31
|
|
32 (options, args) = parser.parse_args()
|
|
33
|
|
34 # if no options were given by the user, print help and exit
|
|
35 if len(sys.argv) == 1:
|
|
36 print parser.print_help()
|
|
37 exit(0)
|
|
38
|
|
39 if options.version :
|
|
40 show_version()
|
|
41 exit (-1)
|
|
42 else :
|
|
43 show_version()
|
|
44
|
|
45 rrbs = options.rrbs
|
|
46
|
|
47 fasta_file=os.path.expanduser(options.filename)
|
|
48 if fasta_file is None:
|
|
49 error('Fasta file for the reference genome must be supported')
|
|
50
|
|
51 if not os.path.isfile(fasta_file):
|
|
52 error('%s cannot be found' % fasta_file)
|
|
53
|
|
54 if options.aligner not in supported_aligners:
|
|
55 error('-a option should be: %s' % ' ,'.join(supported_aligners)+'.')
|
|
56
|
|
57 builder_exec = os.path.join(options.aligner_path or aligner_path[options.aligner],
|
|
58 {BOWTIE : 'bowtie-build',
|
|
59 BOWTIE2 : 'bowtie2-build',
|
|
60 SOAP : '2bwt-builder',
|
|
61 RMAP : '' # do nothing
|
|
62 }[options.aligner])
|
|
63
|
|
64 build_command = builder_exec + { BOWTIE : ' -f %(fname)s.fa %(fname)s',
|
|
65 BOWTIE2 : ' -f %(fname)s.fa %(fname)s',
|
|
66 SOAP : ' %(fname)s.fa'
|
|
67 }[options.aligner]
|
|
68
|
|
69
|
|
70 print "Reference genome file: %s" % fasta_file
|
|
71 print "Reduced Representation Bisulfite Sequencing: %s" % rrbs
|
|
72 print "Builder path: %s" % builder_exec
|
|
73 #---------------------------------------------------------------
|
|
74
|
|
75 ref_path = options.dbpath
|
|
76
|
|
77 if os.path.exists(ref_path):
|
|
78 if not os.path.isdir(ref_path):
|
|
79 error("%s must be a directory. Please, delete it or change the -d option." % ref_path)
|
|
80 else:
|
|
81 os.mkdir(ref_path)
|
|
82
|
|
83 if rrbs: # RRBS preprocessing
|
|
84 rrbs_build(fasta_file, build_command, ref_path, options.low_bound, options.up_bound, options.aligner, options.cut_format)
|
|
85 else: # Whole genome preprocessing
|
|
86 wg_build(fasta_file, build_command, ref_path, options.aligner)
|
|
87
|