Mercurial > repos > weilong-guo > bs_seeker2
comparison BSseeker2/bs_seeker2-build.py @ 0:e6df770c0e58 draft
Initial upload
author | weilong-guo |
---|---|
date | Fri, 12 Jul 2013 18:47:28 -0400 |
parents | |
children | 8b26adf64adc |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:e6df770c0e58 |
---|---|
1 #!/usr/bin/python | |
2 | |
3 import os | |
4 from optparse import OptionParser, OptionGroup | |
5 from bs_index.wg_build import * | |
6 from bs_index.rrbs_build import * | |
7 from bs_utils.utils import * | |
8 | |
9 | |
10 if __name__ == '__main__': | |
11 | |
12 parser = OptionParser() | |
13 | |
14 parser.add_option("-f", "--file", dest="filename", help="Input your reference genome file (fasta)", metavar="FILE") | |
15 parser.add_option("--aligner", dest="aligner", help="Aligner program to perform the analysis: " + ', '.join(supported_aligners) + " [Default: %default]", metavar="ALIGNER", default = BOWTIE2) | |
16 parser.add_option("-p", "--path", dest="aligner_path", help="Path to the aligner program. Defaults: " +' '*70+ '\t'.join(('%s: %s '+' '*70) % (al, aligner_path[al]) for al in sorted(supported_aligners)), | |
17 metavar="PATH") | |
18 parser.add_option("-d", "--db", type="string", dest="dbpath", help="Path to the reference genome library (generated in preprocessing genome) [Default: %default]", metavar="DBPATH", default = reference_genome_path) | |
19 | |
20 parser.add_option("-v", "--version", action="store_true", dest="version", help="show version of BS-Seeker2", default=False) | |
21 | |
22 # RRBS options | |
23 rrbs_opts = OptionGroup(parser, "Reduced Representation Bisulfite Sequencing Options", | |
24 "Use this options with conjuction of -r [--rrbs]") | |
25 rrbs_opts.add_option("-r", "--rrbs", action="store_true", dest="rrbs", help = 'Build index specially for Reduced Representation Bisulfite Sequencing experiments. Genome other than certain fragments will be masked. [Default: %default]', default = False) | |
26 rrbs_opts.add_option("-l", "--low",type= "int", dest="low_bound", help="lower bound of fragment length (excluding recognition sequence such as C-CGG) [Default: %default]", default = 40) | |
27 rrbs_opts.add_option("-u", "--up", type= "int", dest="up_bound", help="upper bound of fragment length (excluding recognition sequence such as C-CGG ends) [Default: %default]", default = 500) | |
28 rrbs_opts.add_option("-c", "--cut-site", type= "string", dest="cut_format", help="Cut sites of restriction enzyme. Ex: MspI(C-CGG), Mael:(C-TAG), double-enzyme MspI&Mael:(C-CGG,C-TAG). [Default: %default]", default = "C-CGG") | |
29 parser.add_option_group(rrbs_opts) | |
30 | |
31 | |
32 (options, args) = parser.parse_args() | |
33 | |
34 # if no options were given by the user, print help and exit | |
35 if len(sys.argv) == 1: | |
36 print parser.print_help() | |
37 exit(0) | |
38 | |
39 if options.version : | |
40 show_version() | |
41 exit (-1) | |
42 else : | |
43 show_version() | |
44 | |
45 rrbs = options.rrbs | |
46 | |
47 fasta_file=os.path.expanduser(options.filename) | |
48 if fasta_file is None: | |
49 error('Fasta file for the reference genome must be supported') | |
50 | |
51 if not os.path.isfile(fasta_file): | |
52 error('%s cannot be found' % fasta_file) | |
53 | |
54 if options.aligner not in supported_aligners: | |
55 error('-a option should be: %s' % ' ,'.join(supported_aligners)+'.') | |
56 | |
57 builder_exec = os.path.join(options.aligner_path or aligner_path[options.aligner], | |
58 {BOWTIE : 'bowtie-build', | |
59 BOWTIE2 : 'bowtie2-build', | |
60 SOAP : '2bwt-builder', | |
61 RMAP : '' # do nothing | |
62 }[options.aligner]) | |
63 | |
64 build_command = builder_exec + { BOWTIE : ' -f %(fname)s.fa %(fname)s', | |
65 BOWTIE2 : ' -f %(fname)s.fa %(fname)s', | |
66 SOAP : ' %(fname)s.fa' | |
67 }[options.aligner] | |
68 | |
69 | |
70 print "Reference genome file: %s" % fasta_file | |
71 print "Reduced Representation Bisulfite Sequencing: %s" % rrbs | |
72 print "Builder path: %s" % builder_exec | |
73 #--------------------------------------------------------------- | |
74 | |
75 ref_path = options.dbpath | |
76 | |
77 if os.path.exists(ref_path): | |
78 if not os.path.isdir(ref_path): | |
79 error("%s must be a directory. Please, delete it or change the -d option." % ref_path) | |
80 else: | |
81 os.mkdir(ref_path) | |
82 | |
83 if rrbs: # RRBS preprocessing | |
84 rrbs_build(fasta_file, build_command, ref_path, options.low_bound, options.up_bound, options.aligner, options.cut_format) | |
85 else: # Whole genome preprocessing | |
86 wg_build(fasta_file, build_command, ref_path, options.aligner) | |
87 |