Mercurial > repos > weilong-guo > bs_seeker2
diff BSseeker2/bs_seeker2-build.py @ 0:e6df770c0e58 draft
Initial upload
author | weilong-guo |
---|---|
date | Fri, 12 Jul 2013 18:47:28 -0400 |
parents | |
children | 8b26adf64adc |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/BSseeker2/bs_seeker2-build.py Fri Jul 12 18:47:28 2013 -0400 @@ -0,0 +1,87 @@ +#!/usr/bin/python + +import os +from optparse import OptionParser, OptionGroup +from bs_index.wg_build import * +from bs_index.rrbs_build import * +from bs_utils.utils import * + + +if __name__ == '__main__': + + parser = OptionParser() + + parser.add_option("-f", "--file", dest="filename", help="Input your reference genome file (fasta)", metavar="FILE") + parser.add_option("--aligner", dest="aligner", help="Aligner program to perform the analysis: " + ', '.join(supported_aligners) + " [Default: %default]", metavar="ALIGNER", default = BOWTIE2) + parser.add_option("-p", "--path", dest="aligner_path", help="Path to the aligner program. Defaults: " +' '*70+ '\t'.join(('%s: %s '+' '*70) % (al, aligner_path[al]) for al in sorted(supported_aligners)), + metavar="PATH") + parser.add_option("-d", "--db", type="string", dest="dbpath", help="Path to the reference genome library (generated in preprocessing genome) [Default: %default]", metavar="DBPATH", default = reference_genome_path) + + parser.add_option("-v", "--version", action="store_true", dest="version", help="show version of BS-Seeker2", default=False) + + # RRBS options + rrbs_opts = OptionGroup(parser, "Reduced Representation Bisulfite Sequencing Options", + "Use this options with conjuction of -r [--rrbs]") + rrbs_opts.add_option("-r", "--rrbs", action="store_true", dest="rrbs", help = 'Build index specially for Reduced Representation Bisulfite Sequencing experiments. Genome other than certain fragments will be masked. [Default: %default]', default = False) + rrbs_opts.add_option("-l", "--low",type= "int", dest="low_bound", help="lower bound of fragment length (excluding recognition sequence such as C-CGG) [Default: %default]", default = 40) + rrbs_opts.add_option("-u", "--up", type= "int", dest="up_bound", help="upper bound of fragment length (excluding recognition sequence such as C-CGG ends) [Default: %default]", default = 500) + rrbs_opts.add_option("-c", "--cut-site", type= "string", dest="cut_format", help="Cut sites of restriction enzyme. Ex: MspI(C-CGG), Mael:(C-TAG), double-enzyme MspI&Mael:(C-CGG,C-TAG). [Default: %default]", default = "C-CGG") + parser.add_option_group(rrbs_opts) + + + (options, args) = parser.parse_args() + + # if no options were given by the user, print help and exit + if len(sys.argv) == 1: + print parser.print_help() + exit(0) + + if options.version : + show_version() + exit (-1) + else : + show_version() + + rrbs = options.rrbs + + fasta_file=os.path.expanduser(options.filename) + if fasta_file is None: + error('Fasta file for the reference genome must be supported') + + if not os.path.isfile(fasta_file): + error('%s cannot be found' % fasta_file) + + if options.aligner not in supported_aligners: + error('-a option should be: %s' % ' ,'.join(supported_aligners)+'.') + + builder_exec = os.path.join(options.aligner_path or aligner_path[options.aligner], + {BOWTIE : 'bowtie-build', + BOWTIE2 : 'bowtie2-build', + SOAP : '2bwt-builder', + RMAP : '' # do nothing + }[options.aligner]) + + build_command = builder_exec + { BOWTIE : ' -f %(fname)s.fa %(fname)s', + BOWTIE2 : ' -f %(fname)s.fa %(fname)s', + SOAP : ' %(fname)s.fa' + }[options.aligner] + + + print "Reference genome file: %s" % fasta_file + print "Reduced Representation Bisulfite Sequencing: %s" % rrbs + print "Builder path: %s" % builder_exec + #--------------------------------------------------------------- + + ref_path = options.dbpath + + if os.path.exists(ref_path): + if not os.path.isdir(ref_path): + error("%s must be a directory. Please, delete it or change the -d option." % ref_path) + else: + os.mkdir(ref_path) + + if rrbs: # RRBS preprocessing + rrbs_build(fasta_file, build_command, ref_path, options.low_bound, options.up_bound, options.aligner, options.cut_format) + else: # Whole genome preprocessing + wg_build(fasta_file, build_command, ref_path, options.aligner) +