Mercurial > repos > weilong-guo > bs_seeker2
comparison BSseeker2/bs_seeker2-align.py @ 1:8b26adf64adc draft default tip
V2.0.5
author | weilong-guo |
---|---|
date | Tue, 05 Nov 2013 01:55:39 -0500 |
parents | e6df770c0e58 |
children |
comparison
equal
deleted
inserted
replaced
0:e6df770c0e58 | 1:8b26adf64adc |
---|---|
1 #!/usr/bin/python | 1 #!/usr/bin/env python |
2 | 2 |
3 from optparse import OptionParser, OptionGroup | 3 from optparse import OptionParser, OptionGroup |
4 import re | 4 import re |
5 import tempfile | 5 import tempfile |
6 from bs_align import output | 6 from bs_align import output |
7 from bs_align.bs_pair_end import * | 7 from bs_align.bs_pair_end import * |
8 from bs_align.bs_single_end import * | 8 from bs_align.bs_single_end import * |
9 from bs_align.bs_rrbs import * | 9 from bs_align.bs_rrbs import * |
10 from bs_utils.utils import * | 10 #import re |
11 #from bs_utils.utils import * | |
11 | 12 |
12 | 13 |
13 if __name__ == '__main__': | 14 if __name__ == '__main__': |
14 | 15 |
15 parser = OptionParser() | 16 parser = OptionParser(usage="Usage: %prog {-i <single> | -1 <mate1> -2 <mate2>} -g <genome.fa> [options]") |
16 # option group 1 | 17 # option group 1 |
17 opt_group = OptionGroup(parser, "For single end reads") | 18 opt_group = OptionGroup(parser, "For single end reads") |
18 opt_group.add_option("-i", "--input", type="string", dest="infilename",help="Input your read file name (FORMAT: sequences, fastq, qseq,fasta)", metavar="INFILE") | 19 opt_group.add_option("-i", "--input", type="string", dest="infilename",help="Input read file (FORMAT: sequences, qseq, fasta, fastq). Ex: read.fa or read.fa.gz", metavar="INFILE") |
19 parser.add_option_group(opt_group) | 20 parser.add_option_group(opt_group) |
20 | 21 |
21 # option group 2 | 22 # option group 2 |
22 opt_group = OptionGroup(parser, "For pair end reads") | 23 opt_group = OptionGroup(parser, "For pair end reads") |
23 opt_group.add_option("-1", "--input_1", type="string", dest="infilename_1",help="Input your read file end 1 (FORMAT: sequences, qseq, fasta, fastq)", metavar="FILE") | 24 opt_group.add_option("-1", "--input_1", type="string", dest="infilename_1",help="Input read file, mate 1 (FORMAT: sequences, qseq, fasta, fastq)", metavar="FILE") |
24 opt_group.add_option("-2", "--input_2", type="string", dest="infilename_2",help="Input your read file end 2 (FORMAT: sequences, qseq, fasta, fastq)", metavar="FILE") | 25 opt_group.add_option("-2", "--input_2", type="string", dest="infilename_2",help="Input read file, mate 2 (FORMAT: sequences, qseq, fasta, fastq)", metavar="FILE") |
25 opt_group.add_option("--minins",type = "int",dest = "min_insert_size", help="The minimum insert size for valid paired-end alignments [Default: %default]", default = -1) | 26 opt_group.add_option("-I", "--minins",type = "int",dest = "min_insert_size", help="The minimum insert size for valid paired-end alignments [Default: %default]", default = 0) |
26 opt_group.add_option("--maxins",type = "int",dest = "max_insert_size", help="The maximum insert size for valid paired-end alignments [Default: %default]", default = 400) | 27 opt_group.add_option("-X", "--maxins",type = "int",dest = "max_insert_size", help="The maximum insert size for valid paired-end alignments [Default: %default]", default = 500) |
27 parser.add_option_group(opt_group) | 28 parser.add_option_group(opt_group) |
28 | 29 |
29 # option group 3 | 30 # option group 3 |
30 opt_group = OptionGroup(parser, "Reduced Representation Bisulfite Sequencing Options") | 31 opt_group = OptionGroup(parser, "Reduced Representation Bisulfite Sequencing Options") |
31 opt_group.add_option("-r", "--rrbs", action="store_true", dest="rrbs", default = False, help = 'Process reads from Reduced Representation Bisulfite Sequencing experiments') | 32 opt_group.add_option("-r", "--rrbs", action="store_true", dest="rrbs", default = False, help = 'Map reads to the Reduced Representation genome') |
32 opt_group.add_option("-c", "--cut-site", type="string",dest="cut_format", help="Cutting sites of restriction enzyme. Ex: MspI(C-CGG), Mael:(C-TAG), double-enzyme MspI&Mael:(C-CGG,C-TAG). [Default: %default]", metavar="pattern", default = "C-CGG") | 33 opt_group.add_option("-c", "--cut-site", type="string",dest="cut_format", help="Cutting sites of restriction enzyme. Ex: MspI(C-CGG), Mael:(C-TAG), double-enzyme MspI&Mael:(C-CGG,C-TAG). [Default: %default]", metavar="pattern", default = "C-CGG") |
33 opt_group.add_option("-L", "--low", type = "int", dest="rrbs_low_bound", help="lower bound of fragment length (excluding C-CGG ends) [Default: %default]", default = 40) | 34 opt_group.add_option("-L", "--low", type = "int", dest="rrbs_low_bound", help="Lower bound of fragment length (excluding C-CGG ends) [Default: %default]", default = 20) |
34 opt_group.add_option("-U", "--up", type = "int", dest="rrbs_up_bound", help="upper bound of fragment length (excluding C-CGG ends) [Default: %default]", default = 500) | 35 opt_group.add_option("-U", "--up", type = "int", dest="rrbs_up_bound", help="Upper bound of fragment length (excluding C-CGG ends) [Default: %default]", default = 500) |
35 parser.add_option_group(opt_group) | 36 parser.add_option_group(opt_group) |
36 | 37 |
37 # option group 4 | 38 # option group 4 |
38 opt_group = OptionGroup(parser, "General options") | 39 opt_group = OptionGroup(parser, "General options") |
39 opt_group.add_option("-t", "--tag", type="string", dest="taginfo",help="[Y]es for undirectional lib, [N]o for directional [Default: %default]", metavar="TAG", default = 'N') | 40 opt_group.add_option("-t", "--tag", type="string", dest="taginfo",help="[Y]es for undirectional lib, [N]o for directional [Default: %default]", metavar="TAG", default = 'N') |
40 opt_group.add_option("-s","--start_base",type = "int",dest = "cutnumber1", help="The first base of your read to be mapped [Default: %default]", default = 1) | 41 opt_group.add_option("-s","--start_base",type = "int",dest = "cutnumber1", help="The first cycle of the read to be mapped [Default: %default]", default = 1) |
41 opt_group.add_option("-e","--end_base",type = "int",dest = "cutnumber2", help="The last cycle number of your read to be mapped [Default: %default]", default = 200) | 42 opt_group.add_option("-e","--end_base",type = "int",dest = "cutnumber2", help="The last cycle of the read to be mapped [Default: %default]", default = 200) |
42 opt_group.add_option("-a", "--adapter", type="string", dest="adapter_file",help="Input text file of your adaptor sequences (to be trimed from the 3'end of the reads). Input 1 seq for dir. lib., 2 seqs for undir. lib. One line per sequence", metavar="FILE", default = '') | 43 opt_group.add_option("-a", "--adapter", type="string", dest="adapter_file",help="Input text file of your adaptor sequences (to be trimmed from the 3'end of the reads, ). " |
43 opt_group.add_option("--am",type = "int",dest = "adapter_mismatch", help="Number of mismatches allowed in adaptor [Default: %default]", default = 0) | 44 "Input one seq for dir. lib., twon seqs for undir. lib. One line per sequence. " |
44 opt_group.add_option("-g", "--genome", type="string", dest="genome",help="Name of the reference genome (the same as the reference genome file in the preprocessing step) [ex. chr21_hg18.fa]") | 45 "Only the first 10bp will be used", metavar="FILE", default = '') |
45 opt_group.add_option("-m", "--mismatches",type = "int", dest="int_no_mismatches",help="Number of mismatches in one read [Default: %default]", default = 4) | 46 opt_group.add_option("--am",type = "int",dest = "adapter_mismatch", help="Number of mismatches allowed in adapter [Default: %default]", default = 0) |
46 opt_group.add_option("--aligner", dest="aligner",help="Aligner program to perform the analisys: " + ', '.join(supported_aligners) + " [Default: %default]", metavar="ALIGNER", default = BOWTIE2) | 47 opt_group.add_option("-g", "--genome", type="string", dest="genome",help="Name of the reference genome (should be the same as \"-f\" in bs_seeker2-build.py ) [ex. chr21_hg18.fa]") |
47 opt_group.add_option("-p", "--path", dest="aligner_path", help="Path to the aligner program. Defaults: " +' '*70+ '\t'.join(('%s: %s '+' '*70) % (al, aligner_path[al]) for al in sorted(supported_aligners)), | 48 opt_group.add_option("-m", "--mismatches",type = "float", dest="no_mismatches",help="Number of mismatches in one read [Default: %default]", default = 4) |
49 opt_group.add_option("--aligner", dest="aligner",help="Aligner program for short reads mapping: " + ', '.join(supported_aligners) + " [Default: %default]", metavar="ALIGNER", default = BOWTIE) | |
50 opt_group.add_option("-p", "--path", dest="aligner_path", help="Path to the aligner program. Detected: " +' '*70+ '\t'.join(('%s: %s '+' '*70) % (al, aligner_path[al]) for al in sorted(supported_aligners)), | |
48 metavar="PATH" | 51 metavar="PATH" |
49 ) | 52 ) |
50 opt_group.add_option("-d", "--db", type="string", dest="dbpath",help="Path to the reference genome library (generated in preprocessing genome) [Default: %default]" , metavar="DBPATH", default = reference_genome_path) | 53 opt_group.add_option("-d", "--db", type="string", dest="dbpath",help="Path to the reference genome library (generated in preprocessing genome) [Default: %default]" , metavar="DBPATH", default = reference_genome_path) |
51 opt_group.add_option("-l", "--split_line",type = "int", dest="no_split",help="Number of lines per split (the read file will be split into small files for mapping. The result will be merged. [Default: %default]", default = 4000000) | 54 opt_group.add_option("-l", "--split_line",type = "int", dest="no_split",help="Number of lines per split (the read file will be split into small files for mapping. The result will be merged. [Default: %default]", default = 4000000) |
52 opt_group.add_option("-o", "--output", type="string", dest="outfilename",help="The name of output file [INFILE.bs(se|pe|rrbs)]", metavar="OUTFILE") | 55 opt_group.add_option("-o", "--output", type="string", dest="outfilename",help="The name of output file [INFILE.bs(se|pe|rrbs)]", metavar="OUTFILE") |
53 opt_group.add_option("-f", "--output-format", type="string", dest="output_format",help="Output format: "+', '.join(output.formats)+" [Default: %default]", metavar="FORMAT", default = output.BAM) | 56 opt_group.add_option("-f", "--output-format", type="string", dest="output_format",help="Output format: "+', '.join(output.formats)+" [Default: %default]", metavar="FORMAT", default = output.BAM) |
54 opt_group.add_option("--no-header", action="store_true", dest="no_SAM_header",help="Suppress SAM header lines [Default: %default]", default = False) | 57 opt_group.add_option("--no-header", action="store_true", dest="no_SAM_header",help="Suppress SAM header lines [Default: %default]", default = False) |
55 opt_group.add_option("--temp_dir", type="string", dest="temp_dir",help="The path to your temporary directory [Default: %default]", metavar="PATH", default = tempfile.gettempdir()) | 58 opt_group.add_option("--temp_dir", type="string", dest="temp_dir",help="The path to your temporary directory [Detected: %default]", metavar="PATH", default = tempfile.gettempdir()) |
56 opt_group.add_option("--XS",type = "string", dest="XS_filter",help="Filter definition for tag XS, format X,Y. X=0.8 and y=5 indicate that for one read, if #(mCH sites)/#(all CH sites)>0.8 and #(mCH sites)>5, then tag XS=1; or else tag XS=0. [Default: %default]", default = "0.5,5") # added by weilong | 59 opt_group.add_option("--XS",type = "string", dest="XS_filter",help="Filter definition for tag XS, format X,Y. X=0.8 and y=5 indicate that for one read, if #(mCH sites)/#(all CH sites)>0.8 and #(mCH sites)>5, then tag XS=1; or else tag XS=0. [Default: %default]", default = "0.5,5") # added by weilong |
57 opt_group.add_option("--multiple-hit", action="store_true", dest="Output_multiple_hit", default = False, help = 'Output reads with multiple hits to file\"Multiple_hit.fa\"') | 60 opt_group.add_option("--multiple-hit", action="store_true", dest="Output_multiple_hit", default = False, help = 'Output reads with multiple hits to file\"Multiple_hit.fa\"') |
58 | 61 |
59 opt_group.add_option("-v", "--version", action="store_true", dest="version",help="show version of BS-Seeker2", metavar="version", default = False) | 62 opt_group.add_option("-v", "--version", action="store_true", dest="version",help="show version of BS-Seeker2", metavar="version", default = False) |
60 | 63 |
94 (options, args) = parser.parse_args(args = bs_seeker_options) | 97 (options, args) = parser.parse_args(args = bs_seeker_options) |
95 | 98 |
96 | 99 |
97 # if no options were given by the user, print help and exit | 100 # if no options were given by the user, print help and exit |
98 if len(sys.argv) == 1: | 101 if len(sys.argv) == 1: |
99 print parser.print_help() | 102 parser.print_help() |
100 exit(0) | 103 exit(0) |
101 | 104 |
102 if options.version : | 105 if options.version : |
103 show_version() | 106 show_version() |
104 exit (-1) | 107 exit (-1) |
110 if options.infilename and (options.infilename_1 or options.infilename_2): | 113 if options.infilename and (options.infilename_1 or options.infilename_2): |
111 error('-i and [-1|-2] options are exclusive. You should use only one of them.') | 114 error('-i and [-1|-2] options are exclusive. You should use only one of them.') |
112 | 115 |
113 if not (options.infilename or (options.infilename_1 and options.infilename_2)): | 116 if not (options.infilename or (options.infilename_1 and options.infilename_2)): |
114 error('You should set either -i or -1 and -2 options.') | 117 error('You should set either -i or -1 and -2 options.') |
118 | |
119 # Calculate the length of read | |
120 if options.infilename : | |
121 read_file = options.infilename | |
122 elif options.infilename_1 : | |
123 read_file = options.infilename_1 | |
124 else : | |
125 error('You should at least specify -i or -1 options.') | |
126 | |
127 try : | |
128 if read_file.endswith(".gz") : # support input file ending with ".gz" | |
129 read_inf = gzip.open(read_file, "rb") | |
130 else : | |
131 read_inf=open(read_file,"r") | |
132 except IOError : | |
133 print "[Error] Cannot open input file : %s" % read_file | |
134 exit(-1) | |
135 oneline = read_inf.readline() | |
136 oneline = read_inf.readline() # get the second line | |
137 read_len = min(len(oneline), (options.cutnumber2-options.cutnumber1)) | |
138 read_inf.close() | |
139 # mismatch allowed: bowtie 1,build-in parameter '-m'; bowtie 2, post-filter paramter | |
140 # mismatch should no greater than the read length | |
141 no_mismatches = float(options.no_mismatches) | |
142 if (no_mismatches < 1) : | |
143 int_no_mismatches=int(no_mismatches * read_len) | |
144 else : | |
145 int_no_mismatches=int(no_mismatches) | |
146 | |
147 str_no_mismatches=str(options.no_mismatches) # pass to specific mode | |
148 | |
149 | |
115 # -t, directional / un-directional library | 150 # -t, directional / un-directional library |
116 asktag=str(options.taginfo).upper() | 151 asktag=str(options.taginfo).upper() |
117 if asktag not in 'YN': | 152 if asktag not in 'YN': |
118 error('-t option should be either Y or N, not %s' % asktag) | 153 error('-t option should be either Y or N, not %s' % asktag) |
119 # -a | 154 # -a |
120 if options.aligner not in supported_aligners: | 155 if options.aligner not in supported_aligners: |
121 error('-a option should be: %s' % ' ,'.join(supported_aligners)+'.') | 156 error('-a option should be: %s' % ' ,'.join(supported_aligners)+'.') |
122 # path for aligner | 157 # path for aligner |
123 aligner_exec = os.path.expanduser( os.path.join(options.aligner_path or aligner_path[options.aligner], options.aligner) ) | 158 aligner_exec = os.path.expanduser( os.path.join(options.aligner_path or aligner_path[options.aligner], options.aligner) ) |
124 # mismatch allowed: bowtie 1,build-in parameter '-m'; bowtie 2, post-filter paramter | 159 |
125 # mismatch should no greater than the read length | 160 |
126 int_no_mismatches=min(options.int_no_mismatches, options.cutnumber2-options.cutnumber1) | |
127 str_no_mismatches=str(int_no_mismatches) | |
128 # -g | 161 # -g |
129 if options.genome is None: | 162 if options.genome is None: |
130 error('-g is a required option') | 163 error('-g is a required option') |
131 genome = os.path.split(options.genome)[1] | 164 genome = os.path.split(options.genome)[1] |
132 genome_subdir = genome | 165 genome_subdir = genome |
147 'Please, specify the options \"--low\" and \"--up\" that you used at the index-building step.\n' | 180 'Please, specify the options \"--low\" and \"--up\" that you used at the index-building step.\n' |
148 'Possible choices are:\n' + '\n'.join([pr.split('_rrbs_')[-1].replace('_',', ') for pr in possible_refs])) | 181 'Possible choices are:\n' + '\n'.join([pr.split('_rrbs_')[-1].replace('_',', ') for pr in possible_refs])) |
149 | 182 |
150 db_path = os.path.expanduser(os.path.join(options.dbpath, genome_subdir + '_' + options.aligner)) | 183 db_path = os.path.expanduser(os.path.join(options.dbpath, genome_subdir + '_' + options.aligner)) |
151 | 184 |
185 | |
152 if not os.path.isdir(db_path): | 186 if not os.path.isdir(db_path): |
153 error('Index DIR \"' + genome_subdir + '..\" cannot be found in ' + options.dbpath +'.\n\tPlease run the bs_seeker2-build.py ' | 187 error('Index DIR \"' + genome_subdir + '..\" cannot be found in ' + options.dbpath +'.\n\tPlease run the bs_seeker2-build.py ' |
154 'to create it with the correct parameters for -g, -r, --low, --up and --aligner.') | 188 'to create it with the correct parameters for -g, -r, --low, --up and --aligner.') |
155 | |
156 # handle aligner options | |
157 # | |
158 | 189 |
159 # default aligner options | 190 # default aligner options |
160 aligner_options_defaults = { | 191 aligner_options_defaults = { |
161 BOWTIE : { '-e' : 40*int_no_mismatches, | 192 BOWTIE : { '-e' : 40*int_no_mismatches, |
162 '--nomaqround' : True, | 193 '--nomaqround' : True, |
163 '--norc' : True, | 194 '--norc' : True, |
164 '-k' : 2, | 195 #'-k' : 2, |
165 # -k=2; report two best hits, and filter by error rates | 196 # -k=2; report two best hits, and filter by error rates |
166 '--quiet' : True, | 197 '--quiet' : True, |
167 '--best' : True, | 198 '--best' : True, |
168 # '--suppress' : '2,5,6', | 199 # '--suppress' : '2,5,6', |
169 '--sam' : True, | 200 '--sam' : True, |
177 '-p' : 2, | 208 '-p' : 2, |
178 '--sam-nohead' : True, | 209 '--sam-nohead' : True, |
179 # run bowtie2 in local mode by default | 210 # run bowtie2 in local mode by default |
180 '--local' : '--end-to-end' not in aligner_options, | 211 '--local' : '--end-to-end' not in aligner_options, |
181 #'--mm' : True, | 212 #'--mm' : True, |
182 '-k' : 2 | 213 #'-k' : 2 |
183 }, | 214 }, |
184 SOAP : { '-v' : int_no_mismatches, | 215 SOAP : { '-v' : int_no_mismatches, |
185 '-p' : 2, | 216 '-p' : 2, |
186 '-r' : 2, | 217 '-r' : 2, |
187 '-M' : 4 | 218 '-M' : 4 |
236 if '--end-to-end' in aligner_options : | 267 if '--end-to-end' in aligner_options : |
237 aligner_title = aligner_title + "-e2e" | 268 aligner_title = aligner_title + "-e2e" |
238 else: | 269 else: |
239 aligner_title = aligner_title + "-local" | 270 aligner_title = aligner_title + "-local" |
240 | 271 |
272 if options.aligner == BOWTIE : | |
273 logm("Mode: Bowtie") | |
274 elif options.aligner == BOWTIE2 : | |
275 if '--end-to-end' not in aligner_options : | |
276 logm("Mode: Bowtie2, local alignment") | |
277 else : | |
278 logm("Mode: Bowtie2, end-to-end alignment") | |
279 | |
280 | |
241 tmp_path = tempfile.mkdtemp(prefix='bs_seeker2_%s_-%s-TMP-' % (os.path.split(outfilename)[1], aligner_title ), dir = options.temp_dir) | 281 tmp_path = tempfile.mkdtemp(prefix='bs_seeker2_%s_-%s-TMP-' % (os.path.split(outfilename)[1], aligner_title ), dir = options.temp_dir) |
242 | 282 |
243 | 283 |
244 (XS_x, XS_y) = options.XS_filter.split(",") | 284 (XS_x, XS_y) = options.XS_filter.split(",") |
245 XS_pct = float(XS_x) | 285 XS_pct = float(XS_x) |
246 XS_count = int(XS_y) | 286 XS_count = int(XS_y) |
247 logm('Filter for tag XS: #(mCH)/#(all CH)>%f and #(mCH)>%d' % (XS_pct, XS_count)) | 287 logm('Filter for tag XS: #(mCH)/#(all CH)>%.2f%% and #(mCH)>%d' % (XS_pct*100, XS_count)) |
248 | 288 |
249 | 289 |
250 logm('Temporary directory: %s' % tmp_path) | 290 logm('Temporary directory: %s' % tmp_path) |
251 logm('Reduced Representation Bisulfite Sequencing: %s' % str(options.rrbs)) | 291 logm('Reduced Representation Bisulfite Sequencing: %s' % str(options.rrbs)) |
252 if options.infilename is not None: | 292 if options.infilename is not None: |
253 logm('Single end') | 293 logm('Single end') |
254 | 294 |
255 aligner_command = aligner_exec + aligner_options_string() + \ | 295 aligner_command = aligner_exec + aligner_options_string() + \ |
256 { BOWTIE : ' %(reference_genome)s -f %(input_file)s %(output_file)s', | 296 { BOWTIE : ' -k 2 %(reference_genome)s -f %(input_file)s %(output_file)s', |
257 BOWTIE2 : ' -x %(reference_genome)s -f -U %(input_file)s -S %(output_file)s', | 297 BOWTIE2 : ' -k 2 -x %(reference_genome)s -f -U %(input_file)s -S %(output_file)s', |
258 SOAP : ' -D %(reference_genome)s.fa.index -o %(output_file)s -a %(input_file)s', | 298 SOAP : ' -D %(reference_genome)s.fa.index -o %(output_file)s -a %(input_file)s', |
259 RMAP : ' -c %(reference_genome)s.fa -o %(output_file)s %(input_file)s' | 299 RMAP : ' -c %(reference_genome)s.fa -o %(output_file)s %(input_file)s' |
260 }[options.aligner] | 300 }[options.aligner] |
261 logm ('Aligner command: %s' % aligner_command) | 301 logm ('Aligner command: %s' % aligner_command) |
262 # single end reads | 302 # single end reads |
263 if options.rrbs: # RRBS scan | 303 if options.rrbs: # RRBS scan |
264 bs_rrbs(options.infilename, | 304 bs_rrbs(options.infilename, |
265 asktag, | 305 asktag, |
266 # options.rrbs_taginfo, | |
267 options.adapter_file, | 306 options.adapter_file, |
268 options.cutnumber1, | 307 options.cutnumber1, |
269 options.cutnumber2, | 308 options.cutnumber2, |
270 options.no_split, | 309 options.no_split, |
271 str_no_mismatches, | 310 str_no_mismatches, |
297 options.Output_multiple_hit | 336 options.Output_multiple_hit |
298 ) | 337 ) |
299 else: | 338 else: |
300 logm('Pair end') | 339 logm('Pair end') |
301 # pair end specific default options | 340 # pair end specific default options |
302 aligner_options = dict({BOWTIE: {'--ff' : asktag == 'N', | 341 aligner_options = dict({BOWTIE: {'--fr' : True, |
303 '--fr' : asktag == 'Y', | |
304 '-X' : options.max_insert_size, | 342 '-X' : options.max_insert_size, |
305 '-I' : options.min_insert_size if options.min_insert_size > 0 else None | 343 '-I' : options.min_insert_size if options.min_insert_size > 0 else None, |
344 '-a' : True # "-k 2" in bowtie would not report the best two | |
306 }, | 345 }, |
307 BOWTIE2 : { | 346 BOWTIE2 : { |
308 '--ff' : asktag == 'N', | 347 '--fr' : True, |
309 '--fr' : asktag == 'Y', | |
310 '-X' : options.max_insert_size, | 348 '-X' : options.max_insert_size, |
311 '-I' : options.min_insert_size if options.min_insert_size > 0 else None, | 349 '-I' : options.min_insert_size if options.min_insert_size > 0 else None, |
312 '--no-discordant' : True, | 350 '--no-discordant' : True, |
313 '--no-mixed' : True | 351 '--no-mixed' : True, |
352 '-k' : 2 | |
314 }, | 353 }, |
315 SOAP: { | 354 SOAP: { |
316 '-x' : options.max_insert_size, | 355 '-x' : options.max_insert_size, |
317 '-m' : options.min_insert_size if options.min_insert_size > 0 else 100 | 356 '-m' : options.min_insert_size if options.min_insert_size > 0 else 100 |
318 }}[options.aligner], | 357 }}[options.aligner], |
325 SOAP : ' -D %(reference_genome)s.fa.index -o %(output_file)s -a %(input_file_1)s -b %(input_file_2)s -2 %(output_file)s.unpaired' #, | 364 SOAP : ' -D %(reference_genome)s.fa.index -o %(output_file)s -a %(input_file_1)s -b %(input_file_2)s -2 %(output_file)s.unpaired' #, |
326 # RMAP : # rmappe, also paste two inputs into one file. | 365 # RMAP : # rmappe, also paste two inputs into one file. |
327 }[options.aligner] | 366 }[options.aligner] |
328 | 367 |
329 logm('Aligner command: %s' % aligner_command) | 368 logm('Aligner command: %s' % aligner_command) |
369 | |
370 if '--end-to-end' not in aligner_options: | |
371 aligner_options_defaults[BOWTIE2].update({'-D' : 50}) | |
372 else: | |
373 aligner_options_defaults[BOWTIE2].update({'-D' : 50, '-L': 15, '--score-min': 'L,-0.6,-0.6' }) | |
330 | 374 |
331 bs_pair_end(options.infilename_1, | 375 bs_pair_end(options.infilename_1, |
332 options.infilename_2, | 376 options.infilename_2, |
333 asktag, | 377 asktag, |
334 options.adapter_file, | 378 options.adapter_file, |
340 db_path, | 384 db_path, |
341 tmp_path, | 385 tmp_path, |
342 outfile, | 386 outfile, |
343 XS_pct, | 387 XS_pct, |
344 XS_count, | 388 XS_count, |
389 options.adapter_mismatch, | |
345 options.Output_multiple_hit | 390 options.Output_multiple_hit |
346 ) | 391 ) |
347 | 392 |
348 outfile.close() | 393 outfile.close() |
349 | 394 |