Mercurial > repos > bgruening > bismark
comparison bismark_wrapper.py @ 3:91f07ff056ca draft
Uploaded
author | bgruening |
---|---|
date | Mon, 14 Apr 2014 16:43:14 -0400 |
parents | 82814a8a2395 |
children | 243e8f9fb75b |
comparison
equal
deleted
inserted
replaced
2:82814a8a2395 | 3:91f07ff056ca |
---|---|
41 | 41 |
42 parser.add_argument( '-1', '--mate1', dest='mate1', | 42 parser.add_argument( '-1', '--mate1', dest='mate1', |
43 help='The forward reads file in Sanger FASTQ or FASTA format.' ) | 43 help='The forward reads file in Sanger FASTQ or FASTA format.' ) |
44 parser.add_argument( '-2', '--mate2', dest='mate2', | 44 parser.add_argument( '-2', '--mate2', dest='mate2', |
45 help='The reverse reads file in Sanger FASTQ or FASTA format.' ) | 45 help='The reverse reads file in Sanger FASTQ or FASTA format.' ) |
46 parser.add_argument( '--sort-bam', dest='sort_bam', action="store_true" ) | |
46 | 47 |
47 parser.add_argument( '--output-unmapped-reads', dest='output_unmapped_reads', | 48 parser.add_argument( '--output-unmapped-reads', dest='output_unmapped_reads', |
48 help='Additional output file with unmapped reads (single-end).' ) | 49 help='Additional output file with unmapped reads (single-end).' ) |
49 parser.add_argument( '--output-unmapped-reads-l', dest='output_unmapped_reads_l', | 50 parser.add_argument( '--output-unmapped-reads-l', dest='output_unmapped_reads_l', |
50 help='File name for unmapped reads (left, paired-end).' ) | 51 help='File name for unmapped reads (left, paired-end).' ) |
174 TMP to some larger space. It's not recommended but it works. | 175 TMP to some larger space. It's not recommended but it works. |
175 """ | 176 """ |
176 #tmp_bismark_dir = tempfile.mkdtemp( dir='/data/0/galaxy_db/tmp/' ) | 177 #tmp_bismark_dir = tempfile.mkdtemp( dir='/data/0/galaxy_db/tmp/' ) |
177 tmp_bismark_dir = tempfile.mkdtemp() | 178 tmp_bismark_dir = tempfile.mkdtemp() |
178 output_dir = os.path.join( tmp_bismark_dir, 'results') | 179 output_dir = os.path.join( tmp_bismark_dir, 'results') |
179 cmd = 'bismark %(args)s --bam --temp_dir %(tmp_bismark_dir)s -o %(output_dir)s --quiet %(genome_folder)s %(reads)s' | 180 cmd = 'bismark %(args)s --bam --temp_dir %(tmp_bismark_dir)s --gzip -o %(output_dir)s --quiet %(genome_folder)s %(reads)s' |
180 | 181 |
181 if args.fasta: | 182 if args.fasta: |
182 # he query input files (specified as mate1,mate2 or singles) are FastA | 183 # the query input files (specified as mate1,mate2 or singles) are FastA |
183 cmd = '%s %s' % (cmd, '--fasta') | 184 cmd = '%s %s' % (cmd, '--fasta') |
184 elif args.fastq: | 185 elif args.fastq: |
185 cmd = '%s %s' % (cmd, '--fastq') | 186 cmd = '%s %s' % (cmd, '--fastq') |
186 | 187 |
187 if args.bismark_path: | 188 if args.bismark_path: |
221 # --seedlen | 222 # --seedlen |
222 additional_opts += ' -l %s ' % args.seed_len | 223 additional_opts += ' -l %s ' % args.seed_len |
223 | 224 |
224 # alignment options | 225 # alignment options |
225 if args.bowtie2: | 226 if args.bowtie2: |
226 additional_opts += ' -p %s --bowtie2 ' % args.num_threads | 227 additional_opts += ' -p %s --bowtie2 ' % (int(args.num_threads/2)) #divides by 2 here since bismark will spawn 2 (original top and original bottom) jobs with -p threads each |
227 if args.seed_mismatches: | 228 if args.seed_mismatches: |
228 additional_opts += ' -N %s ' % args.seed_mismatches | 229 additional_opts += ' -N %s ' % args.seed_mismatches |
229 if args.seed_len: | 230 if args.seed_len: |
230 additional_opts += ' -L %s ' % args.seed_len | 231 additional_opts += ' -L %s ' % args.seed_len |
231 if args.seed_extention_attempts: | 232 if args.seed_extention_attempts: |
325 | 326 |
326 tmp_res = tempfile.NamedTemporaryFile( dir= output_dir).name | 327 tmp_res = tempfile.NamedTemporaryFile( dir= output_dir).name |
327 | 328 |
328 bam_files = glob( os.path.join( output_dir, '*.bam') ) | 329 bam_files = glob( os.path.join( output_dir, '*.bam') ) |
329 if len( bam_files ) > 1: | 330 if len( bam_files ) > 1: |
330 cmd = 'samtools merge -@ %s -f - %s ' % ( args.num_threads, ' '.join( bam_files ) ) | 331 cmd = 'samtools merge -@ %s -f %s %s ' % ( args.num_threads, tmp_res, ' '.join( bam_files ) ) |
331 | 332 |
332 p1 = subprocess.Popen( args=shlex.split( cmd ), stdout=subprocess.PIPE ) | 333 proc = subprocess.Popen( args=shlex.split( cmd ), stdout=subprocess.PIPE ) |
333 proc = subprocess.Popen( ['samtools', 'sort', '-@', str(args.num_threads), '-', tmp_res], stdin=p1.stdout, stdout=tmp_stdout, stderr=tmp_stderr ) | 334 |
335 returncode = proc.wait() | |
336 tmp_stdout.close() | |
337 tmp_stderr.close() | |
338 if returncode != 0: | |
339 raise Exception, open( tmp_stderr.name ).read() | |
334 else: | 340 else: |
335 proc = subprocess.Popen( ['samtools', 'sort', '-@', str(args.num_threads), bam_files[0], tmp_res], stdout=tmp_stdout, stderr=tmp_stderr ) | 341 tmp_res = bam_files[0] |
336 | 342 |
337 returncode = proc.wait() | 343 bam_path = "%s" % tmp_res |
338 tmp_stdout.close() | 344 |
339 tmp_stderr.close() | |
340 if returncode != 0: | |
341 raise Exception, open( tmp_stderr.name ).read() | |
342 | |
343 bam_path = "%s.bam" % tmp_res | |
344 if os.path.exists( bam_path ): | 345 if os.path.exists( bam_path ): |
345 shutil.copy( bam_path, args.output ) | 346 if args.sort_bam: |
347 cmd = 'samtools sort -@ %s %s %s' % (args.num_threads, bam_path, args.output) | |
348 else: | |
349 shutil.copy( bam_path, args.output ) | |
346 else: | 350 else: |
347 stop_err( 'BAM file no found:\n' + str( bam_path ) ) | 351 stop_err( 'BAM file no found:\n' + str( bam_path ) ) |
352 | |
353 | |
348 | 354 |
349 # TODO: look for errors in program output. | 355 # TODO: look for errors in program output. |
350 except Exception, e: | 356 except Exception, e: |
351 stop_err( 'Error in merging bam files:\n' + str( e ) ) | 357 stop_err( 'Error in merging bam files:\n' + str( e ) ) |
352 | 358 |