comparison bismark_wrapper.py @ 3:91f07ff056ca draft

Uploaded
author bgruening
date Mon, 14 Apr 2014 16:43:14 -0400
parents 82814a8a2395
children 243e8f9fb75b
comparison
equal deleted inserted replaced
2:82814a8a2395 3:91f07ff056ca
41 41
42 parser.add_argument( '-1', '--mate1', dest='mate1', 42 parser.add_argument( '-1', '--mate1', dest='mate1',
43 help='The forward reads file in Sanger FASTQ or FASTA format.' ) 43 help='The forward reads file in Sanger FASTQ or FASTA format.' )
44 parser.add_argument( '-2', '--mate2', dest='mate2', 44 parser.add_argument( '-2', '--mate2', dest='mate2',
45 help='The reverse reads file in Sanger FASTQ or FASTA format.' ) 45 help='The reverse reads file in Sanger FASTQ or FASTA format.' )
46 parser.add_argument( '--sort-bam', dest='sort_bam', action="store_true" )
46 47
47 parser.add_argument( '--output-unmapped-reads', dest='output_unmapped_reads', 48 parser.add_argument( '--output-unmapped-reads', dest='output_unmapped_reads',
48 help='Additional output file with unmapped reads (single-end).' ) 49 help='Additional output file with unmapped reads (single-end).' )
49 parser.add_argument( '--output-unmapped-reads-l', dest='output_unmapped_reads_l', 50 parser.add_argument( '--output-unmapped-reads-l', dest='output_unmapped_reads_l',
50 help='File name for unmapped reads (left, paired-end).' ) 51 help='File name for unmapped reads (left, paired-end).' )
174 TMP to some larger space. It's not recommended but it works. 175 TMP to some larger space. It's not recommended but it works.
175 """ 176 """
176 #tmp_bismark_dir = tempfile.mkdtemp( dir='/data/0/galaxy_db/tmp/' ) 177 #tmp_bismark_dir = tempfile.mkdtemp( dir='/data/0/galaxy_db/tmp/' )
177 tmp_bismark_dir = tempfile.mkdtemp() 178 tmp_bismark_dir = tempfile.mkdtemp()
178 output_dir = os.path.join( tmp_bismark_dir, 'results') 179 output_dir = os.path.join( tmp_bismark_dir, 'results')
179 cmd = 'bismark %(args)s --bam --temp_dir %(tmp_bismark_dir)s -o %(output_dir)s --quiet %(genome_folder)s %(reads)s' 180 cmd = 'bismark %(args)s --bam --temp_dir %(tmp_bismark_dir)s --gzip -o %(output_dir)s --quiet %(genome_folder)s %(reads)s'
180 181
181 if args.fasta: 182 if args.fasta:
182 # he query input files (specified as mate1,mate2 or singles) are FastA 183 # the query input files (specified as mate1,mate2 or singles) are FastA
183 cmd = '%s %s' % (cmd, '--fasta') 184 cmd = '%s %s' % (cmd, '--fasta')
184 elif args.fastq: 185 elif args.fastq:
185 cmd = '%s %s' % (cmd, '--fastq') 186 cmd = '%s %s' % (cmd, '--fastq')
186 187
187 if args.bismark_path: 188 if args.bismark_path:
221 # --seedlen 222 # --seedlen
222 additional_opts += ' -l %s ' % args.seed_len 223 additional_opts += ' -l %s ' % args.seed_len
223 224
224 # alignment options 225 # alignment options
225 if args.bowtie2: 226 if args.bowtie2:
226 additional_opts += ' -p %s --bowtie2 ' % args.num_threads 227 additional_opts += ' -p %s --bowtie2 ' % (int(args.num_threads/2)) #divides by 2 here since bismark will spawn 2 (original top and original bottom) jobs with -p threads each
227 if args.seed_mismatches: 228 if args.seed_mismatches:
228 additional_opts += ' -N %s ' % args.seed_mismatches 229 additional_opts += ' -N %s ' % args.seed_mismatches
229 if args.seed_len: 230 if args.seed_len:
230 additional_opts += ' -L %s ' % args.seed_len 231 additional_opts += ' -L %s ' % args.seed_len
231 if args.seed_extention_attempts: 232 if args.seed_extention_attempts:
325 326
326 tmp_res = tempfile.NamedTemporaryFile( dir= output_dir).name 327 tmp_res = tempfile.NamedTemporaryFile( dir= output_dir).name
327 328
328 bam_files = glob( os.path.join( output_dir, '*.bam') ) 329 bam_files = glob( os.path.join( output_dir, '*.bam') )
329 if len( bam_files ) > 1: 330 if len( bam_files ) > 1:
330 cmd = 'samtools merge -@ %s -f - %s ' % ( args.num_threads, ' '.join( bam_files ) ) 331 cmd = 'samtools merge -@ %s -f %s %s ' % ( args.num_threads, tmp_res, ' '.join( bam_files ) )
331 332
332 p1 = subprocess.Popen( args=shlex.split( cmd ), stdout=subprocess.PIPE ) 333 proc = subprocess.Popen( args=shlex.split( cmd ), stdout=subprocess.PIPE )
333 proc = subprocess.Popen( ['samtools', 'sort', '-@', str(args.num_threads), '-', tmp_res], stdin=p1.stdout, stdout=tmp_stdout, stderr=tmp_stderr ) 334
335 returncode = proc.wait()
336 tmp_stdout.close()
337 tmp_stderr.close()
338 if returncode != 0:
339 raise Exception, open( tmp_stderr.name ).read()
334 else: 340 else:
335 proc = subprocess.Popen( ['samtools', 'sort', '-@', str(args.num_threads), bam_files[0], tmp_res], stdout=tmp_stdout, stderr=tmp_stderr ) 341 tmp_res = bam_files[0]
336 342
337 returncode = proc.wait() 343 bam_path = "%s" % tmp_res
338 tmp_stdout.close() 344
339 tmp_stderr.close()
340 if returncode != 0:
341 raise Exception, open( tmp_stderr.name ).read()
342
343 bam_path = "%s.bam" % tmp_res
344 if os.path.exists( bam_path ): 345 if os.path.exists( bam_path ):
345 shutil.copy( bam_path, args.output ) 346 if args.sort_bam:
347 cmd = 'samtools sort -@ %s %s %s' % (args.num_threads, bam_path, args.output)
348 else:
349 shutil.copy( bam_path, args.output )
346 else: 350 else:
347 stop_err( 'BAM file no found:\n' + str( bam_path ) ) 351 stop_err( 'BAM file no found:\n' + str( bam_path ) )
352
353
348 354
349 # TODO: look for errors in program output. 355 # TODO: look for errors in program output.
350 except Exception, e: 356 except Exception, e:
351 stop_err( 'Error in merging bam files:\n' + str( e ) ) 357 stop_err( 'Error in merging bam files:\n' + str( e ) )
352 358