Mercurial > repos > bgruening > bismark
diff bismark_wrapper.py @ 3:91f07ff056ca draft
Uploaded
author | bgruening |
---|---|
date | Mon, 14 Apr 2014 16:43:14 -0400 |
parents | 82814a8a2395 |
children | 243e8f9fb75b |
line wrap: on
line diff
--- a/bismark_wrapper.py Wed Aug 21 05:19:54 2013 -0400 +++ b/bismark_wrapper.py Mon Apr 14 16:43:14 2014 -0400 @@ -43,6 +43,7 @@ help='The forward reads file in Sanger FASTQ or FASTA format.' ) parser.add_argument( '-2', '--mate2', dest='mate2', help='The reverse reads file in Sanger FASTQ or FASTA format.' ) + parser.add_argument( '--sort-bam', dest='sort_bam', action="store_true" ) parser.add_argument( '--output-unmapped-reads', dest='output_unmapped_reads', help='Additional output file with unmapped reads (single-end).' ) @@ -176,10 +177,10 @@ #tmp_bismark_dir = tempfile.mkdtemp( dir='/data/0/galaxy_db/tmp/' ) tmp_bismark_dir = tempfile.mkdtemp() output_dir = os.path.join( tmp_bismark_dir, 'results') - cmd = 'bismark %(args)s --bam --temp_dir %(tmp_bismark_dir)s -o %(output_dir)s --quiet %(genome_folder)s %(reads)s' + cmd = 'bismark %(args)s --bam --temp_dir %(tmp_bismark_dir)s --gzip -o %(output_dir)s --quiet %(genome_folder)s %(reads)s' if args.fasta: - # he query input files (specified as mate1,mate2 or singles) are FastA + # the query input files (specified as mate1,mate2 or singles) are FastA cmd = '%s %s' % (cmd, '--fasta') elif args.fastq: cmd = '%s %s' % (cmd, '--fastq') @@ -223,7 +224,7 @@ # alignment options if args.bowtie2: - additional_opts += ' -p %s --bowtie2 ' % args.num_threads + additional_opts += ' -p %s --bowtie2 ' % (int(args.num_threads/2)) #divides by 2 here since bismark will spawn 2 (original top and original bottom) jobs with -p threads each if args.seed_mismatches: additional_opts += ' -N %s ' % args.seed_mismatches if args.seed_len: @@ -327,24 +328,29 @@ bam_files = glob( os.path.join( output_dir, '*.bam') ) if len( bam_files ) > 1: - cmd = 'samtools merge -@ %s -f - %s ' % ( args.num_threads, ' '.join( bam_files ) ) + cmd = 'samtools merge -@ %s -f %s %s ' % ( args.num_threads, tmp_res, ' '.join( bam_files ) ) - p1 = subprocess.Popen( args=shlex.split( cmd ), stdout=subprocess.PIPE ) - proc = subprocess.Popen( ['samtools', 'sort', '-@', str(args.num_threads), '-', tmp_res], stdin=p1.stdout, stdout=tmp_stdout, stderr=tmp_stderr ) - else: - proc = subprocess.Popen( ['samtools', 'sort', '-@', str(args.num_threads), bam_files[0], tmp_res], stdout=tmp_stdout, stderr=tmp_stderr ) + proc = subprocess.Popen( args=shlex.split( cmd ), stdout=subprocess.PIPE ) - returncode = proc.wait() - tmp_stdout.close() - tmp_stderr.close() - if returncode != 0: - raise Exception, open( tmp_stderr.name ).read() + returncode = proc.wait() + tmp_stdout.close() + tmp_stderr.close() + if returncode != 0: + raise Exception, open( tmp_stderr.name ).read() + else: + tmp_res = bam_files[0] - bam_path = "%s.bam" % tmp_res + bam_path = "%s" % tmp_res + if os.path.exists( bam_path ): - shutil.copy( bam_path, args.output ) + if args.sort_bam: + cmd = 'samtools sort -@ %s %s %s' % (args.num_threads, bam_path, args.output) + else: + shutil.copy( bam_path, args.output ) else: stop_err( 'BAM file no found:\n' + str( bam_path ) ) + + # TODO: look for errors in program output. except Exception, e: