Mercurial > repos > crs4 > sopra
diff sopra_wpc.py @ 0:988d5a82291a draft
Uploaded
author | crs4 |
---|---|
date | Thu, 24 Oct 2013 14:02:10 -0400 |
parents | |
children | 87ffe493b6c1 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/sopra_wpc.py Thu Oct 24 14:02:10 2013 -0400 @@ -0,0 +1,116 @@ +# -*- coding: utf-8 -*- +""" +SOPRA with prebuilt contigs workflow runner +""" + +import optparse +import os +import tempfile +import shutil +import subprocess +import sys + + +# Copyright (c) Twisted Matrix Laboratories. +def which(name, flags=os.X_OK): + """ Search PATH for executable files with the given name. """ + result = [] + exts = filter(None, os.environ.get('PATHEXT', '').split(os.pathsep)) + path = os.environ.get('PATH', None) + if path is None: + return [] + for p in os.environ.get('PATH', '').split(os.pathsep): + p = os.path.join(p, name) + if os.access(p, flags): + result.append(p) + for e in exts: + pext = p + e + if os.access(pext, flags): + result.append(pext) + return result + + +def __main__(): + parser = optparse.OptionParser(description='SOPRA with prebuilt contigs') + parser.add_option('--contigs', action='append', dest='contigs', help='Contigs FASTA files, at least 1') + parser.add_option('--mate', action='append', dest='mates', help='Paired-end Illumina libraries, at least 1 FASTA file') + parser.add_option('-d', action='append', dest='insert_sizes', type='int', help='List of insert sizes for the corresponding mate pair libraries') + parser.add_option('-v', dest='max_mismatches', type='int', help='Maximum number of mismatches when aligning reads on contigs with Bowtie') + parser.add_option('-c', dest='c_option', type='int', help='If the number of times a read and its reverse complement appear in the library is equal to or more than this value, the pairing information from that read will be disregarded') + parser.add_option('-w', dest='w_option', type='int', help='Minimum number of links between two contigs') + parser.add_option('-L', dest='L_option', type='int', help='Minimum length of contigs to be used in scaffold assembly') + parser.add_option('--h_option', dest='h_option', type='float', help='High coverage contigs (above mean coverage + h x std coverage) are not considered in the scaffold assembly mainly to exclude reads from repetitive regions') + parser.add_option('--scaffolds', dest='scaffolds', help='scaffolds fasta file mandatory') + parser.add_option('-l', '--logfile', dest='logfile', help='log file (default=stdout)') + (options, args) = parser.parse_args() + if len(args) > 0: + parser.error('Wrong number of arguments') + + contigs = options.contigs # a list of file paths + mates = options.mates # a list of file paths + insert_sizes = options.insert_sizes # a list of integers + max_mismatches = options.max_mismatches + c_option = options.c_option + w_option = options.w_option + L_option = options.L_option + h_option = options.h_option + scaffolds = options.scaffolds + logfile = options.logfile + + s_scaf_path = which('s_scaf_v1.4.6.pl').pop() + print 'Creating temp dir' + wd = tempfile.mkdtemp() + try: + fake_mates = [os.path.join(wd, os.path.basename(mate) + '.fasta') for mate in mates] # s_prep_contigAseq_v1.4.6.pl wants a mate file with extension [Ff][Aa][Ss][Tt][Aa] or [Ff][Aa] + contigs_sopra = os.path.join(wd, 'contigs_sopra.fasta') # s_prep_contigAseq_v1.4.6.pl always writes all the prepared contigs to this file + bowtie_build = os.path.join(wd, 'bowtie_build') # arbitrary basename for bowtie-build output files + mate_sopras = [os.path.splitext(fake_mate)[0] + '_sopra.fasta' for fake_mate in fake_mates] # s_prep_contigAseq_v1.4.6.pl writes the prepared paired reads to these files + mysam_mates = [mate_sopra + '.sam' for mate_sopra in mate_sopras] # arbitrary filenames for bowtie output in SAM format + mysam_mates_parsed = [mysam_mate + '_parsed' for mysam_mate in mysam_mates] # s_parse_sam_v1.4.6.pl writes its output to these files + orientdistinfo = os.path.join(wd, 'orientdistinfo_c%d' % c_option) # s_read_parsed_sam_v1.4.6.pl writes its output to this file + scaffolds_file = os.path.join(wd, "scaffolds_h%s_L%d_w%d.fasta" % (h_option, L_option, w_option)) # s_scaf_v1.4.6.pl writes its output to this file + + for i in range(len(mates)): + print "Copying mate %s to %s" % (mates[i], fake_mates[i]) + shutil.copy2(mates[i], fake_mates[i]) + + log = open(logfile, 'w') if logfile else sys.stdout + try: + cmd_step1 = "s_prep_contigAseq_v1.4.6.pl -contig %s -mate %s -a %s" % (" ".join(contigs), " ".join(fake_mates), wd) + print "SOPRA with prebuilt contigs (preparation) command to be executed:\n %s" % cmd_step1 + subprocess.check_call(args=cmd_step1, stdout=log, shell=True) + + cmd_step2 = "bowtie-build %s %s" % (contigs_sopra, bowtie_build) + print "SOPRA with prebuilt contigs (Bowtie building index) command to be executed:\n %s" % cmd_step2 + subprocess.check_call(args=cmd_step2, stdout=log, shell=True) + + for i in range(len(mate_sopras)): + cmd_step3 = "bowtie -v %d -m 1 -f --sam %s %s %s" % (max_mismatches, bowtie_build, mate_sopras[i], mysam_mates[i]) + print "SOPRA with prebuilt contigs (Bowtie alignment of library %d) command to be executed:\n %s" % (i+1, cmd_step3) + subprocess.check_call(args=cmd_step3, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because bowtie writes some logging info there + + cmd_step4 = "s_parse_sam_v1.4.6.pl -sam %s -a %s" % (' '.join(mysam_mates), wd) + print "SOPRA with prebuilt contigs (removing reads not mapped in a proper pair) command to be executed:\n %s" % cmd_step4 + subprocess.check_call(args=cmd_step4, stdout=log, shell=True) + + cmd_step5 = "s_read_parsed_sam_v1.4.6.pl -c %d -a %s" % (c_option, wd) + for i in range(len(mysam_mates_parsed)): + cmd_step5 += " -parsed %s -d %d" % (mysam_mates_parsed[i], insert_sizes[i]) + print "SOPRA with prebuilt contigs (read parsed SAM) command to be executed:\n %s" % cmd_step5 + subprocess.check_call(args=cmd_step5, stdout=log, shell=True) + + cmd_step6 = "perl -X %s -w %d -L %d -h %s -o %s -a %s" % (s_scaf_path, w_option, L_option, h_option, orientdistinfo, wd) # need to call with perl -X because: 1) otherwise some Perl warnings are written on stderr; 2) simply redirecting stderr would hide real errors since it always returns exit status 0 + print "SOPRA with prebuilt contigs (scaffold assembly) command to be executed:\n %s" % cmd_step6 + subprocess.check_call(args=cmd_step6, stdout=log, shell=True) + finally: + if log != sys.stdout: + log.close() + + print 'Moving result file %s to %s' % (scaffolds_file, scaffolds) + shutil.move(scaffolds_file, scaffolds) + finally: + shutil.rmtree(wd) + + +if __name__ == "__main__": + __main__()