Mercurial > repos > crs4 > sopra
comparison sopra_wpc.py @ 2:87ffe493b6c1 draft default tip
Use GALAXY_SLOTS for multithreading in Bowtie. Create symlinks instead of copying files. Specify in help that Bowtie is used to align the reads to the contigs. Add readme.rst .
| author | crs4 |
|---|---|
| date | Mon, 03 Mar 2014 11:28:41 -0500 |
| parents | 988d5a82291a |
| children |
comparison
equal
deleted
inserted
replaced
| 1:d180348fe9db | 2:87ffe493b6c1 |
|---|---|
| 30 return result | 30 return result |
| 31 | 31 |
| 32 | 32 |
| 33 def __main__(): | 33 def __main__(): |
| 34 parser = optparse.OptionParser(description='SOPRA with prebuilt contigs') | 34 parser = optparse.OptionParser(description='SOPRA with prebuilt contigs') |
| 35 parser.add_option('-p', dest='num_threads', type='int', help='Number of threads for Bowtie') | |
| 35 parser.add_option('--contigs', action='append', dest='contigs', help='Contigs FASTA files, at least 1') | 36 parser.add_option('--contigs', action='append', dest='contigs', help='Contigs FASTA files, at least 1') |
| 36 parser.add_option('--mate', action='append', dest='mates', help='Paired-end Illumina libraries, at least 1 FASTA file') | 37 parser.add_option('--mate', action='append', dest='mates', help='Paired-end Illumina libraries, at least 1 FASTA file') |
| 37 parser.add_option('-d', action='append', dest='insert_sizes', type='int', help='List of insert sizes for the corresponding mate pair libraries') | 38 parser.add_option('-d', action='append', dest='insert_sizes', type='int', help='List of insert sizes for the corresponding mate pair libraries') |
| 38 parser.add_option('-v', dest='max_mismatches', type='int', help='Maximum number of mismatches when aligning reads on contigs with Bowtie') | 39 parser.add_option('-v', dest='max_mismatches', type='int', help='Maximum number of mismatches when aligning reads on contigs with Bowtie') |
| 39 parser.add_option('-c', dest='c_option', type='int', help='If the number of times a read and its reverse complement appear in the library is equal to or more than this value, the pairing information from that read will be disregarded') | 40 parser.add_option('-c', dest='c_option', type='int', help='If the number of times a read and its reverse complement appear in the library is equal to or more than this value, the pairing information from that read will be disregarded') |
| 47 parser.error('Wrong number of arguments') | 48 parser.error('Wrong number of arguments') |
| 48 | 49 |
| 49 contigs = options.contigs # a list of file paths | 50 contigs = options.contigs # a list of file paths |
| 50 mates = options.mates # a list of file paths | 51 mates = options.mates # a list of file paths |
| 51 insert_sizes = options.insert_sizes # a list of integers | 52 insert_sizes = options.insert_sizes # a list of integers |
| 52 max_mismatches = options.max_mismatches | |
| 53 c_option = options.c_option | 53 c_option = options.c_option |
| 54 w_option = options.w_option | 54 w_option = options.w_option |
| 55 L_option = options.L_option | 55 L_option = options.L_option |
| 56 h_option = options.h_option | 56 h_option = options.h_option |
| 57 scaffolds = options.scaffolds | 57 scaffolds = options.scaffolds |
| 58 logfile = options.logfile | 58 logfile = options.logfile |
| 59 | 59 |
| 60 s_scaf_path = which('s_scaf_v1.4.6.pl').pop() | 60 s_scaf_path = which('s_scaf_v1.4.6.pl').pop() |
| 61 print 'Creating temp dir' | 61 print 'Creating temporary directory' |
| 62 wd = tempfile.mkdtemp() | 62 wd = tempfile.mkdtemp() |
| 63 try: | 63 try: |
| 64 fake_mates = [os.path.join(wd, os.path.basename(mate) + '.fasta') for mate in mates] # s_prep_contigAseq_v1.4.6.pl wants a mate file with extension [Ff][Aa][Ss][Tt][Aa] or [Ff][Aa] | 64 fake_mates = [os.path.join(wd, os.path.basename(mate) + '.fasta') for mate in mates] # s_prep_contigAseq_v1.4.6.pl wants a mate file with extension [Ff][Aa][Ss][Tt][Aa] or [Ff][Aa] |
| 65 contigs_sopra = os.path.join(wd, 'contigs_sopra.fasta') # s_prep_contigAseq_v1.4.6.pl always writes all the prepared contigs to this file | 65 contigs_sopra = os.path.join(wd, 'contigs_sopra.fasta') # s_prep_contigAseq_v1.4.6.pl always writes all the prepared contigs to this file |
| 66 bowtie_build = os.path.join(wd, 'bowtie_build') # arbitrary basename for bowtie-build output files | 66 bowtie_build = os.path.join(wd, 'bowtie_build') # arbitrary basename for bowtie-build output files |
| 67 mate_sopras = [os.path.splitext(fake_mate)[0] + '_sopra.fasta' for fake_mate in fake_mates] # s_prep_contigAseq_v1.4.6.pl writes the prepared paired reads to these files | 67 mate_sopras = [os.path.splitext(fake_mate)[0] + '_sopra.fasta' for fake_mate in fake_mates] # s_prep_contigAseq_v1.4.6.pl writes the prepared paired reads to these files |
| 68 mysam_mates = [mate_sopra + '.sam' for mate_sopra in mate_sopras] # arbitrary filenames for bowtie output in SAM format | 68 mysam_mates = [mate_sopra + '.sam' for mate_sopra in mate_sopras] # arbitrary filenames for bowtie output in SAM format |
| 69 mysam_mates_parsed = [mysam_mate + '_parsed' for mysam_mate in mysam_mates] # s_parse_sam_v1.4.6.pl writes its output to these files | 69 mysam_mates_parsed = [mysam_mate + '_parsed' for mysam_mate in mysam_mates] # s_parse_sam_v1.4.6.pl writes its output to these files |
| 70 orientdistinfo = os.path.join(wd, 'orientdistinfo_c%d' % c_option) # s_read_parsed_sam_v1.4.6.pl writes its output to this file | 70 orientdistinfo = os.path.join(wd, "orientdistinfo_c%d" % c_option) # s_read_parsed_sam_v1.4.6.pl writes its output to this file |
| 71 scaffolds_file = os.path.join(wd, "scaffolds_h%s_L%d_w%d.fasta" % (h_option, L_option, w_option)) # s_scaf_v1.4.6.pl writes its output to this file | 71 scaffolds_file = os.path.join(wd, "scaffolds_h%s_L%d_w%d.fasta" % (h_option, L_option, w_option)) # s_scaf_v1.4.6.pl writes its output to this file |
| 72 | 72 |
| 73 for i in range(len(mates)): | 73 for i in range(len(mates)): |
| 74 print "Copying mate %s to %s" % (mates[i], fake_mates[i]) | 74 print "Creating symbolic link %s pointing to %s" % (fake_mates[i], mates[i]) |
| 75 shutil.copy2(mates[i], fake_mates[i]) | 75 os.symlink(mates[i], fake_mates[i]) |
| 76 | 76 |
| 77 log = open(logfile, 'w') if logfile else sys.stdout | 77 log = open(logfile, 'w') if logfile else sys.stdout |
| 78 try: | 78 try: |
| 79 cmd_step1 = "s_prep_contigAseq_v1.4.6.pl -contig %s -mate %s -a %s" % (" ".join(contigs), " ".join(fake_mates), wd) | 79 cmd_step1 = "s_prep_contigAseq_v1.4.6.pl -contig %s -mate %s -a %s" % (" ".join(contigs), " ".join(fake_mates), wd) |
| 80 print "SOPRA with prebuilt contigs (preparation) command to be executed:\n %s" % cmd_step1 | 80 print "SOPRA with prebuilt contigs (preparation) command to be executed:\n %s" % cmd_step1 |
| 83 cmd_step2 = "bowtie-build %s %s" % (contigs_sopra, bowtie_build) | 83 cmd_step2 = "bowtie-build %s %s" % (contigs_sopra, bowtie_build) |
| 84 print "SOPRA with prebuilt contigs (Bowtie building index) command to be executed:\n %s" % cmd_step2 | 84 print "SOPRA with prebuilt contigs (Bowtie building index) command to be executed:\n %s" % cmd_step2 |
| 85 subprocess.check_call(args=cmd_step2, stdout=log, shell=True) | 85 subprocess.check_call(args=cmd_step2, stdout=log, shell=True) |
| 86 | 86 |
| 87 for i in range(len(mate_sopras)): | 87 for i in range(len(mate_sopras)): |
| 88 cmd_step3 = "bowtie -v %d -m 1 -f --sam %s %s %s" % (max_mismatches, bowtie_build, mate_sopras[i], mysam_mates[i]) | 88 cmd_step3 = "bowtie -p %d -v %d -m 1 -f --sam %s %s %s" % (options.num_threads, options.max_mismatches, bowtie_build, mate_sopras[i], mysam_mates[i]) |
| 89 print "SOPRA with prebuilt contigs (Bowtie alignment of library %d) command to be executed:\n %s" % (i+1, cmd_step3) | 89 print "SOPRA with prebuilt contigs (Bowtie alignment of library %d) command to be executed:\n %s" % (i+1, cmd_step3) |
| 90 subprocess.check_call(args=cmd_step3, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because bowtie writes some logging info there | 90 subprocess.check_call(args=cmd_step3, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because bowtie writes some logging info there |
| 91 | 91 |
| 92 cmd_step4 = "s_parse_sam_v1.4.6.pl -sam %s -a %s" % (' '.join(mysam_mates), wd) | 92 cmd_step4 = "s_parse_sam_v1.4.6.pl -sam %s -a %s" % (' '.join(mysam_mates), wd) |
| 93 print "SOPRA with prebuilt contigs (removing reads not mapped in a proper pair) command to be executed:\n %s" % cmd_step4 | 93 print "SOPRA with prebuilt contigs (removing reads not mapped in a proper pair) command to be executed:\n %s" % cmd_step4 |
| 104 subprocess.check_call(args=cmd_step6, stdout=log, shell=True) | 104 subprocess.check_call(args=cmd_step6, stdout=log, shell=True) |
| 105 finally: | 105 finally: |
| 106 if log != sys.stdout: | 106 if log != sys.stdout: |
| 107 log.close() | 107 log.close() |
| 108 | 108 |
| 109 print 'Moving result file %s to %s' % (scaffolds_file, scaffolds) | 109 print "Moving result file %s to %s" % (scaffolds_file, scaffolds) |
| 110 shutil.move(scaffolds_file, scaffolds) | 110 shutil.move(scaffolds_file, scaffolds) |
| 111 finally: | 111 finally: |
| 112 shutil.rmtree(wd) | 112 shutil.rmtree(wd) |
| 113 | 113 |
| 114 | 114 |
