changeset 2:87ffe493b6c1 draft default tip

Use GALAXY_SLOTS for multithreading in Bowtie. Create symlinks instead of copying files. Specify in help that Bowtie is used to align the reads to the contigs. Add readme.rst .
author crs4
date Mon, 03 Mar 2014 11:28:41 -0500
parents d180348fe9db
children
files COPYING readme.rst sopra_wpc.py sopra_wpc.xml
diffstat 4 files changed, 42 insertions(+), 11 deletions(-) [+]
line wrap: on
line diff
--- a/COPYING	Tue Oct 29 05:27:29 2013 -0400
+++ b/COPYING	Mon Mar 03 11:28:41 2014 -0500
@@ -1,4 +1,4 @@
-Copyright © 2013 CRS4 Srl. http://www.crs4.it/
+Copyright © 2013-2014 CRS4 Srl. http://www.crs4.it/
 Created by:
 Gianmauro Cuccuru <gianmauro.cuccuru@crs4.it>
 Nicola Soranzo <nicola.soranzo@crs4.it>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/readme.rst	Mon Mar 03 11:28:41 2014 -0500
@@ -0,0 +1,29 @@
+SOPRA wrapper
+=============
+
+Configuration
+-------------
+
+sopra_wpc tool may be configured to use more than one CPU core by selecting an appropriate destination for this tool in Galaxy job_conf.xml file (see https://wiki.galaxyproject.org/Admin/Config/Jobs and https://wiki.galaxyproject.org/Admin/Config/Performance/Cluster ).
+
+If you are using Galaxy release_2013.11.04 or later, this tool will automatically use the number of CPU cores allocated by the job runner according to the configuration of the destination selected for this tool.
+
+If instead you are using an older Galaxy release, you should also add a line
+
+  GALAXY_SLOTS=N; export GALAXY_SLOTS
+
+(where N is the number of CPU cores allocated by the job runner for this tool) to the file
+
+  <tool_dependencies_dir>/bowtie/1.0.0/crs4/sopra_wpc/<hash_string>/env.sh
+
+Version history
+---------------
+
+- Release 2: Use GALAXY_SLOTS for multithreading in Bowtie. Create symlinks instead of copying files. Specify in help that Bowtie is used to align the reads to the contigs. Add readme.rst .
+- Release 1: Depend on package_bowtie_1_0_0 .
+- Release 0: Initial release in the Tool Shed.
+
+Development
+-----------
+
+Development is hosted at https://bitbucket.org/crs4/orione-tools . Contributions and bug reports are very welcome!
--- a/sopra_wpc.py	Tue Oct 29 05:27:29 2013 -0400
+++ b/sopra_wpc.py	Mon Mar 03 11:28:41 2014 -0500
@@ -32,6 +32,7 @@
 
 def __main__():
     parser = optparse.OptionParser(description='SOPRA with prebuilt contigs')
+    parser.add_option('-p', dest='num_threads', type='int', help='Number of threads for Bowtie')
     parser.add_option('--contigs', action='append', dest='contigs', help='Contigs FASTA files, at least 1')
     parser.add_option('--mate', action='append', dest='mates', help='Paired-end Illumina libraries, at least 1 FASTA file')
     parser.add_option('-d', action='append', dest='insert_sizes', type='int', help='List of insert sizes for the corresponding mate pair libraries')
@@ -49,7 +50,6 @@
     contigs = options.contigs # a list of file paths
     mates = options.mates # a list of file paths
     insert_sizes = options.insert_sizes # a list of integers
-    max_mismatches = options.max_mismatches
     c_option = options.c_option
     w_option = options.w_option
     L_option = options.L_option
@@ -58,7 +58,7 @@
     logfile = options.logfile
 
     s_scaf_path = which('s_scaf_v1.4.6.pl').pop()
-    print 'Creating temp dir'
+    print 'Creating temporary directory'
     wd = tempfile.mkdtemp()
     try:
         fake_mates = [os.path.join(wd, os.path.basename(mate) + '.fasta') for mate in mates] # s_prep_contigAseq_v1.4.6.pl wants a mate file with extension [Ff][Aa][Ss][Tt][Aa] or [Ff][Aa]
@@ -67,12 +67,12 @@
         mate_sopras = [os.path.splitext(fake_mate)[0] + '_sopra.fasta' for fake_mate in fake_mates] # s_prep_contigAseq_v1.4.6.pl writes the prepared paired reads to these files
         mysam_mates = [mate_sopra + '.sam' for mate_sopra in mate_sopras] # arbitrary filenames for bowtie output in SAM format
         mysam_mates_parsed = [mysam_mate + '_parsed' for mysam_mate in mysam_mates] # s_parse_sam_v1.4.6.pl writes its output to these files
-        orientdistinfo = os.path.join(wd, 'orientdistinfo_c%d' % c_option) # s_read_parsed_sam_v1.4.6.pl writes its output to this file
+        orientdistinfo = os.path.join(wd, "orientdistinfo_c%d" % c_option) # s_read_parsed_sam_v1.4.6.pl writes its output to this file
         scaffolds_file = os.path.join(wd, "scaffolds_h%s_L%d_w%d.fasta" % (h_option, L_option, w_option)) # s_scaf_v1.4.6.pl writes its output to this file
 
         for i in range(len(mates)):
-            print "Copying mate %s to %s" % (mates[i], fake_mates[i])
-            shutil.copy2(mates[i], fake_mates[i])
+            print "Creating symbolic link %s pointing to %s" % (fake_mates[i], mates[i])
+            os.symlink(mates[i], fake_mates[i])
 
         log = open(logfile, 'w') if logfile else sys.stdout
         try:
@@ -85,7 +85,7 @@
             subprocess.check_call(args=cmd_step2, stdout=log, shell=True)
 
             for i in range(len(mate_sopras)):
-                cmd_step3 = "bowtie -v %d -m 1 -f --sam %s %s %s" % (max_mismatches, bowtie_build, mate_sopras[i], mysam_mates[i])
+                cmd_step3 = "bowtie -p %d -v %d -m 1 -f --sam %s %s %s" % (options.num_threads, options.max_mismatches, bowtie_build, mate_sopras[i], mysam_mates[i])
                 print "SOPRA with prebuilt contigs (Bowtie alignment of library %d) command to be executed:\n %s" % (i+1, cmd_step3)
                 subprocess.check_call(args=cmd_step3, stdout=log, stderr=subprocess.STDOUT, shell=True) # need to redirect stderr because bowtie writes some logging info there
 
@@ -106,7 +106,7 @@
             if log != sys.stdout:
                 log.close()
 
-        print 'Moving result file %s to %s' % (scaffolds_file, scaffolds)
+        print "Moving result file %s to %s" % (scaffolds_file, scaffolds)
         shutil.move(scaffolds_file, scaffolds)
     finally:
         shutil.rmtree(wd)
--- a/sopra_wpc.xml	Tue Oct 29 05:27:29 2013 -0400
+++ b/sopra_wpc.xml	Mon Mar 03 11:28:41 2014 -0500
@@ -1,11 +1,11 @@
 <tool id="sopra_wpc" name="SOPRA with prebuilt contigs" version="0.1">
-  <description>for Illumina workflow</description>
+  <description>for Illumina libraries</description>
   <requirements>
     <requirement type="package" version="1.4.6">sopra</requirement>
     <requirement type="package" version="1.0.0">bowtie</requirement>
   </requirements>
   <command interpreter="python">
-    sopra_wpc.py
+    sopra_wpc.py -p \${GALAXY_SLOTS:-1}
     #for $cr in $contigs_repeat
       --contigs ${cr.contigs_file}
     #end for
@@ -36,7 +36,9 @@
   <help>
 **What it does**
 
-SOPRA is an assembly tool for mate pair/paired-end data generated by high-throughput sequencing technologies, e.g. Illumina and SOLiD platforms.
+SOPRA is a scaffold assembly tool for paired-end/mate pair data generated by high-throughput sequencing technologies, e.g. Illumina and SOLiD platforms. This wrapper currently supports only Illumina paired-end data.
+
+Bowtie is used to align the reads to the contigs.
 
 The input paired-end FASTA file can be obtained with:
 FR reads -> *FASTQ interlacer on paired end reads* followed by *FASTQ to FASTA* converter