changeset 4:243e8f9fb75b draft

Uploaded
author bgruening
date Mon, 09 Feb 2015 18:24:41 -0500
parents 91f07ff056ca
children b100248c35b8
files bismark_bowtie2_wrapper.xml bismark_bowtie_wrapper.xml bismark_methylation_extractor.xml bismark_wrapper.py tool_dependencies.xml
diffstat 5 files changed, 118 insertions(+), 79 deletions(-) [+]
line wrap: on
line diff
--- a/bismark_bowtie2_wrapper.xml	Mon Apr 14 16:43:14 2014 -0400
+++ b/bismark_bowtie2_wrapper.xml	Mon Feb 09 18:24:41 2015 -0500
@@ -7,14 +7,20 @@
         <requirement type="package" version="0.1.19">samtools</requirement>
         <requirement type="package" version="2.1.0">bowtie2</requirement>
     </requirements>
-    <parallelism method="basic"></parallelism>
+    <stdio>
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+        <regex match="Error:" />
+        <regex match="Exception:" />
+    </stdio>
     <command interpreter="python">
+<![CDATA[
         bismark_wrapper.py
-        
+
         ## Change this to accommodate the number of threads you have available.
         --num-threads "\${GALAXY_SLOTS:-24}"
 
-        --bismark_path \$SCRIPT_PATH
+        ##--bismark_path \$SCRIPT_PATH
 
         --bowtie2
 
@@ -34,7 +40,6 @@
         ##  Input parameters
         ##
 
-
         #if $singlePaired.sPaired == "single":
             --single-paired $singlePaired.input_singles
 
@@ -58,14 +63,17 @@
             --mate1 #echo ','.join($mate1)
             --mate2 #echo ','.join($mate2)
 
-            #if $singlePaired.mate_list[0].input_mate1.ext == "fastqillumina":
-                --phred64-quals
-                --fastq
-            #elif $singlePaired.mate_list[0].input_mate1.ext == "fastqsanger":
-                --fastq
-            #elif $singlePaired.mate_list[0].input_mate1.ext == "fasta":
-                --fasta
-            #end if
+            #for $mate_pair in $singlePaired.mate_list:
+                #if $mate_pair.input_mate1.ext == "fastqillumina":
+                    --phred64-quals
+                    --fastq
+                #elif $mate_pair.input_mate1.ext == "fastqsanger":
+                    --fastq
+                #elif $mate_pair.input_mate1.ext == "fasta":
+                    --fasta
+                #end if
+                #break
+            #end for
 
             -I $singlePaired.minInsert
             -X $singlePaired.maxInsert
@@ -89,7 +97,7 @@
             --seed-extention-attempts $params.seed_extention_attempts
             ## default 2
             --max-reseed $params.max_reseed
-            
+
             ## default 70
             ##--maqerr $params.maqerr
 
@@ -140,6 +148,7 @@
         #end if
       #end if
 
+]]>
     </command>
     <inputs>
         <conditional name="refGenomeSource">
@@ -213,7 +222,7 @@
                 <param name="bismark_stdout" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write the bismark output and summary information to an extra file" />
                 <param name="isReportOutput" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Offer all report files concatenated in one file" />
 
-                <!--end output options --> 
+                <!--end output options -->
             </when>  <!-- full -->
       </conditional>  <!-- params -->
       <!--
@@ -275,18 +284,22 @@
             </action>
           </when>
           <when value="paired">
-            <action type="format">
+            <!--action type="format">
               <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
-            </action>
+            </action-->
           </when>
         </conditional>
       </actions>
     </data>
 
     <data format="fastq" name="output_suppressed_reads_r" label="${tool.name} on ${on_string}: suppressed reads (R)">
-      <filter>singlePaired['sPaired'] == "paired"</filter>
-      <filter>params['settingsType'] == "custom"</filter>
-      <filter>params['supressed_read_file'] is True</filter>
+      <filter>
+        ((
+            singlePaired['sPaired'] == "paired" and
+          params['settingsType'] == "custom" and
+          params['suppressed_read_file'] is True
+        ))
+      </filter>
       <actions>
         <conditional name="singlePaired.sPaired">
           <when value="single">
@@ -295,9 +308,9 @@
             </action>
           </when>
           <when value="paired">
-            <action type="format">
+            <!--action type="format">
               <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
-            </action>
+            </action-->
           </when>
         </conditional>
       </actions>
@@ -319,18 +332,22 @@
             </action>
           </when>
           <when value="paired">
-            <action type="format">
+            <!--action type="format">
               <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
-            </action>
+            </action-->
           </when>
         </conditional>
       </actions>
     </data>
 
     <data format="fastq" name="output_unmapped_reads_r" label="${tool.name} on ${on_string}: unmapped reads (R)">
-      <filter>singlePaired['sPaired'] == "paired"</filter>
-      <filter>params['settingsType'] == "custom"</filter>
-      <filter>params['unmapped_read_file'] is True</filter>
+      <filter>
+        ((
+            singlePaired['sPaired'] == "paired" and
+          params['settingsType'] == "custom" and
+          params['unmapped_read_file'] is True
+        ))
+      </filter>
       <actions>
         <conditional name="singlePaired.sPaired">
           <when value="single">
@@ -339,9 +356,9 @@
             </action>
           </when>
           <when value="paired">
-            <action type="format">
+            <!--action type="format">
               <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
-            </action>
+            </action-->
           </when>
         </conditional>
       </actions>
@@ -352,6 +369,7 @@
     </tests>
 
     <help>
+<![CDATA[
 
 **What it does**
 
@@ -409,8 +427,8 @@
     Column  Description
   --------  --------------------------------------------------------
   1  QNAME  seq-ID
-  2  FLAG   this flag tries to take the strand a bisulfite read 
-            originated from into account 
+  2  FLAG   this flag tries to take the strand a bisulfite read
+            originated from into account
             (this is different from ordinary DNA alignment flags!)
   3  RNAME  chromosome
   4  POS    start position
@@ -422,12 +440,12 @@
   10 SEQ    query SEQuence on the same strand as the reference
   11 QUAL   Phred33 scale
   12 NM-tag edit distance to the reference)
-  13 XX-tag base-by-base mismatches to the reference. 
+  13 XX-tag base-by-base mismatches to the reference.
             This does not include indels.
   14 XM-tag methylation call string
   15 XR-tag read conversion state for the alignment
   16 XG-tag genome conversion state for the alignment
-  
+
 
 Each read of paired-end alignments is written out in a separate line in the above format.
 
@@ -484,7 +502,7 @@
   --phred64-quals        FASTQ qualities are ASCII chars equal to the Phred quality plus 64. Default: off.
 
   --solexa-quals         Convert FASTQ qualities from solexa-scaled (which can be negative) to phred-scaled
-                         (which can't). The formula for conversion is: 
+                         (which can't). The formula for conversion is:
                          phred-qual = 10 * log(1 + 10 ** (solexa-qual/10.0)) / log(10). Used with -q. This
                          is usually the right option for use with (unconverted) reads emitted by the GA
                          Pipeline versions prior to 1.3. Works only for Bowtie 1. Default: off.
@@ -496,7 +514,7 @@
 Alignment::
 
   -n/--seedmms INT       The maximum number of mismatches permitted in the "seed", i.e. the first L base pairs
-                         of the read (where L is set with -l/--seedlen). This may be 0, 1, 2 or 3 and the 
+                         of the read (where L is set with -l/--seedlen). This may be 0, 1, 2 or 3 and the
                          default is 1. This option is only available for Bowtie 1 (for Bowtie 2 see -N).
 
   -l/--seedlen           The "seed length"; i.e., the number of bases of the high quality end of the read to
@@ -634,11 +652,15 @@
                          Bowtie 2 searches for at most INT+1 distinct, valid alignments for each read. The search terminates when it
                          can't find more distinct valid alignments, or when it finds INT+1 distinct alignments, whichever
                          happens first. Only the best alignment is reported. Information from the other alignments is used to
-                         estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes 
+                         estimate mapping quality and to set SAM optional fields, such as AS:i and XS:i. Increasing -M makes
                          Bowtie 2 slower, but increases the likelihood that it will pick the correct alignment for a read that
                          aligns many places. For reads that have more than INT+1 distinct, valid alignments, Bowtie 2 does not
                          guarantee that the alignment reported is the best possible in terms of alignment score. -M is
                          always used and its default value is set to 10.
 
+]]>
   </help>
+  <citations>
+      <citation type="doi">10.1093/bioinformatics/btr167</citation>
+  </citations>
 </tool>
--- a/bismark_bowtie_wrapper.xml	Mon Apr 14 16:43:14 2014 -0400
+++ b/bismark_bowtie_wrapper.xml	Mon Feb 09 18:24:41 2015 -0500
@@ -7,11 +7,17 @@
         <requirement type="package" version="0.1.19">samtools</requirement>
         <requirement type="package" version="0.12.8">bowtie</requirement>
     </requirements>
-    <parallelism method="basic"></parallelism>
+    <stdio>
+        <exit_code range="1:" />
+        <exit_code range=":-1" />
+        <regex match="Error:" />
+        <regex match="Exception:" />
+    </stdio>
     <command interpreter="python">
+<![CDATA[
         bismark_wrapper.py
 
-        --bismark_path \$SCRIPT_PATH
+        ##--bismark_path \$SCRIPT_PATH
 
         ##
         ## Bismark Genome Preparation, if desired.
@@ -53,14 +59,17 @@
             --mate1 #echo ','.join($mate1)
             --mate2 #echo ','.join($mate2)
 
-            #if $singlePaired.mate_list[0].input_mate1.ext == "fastqillumina":
-                --phred64-quals
-                --fastq
-            #elif $singlePaired.mate_list[0].input_mate1.ext == "fastqsanger":
-                --fastq
-            #elif $singlePaired.mate_list[0].input_mate1.ext == "fasta":
-                --fasta
-            #end if
+            #for $mate_pair in $singlePaired.mate_list:
+                #if $mate_pair.input_mate1.ext == "fastqillumina":
+                    --phred64-quals
+                    --fastq
+                #elif $mate_pair.input_mate1.ext == "fastqsanger":
+                    --fastq
+                #elif $mate_pair.input_mate1.ext == "fasta":
+                    --fasta
+                #end if
+                #break
+            #end for
 
             -I $singlePaired.minInsert
             -X $singlePaired.maxInsert
@@ -123,6 +132,7 @@
         #end if
       #end if
 
+]]>
     </command>
     <inputs>
         <conditional name="refGenomeSource">
@@ -191,7 +201,7 @@
                 <!-- output Options -->
                 <param name="bismark_stdout" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Write the bismark output and summary information to an extra file" />
                 <param name="isReportOutput" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Offer all report files concatenated in one file" />
-                <!--end output options --> 
+                <!--end output options -->
             </when>  <!-- full -->
       </conditional>  <!-- params -->
       <!--
@@ -251,9 +261,9 @@
             </action>
           </when>
           <when value="paired">
-            <action type="format">
+            <!--action type="format">
               <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
-            </action>
+            </action-->
           </when>
         </conditional>
       </actions>
@@ -271,9 +281,9 @@
             </action>
           </when>
           <when value="paired">
-            <action type="format">
+            <!--action type="format">
               <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
-            </action>
+            </action-->
           </when>
         </conditional>
       </actions>
@@ -295,9 +305,9 @@
             </action>
           </when>
           <when value="paired">
-            <action type="format">
+            <!--action type="format">
               <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
-            </action>
+            </action-->
           </when>
         </conditional>
       </actions>
@@ -314,21 +324,20 @@
             </action>
           </when>
           <when value="paired">
-            <action type="format">
+            <!--action type="format">
               <option type="from_param" name="singlePaired.mate_list[0].input_mate1" param_attribute="ext" />
-            </action>
+            </action-->
           </when>
         </conditional>
       </actions>
     </data>
-        
-        
     </outputs>
 
     <tests>
     </tests>
 
     <help>
+<![CDATA[
 
 **What it does**
 
@@ -386,8 +395,8 @@
     Column  Description
   --------  --------------------------------------------------------
   1  QNAME  seq-ID
-  2  FLAG   this flag tries to take the strand a bisulfite read 
-            originated from into account 
+  2  FLAG   this flag tries to take the strand a bisulfite read
+            originated from into account
             (this is different from ordinary DNA alignment flags!)
   3  RNAME  chromosome
   4  POS    start position
@@ -399,12 +408,12 @@
   10 SEQ    query SEQuence on the same strand as the reference
   11 QUAL   Phred33 scale
   12 NM-tag edit distance to the reference)
-  13 XX-tag base-by-base mismatches to the reference. 
+  13 XX-tag base-by-base mismatches to the reference.
             This does not include indels.
   14 XM-tag methylation call string
   15 XR-tag read conversion state for the alignment
   16 XG-tag genome conversion state for the alignment
-  
+
 
 Each read of paired-end alignments is written out in a separate line in the above format.
 
@@ -461,7 +470,7 @@
   --phred64-quals        FASTQ qualities are ASCII chars equal to the Phred quality plus 64. Default: off.
 
   --solexa-quals         Convert FASTQ qualities from solexa-scaled (which can be negative) to phred-scaled
-                         (which can't). The formula for conversion is: 
+                         (which can't). The formula for conversion is:
                          phred-qual = 10 * log(1 + 10 ** (solexa-qual/10.0)) / log(10). Used with -q. This
                          is usually the right option for use with (unconverted) reads emitted by the GA
                          Pipeline versions prior to 1.3. Works only for Bowtie 1. Default: off.
@@ -473,7 +482,7 @@
 Alignment::
 
   -n/--seedmms INT       The maximum number of mismatches permitted in the "seed", i.e. the first L base pairs
-                         of the read (where L is set with -l/--seedlen). This may be 0, 1, 2 or 3 and the 
+                         of the read (where L is set with -l/--seedlen). This may be 0, 1, 2 or 3 and the
                          default is 1. This option is only available for Bowtie 1 (for Bowtie 2 see -N).
 
   -l/--seedlen           The "seed length"; i.e., the number of bases of the high quality end of the read to
@@ -546,5 +555,9 @@
                          the specified folder does not exist, Bismark will attempt to create it first. The path to the
                          temporary folder can be either relative or absolute.
 
+]]>
   </help>
+  <citations>
+      <citation type="doi">10.1093/bioinformatics/btr167</citation>
+  </citations>
 </tool>
--- a/bismark_methylation_extractor.xml	Mon Apr 14 16:43:14 2014 -0400
+++ b/bismark_methylation_extractor.xml	Mon Feb 09 18:24:41 2015 -0500
@@ -9,11 +9,12 @@
     </requirements>
     <parallelism method="basic"></parallelism>
     <command interpreter="python">
+<![CDATA[
         bismark_methylation_extractor.py
 
         --infile $input
 
-        --bismark_path \$SCRIPT_PATH
+        #--bismark_path \$SCRIPT_PATH
 
         #if $singlePaired.sPaired == "single":
             --single-end
@@ -78,6 +79,7 @@
         ## end compress
         #end if
 
+]]>
     </command>
     <inputs>
         <!-- Input Parameters -->
@@ -187,11 +189,12 @@
     </tests>
 
     <help>
+<![CDATA[
 
 **What it does**
 
 The following is a brief description of all options to control the Bismark_
-methylation extractor. The script reads in a bisulfite read alignment results file 
+methylation extractor. The script reads in a bisulfite read alignment results file
 produced by the Bismark bisulfite mapper and extracts the methylation information
 for individual cytosines. This information is found in the methylation call field
 which can contain the following characters:
@@ -285,8 +288,8 @@
 
 Output::
 
-  --comprehensive          Specifying this option will merge all four possible strand-specific 
-                           methylation info into context-dependent output files. The default 
+  --comprehensive          Specifying this option will merge all four possible strand-specific
+                           methylation info into context-dependent output files. The default
                            contexts are:
                             - CpG context
                             - CHG context
@@ -301,5 +304,6 @@
                            this script.
 
 
+]]>
   </help>
 </tool>
--- a/bismark_wrapper.py	Mon Apr 14 16:43:14 2014 -0400
+++ b/bismark_wrapper.py	Mon Feb 09 18:24:41 2015 -0500
@@ -17,7 +17,6 @@
 
 def __main__():
 
-    print 'tempfile_location',tempfile.gettempdir()
     #Parse Command Line
     parser = argparse.ArgumentParser(description='Wrapper for the bismark bisulfite mapper.')
     parser.add_argument( '-p', '--num-threads', dest='num_threads',
@@ -258,9 +257,6 @@
 
     # Final bismark command:
     cmd = cmd % arguments
-    print 'bismark_cmd:', cmd
-    #sys.stderr.write( cmd )
-    #sys.exit(1)
     # Run
     try:
         tmp_out = tempfile.NamedTemporaryFile().name
@@ -321,8 +317,8 @@
         """
         #tmp_out = tempfile.NamedTemporaryFile( dir=output_dir ).name
         tmp_stdout = open( tmp_out, 'wab' )
-        tmp_err = tempfile.NamedTemporaryFile( dir=output_dir ).name
-        tmp_stderr = open( tmp_err, 'wb' )
+        #tmp_err = tempfile.NamedTemporaryFile( dir=output_dir ).name
+        tmp_stderr = open( tmp_err, 'wab' )
 
         tmp_res = tempfile.NamedTemporaryFile( dir= output_dir).name
 
@@ -338,18 +334,22 @@
             if returncode != 0:
                 raise Exception, open( tmp_stderr.name ).read()
         else:
-	    tmp_res = bam_files[0]
+            tmp_res = bam_files[0]
 
         bam_path = "%s" % tmp_res
 
         if os.path.exists( bam_path ):
-	    if args.sort_bam:
-                cmd = 'samtools sort -@ %s %s %s' % (args.num_threads, bam_path, args.output) 
-	    else:
-                shutil.copy( bam_path, args.output )
+            if args.sort_bam:
+                cmd = 'samtools sort -@ %s %s sorted_bam' % (args.num_threads, bam_path)
+                proc = subprocess.Popen( args=shlex.split( cmd ) )
+                returncode = proc.wait()
+                if returncode != 0:
+                    raise Exception("Error during '%s'" % cmd)
+                shutil.move( 'sorted_bam.bam', args.output )
+            else:
+                shutil.move( bam_path, args.output )
         else:
             stop_err( 'BAM file no found:\n' + str( bam_path ) )
-		
 
 
     # TODO: look for errors in program output.
--- a/tool_dependencies.xml	Mon Apr 14 16:43:14 2014 -0400
+++ b/tool_dependencies.xml	Mon Feb 09 18:24:41 2015 -0500
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <tool_dependency>
     <package name="samtools" version="0.1.19">
-        <repository changeset_revision="00e17a794a2e" name="package_samtools_0_1_19" owner="iuc" toolshed="http://toolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="923adc89c666" name="package_samtools_0_1_19" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
     <set_environment version="1.0">
         <environment_variable action="set_to" name="SCRIPT_PATH">$REPOSITORY_INSTALL_DIR</environment_variable>