changeset 3:4130e95bd6c8 draft

Batch processing mode supported
author jetbrains
date Mon, 19 Nov 2018 08:24:04 -0500
parents 5b99943c4627
children 7936a3af3dd1
files span.xml span_wrapper.py
diffstat 2 files changed, 95 insertions(+), 132 deletions(-) [+]
line wrap: on
line diff
--- a/span.xml	Sun Nov 18 08:20:27 2018 -0500
+++ b/span.xml	Mon Nov 19 08:24:04 2018 -0500
@@ -9,41 +9,33 @@
         <exit_code range=":-1"/>
     </stdio>
     <command interpreter="python">
-#import re
-#set treatment_identifier = re.sub('[^\w\-\.]', '_', str($treatment_file.element_identifier))
-#set genome_identifier = re.sub('[^\w\-\.]', '_', str($genome_file.element_identifier))
-
-#if $control.control_selector
-    #set control_identifier = re.sub('[^\w\-\.]', '_', str($control.control_file.element_identifier))
-#end if
-
 #if str($action.action_selector) == "model"
-    #if $control.control_selector
-        span_wrapper.py model with_control
-            "${genome_identifier}" "${genome_file}"
-            "${treatment_identifier}" "${treatment_file}"
-            "${bin}" "${action.model_file}"
-            "${control_identifier}" "${control.control_file}"
+    #if str($control_file) != 'None':
+        span_wrapper.py model_with_control
+            "${genome_file.name}" "${genome_file}"
+            "${treatment_file.name}" "${treatment_file}"
+            "${control_file.name}" "${control_file}"
+            "${bin}"
     #else
-        span_wrapper.py model without_control
-            "${genome_identifier}" "${genome_file}"
-            "${treatment_identifier}" "${treatment_file}"
-            "${bin}" "${action.model_file}"
+        span_wrapper.py model_without_control
+            "${genome_file.name}" "${genome_file}"
+            "${treatment_file.name}" "${treatment_file}"
+            "${bin}"
     #end if
 #else
-    #if $control.control_selector
-        span_wrapper.py peaks with_control
-            "${genome_identifier}" "${genome_file}"
-            "${treatment_identifier}" "${treatment_file}"
-            "${bin}" "${action.model_file}"
-            "${control_identifier}" "${control.control_file}"
-            "${action.fdr}" "${action.gap}" "${action.peaks_file}"
+    #if str($control_file) != 'None':
+        span_wrapper.py peaks_with_control
+            "${genome_file.name}" "${genome_file}"
+            "${treatment_file.name}" "${treatment_file}"
+            "${control_file.name}" "${control_file}"
+            "${bin}"
+            "${action.fdr}" "${action.gap}"
     #else
-        span_wrapper.py peaks without_control
-            "${genome_identifier}" "${genome_file}"
-            "${treatment_identifier}" "${treatment_file}"
-            "${bin}" "${action.model_file}"
-            "${action.fdr}" "${action.gap}" "${action.peaks_file}"
+        span_wrapper.py peaks_without_control
+            "${genome_file.name}" "${genome_file}"
+            "${treatment_file.name}" "${treatment_file}"
+            "${bin}"
+            "${action.fdr}" "${action.gap}"
     #end if
 #end if
      </command>
@@ -51,33 +43,20 @@
         <param name="treatment_file" type="data" format="bam" label="Treatment BAM"
                description="Treatment BAM reads to process" argument="--treatment"
                help="Treatment BAM reads to process"/>
+
+        <param name="control_file" type="data" format="BAM" label="Control BAM" optional="True"
+               argument="--control" help="Control BAM reads to process"/>
+
         <param name="genome_file" type="data" format="chrom.sizes" label="Genome chrom.sizes"
                description="Genome build chrom.sizes file" argument="--chrom.sizes"
                help="Genome build chrom.sizes file"/>
 
-        <conditional name="control">
-            <param name="control_selector" type="boolean" label="Control available" value="false"/>
-            <when value="true">
-                <param name="control_file" type="data" format="bam" label="Control BAM"
-                       description="Control BAM reads to process" argument="--control"
-                       help="Control BAM reads to process"/>
-            </when>
-        </conditional>
-
         <conditional name="action">
             <param name="action_selector" type="select" label="Action">
                 <option value="model">Compute SPAN model</option>
                 <option value="peaks">Compute SPAN model and produce peaks file</option>
             </param>
-            <when value="model">
-                <param name="model_file" type="text" value="model.span" label="Model name"
-                       help="Trained model file in binary format, which can be visualized directly in JBR Genome Browser
-                       and used in integrated peak calling pipeline"/>
-            </when>
             <when value="peaks">
-                <param name="model_file" type="text" value="model.span" label="Model file name"
-                       help="Trained model file in binary format, which can be visualized directly in JBR Genome Browser
-                       and used in integrated peak calling pipeline"/>
                 <param name="fdr" size="5" type="float" value="0.0001" label="FDR" argument="--fdr"
                        help="Minimum FDR cutoff to call significant regions, default value is 1.0E-6.
                        SPAN reports p- and q- values for the null hypothesis that a given bin is not enriched with a histone modification.
@@ -87,7 +66,6 @@
                 <param name="gap" size="5" type="integer" value="5" label="GAP" argument="--gap"
                        help="Gap size to merge spatially close peaks. Useful for wide histone modifications.
                        Default value is 5, i.e. peaks separated by 5*BIN distance or less are merged."/>
-                <param name="peaks_file" type="text" value="result.peak" label="Peaks file name" argument="--peaks"/>
             </when>
         </conditional>
 
@@ -96,11 +74,13 @@
                Default value is 200bp, approximately the length of one nucleosome."/>
     </inputs>
     <outputs>
-        <data name="SPAN model file" format="span" from_work_dir="*.span" label="SPAN model file ${action.model_file} on ${on_string}"/>
-        <data name="SPAN peaks file" format="bed" from_work_dir="*.peak" label="SPAN peaks file ${action.peaks_file} on ${on_string}">
+        <data name="model.span" format="span" from_work_dir="*.span"
+              label="SPAN model on ${on_string} (${treatment_file.name}#if str($control_file) != 'None' then '_{}'.format($control_file.name) else '' #_${bin})"/>
+        <data name="result.peak" format="bed" from_work_dir="*.peak"
+              label="SPAN peaks on ${on_string} (${treatment_file.name}#if str($control_file) != 'None' then '_{}'.format($control_file.name) else '' #_${bin}_${action.fdr}_${action.gap})">
             <filter>action['action_selector'] == "peaks"</filter>
         </data>
-        <data name="SPAN log file" format="txt" from_work_dir="*.log" label="SPAN log file on ${on_string}"/>
+        <data name="span.log" format="txt" from_work_dir="*.log" label="SPAN logs on ${on_string}"/>
     </outputs>
     <help><![CDATA[
 .. class:: infomark
@@ -134,7 +114,7 @@
 
 **Outputs**
 
-This tool produces a SPAN binary model file and/or peaks in ENCODE broadPeak (BED 6+3) format.
+This tool produces a SPAN binary model file (can be visualized in JBR Genome Browser and used in semi-supervised peak calling) and/or peaks in ENCODE broadPeak (BED 6+3) format.
 
 Peak file columns contain the following data:
 
--- a/span_wrapper.py	Sun Nov 18 08:20:27 2018 -0500
+++ b/span_wrapper.py	Mon Nov 19 08:24:04 2018 -0500
@@ -12,38 +12,36 @@
 print 'Using SPAN Peak Analyzer distributive file {0}'.format(SPAN_JAR)
 
 # #if str($action.action_selector) == "model"
-#     #if $control.control_selector
-#         span_wrapper.py model with_control
+#     #if str($control_file) != 'None':
+#         span_wrapper.py model_with_control
 #             "${genome_identifier}" "${genome_file}"
 #             "${treatment_identifier}" "${treatment_file}"
-#             "${bin}" "${action.model_file}"
-#             "${control_identifier}" "${control.control_file}"
+#             "${control_identifier}" "${control_file}"
+#             "${bin}"
+#
 #     #else
 #         span_wrapper.py model without_control
 #             "${genome_identifier}" "${genome_file}"
 #             "${treatment_identifier}" "${treatment_file}"
-#             "${bin}" "${action.model_file}"
+#             "${bin}"
 #     #end if
 # #else
-#     #if $control.control_selector
-#         span_wrapper.py peaks with_control
+#     #if str($control_file) != 'None':
+#         span_wrapper.py peaks_with_control
 #             "${genome_identifier}" "${genome_file}"
 #             "${treatment_identifier}" "${treatment_file}"
-#             "${bin}" "${action.model_file}"
-#             "${control_identifier}" "${control.control_file}"
-#             "${fdr}" "${gap}" "${action.peaks_file}"
+#             "${control_identifier}" "${control_file}"
+#             "${bin}"
+#             "${action.fdr}" "${action.gap}"
 #     #else
-#         span_wrapper.py peaks with_control
+#         span_wrapper.py peaks_without_control
 #             "${genome_identifier}" "${genome_file}"
 #             "${treatment_identifier}" "${treatment_file}"
-#             "${bin}" "${action.model_file}"
-#             "${fdr}" "${gap}" "${action.peaks_file}"
+#             "${bin}"
+#             "${action.fdr}" "${action.gap}"
 #     #end if
 # #end if
-
-# See https://research.jetbrains.org/groups/biolabs/tools/span-peak-analyzer for command line options
 action = argv[0]
-control = argv[1]
 
 working_dir = os.path.abspath('.')
 print 'WORKING DIRECTORY: {}'.format(working_dir)
@@ -52,82 +50,67 @@
 def link(name, f):
     """ SPAN uses file extension to detect input type, so original names are necessary, instead of Galaxy .dat files"""
     result = os.path.join(working_dir, name)
-    os.symlink(f, result)
+    if not os.path.exists(result):
+        os.symlink(f, result)
     return result
 
 
-if action == 'model':
-    if control == 'with_control':
-        (chrom_sizes, chrom_sizes_file,
-         treatment, treatment_file,
-         bin, model_file,
-         control, control_file) = argv[2:]
-        cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} --control {} --bin {}'.format(
-            SPAN_JAR,
-            link(chrom_sizes, chrom_sizes_file),
-            link(treatment, treatment_file),
-            link(control, control_file),
-            bin
-        )
-    elif control == 'without_control':
-        (chrom_sizes, chrom_sizes_file,
-         treatment, treatment_file,
-         bin, model_file) = argv[2:]
-        cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} --bin {}'.format(
-            SPAN_JAR,
-            link(chrom_sizes, chrom_sizes_file),
-            link(treatment, treatment_file),
-            bin
-        )
-    else:
-        raise Exception("Unknown control option {}".format(control))
-
-elif action == "peaks":
-    if control == 'with_control':
-        (chrom_sizes, chrom_sizes_file,
-         treatment, treatment_file,
-         bin, model_file,
-         control, control_file,
-         fdr, gap, peaks_file) = argv[2:]
-        cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} --control {} --bin {} --fdr {} --gap {} --peaks {}'.format(
-            SPAN_JAR,
-            link(chrom_sizes, chrom_sizes_file),
-            link(treatment, treatment_file),
-            link(control, control_file),
-            bin, fdr, gap,
-            os.path.join(working_dir, peaks_file)
-        )
-    elif control == 'without_control':
-        (chrom_sizes, chrom_sizes_file,
-         treatment, treatment_file,
-         bin, model_file,
-         fdr, gap, peaks_file) = argv[2:]
-        cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} --bin {} --fdr {} --gap {} --peaks {}'.format(
-            SPAN_JAR,
-            link(chrom_sizes, chrom_sizes_file),
-            link(treatment, treatment_file),
-            bin, fdr, gap,
-            os.path.join(working_dir, peaks_file)
-        )
-    else:
-        raise Exception("Unknown control option {}".format(control))
+if action == 'model_with_control':
+    (chrom_sizes, chrom_sizes_file,
+     treatment, treatment_file,
+     control, control_file,
+     bin) = argv[1:]
+    cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} --control {} --bin {}'.format(
+        SPAN_JAR,
+        link(chrom_sizes, chrom_sizes_file),
+        link(treatment, treatment_file),
+        link(control, control_file),
+        bin)
+elif action == 'model_without_control':
+    (chrom_sizes, chrom_sizes_file,
+     treatment, treatment_file,
+     bin) = argv[1:]
+    cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} --bin {}'.format(
+        SPAN_JAR,
+        link(chrom_sizes, chrom_sizes_file),
+        link(treatment, treatment_file),
+        bin)
+elif action == "peaks_with_control":
+    (chrom_sizes, chrom_sizes_file,
+     treatment, treatment_file,
+     control, control_file,
+     bin,
+     fdr, gap) = argv[1:]
+    cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} --control {} ' \
+          '--bin {} --fdr {} --gap {} --peaks {}'.format(
+        SPAN_JAR,
+        link(chrom_sizes, chrom_sizes_file),
+        link(treatment, treatment_file),
+        link(control, control_file),
+        bin, fdr, gap,
+        os.path.join(working_dir, 'result.peak'))
+elif action == 'peaks_without_control':
+    (chrom_sizes, chrom_sizes_file,
+     treatment, treatment_file,
+     bin,
+     fdr, gap) = argv[1:]
+    cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} -' \
+          '-bin {} --fdr {} --gap {} --peaks {}'.format(
+        SPAN_JAR,
+        link(chrom_sizes, chrom_sizes_file),
+        link(treatment, treatment_file),
+        bin, fdr, gap,
+        os.path.join(working_dir, 'result.peak'))
 else:
     raise Exception("Unknown action command {}".format(action))
 
-
 print 'Launching SPAN: {}'.format(cmd)
-print 'Model file: {}'.format(model_file)
-try:
-    print 'Peaks file: {}'.format(peaks_file)
-except NameError:
-    pass
-
 subprocess.check_call(cmd, cwd=None, shell=True)
 
 # Move model to the the working dir with given name
 fit_dir = os.path.join(working_dir, 'fit')
 model_original = os.path.join(fit_dir, os.listdir(fit_dir)[0])
-shutil.move(model_original, os.path.join(working_dir, model_file))
+shutil.move(model_original, os.path.join(working_dir, 'model.span'))
 
 # Move log file
 logs_dir = os.path.join(working_dir, 'logs')