Mercurial > repos > jetbrains > span
changeset 3:4130e95bd6c8 draft
Batch processing mode supported
author | jetbrains |
---|---|
date | Mon, 19 Nov 2018 08:24:04 -0500 |
parents | 5b99943c4627 |
children | 7936a3af3dd1 |
files | span.xml span_wrapper.py |
diffstat | 2 files changed, 95 insertions(+), 132 deletions(-) [+] |
line wrap: on
line diff
--- a/span.xml Sun Nov 18 08:20:27 2018 -0500 +++ b/span.xml Mon Nov 19 08:24:04 2018 -0500 @@ -9,41 +9,33 @@ <exit_code range=":-1"/> </stdio> <command interpreter="python"> -#import re -#set treatment_identifier = re.sub('[^\w\-\.]', '_', str($treatment_file.element_identifier)) -#set genome_identifier = re.sub('[^\w\-\.]', '_', str($genome_file.element_identifier)) - -#if $control.control_selector - #set control_identifier = re.sub('[^\w\-\.]', '_', str($control.control_file.element_identifier)) -#end if - #if str($action.action_selector) == "model" - #if $control.control_selector - span_wrapper.py model with_control - "${genome_identifier}" "${genome_file}" - "${treatment_identifier}" "${treatment_file}" - "${bin}" "${action.model_file}" - "${control_identifier}" "${control.control_file}" + #if str($control_file) != 'None': + span_wrapper.py model_with_control + "${genome_file.name}" "${genome_file}" + "${treatment_file.name}" "${treatment_file}" + "${control_file.name}" "${control_file}" + "${bin}" #else - span_wrapper.py model without_control - "${genome_identifier}" "${genome_file}" - "${treatment_identifier}" "${treatment_file}" - "${bin}" "${action.model_file}" + span_wrapper.py model_without_control + "${genome_file.name}" "${genome_file}" + "${treatment_file.name}" "${treatment_file}" + "${bin}" #end if #else - #if $control.control_selector - span_wrapper.py peaks with_control - "${genome_identifier}" "${genome_file}" - "${treatment_identifier}" "${treatment_file}" - "${bin}" "${action.model_file}" - "${control_identifier}" "${control.control_file}" - "${action.fdr}" "${action.gap}" "${action.peaks_file}" + #if str($control_file) != 'None': + span_wrapper.py peaks_with_control + "${genome_file.name}" "${genome_file}" + "${treatment_file.name}" "${treatment_file}" + "${control_file.name}" "${control_file}" + "${bin}" + "${action.fdr}" "${action.gap}" #else - span_wrapper.py peaks without_control - "${genome_identifier}" "${genome_file}" - "${treatment_identifier}" "${treatment_file}" - "${bin}" "${action.model_file}" - "${action.fdr}" "${action.gap}" "${action.peaks_file}" + span_wrapper.py peaks_without_control + "${genome_file.name}" "${genome_file}" + "${treatment_file.name}" "${treatment_file}" + "${bin}" + "${action.fdr}" "${action.gap}" #end if #end if </command> @@ -51,33 +43,20 @@ <param name="treatment_file" type="data" format="bam" label="Treatment BAM" description="Treatment BAM reads to process" argument="--treatment" help="Treatment BAM reads to process"/> + + <param name="control_file" type="data" format="BAM" label="Control BAM" optional="True" + argument="--control" help="Control BAM reads to process"/> + <param name="genome_file" type="data" format="chrom.sizes" label="Genome chrom.sizes" description="Genome build chrom.sizes file" argument="--chrom.sizes" help="Genome build chrom.sizes file"/> - <conditional name="control"> - <param name="control_selector" type="boolean" label="Control available" value="false"/> - <when value="true"> - <param name="control_file" type="data" format="bam" label="Control BAM" - description="Control BAM reads to process" argument="--control" - help="Control BAM reads to process"/> - </when> - </conditional> - <conditional name="action"> <param name="action_selector" type="select" label="Action"> <option value="model">Compute SPAN model</option> <option value="peaks">Compute SPAN model and produce peaks file</option> </param> - <when value="model"> - <param name="model_file" type="text" value="model.span" label="Model name" - help="Trained model file in binary format, which can be visualized directly in JBR Genome Browser - and used in integrated peak calling pipeline"/> - </when> <when value="peaks"> - <param name="model_file" type="text" value="model.span" label="Model file name" - help="Trained model file in binary format, which can be visualized directly in JBR Genome Browser - and used in integrated peak calling pipeline"/> <param name="fdr" size="5" type="float" value="0.0001" label="FDR" argument="--fdr" help="Minimum FDR cutoff to call significant regions, default value is 1.0E-6. SPAN reports p- and q- values for the null hypothesis that a given bin is not enriched with a histone modification. @@ -87,7 +66,6 @@ <param name="gap" size="5" type="integer" value="5" label="GAP" argument="--gap" help="Gap size to merge spatially close peaks. Useful for wide histone modifications. Default value is 5, i.e. peaks separated by 5*BIN distance or less are merged."/> - <param name="peaks_file" type="text" value="result.peak" label="Peaks file name" argument="--peaks"/> </when> </conditional> @@ -96,11 +74,13 @@ Default value is 200bp, approximately the length of one nucleosome."/> </inputs> <outputs> - <data name="SPAN model file" format="span" from_work_dir="*.span" label="SPAN model file ${action.model_file} on ${on_string}"/> - <data name="SPAN peaks file" format="bed" from_work_dir="*.peak" label="SPAN peaks file ${action.peaks_file} on ${on_string}"> + <data name="model.span" format="span" from_work_dir="*.span" + label="SPAN model on ${on_string} (${treatment_file.name}#if str($control_file) != 'None' then '_{}'.format($control_file.name) else '' #_${bin})"/> + <data name="result.peak" format="bed" from_work_dir="*.peak" + label="SPAN peaks on ${on_string} (${treatment_file.name}#if str($control_file) != 'None' then '_{}'.format($control_file.name) else '' #_${bin}_${action.fdr}_${action.gap})"> <filter>action['action_selector'] == "peaks"</filter> </data> - <data name="SPAN log file" format="txt" from_work_dir="*.log" label="SPAN log file on ${on_string}"/> + <data name="span.log" format="txt" from_work_dir="*.log" label="SPAN logs on ${on_string}"/> </outputs> <help><![CDATA[ .. class:: infomark @@ -134,7 +114,7 @@ **Outputs** -This tool produces a SPAN binary model file and/or peaks in ENCODE broadPeak (BED 6+3) format. +This tool produces a SPAN binary model file (can be visualized in JBR Genome Browser and used in semi-supervised peak calling) and/or peaks in ENCODE broadPeak (BED 6+3) format. Peak file columns contain the following data:
--- a/span_wrapper.py Sun Nov 18 08:20:27 2018 -0500 +++ b/span_wrapper.py Mon Nov 19 08:24:04 2018 -0500 @@ -12,38 +12,36 @@ print 'Using SPAN Peak Analyzer distributive file {0}'.format(SPAN_JAR) # #if str($action.action_selector) == "model" -# #if $control.control_selector -# span_wrapper.py model with_control +# #if str($control_file) != 'None': +# span_wrapper.py model_with_control # "${genome_identifier}" "${genome_file}" # "${treatment_identifier}" "${treatment_file}" -# "${bin}" "${action.model_file}" -# "${control_identifier}" "${control.control_file}" +# "${control_identifier}" "${control_file}" +# "${bin}" +# # #else # span_wrapper.py model without_control # "${genome_identifier}" "${genome_file}" # "${treatment_identifier}" "${treatment_file}" -# "${bin}" "${action.model_file}" +# "${bin}" # #end if # #else -# #if $control.control_selector -# span_wrapper.py peaks with_control +# #if str($control_file) != 'None': +# span_wrapper.py peaks_with_control # "${genome_identifier}" "${genome_file}" # "${treatment_identifier}" "${treatment_file}" -# "${bin}" "${action.model_file}" -# "${control_identifier}" "${control.control_file}" -# "${fdr}" "${gap}" "${action.peaks_file}" +# "${control_identifier}" "${control_file}" +# "${bin}" +# "${action.fdr}" "${action.gap}" # #else -# span_wrapper.py peaks with_control +# span_wrapper.py peaks_without_control # "${genome_identifier}" "${genome_file}" # "${treatment_identifier}" "${treatment_file}" -# "${bin}" "${action.model_file}" -# "${fdr}" "${gap}" "${action.peaks_file}" +# "${bin}" +# "${action.fdr}" "${action.gap}" # #end if # #end if - -# See https://research.jetbrains.org/groups/biolabs/tools/span-peak-analyzer for command line options action = argv[0] -control = argv[1] working_dir = os.path.abspath('.') print 'WORKING DIRECTORY: {}'.format(working_dir) @@ -52,82 +50,67 @@ def link(name, f): """ SPAN uses file extension to detect input type, so original names are necessary, instead of Galaxy .dat files""" result = os.path.join(working_dir, name) - os.symlink(f, result) + if not os.path.exists(result): + os.symlink(f, result) return result -if action == 'model': - if control == 'with_control': - (chrom_sizes, chrom_sizes_file, - treatment, treatment_file, - bin, model_file, - control, control_file) = argv[2:] - cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} --control {} --bin {}'.format( - SPAN_JAR, - link(chrom_sizes, chrom_sizes_file), - link(treatment, treatment_file), - link(control, control_file), - bin - ) - elif control == 'without_control': - (chrom_sizes, chrom_sizes_file, - treatment, treatment_file, - bin, model_file) = argv[2:] - cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} --bin {}'.format( - SPAN_JAR, - link(chrom_sizes, chrom_sizes_file), - link(treatment, treatment_file), - bin - ) - else: - raise Exception("Unknown control option {}".format(control)) - -elif action == "peaks": - if control == 'with_control': - (chrom_sizes, chrom_sizes_file, - treatment, treatment_file, - bin, model_file, - control, control_file, - fdr, gap, peaks_file) = argv[2:] - cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} --control {} --bin {} --fdr {} --gap {} --peaks {}'.format( - SPAN_JAR, - link(chrom_sizes, chrom_sizes_file), - link(treatment, treatment_file), - link(control, control_file), - bin, fdr, gap, - os.path.join(working_dir, peaks_file) - ) - elif control == 'without_control': - (chrom_sizes, chrom_sizes_file, - treatment, treatment_file, - bin, model_file, - fdr, gap, peaks_file) = argv[2:] - cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} --bin {} --fdr {} --gap {} --peaks {}'.format( - SPAN_JAR, - link(chrom_sizes, chrom_sizes_file), - link(treatment, treatment_file), - bin, fdr, gap, - os.path.join(working_dir, peaks_file) - ) - else: - raise Exception("Unknown control option {}".format(control)) +if action == 'model_with_control': + (chrom_sizes, chrom_sizes_file, + treatment, treatment_file, + control, control_file, + bin) = argv[1:] + cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} --control {} --bin {}'.format( + SPAN_JAR, + link(chrom_sizes, chrom_sizes_file), + link(treatment, treatment_file), + link(control, control_file), + bin) +elif action == 'model_without_control': + (chrom_sizes, chrom_sizes_file, + treatment, treatment_file, + bin) = argv[1:] + cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} --bin {}'.format( + SPAN_JAR, + link(chrom_sizes, chrom_sizes_file), + link(treatment, treatment_file), + bin) +elif action == "peaks_with_control": + (chrom_sizes, chrom_sizes_file, + treatment, treatment_file, + control, control_file, + bin, + fdr, gap) = argv[1:] + cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} --control {} ' \ + '--bin {} --fdr {} --gap {} --peaks {}'.format( + SPAN_JAR, + link(chrom_sizes, chrom_sizes_file), + link(treatment, treatment_file), + link(control, control_file), + bin, fdr, gap, + os.path.join(working_dir, 'result.peak')) +elif action == 'peaks_without_control': + (chrom_sizes, chrom_sizes_file, + treatment, treatment_file, + bin, + fdr, gap) = argv[1:] + cmd = 'java -jar {} analyze --chrom.sizes {} --treatment {} -' \ + '-bin {} --fdr {} --gap {} --peaks {}'.format( + SPAN_JAR, + link(chrom_sizes, chrom_sizes_file), + link(treatment, treatment_file), + bin, fdr, gap, + os.path.join(working_dir, 'result.peak')) else: raise Exception("Unknown action command {}".format(action)) - print 'Launching SPAN: {}'.format(cmd) -print 'Model file: {}'.format(model_file) -try: - print 'Peaks file: {}'.format(peaks_file) -except NameError: - pass - subprocess.check_call(cmd, cwd=None, shell=True) # Move model to the the working dir with given name fit_dir = os.path.join(working_dir, 'fit') model_original = os.path.join(fit_dir, os.listdir(fit_dir)[0]) -shutil.move(model_original, os.path.join(working_dir, model_file)) +shutil.move(model_original, os.path.join(working_dir, 'model.span')) # Move log file logs_dir = os.path.join(working_dir, 'logs')