Mercurial > repos > jetbrains > span
diff span.xml @ 2:5b99943c4627 draft
Span version https://github.com/JetBrains-Research/galaxy-applications/commit/cbbba255d66a4775cc35caf5cb85665396fdcd2a
author | jetbrains |
---|---|
date | Sun, 18 Nov 2018 08:20:27 -0500 |
parents | 1f0c4f0a9c3b |
children | 4130e95bd6c8 |
line wrap: on
line diff
--- a/span.xml Thu Nov 15 11:30:01 2018 -0500 +++ b/span.xml Sun Nov 18 08:20:27 2018 -0500 @@ -1,8 +1,7 @@ <tool id="span" name="SPAN" version="0.7.1.4272"> - <description>ChIP-Seq analysis</description> + <description>Semi-supervised Peak Analyzer for ChIP-Seq data</description> <requirements> <requirement type="package" version="0.7.1.4272">package_span_jar</requirement> - <!--<container type="docker">biolabs/span</container>--> </requirements> <stdio> <!-- Wrapper ensures anything other than zero is an error --> @@ -10,31 +9,58 @@ <exit_code range=":-1"/> </stdio> <command interpreter="python"> +#import re +#set treatment_identifier = re.sub('[^\w\-\.]', '_', str($treatment_file.element_identifier)) +#set genome_identifier = re.sub('[^\w\-\.]', '_', str($genome_file.element_identifier)) + +#if $control.control_selector + #set control_identifier = re.sub('[^\w\-\.]', '_', str($control.control_file.element_identifier)) +#end if + #if str($action.action_selector) == "model" #if $control.control_selector - span_wrapper.py model with_control "${genome}" "${treatment_file}" "${bin}" "${action.model_file}" "${control.control_file}" + span_wrapper.py model with_control + "${genome_identifier}" "${genome_file}" + "${treatment_identifier}" "${treatment_file}" + "${bin}" "${action.model_file}" + "${control_identifier}" "${control.control_file}" #else - span_wrapper.py model without_control "${genome}" "${treatment_file}" "${bin}" "${action.model_file}" + span_wrapper.py model without_control + "${genome_identifier}" "${genome_file}" + "${treatment_identifier}" "${treatment_file}" + "${bin}" "${action.model_file}" #end if #else #if $control.control_selector - span_wrapper.py peaks with_control "${genome}" "${treatment_file}" "${bin}" "${action.model_file}" "${control.control_file}" "${fdr}" "${gap}" "${action.peaks_file}" + span_wrapper.py peaks with_control + "${genome_identifier}" "${genome_file}" + "${treatment_identifier}" "${treatment_file}" + "${bin}" "${action.model_file}" + "${control_identifier}" "${control.control_file}" + "${action.fdr}" "${action.gap}" "${action.peaks_file}" #else - span_wrapper.py peaks without_control "${genome}" "${treatment_file}" "${bin}" "${action.model_file}" "${fdr}" "${gap}" "${action.peaks_file}" + span_wrapper.py peaks without_control + "${genome_identifier}" "${genome_file}" + "${treatment_identifier}" "${treatment_file}" + "${bin}" "${action.model_file}" + "${action.fdr}" "${action.gap}" "${action.peaks_file}" #end if #end if </command> <inputs> <param name="treatment_file" type="data" format="bam" label="Treatment BAM" - description="Treatment BAM reads to process"/> - <param name="genome" type="data" format="chrom.sizes" label="Genome chrom.sizes" - description="Genome build chrom.sizes file"/> + description="Treatment BAM reads to process" argument="--treatment" + help="Treatment BAM reads to process"/> + <param name="genome_file" type="data" format="chrom.sizes" label="Genome chrom.sizes" + description="Genome build chrom.sizes file" argument="--chrom.sizes" + help="Genome build chrom.sizes file"/> <conditional name="control"> <param name="control_selector" type="boolean" label="Control available" value="false"/> <when value="true"> <param name="control_file" type="data" format="bam" label="Control BAM" - description="Control BAM reads to process"/> + description="Control BAM reads to process" argument="--control" + help="Control BAM reads to process"/> </when> </conditional> @@ -44,26 +70,107 @@ <option value="peaks">Compute SPAN model and produce peaks file</option> </param> <when value="model"> - <param name="model_file" type="text" value="model.span" label="Model name"/> + <param name="model_file" type="text" value="model.span" label="Model name" + help="Trained model file in binary format, which can be visualized directly in JBR Genome Browser + and used in integrated peak calling pipeline"/> </when> <when value="peaks"> - <param name="model_file" type="text" value="model.span" label="Model file name"/> - <param name="fdr" size="5" type="float" value="0.0001" label="FDR"/> - <param name="gap" size="5" type="integer" value="5" label="GAP"/> - <param name="peaks_file" type="text" value="result.peak" label="Peaks file name"/> + <param name="model_file" type="text" value="model.span" label="Model file name" + help="Trained model file in binary format, which can be visualized directly in JBR Genome Browser + and used in integrated peak calling pipeline"/> + <param name="fdr" size="5" type="float" value="0.0001" label="FDR" argument="--fdr" + help="Minimum FDR cutoff to call significant regions, default value is 1.0E-6. + SPAN reports p- and q- values for the null hypothesis that a given bin is not enriched with a histone modification. + Peaks are formed from a list of truly (in the FDR sense) enriched bins for the analyzed biological condition by thresholding the + Q-value with a cutoff FDR and merging spatially close peaks using GAP option to broad ones. This is equivalent to controlling FDR. + q-values are are calculated from p-values using Benjamini-Hochberg procedure."/> + <param name="gap" size="5" type="integer" value="5" label="GAP" argument="--gap" + help="Gap size to merge spatially close peaks. Useful for wide histone modifications. + Default value is 5, i.e. peaks separated by 5*BIN distance or less are merged."/> + <param name="peaks_file" type="text" value="result.peak" label="Peaks file name" argument="--peaks"/> </when> </conditional> - <param name="bin" size="5" type="integer" value="200" label="Bin size"/> + <param name="bin" size="5" type="integer" value="200" label="Bin size" argument="--bin" + help="Peak analysis is performed on read coverage tiled into consequent bins, with size being configurable. + Default value is 200bp, approximately the length of one nucleosome."/> </inputs> <outputs> - <data name="${action.model_file}" format="span" label="SPAN model file"/> - <data name="${action.peaks_file}" format="bed" label="SPAN peaks file"> + <data name="SPAN model file" format="span" from_work_dir="*.span" label="SPAN model file ${action.model_file} on ${on_string}"/> + <data name="SPAN peaks file" format="bed" from_work_dir="*.peak" label="SPAN peaks file ${action.peaks_file} on ${on_string}"> <filter>action['action_selector'] == "peaks"</filter> </data> + <data name="SPAN log file" format="txt" from_work_dir="*.log" label="SPAN log file on ${on_string}"/> </outputs> - <help> - SPAN Semi-supervised Peak Analyzer is a tool for analyzing ChIP-seq data. - Details: http://artyomovlab.wustl.edu/aging/span.html - </help> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +SPAN Semi-supervised Peak Analyzer is a tool for analyzing ChIP-seq data. + +----- + +**Inputs** + +*-t, --treatment <Path>* **Required.** ChIP-seq treatment file. bam, bed or .bed.gz file; If multiple files are given, treated as replicates. + +*--chrom.sizes, --cs <Path>* **Required.** Chromosome sizes path, can be downloaded at http://hgdownload.cse.ucsc.edu/goldenPath/<build>/bigZips/<build>.chrom.sizes + +*-c, --control <Path>* Control file. bam, bed or bed.gz file; Single control file or separate file per each treatment file required. + +*--fragment <Integer>* Fragment size, read length if not given + +*-b, --bin <Integer>* Bin size + +*-f, --fdr <Double>* Fdr value + +*-g, --gap <Integer>* Gap size to merge peaks + +*-p, --peaks <Path>* Path to result peaks file in ENCODE broadPeak (BED 6+3) format + + +----- + +**Outputs** + +This tool produces a SPAN binary model file and/or peaks in ENCODE broadPeak (BED 6+3) format. + +Peak file columns contain the following data: + +* **1st**: chromosome name +* **2nd**: start position of peak +* **3rd**: end position of peak +* **4th**: name of peak +* **5th**: integer score for display in genome browser (e.g. UCSC) +* **6th**: strand, either "." (=no strand) or "+" or "-" +* **7th**: fold-change +* **8th**: -log10pvalue +* **9th**: -log10qvalue + +----- + +**SPAN workflow** + +* Convert raw reads to tags using *FRAGMENT* parameter. +* Compute coverage for all genome tiled into bins of *BIN* base pairs. +* Fit 3-state hidden Markov model that classifies bins as ZERO states with no coverage, LOW states of non-specific binding, and HIGH states of the specific binding. +* Compute posterior HIGH state probability of each bin. +* Trained model is saved into *.span* binary format. +* Peaks are computed using trained model and *FDR* and *GAP* parameters. + +------ + +**Citation** + +If you use this tool in Galaxy, please cite XXX, et al. *In preparation.* + +----- + +**More Information** + +* Project home page: https://research.jetbrains.org/groups/biolabs/tools/span-peak-analyzer +* Study cases: https://artyomovlab.wustl.edu/aging + +]]></help> </tool>