view span.xml @ 11:191a97fe6da5 draft

https://github.com/JetBrains-Research/galaxy-applications/commit/08f5cc4f954c99934870f39a4ccbeebaf1620634
author jetbrains
date Wed, 16 Jan 2019 12:18:16 -0500
parents 932bcf6c9332
children 6c9ed05526da
line wrap: on
line source

<tool id="span" name="SPAN" version="0.9.2.4618">
    <description>Semi-supervised Peak Analyzer for ChIP-Seq data</description>
    <requirements>
        <requirement type="package" version="0.9.2.4618">package_span_jar</requirement>
    </requirements>
    <stdio>
        <!-- Wrapper ensures anything other than zero is an error -->
        <exit_code range="1:"/>
        <exit_code range=":-1"/>
    </stdio>
    <command interpreter="python">
#if str($action.action_selector) == "model"
    #if str($control_file) != 'None':
        span_wrapper.py
            "${advanced_options.memory}" "${advanced_options.threads}"
            model_with_control
            "${genome_file.name}" "${genome_file}"
            "${treatment_file.name}" "${treatment_file}"
            "${control_file.name}" "${control_file}"
            "${bin}"
    #else
        span_wrapper.py
            "${advanced_options.memory}" "${advanced_options.threads}"
            model_without_control
            "${genome_file.name}" "${genome_file}"
            "${treatment_file.name}" "${treatment_file}"
            "${bin}"
    #end if
#else
    #if str($control_file) != 'None':
        span_wrapper.py
            "${advanced_options.memory}" "${advanced_options.threads}"
            peaks_with_control
            "${genome_file.name}" "${genome_file}"
            "${treatment_file.name}" "${treatment_file}"
            "${control_file.name}" "${control_file}"
            "${bin}"
            "${action.fdr}" "${action.gap}"
    #else
        span_wrapper.py
            "${advanced_options.memory}" "${advanced_options.threads}"
            peaks_without_control
            "${genome_file.name}" "${genome_file}"
            "${treatment_file.name}" "${treatment_file}"
            "${bin}"
            "${action.fdr}" "${action.gap}"
    #end if
#end if
     </command>
    <inputs>
        <param name="treatment_file" type="data" format="bam" label="Treatment BAM"
               description="Treatment BAM reads to process" argument="--treatment"
               help="Treatment BAM reads to process"/>

        <param name="control_file" type="data" format="BAM" label="Control BAM" optional="True"
               argument="--control" help="Control BAM reads to process"/>

        <param name="genome_file" type="data" format="chrom.sizes" label="Genome chrom.sizes"
               description="Genome build chrom.sizes file" argument="--chrom.sizes"
               help="Genome build chrom.sizes file"/>

        <conditional name="action">
            <param name="action_selector" type="select" label="Action">
                <option value="model">Compute SPAN model</option>
                <option value="peaks">Compute SPAN model and produce peaks file</option>
            </param>
            <when value="peaks">
                <param name="fdr" size="5" type="float" value="0.0001" min="0" label="FDR" argument="--fdr"
                       help="Minimum FDR cutoff to call significant regions, default value is 1.0E-6.
                       SPAN reports p- and q- values for the null hypothesis that a given bin is not enriched with a histone modification.
                       Peaks are formed from a list of truly (in the FDR sense) enriched bins for the analyzed biological condition by thresholding the
                       Q-value with a cutoff FDR and merging spatially close peaks using GAP option to broad ones. This is equivalent to controlling FDR.
                       q-values are are calculated from p-values using Benjamini-Hochberg procedure."/>
                <param name="gap" size="3" type="integer" value="5" min="0" label="GAP" argument="--gap"
                       help="Gap size to merge spatially close peaks. Useful for wide histone modifications.
                       Default value is 5, i.e. peaks separated by 5*BIN distance or less are merged."/>
            </when>
        </conditional>

        <param name="bin" size="5" type="integer" value="200" min="50" label="Bin size" argument="--bin"
               help="Peak analysis is performed on read coverage tiled into consequent bins, with size being configurable.
               Default value is 200bp, approximately the length of one nucleosome."/>

        <section name="advanced_options" title="Advanced Options">
            <param name="memory" size="6" type="integer" value="4096" min="1024"
                   label="Memory limit in megabytes" help="Default value is 4096 megabytes"/>
            <param name="threads" argument="--threads" size="2" type="integer" value="2" min="1"
                   label="Threads number"
                   help="Default value is 2 threads. SPAN utilizes both multithreading and specialized processor extensions like SSE2, AVX, etc."/>
        </section>
    </inputs>
    <outputs>
        <data name="model.span" format="span" from_work_dir="*.span"
              label="SPAN model on ${on_string} (${treatment_file.name}#if str($control_file) != 'None' then '_{}'.format($control_file.name) else '' #_${bin})"/>
        <data name="result.peak" format="bed" from_work_dir="*.peak"
              label="SPAN peaks on ${on_string} (${treatment_file.name}#if str($control_file) != 'None' then '_{}'.format($control_file.name) else '' #_${bin}_${action.fdr}_${action.gap})">
            <filter>action['action_selector'] == "peaks"</filter>
        </data>
        <data name="span.log" format="txt" from_work_dir="*.log" label="SPAN logs on ${on_string}"/>
    </outputs>
    <help><![CDATA[
.. class:: infomark

**What it does**

SPAN Semi-supervised Peak Analyzer is a tool for analyzing ChIP-seq data.

-----

**Inputs**

*-t, --treatment <Path>*        **Required.** ChIP-seq treatment file. bam, bed or .bed.gz file; If multiple files are given, treated as replicates.

*--chrom.sizes, --cs <Path>*    **Required.** Chromosome sizes path, can be downloaded at http://hgdownload.cse.ucsc.edu/goldenPath/<build>/bigZips/<build>.chrom.sizes

*-c, --control <Path>*          Control file. bam, bed or bed.gz file; Single control file or separate file per each treatment file required.

*--fragment <Integer>*          Fragment size, read length if not given

*-b, --bin <Integer>*           Bin size

*-f, --fdr <Double>*            Fdr value

*-g, --gap <Integer>*           Gap size to merge peaks

*-p, --peaks <Path>*            Path to result peaks file in ENCODE broadPeak (BED 6+3) format


-----

**Outputs**

This tool produces a SPAN binary model file (can be visualized in JBR Genome Browser and used in semi-supervised peak calling) and/or peaks in ENCODE broadPeak (BED 6+3) format.

Peak file columns contain the following data:

* **1st**: chromosome name
* **2nd**: start position of peak
* **3rd**: end position of peak
* **4th**: name of peak
* **5th**: integer score for display in genome browser (e.g. UCSC)
* **6th**: strand, either "." (=no strand) or "+" or "-"
* **7th**: fold-change
* **8th**: -log10pvalue
* **9th**: -log10qvalue

-----

**SPAN workflow**

* Convert raw reads to tags using *FRAGMENT* parameter.
* Compute coverage for all genome tiled into bins of *BIN* base pairs.
* Fit 3-state hidden Markov model that classifies bins as ZERO states with no coverage, LOW states of non-specific binding, and HIGH states of the specific binding.
* Compute posterior HIGH state probability of each bin.
* Trained model is saved into *.span* binary format.
* Peaks are computed using trained model and *FDR* and *GAP* parameters.

------

**Citation**

If you use this tool in Galaxy, please cite XXX, et al. *In preparation.*

-----

**More Information**

* Project home page: https://research.jetbrains.org/groups/biolabs/tools/span-peak-analyzer
* Study cases: https://artyomovlab.wustl.edu/aging

]]></help>
</tool>