changeset 0:4aa531981208 draft

Uploaded
author iuc
date Mon, 13 Mar 2017 13:25:01 -0400
parents
children 7e0a12282c7f
files .shed.yml README.rst macros.xml multigps.xml test-data/cntrl_hg19.scidx test-data/expt_hg19.scidx test-data/hg19_all_events_table1.tabular test-data/hg19_all_events_table2.tabular test-data/hg19_experiment_events1.tabular test-data/hg19_experiment_events2.tabular test-data/hg19_output_html1.html test-data/hg19_output_html2.html test-data/hg19_replicates_counts1.tabular test-data/hg19_replicates_counts2.tabular tool-data/all_fasta.loc.sample tool-data/tool_data_table_conf.xml.sample
diffstat 16 files changed, 828 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/.shed.yml	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,13 @@
+name: multigps
+owner: iuc
+description: Analyzes collections of multi-condition ChIP-seq data.
+homepage_url: http://mahonylab.org/software/multigps/
+long_description: |
+  Contains a tool that runs MultiGPS, a framework for analyzing collections of multi-condition ChIP-seq
+  datasets and characterizing differential binding events between conditions.  MultiGPS encourages consistency
+  in the reported binding event locations across conditions and provides accurate estimation of ChIP enrichment
+  levels at each event.  MultiGPS loads all data to memory, so you will need a lot of available memory if you
+  are running analysis over many conditions or large datasets.
+type: unrestricted
+categories:
+- ChIP-seq
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/README.rst	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,17 @@
+MultiGPS wrapper for Galaxy
+================================
+
+* http://mahonylab.org/software/multigps/
+
+MultiGPS performs significant EM optimization of binding events along the genome and across experimental
+conditions, and it integrates motif-finding via MEME.  The tool loads all data into memory, so the potential
+exists for time and memory intensive analyses if running over many conditions or large datasets.
+
+Setting the memory allocation in Galaxy for this tool is handled using the <env id="_JAVA_OPTIONS"> tag for
+a selected job runner in the job_conf.xml file.
+
+License
+-------
+
+MIT
+
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/macros.xml	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,8 @@
+<?xml version='1.0' encoding='UTF-8'?>
+<macros>
+    <xml name="motif_finding_params">
+        <param name="memenmotifs" type="integer" min="0" value="3" label="Number of motifs MEME should find for each condition" />
+        <param name="mememinw" type="integer" min="0" value="6" label="Minimum motif width for MEME" />
+        <param name="mememaxw" type="integer" min="0" value="16" label="Maximum motif width for MEME" />
+    </xml>
+</macros>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/multigps.xml	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,458 @@
+<tool id="multigps" name="MultiGPS" version="0.73.0">
+    <description>analyzes collections of multi-condition ChIP-seq data</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <requirements>
+        <requirement type="package" version="0.73">multigps</requirement>
+    </requirements>
+    <command detect_errors="aggressive">
+        <![CDATA[
+            #set output_dir = $output_html.files_path
+            mkdir -p $output_dir &&
+            multigps
+            ## General options
+            ## Append .txt extensions to events hrefs
+            ## in output dataset so files will render
+            ## in the browser.
+            --eventsaretxt
+            ## Do not run the parallel version of meme
+            ## since it is not yet available in conda.
+            --meme1proc
+            --expt '$expt'
+            --format $expt.ext
+            #if str($ctrl) != 'None':
+                --ctrl '$ctrl'
+            #end if
+            --threads \${GALAXY_SLOTS:-4}
+            --geninfo '$chromInfo'
+            ## Advanced options
+            #set aoc = $advanced_options_cond
+            #if str($aoc.advanced_options) == 'display':
+                #set bmsc = $aoc.binding_model_smoothing_cond
+                #set gmsc = $aoc.gauss_model_smoothing_cond
+                #set rbec = $aoc.report_binding_events_cond
+                #set rloc = $aoc.reads_limits_options_cond
+                #set sdc = $aoc.scale_data_cond
+                #set umc = $aoc.use_motif_cond
+                #if str($umc.use_motif) == 'yes':
+                    #set rgc = $umc.reference_genome_cond
+                    --seq
+                    #if str($rgc.reference_genome_source) == 'cached':
+		        '${rgc.reference_genome.fields.path}'
+                    #else:
+		        '${rgc.reference_genome}'
+                    #end if
+                #end if
+                ## Limits on how many reads
+                #if str($rloc.reads_limits) == 'yes':
+                    --fixedpb $rloc.fixedpb
+                    --poissongausspb $rloc.poissongausspb
+                    #if str($rloc.nonunique) == 'yes':
+                        --nonunique
+                    #end if
+                    --mappability $rloc.mappability
+                    #if str($rloc.nocache) == 'yes':
+                        --nocache
+                    #end if
+                #end if
+                ## Scaling data
+                #if str($sdc.scale_data) == 'yes':
+                    #if str($sdc.scaling) == 'no':
+                        --noscaling $sdc.scaling
+                    #end if
+                    #if str($sdc.medianscale) == 'yes':
+                        --medianscale $sdc.medianscale
+                    #end if
+                    #if str($sdc.regressionscale) == 'yes':
+                        --regressionscale $sdc.regressionscale
+                    #end if
+                    #if str($sdc.sesscale) == 'yes':
+                        --sesscale $sdc.sesscale
+                    #end if
+                    #if $sdc.fixedscaling > 0:
+                        ‒‒fixedscaling $sdc.fixedscaling
+                    #end if
+                    --scalewin $sdc.scalewin
+                    #if str($sdc.plotscaling) == 'yes':
+                        --plotscaling $sdc.plotscaling
+                    #end if
+                #end if
+                ## Running MultiGPS
+                #if str($aoc.readdistributionfile) != 'None':
+                    --d '$aoc.readdistributionfile'
+                #end if
+                --r $aoc.maxtrainingrounds
+                #if str($aoc.nomodelupdate) == 'no':
+                    --nomodelupdate
+                #end if
+                --minmodelupdateevents $aoc.minmodelupdateevents
+                #if str($bmsc.nomodelsmoothing) == 'no':
+                    --nomodelsmoothing
+                #else:
+                    --splinesmoothparam $bmsc.splinesmoothparam
+                #end if
+                #if str($gmsc.gaussmodelsmoothing) == 'yes':
+                    --gaussmodelsmoothing
+                    --gausssmoothparam $gmsc.gausssmoothparam
+                #end if
+                #if str($aoc.jointinmodel) == 'yes':
+                    --jointinmodel
+                #end if
+                #if str($aoc.fixedmodelrange) == 'yes':
+                    --fixedmodelrange
+                #end if
+                --prlogconf $aoc.prlogconf
+                #if $aoc.fixedalpha > 0:
+                    --fixedalpha $aoc.fixedalpha
+                #end if
+                --alphascale $aoc.alphascale
+                #if str($aoc.mlconfignotshared) == 'no':
+                    --mlconfignotshared
+                #end if
+                #if str($aoc.exclude) != 'None':
+                    --exclude '$aoc.exclude'
+                #end if
+                ## MultiGPS priors
+                #if str($umc.use_motif) == 'yes':
+                    #set mpc = $umc.multigps_priors_cond
+                    #if str($mpc.multigps_priors) == 'yes':
+                        #set bmc = $mpc.both_motifs_cond
+                        #if str($mpc.noposprior) == 'no':
+                            --noposprior
+                        #end if
+                        --probshared $mpc.probshared
+                        #if str($bmc.nomotifs) == 'yes':
+                            --memenmotifs $bmc.memenmotifs
+                            --mememinw $bmc.mememinw
+                            --mememaxw $bmc.mememaxw
+                        #else:
+                            #set mfoc = $bmc.nomotifprior_cond
+                            --nomotifs
+                            --nomotifprior $mfoc.nomotifprior
+                            #if str($mfoc.nomotifprior) == 'yes':
+                                --memenmotifs $mfoc.memenmotifs
+                                --mememinw $mfoc.mememinw
+                                --mememaxw $mfoc.mememaxw
+                            #end if
+                        #end if
+                    #end if
+                #end if
+                ## Reporting binding events
+                #if str($rbec.report_binding_events) == 'yes':
+                    --q $rbec.minqvalue
+                    --minfold $rbec.minfold
+                    #if str($rbec.nodifftests) == 'no':
+                        --nodifftests
+                    #end if
+                    --edgerod $rbec.edgerod
+                    --diffp $rbec.diffp
+                #end if
+            #end if
+            2>&1
+            --out '$output_html.files_path'
+            && cp $output_dir/*.events.txt '$experiment_events' || true
+            && mv $output_dir/*.html '$output_html' || true
+            && mv $output_dir/*.table.txt '$all_events_table' || true
+            && mv $output_dir/*.counts '$replicates_counts' || true
+        ]]>
+    </command>
+    <inputs>
+        <param name="expt" type="data" format="bam,bed,scidx" label="Run MultiGPS on">
+            <validator type="unspecified_build" />
+        </param>
+        <param name="ctrl" type="data" format="bam,bed,scidx" optional="True" label="Optional file containing reads from a control experiment" help="Must be same forat as the input above" />
+        <!-- Advanced options -->
+        <conditional name="advanced_options_cond">
+            <param name="advanced_options" type="select" label="Advanced options">
+                <option value="hide" selected="true">Hide</option>
+                <option value="display">Display</option>
+            </param>
+            <when value="display">
+                <!-- Limits on how many reads -->
+                <conditional name="reads_limits_options_cond">
+                    <param name="reads_limits" type="select" label="Set limits on how many reads can have their 5′ end at the same position in each replicate?" help="Default behavior is to estimate a global per-base limit from a Poisson distribution parameterized by the number of reads divided by the number of mappable bases in the genome. The per-base limit is set as the count corresponding to the 10^-7 probability level from the Poisson.">
+                        <option value="no" selected="True">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no" />
+                    <when value="yes">
+                        <param name="fixedpb" type="integer" value="0" min="0" label="Fixed per-base limit" help="Zero value estimates from background model"/>
+                        <param name="poissongausspb" type="integer" value="0" min="0" label="Poisson threshold for filtering per base" help="Filter per base using the specified Poisson threshold parameterized by a local Gaussian sliding window" />
+                        <param name="nonunique" type="select" label="Use non-unique reads?">
+                            <option value="no" selected="True">No</option>
+                            <option value="yes">Yes</option>
+                        </param>
+                        <param name="mappability" type="float" value="0.0" min="0.8" label="Fraction of the genome that is mappable for these experiments" />
+                        <param name="nocache" type="select" label="Turn off caching of the entire set of experiments?" help="Run slower with less memory" >
+                            <option value="no" selected="True">No</option>
+                            <option value="yes">Yes</option>
+                        </param>
+                    </when>
+                </conditional>
+                <!-- Scaling data -->
+                <conditional name="scale_data_cond">
+                    <param  name="scale_data" type="select" label="Set data scaling parameters?" help="Default behavior is to scale signal to corresponding controls using regression on the set of signal/control ratios in 10Kbp windows.">
+                        <option value="no" selected="True">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="yes">
+                        <param  name="scaling" type="select" label="Use signal vs control scaling?">
+                            <option value="yes" selected="True">Yes</option>
+                            <option value="no">No</option>
+                        </param>
+                        <param  name="medianscale" type="select" label="Use the median signal/control ratio as the scaling factor?">
+                            <option value="no" selected="True">No</option>
+                            <option value="yes">Yes</option>
+                        </param>
+                        <param  name="regressionscale" type="select" label="Use scaling by regression on binned tag counts?">
+                            <option value="no" selected="True">No</option>
+                            <option value="yes">Yes</option>
+                        </param>
+                        <param  name="sesscale" type="select" label="Estimate scaling factor by SES?" help="SES: Diaz, et al. Stat Appl Genet Mol Biol. 2012">
+                            <option value="no" selected="True">No</option>
+                            <option value="yes">Yes</option>
+                        </param>
+                        <param name="fixedscaling" type="float" value="0.0" min="0.0" label="Multiply control counts by total tag count ratio and then by this factor" help="Set as 0 to skip" />
+                        <param name="scalewin" type="integer" min="0" value="500" label="Window size for estimating scaling ratios" help="The value is the number of base pairs.  Use something much smaller than the default if scaling via SES (e.g. 200)." />
+                        <param  name="plotscaling" type="select" label="Plot diagnostic information for the chosen scaling method?">
+                            <option value="no" selected="True">No</option>
+                            <option value="yes">Yes</option>
+                        </param>
+                    </when>
+                    <when value="no" />
+                </conditional>
+                <!-- Running MultiGPS -->
+                <param name="readdistributionfile" type="data" optional="True" format="tabular" label="Optional binding event read distribution file for initializing models" help="A default initial distribution appropriate for ChIP-seq data is used if this option is not specified." />
+                <param name="maxtrainingrounds" type="integer" value="3" min="0" label="Maximum number of training rounds for updating binding event read distributions" />
+                <param  name="nomodelupdate" type="select" label="Perform binding model updates?">
+                    <option value="yes" selected="True">Yes</option>
+                    <option value="no">No</option>
+                </param>
+                <param name="minmodelupdateevents" type="integer" value="500" min="0" label="Minimum number of events to support an update of the read distribution" />
+                <conditional name="binding_model_smoothing_cond">
+                    <param  name="nomodelsmoothing" type="select" label="Perform binding model smoothing?" help="Smoothing performed with a cubic spline.">
+                        <option value="yes" selected="True">Yes</option>
+                        <option value="no">No</option>
+                    </param>
+                    <when value="yes">
+                        <param name="splinesmoothparam" type="integer" value="30" min="0" label="Spline smoothing parameter" />
+                    </when>
+                    <when value="no" />
+                </conditional>
+                <conditional name="gauss_model_smoothing_cond">
+                    <param  name="gaussmodelsmoothing" type="select" label="Use Gaussian model smoothing?" help="Select No to smooth with a cubic spline.">
+                        <option value="no" selected="True">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no" />
+                    <when value="yes">
+                        <param name="gausssmoothparam" type="integer" value="3" min="0" label="Smoothing factor" help="Gaussian smoothing standard deviation." />
+                    </when>
+                </conditional>
+                <param  name="jointinmodel" type="select" label="Allow joint events in model updates?">
+                    <option value="no" selected="True">No</option>
+                    <option value="yes">Yes</option>
+                </param>
+                <param  name="fixedmodelrange" type="select" label="Keep binding model range fixed to inital size?" help="Select No to vary automatically">
+                    <option value="no" selected="True">No</option>
+                    <option value="yes">Yes</option>
+                </param>
+                <param name="prlogconf" type="integer" value="-6" label="Poisson log threshold for potential region scanning" />
+                <param name="fixedalpha" type="integer" value="0" min="0" label="Impose this alpha" help="This is a sparse prior on binding events in the MultiGPS model. It can be interpreted as a minimum number of reads that each binding event must be responsible for in the model. A zero value will estimate the alpha automatically." />
+                <param name="alphascale" type="float" value="1.0" min="0" label="Alpha scaling factor" />
+                <param  name="mlconfignotshared" type="select" label="Share component configs in the ML step?" help="Mainly affects the quantification of binding levels for binding events that are not shared but are located at nearby locations across experiments.">
+                    <option value="yes" selected="True">Yes</option>
+                    <option value="no">No</option>
+                </param>
+                <param name="exclude" type="data" optional="True" format="txt" label="Optional file containing a set of regions to ignore during MultiGPS training" help="Ideally exclude the mitochondrial genome and other blacklisted regions that contain artifactual accumulations of reads in both ChIP-seq and control experiments." />
+                <!-- MultiGPS priors -->
+                <conditional name="use_motif_cond">
+                    <param name="use_motif" type="select" label="Perform motif-finding or use a motif-prior?">
+                        <option value="no" selected="True">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="yes">
+                        <!-- Specifying the genome -->
+                        <conditional name="reference_genome_cond">
+                            <param name="reference_genome_source" type="select" label="Choose the source for the reference genome">
+                                <option value="cached">locally cached</option>
+                                <option value="history">from history</option>
+                            </param>
+                            <when value="cached">
+                                <param name="reference_genome" type="select" label="Using reference genome">
+                                    <options from_data_table="all_fasta">
+                                        <filter type="data_meta" key="dbkey" ref="expt" column="1"/>
+                                    </options>
+                                    <validator type="no_options" message="A built-in reference genome is not available for the build associated with the selected input file"/>
+                                </param>
+                            </when>
+                            <when value="history">
+                                <param name="reference_genome" type="data" format="fasta" label="Using reference genome">
+                                    <options>
+                                        <filter type="data_meta" key="dbkey" ref="expt"/>
+                                    </options>
+                                    <validator type="no_options" message="The current history does not include a fasta dataset with the build associated with the selected input file"/>
+                                </param>
+                            </when>
+                        </conditional>
+                        <!-- MultiGPS priors options -->
+                        <conditional name="multigps_priors_cond">
+                            <param name="multigps_priors" type="select" label="Specify MultiGPS priors options?">
+                                <option value="no" selected="True">No</option>
+                                <option value="yes">Yes</option>
+                            </param>
+                            <when value="no" />
+                            <when value="yes">
+                                <param name="noposprior" type="select" label="Perform inter-experiment positional prior?">
+                                    <option value="yes" selected="True">Yes</option>
+                                    <option value="no">No</option>
+                                </param>
+                                <param name="probshared" type="float" value="0.9" min="0.0" label="Probability that events are shared across conditions" />
+                                <conditional name="both_motifs_cond">
+                                    <param name="nomotifs" type="select" label="Perform both motif-finding and motif priors?">
+                                        <option value="yes" selected="True">Yes</option>
+                                        <option value="no">No</option>
+                                    </param>
+                                    <when value="yes">
+                                        <expand macro="motif_finding_params" />
+                                    </when>
+                                    <when value="no">
+                                        <conditional name="nomotifprior_cond">
+                                            <param name="nomotifprior" type="select" label="Perform motif-finding only?" help="Selecting Yes turns off motif priors.">
+                                                <option value="no" selected="True">No</option>
+                                                <option value="yes">Yes</option>
+                                            </param>
+                                            <when value="no" />
+                                            <when value="yes">
+                                                <expand macro="motif_finding_params" />
+                                            </when>
+                                        </conditional>
+                                    </when>
+                                </conditional>
+                            </when>
+                        </conditional>
+                    </when>
+                    <when value="no" />
+                </conditional>
+                <!-- Reporting binding events -->
+                <conditional name="report_binding_events_cond">
+                    <param name="report_binding_events" type="select" label="Report binding events?">
+                        <option value="no" selected="True">No</option>
+                        <option value="yes">Yes</option>
+                    </param>
+                    <when value="no" />
+                    <when value="yes">
+                        <param name="minqvalue" type="float" min="0" value="0.001" label="Minimum Q-value (corrected p-value) of reported binding events" />
+                        <param name="minfold" type="float" min="0" value="1.5" label="Minimum event fold-change vs scaled control" />
+                        <param name="nodifftests" type="select" label="Run differential enrichment tests?">
+                            <option value="yes" selected="True">Yes</option>
+                            <option value="no">No</option>
+                        </param>
+                        <param name="edgerod" type="float" min="0" value="0.15" label="EdgeR over-dispersion parameter value" />
+                        <param name="diffp" type="float" min="0" value="0.01" label="Minimum p-value for reporting differential enrichment" />
+                    </when>
+                </conditional>
+            </when>
+            <when value="hide" />
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="replicates_counts" format="tabular" label="${tool.name} replicates counts on ${on_string}"/>
+        <data name="all_events_table" format="tabular" label="${tool.name} all events table on ${on_string}"/>
+        <data name="experiment_events" format="tabular" label="${tool.name} experiment events on ${on_string}"/>
+        <data name="output_html" format="html" label="${tool.name} on ${on_string}"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="expt" value="expt_hg19.scidx" ftype="scidx" dbkey="hg19" />
+            <param name="advanced_options_cond" value="hide" />
+            <output name="output_html" file="hg19_output_html1.html" ftype="html" compare="contains"/>
+            <output name="experiment_events" file="hg19_experiment_events1.tabular" ftype="tabular"/>
+            <output name="all_events_table" file="hg19_all_events_table1.tabular" ftype="tabular"/>
+            <output name="replicates_counts" file="hg19_replicates_counts1.tabular" ftype="tabular"/>
+        </test>
+        <test>
+            <param name="expt" value="expt_hg19.scidx" ftype="scidx" dbkey="hg19" />
+            <param name="ctrl" value="cntrl_hg19.scidx" ftype="scidx" dbkey="hg19" />
+            <param name="advanced_options_cond" value="display" />
+            <output name="output_html" file="hg19_output_html2.html" ftype="html" compare="contains"/>
+            <output name="experiment_events" file="hg19_experiment_events2.tabular" ftype="tabular"/>
+            <output name="all_events_table" file="hg19_all_events_table2.tabular" ftype="tabular"/>
+            <output name="replicates_counts" file="hg19_replicates_counts2.tabular" ftype="tabular"/>
+        </test>
+    </tests>
+    <help>
+
+**What it does**
+
+MultiGPS is a framework for analyzing collections of multi-condition ChIP-seq datasets and characterizing
+differential binding events between conditions.  MultiGPS encourages consistency in the reported binding
+event locations across conditions and provides accurate estimation of ChIP enrichment levels at each event.
+MultiGPS performs significant EM optimization of binding events along the genome and across experimental
+conditions, and it integrates motif-finding via MEME.  The tool loads all data into memory, so the potential
+exists for time and memory intensive analyses if running over many conditions or large datasets.
+
+-----
+
+**Options**
+
+* **Loading data:**
+
+ - **Optional file containing reads from a control experiment** - must be same format as input experiment
+ - **Fixed per-base limit** - Fixed per-base limit (default: estimated from background model).
+ - **Poisson threshold for filtering per base** - Look at neighboring positions to decide what the per-base limit should be.
+ - **Use non-unique reads** - Use non-unique reads.
+ - **Fraction of the genome that is mappable for these experiments** - Fraction of the genome that is mappable for these experiments
+ - **Turn off caching of the entire set of experiments?** - Flag to turn off caching of the entire set of experiments (i.e. run slower with less memory).
+
+* **Scaling control vs signal counts:**
+
+ - **Use signal vs control scaling?** - Flag to turn off auto estimation of signal vs control scaling factor
+ - **Use the median signal/control ratio as the scaling factor?** - Flag to use scaling by median ratio (default = scaling by NCIS).
+ - **Use scaling by regression on binned tag counts?** - Flag to use scaling by regression (default = scaling by NCIS).
+ - **Estimate scaling factor by SES?** - Specify whether to estimate scaling factor by SES.
+ - **Multiply control counts by total tag count ratio and then by this factor** - Multiply control counts by total tag count ratio and then by this factor (default: NCIS).
+ - **Window size for estimating scaling ratios** - Window size in base pairs for estimating scaling ratios
+ - **Plot diagnostic information for the chosen scaling method?** - Flag to plot diagnostic information for the chosen scaling method.
+
+* **Running MultiGPS:**
+
+ - **Optional binding event read distribution file** - Binding event read distribution file for initializing models. The true distribution of reads around binding events is estimated during MultiGPS training. A default initial distribution appropriate for ChIP-seq data is used if this option is not specified.
+ - **Maximum number of training rounds for updating binding event read distributions** - Maximum number of training rounds for updating binding event read distributions.
+ - **Perform binding model updates?** - Perform binding model updates?
+ - **Minimum number of events to support an update of the read distribution** - Minimum number of events to support an update of the read distribution
+ - **Perform binding model smoothing?** - Smooth with a cubic spline using a specified smoothing factor.
+ - **Spline smoothing parameter** - Smoothing parameter for smoothing cubic spline.
+ - **Perform Gaussian model smoothing?** - Select "Yes" to use Gaussian model smoothing using a specified smoothing factor if binding model smoothing is not performed.
+ - **Allow joint events in model updates?** - Specify whether to allow joint events in model updates.
+ - **Keep binding model range fixed to inital size?** - Flag to keep binding model range fixed to inital size (default: vary automatically)
+ - **Poisson log threshold for potential region scanning** - Poisson log threshold for potential region scanning.
+ - **Alpha scaling factor** - Alpha scaling factor. Increasing this parameter results in stricter binding event calls. 
+ - **Impose this alpha** - The alpha parameter is a sparse prior on binding events in the MultiGPS model. It can be interpreted as a minimum number of reads that each binding event must be responsible for in the model. Default: estimate alpha automatically.
+ - **Share component configs in the ML step?** - Flag to not share component configs in the ML step
+ - **Optional file containing a set of regions to ignore during MultiGPS training** - File containing a set of regions to ignore during MultiGPS training. It’s a good idea to exclude the mitochondrial genome and other ‘blacklisted’ regions that contain artifactual accumulations of reads in both ChIP-seq and control experiments. MultiGPS will waste time trying to model binding events in these regions, even though they will not typically appear significantly enriched over the control (and thus will not be reported to the user).
+
+* **MultiGPS priors:**
+
+ - **Perform inter-experiment positional prior?** - Flag to turn off inter-experiment positional prior (default=on).
+ - **Probability that events are shared across conditions** - Probability that events are shared across conditions.
+ - **Perform both motif-finding and motif priors?** - Flag to turn off motif-finding and motif priors.
+ - **Perform motif-finding only?** - Flag to turn off motif priors only.
+ - **Number of motifs MEME should find for each condition** - Number of motifs MEME should find for each condition.
+ - **Minimum motif width for MEME** - minw arg for MEME.
+ - **Maximum motif width for MEME** - maxw arg for MEME.
+
+* **Reporting binding events:**
+
+ - **Minimum Q-value (corrected p-value) of reported binding events** - Minimum Q-value (corrected p-value) of reported binding events.
+ - **Minimum event fold-change vs scaled control** - Minimum event fold-change vs scaled control.
+ - **Run differential enrichment tests?** - Choose whether to run differential enrichment tests.
+ - **EdgeR over-dispersion parameter value** - EdgeR over-dispersion parameter value.
+ - **Minimum p-value for reporting differential enrichment** - Minimum p-value for reporting differential enrichment.
+
+    </help>
+    <citations>
+        <citation type="doi">10.1371/journal.pcbi.1003501</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/cntrl_hg19.scidx	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,20 @@
+#2016-11-04 08:39:36.449;localbam.bam;READ1
+chrom	index	forward	reverse	value
+chr1	13038	1	0	1
+chr1	16604	0	1	1
+chr1	19215	1	0	1
+chr1	20151	0	1	1
+chr1	48908	1	0	1
+chr1	49489	1	0	1
+chr1	55105	1	0	1
+chr1	57499	0	1	1
+chr1	66636	1	0	1
+chr1	67393	0	1	1
+chr1	72875	0	1	1
+chr1	73954	0	1	1
+chr1	82146	1	0	1
+chr1	85089	0	1	1
+chr1	101405	1	0	1
+chr1	106851	0	1	1
+chr1	108358	0	1	1
+chr1	113877	0	1	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/expt_hg19.scidx	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,101 @@
+#2016-11-04 08:39:36.449;localbam.bam;READ1
+chrom	index	forward	reverse	value
+chr1	13038	1	0	1
+chr1	16604	0	1	1
+chr1	19215	1	0	1
+chr1	20151	0	1	1
+chr1	48908	1	0	1
+chr1	49489	1	0	1
+chr1	55105	1	0	1
+chr1	57499	0	1	1
+chr1	66636	1	0	1
+chr1	67393	0	1	1
+chr1	72875	0	1	1
+chr1	73954	0	1	1
+chr1	82146	1	0	1
+chr1	85089	0	1	1
+chr1	101405	1	0	1
+chr1	106851	0	1	1
+chr1	108358	0	1	1
+chr1	113877	0	1	1
+chr1	122833	1	0	1
+chr1	123101	0	1	1
+chr1	124863	1	0	1
+chr1	125216	0	1	1
+chr1	139373	0	1	1
+chr1	150551	1	0	1
+chr1	158113	1	0	1
+chr1	160681	1	0	1
+chr1	167381	1	0	1
+chr1	173325	0	1	1
+chr1	234394	1	0	1
+chr1	234434	0	1	1
+chr1	235111	1	0	1
+chr1	236086	0	1	1
+chr1	236159	1	0	1
+chr1	237302	0	1	1
+chr1	237648	1	0	1
+chr1	237735	1	0	1
+chr1	238867	0	1	1
+chr1	240339	1	0	1
+chr1	250472	1	0	1
+chr1	250560	0	1	1
+chr1	252828	1	0	1
+chr1	255896	1	0	1
+chr1	258157	1	0	1
+chr1	354230	0	1	1
+chr1	521482	0	1	1
+chr1	523810	0	1	1
+chr1	526070	0	1	1
+chr1	526303	1	0	1
+chr1	526406	1	0	1
+chr1	527103	0	1	1
+chr1	527586	1	0	1
+chr1	527590	1	0	1
+chr1	527599	1	0	1
+chr1	530330	1	0	1
+chr1	532712	0	1	1
+chr1	534863	0	1	1
+chr1	535107	1	0	1
+chr1	535662	1	0	1
+chr1	535998	1	0	1
+chr1	537647	0	1	1
+chr1	539496	1	0	1
+chr1	540024	0	1	1
+chr1	540628	1	0	1
+chr1	542424	1	0	1
+chr1	544024	0	1	1
+chr1	546672	1	0	1
+chr1	549581	0	1	1
+chr1	562709	1	0	1
+chr1	563545	0	1	1
+chr1	566222	0	1	1
+chr1	566375	0	1	1
+chr1	566736	1	0	1
+chr1	566863	1	0	1
+chr1	566974	1	0	1
+chr1	568430	0	1	1
+chr1	568570	1	0	1
+chr1	569236	0	1	1
+chr1	569310	0	1	1
+chr1	569499	1	0	1
+chr1	569640	1	0	1
+chr1	569752	1	0	1
+chr1	569823	1	0	1
+chr1	569890	1	0	1
+chr1	569902	1	0	1
+chr1	569912	1	0	1
+chr1	569914	1	0	1
+chr1	569917	1	0	1
+chr1	569931	0	1	1
+chr1	569942	0	1	1
+chr1	569954	0	1	1
+chr1	569960	0	1	1
+chr1	569961	0	1	1
+chr1	569963	0	1	1
+chr1	570032	0	1	1
+chr1	570109	1	0	1
+chr1	570164	0	1	1
+chr1	570169	1	0	1
+chr1	570177	0	1	1
+chr1	570226	0	1	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_all_events_table1.tabular	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,31 @@
+### MultiGPS output
+#Condition	Name	Index	TotalSigCount	SignalFraction
+#Condition	experiment	0	99.0	0.555
+#Replicate	ParentCond	Name	Index	SigCount	CtrlCount	SigCtrlScaling	SignalFraction
+#Replicate	experiment	experiment:rep1	0	99.0	0	1	0.56
+#
+#Point	experiment_Sig	experiment_Ctrl	experiment_log2Fold	experiment_log2P	ActiveConds
+chr1:569927	14.3	0.0	3.838	-6.470	1
+chr1:536042	1.0	0.0	0.014	-1.474	1
+chr1:535151	1.0	0.0	0.014	-1.413	1
+chr1:534819	1.0	0.0	0.014	-1.348	1
+chr1:539540	1.0	0.0	0.024	-1.281	1
+chr1:539980	1.0	0.0	0.024	-1.211	1
+chr1:125172	1.0	0.0	0.033	-1.137	1
+chr1:49533	1.0	0.0	0.036	-1.059	1
+chr1:67349	1.0	0.0	0.041	-0.976	1
+chr1:563501	1.0	0.0	0.043	-0.889	1
+chr1:20107	1.0	0.0	0.046	-0.796	1
+chr1:568610	1.8	0.0	0.853	-0.696	1
+chr1:237754	2.0	0.0	1.015	-0.626	1
+chr1:236192	2.0	0.0	0.973	-0.589	1
+chr1:569214	2.0	0.0	0.989	-0.474	1
+chr1:569652	2.0	0.0	1.016	-0.433	1
+chr1:566215	2.0	0.0	0.991	-0.348	1
+chr1:234414	2.0	0.0	1.020	-0.211	1
+chr1:122967	2.0	0.0	0.996	-0.211	1
+chr1:570152	4.6	0.0	2.207	-0.100	1
+chr1:250516	2.0	0.0	0.998	-0.059	1
+chr1:527636	3.0	0.0	1.608	0.000	1
+chr1:566984	2.9	0.0	1.560	0.000	1
+chr1:526421	2.0	0.0	1.021	0.000	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_all_events_table2.tabular	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,31 @@
+### MultiGPS output
+#Condition	Name	Index	TotalSigCount	SignalFraction
+#Condition	experiment	0	99.0	0.555
+#Replicate	ParentCond	Name	Index	SigCount	CtrlCount	SigCtrlScaling	SignalFraction
+#Replicate	experiment	experiment:rep1	0	99.0	18.0	4.00	0.555
+#
+#Point	experiment_Sig	experiment_Ctrl	experiment_log2Fold	experiment_log2P	ActiveConds
+chr1:569927	14.3	0.0	3.838	-6.470	1
+chr1:536042	1.0	0.0	0.014	-1.281	1
+chr1:535151	1.0	0.0	0.014	-1.211	1
+chr1:534819	1.0	0.0	0.014	-1.137	1
+chr1:539540	1.0	0.0	0.024	-1.059	1
+chr1:539980	1.0	0.0	0.024	-0.976	1
+chr1:125172	1.0	0.0	0.033	-0.889	1
+chr1:563501	1.0	0.0	0.043	-0.796	1
+chr1:568610	1.8	0.0	0.853	-0.696	1
+chr1:237754	2.0	0.0	1.015	-0.626	1
+chr1:236192	2.0	0.0	0.973	-0.589	1
+chr1:569214	2.0	0.0	0.989	-0.474	1
+chr1:569652	2.0	0.0	1.016	-0.433	1
+chr1:566215	2.0	0.0	0.991	-0.348	1
+chr1:234414	2.0	0.0	1.020	-0.211	1
+chr1:122967	2.0	0.0	0.996	-0.211	1
+chr1:570152	4.6	0.0	2.207	-0.100	1
+chr1:250516	2.0	0.0	0.998	-0.059	1
+chr1:49533	1.0	1.0	-2.000	-0.006	1
+chr1:527636	3.0	0.0	1.608	0.000	1
+chr1:566984	2.9	0.0	1.560	0.000	1
+chr1:526421	2.0	0.0	1.021	0.000	1
+chr1:20107	1.0	1.0	-2.000	0.000	1
+chr1:67349	1.0	1.0	-2.000	0.000	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_experiment_events1.tabular	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,7 @@
+### MultiGPS output
+#Condition	Name	Index	TotalSigCount	SignalFraction
+#Condition	experiment	0	99.0	0.555
+#Replicate	ParentCond	Name	Index	SigCount	CtrlCount	CtrlScaling	SignalFraction
+#Replicate	experiment	experiment:rep1	0	99.0	0	1	0.555
+#
+#Point	experiment_Sig	experiment_Ctrl	experiment_log2Fold	experiment_log2P	Sequence	MotifScore
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_experiment_events2.tabular	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,7 @@
+### MultiGPS output
+#Condition	Name	Index	TotalSigCount	SignalFraction
+#Condition	experiment	0	99.0	0.555
+#Replicate	ParentCond	Name	Index	SigCount	CtrlCount	CtrlScaling	SignalFraction
+#Replicate	experiment	experiment:rep1	0	99.0	18.0	4.000	0.555
+#
+#Point	experiment_Sig	experiment_Ctrl	experiment_log2Fold	experiment_log2P	Sequence	MotifScore
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_output_html1.html	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,30 @@
+<html>
+	<body>
+</p>
+	<h2>Binding events</h2>
+	<table>
+		<tr>		<th>Condition</th>
+		<th>Events</th>
+		<th>File</th>
+		</tr>
+		<tr>		<td>experiment</td>
+		<td>0</td>
+		</tr>
+	</table>
+	<h2>Input data</h2>
+	<table>
+		<tr>		<th>Replicate</th>
+		<th>ReadCount</th>
+		<th>CtrlScaling</th>
+		<th>SignalFraction</th>
+		<th>ReadDistributionModel</th>
+		</tr>
+		<tr>		<td>experiment rep1</td>
+		<td>99.0</td>
+		<td>NA</td>
+		<td>0.555</td>
+		</tr>
+	</table>
+	<h2>Miscellaneous files</h2>
+	</body>
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_output_html2.html	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,30 @@
+<html>
+	<body>
+</p>
+	<h2>Binding events</h2>
+	<table>
+		<tr>		<th>Condition</th>
+		<th>Events</th>
+		<th>File</th>
+		</tr>
+		<tr>		<td>experiment</td>
+		<td>0</td>
+		</tr>
+	</table>
+	<h2>Input data</h2>
+	<table>
+		<tr>		<th>Replicate</th>
+		<th>ReadCount</th>
+		<th>CtrlScaling</th>
+		<th>SignalFraction</th>
+		<th>ReadDistributionModel</th>
+		</tr>
+		<tr>		<td>experiment rep1</td>
+		<td>99.0</td>
+		<td>4.000</td>
+		<td>0.555</td>
+		</tr>
+	</table>
+	<h2>Miscellaneous files</h2>
+	</body>
+</html>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_replicates_counts1.tabular	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,25 @@
+Point	experiment:rep1
+chr1:569927	14
+chr1:536042	1
+chr1:535151	1
+chr1:534819	1
+chr1:539540	1
+chr1:539980	1
+chr1:125172	1
+chr1:49533	1
+chr1:67349	1
+chr1:563501	1
+chr1:20107	1
+chr1:568610	2
+chr1:237754	2
+chr1:236192	2
+chr1:569214	2
+chr1:569652	2
+chr1:566215	2
+chr1:234414	2
+chr1:122967	2
+chr1:570152	5
+chr1:250516	2
+chr1:527636	3
+chr1:566984	3
+chr1:526421	2
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/hg19_replicates_counts2.tabular	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,25 @@
+Point	experiment:rep1
+chr1:569927	14
+chr1:536042	1
+chr1:535151	1
+chr1:534819	1
+chr1:539540	1
+chr1:539980	1
+chr1:125172	1
+chr1:563501	1
+chr1:568610	2
+chr1:237754	2
+chr1:236192	2
+chr1:569214	2
+chr1:569652	2
+chr1:566215	2
+chr1:234414	2
+chr1:122967	2
+chr1:570152	5
+chr1:250516	2
+chr1:49533	1
+chr1:527636	3
+chr1:566984	3
+chr1:526421	2
+chr1:20107	1
+chr1:67349	1
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/all_fasta.loc.sample	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,18 @@
+#This file lists the locations and dbkeys of all the fasta files
+#under the "genome" directory (a directory that contains a directory
+#for each build). The script extract_fasta.py will generate the file
+#all_fasta.loc. This file has the format (white space characters are
+#TAB characters):
+#
+#<unique_build_id>  <dbkey> <display_name>  <file_path>
+#
+#So, all_fasta.loc could look something like this:
+#
+#apiMel3    apiMel3 Honeybee (Apis mellifera): apiMel3  /path/to/genome/apiMel3/apiMel3.fa
+#hg19canon  hg19    Human (Homo sapiens): hg19 Canonical    /path/to/genome/hg19/hg19canon.fa
+#hg19full   hg19    Human (Homo sapiens): hg19 Full /path/to/genome/hg19/hg19full.fa
+#
+#Your all_fasta.loc file should contain an entry for each individual
+#fasta file. So there will be multiple fasta files for each build,
+#such as with hg19 above.
+#
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool-data/tool_data_table_conf.xml.sample	Mon Mar 13 13:25:01 2017 -0400
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+</tables>