diffbind: diffbind.xml comparison

comparison diffbind.xml @ 10:d7725c5596ab draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit f970dcbe9d0e4c3714b1db74c404ea34223cf8ed

author	iuc
date	Tue, 20 Mar 2018 04:51:25 -0400
parents	6171163112de
children	4c7ab9995f9e

comparison

equal deleted inserted replaced

-:6171163112de
+:d7725c5596ab
-<tool id="diffbind" name="DiffBind" version="2.6.5.0">
+<tool id="diffbind" name="DiffBind" version="2.6.6.0">
 <description> differential binding analysis of ChIP-Seq peak data</description>
 <requirements>
-<requirement type="package" version="2.6.5">bioconductor-diffbind</requirement>
+<requirement type="package" version="2.6.6">bioconductor-diffbind</requirement>
 <requirement type="package" version="1.20.0">r-getopt</requirement>
-<!--added rmysql requirement to remove: "Warning: namespace ‘RMySQL’ is not available"-->
-<requirement type="package" version="0.10.11">r-rmysql</requirement>
 </requirements>
 <stdio>
 <regex match="Execution halted"
 source="both"
 level="fatal"
 source="both"
 level="fatal"
 description="An undefined error occured, please check your intput carefully and contact your administrator." />
 </stdio>
 <version_command><![CDATA[
-echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ")," getopt version" $(R --vanilla --slave -e "library(getopt); cat(sessionInfo()\$otherPkgs\$getopt\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rmysql version" $(R --vanilla --slave -e "library(rmysql); cat(sessionInfo()\$otherPkgs\$rmysql\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
+echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ")
 ]]></version_command>
 <command><![CDATA[
 ## seems that diffbind also needs file extensions to work properly
 #set $counter = 1
 #for $sample in $samples:
 #end for
 Rscript '$__tool_directory__/diffbind.R'
 -i $infile
 -o '$outfile'
+-t $th
+-f $out.format
 -p '$plots'
--f $format
--t $th
+#if $out.binding_matrix:
-#if $binding_affinity_matrix:
 -b
+#end if
+#if $out.rdata:
+-r
 #end if
 ]]>
 </command>
 <configfiles>
 <configfile name="infile"><![CDATA[
 #end if
 #set $counter = $counter + 1
 #end for]]></configfile>
 </configfiles>
 <inputs>
-<repeat name="samples" title="Samples" min="2">
+<repeat name="samples" title="Samples" min="4">
 <param name="sample_id" type="text" value="Sample ID" label="Specify a sample id" help="e.g. BT474.1-" />
 <param name="tissue" type="text" value="Tissue" label="Specify the tissue" help="e.g. BT474" />
 <param name="factor" type="text" value="Factor Name" label="Specify a factor name" help="e.g. ER" />
 <param name="condition" type="text" value="Condition" label="Specify the condition" help="e.g. Resistent" />
 <param name="replicate" type="integer" value="1" label="Specify the replicate number" help="e.g. 1" />
 <param name="peaks" type="data" format="bed" label="Peak file" help="Result of your Peak calling experiment."/>
 </repeat>
 <param name="th" type="float" value="1" min="0" max="1"
 label="FDR Threshold"
 help="Significance threshold; all sites with FDR less than or equal to this value will be included in the report. A value of 1 will include all binding sites in the report. Default: 1"/>
-<param name="pdf" type="boolean" truevalue="" falsevalue="" checked="true"
-label="Visualising the analysis results"
+<!-- Output Options -->
-help="output an additional PDF file" />
+<section name="out" expanded="false" title="Output Options">
 <param name="format" type="select" label="Output Format">
 <option value="bed">BED</option>
 <option value="gff">GFF</option>
 <option value="wig">WIG</option>
 </param>
-<param name="binding_affinity_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" />
+<param name="pdf" type="boolean" truevalue="True" falsevalue="" checked="False" label="Visualising the analysis results" help="output an additional PDF file" />
+<param name="binding_matrix" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output binding affinity matrix?" help="Output a table of the binding scores" />
+<param name="rdata" type="boolean" truevalue="True" falsevalue="" checked="False" label="Output RData file?" help="Output all the data used by R to construct the plots and tables, can be loaded into R. Default: No">
+</param>
+</section>
 </inputs>
 <outputs>
-<data name="outfile" format="bed" label="Differential binding sites on ${on_string}">
+<data name="outfile" format="bed" label="${tool.name} on ${on_string}: Differentially bound sites">
 <change_format>
 <when input="format" value="wig" format="wig" />
 <when input="format" value="gff" format="gff" />
 </change_format>
 </data>
-<data name="plots" format="pdf" label="Differential binding sites on ${on_string}">
+<data name="plots" format="pdf" label="${tool.name} on ${on_string}: Plots">
-<filter>pdf == True</filter>
+<filter>out['pdf']</filter>
 </data>
-<data name="binding_matrix" format="tabular" from_work_dir="bmatrix.tab" label="Differential binding sites on ${on_string}">
+<data name="binding_matrix" format="tabular" from_work_dir="bmatrix.tab" label="${tool.name} on ${on_string}: Binding matrix">
-<filter>binding_affinity_matrix == True</filter>
+<filter>out['binding_matrix']</filter>
+</data>
+<data name="rdata" format="rdata" from_work_dir="DiffBind_analysis.RData" label="${tool.name} on ${on_string}: RData file">
+<filter>out['rdata']</filter>
 </data>
 </outputs>
 <tests>
-<test>
+<test expect_num_outputs="4">
 <repeat name="samples">
 <param name="sample_id" value="BT4741" />
 <param name="tissue" value="BT474" />
 <param name="factor" value="ER" />
 <param name="condition" value="Resistant" />
 <param name="replicate" value="2" />
 <param name="bamreads" ftype="bam" value="MCF7_ER_2.bam" />
 <param name="peaks" ftype="bed" value="MCF7_ER_2.bed.gz" />
 </repeat>
 <param name="pdf" value="True" />
-<param name="binding_affinity_matrix" value="True" />
+<param name="binding_matrix" value="True" />
+<param name="rdata" value="True" />
 <output name="outfile" value="out_diffbind.bed" />
+<output name="plots" value="out_plots.pdf" compare="sim_size" />
 <output name="binding_matrix" value="out_binding.matrix" />
+<output name="rdata" value="DiffBind_analysis.RData" compare="sim_size"/>
 </test>
 </tests>
 <help><![CDATA[
 .. class:: infomark
 between two sample groups. It includes functions to support the processing of peak sets,
 including overlapping and merging peak sets, counting sequencing reads overlapping intervals
 in peak sets, and identifying statistically significantly differentially bound sites based on
 evidence of binding affinity (measured by differences in read densities). To this end it uses
 statistical routines developed in an RNA-Seq context (primarily the Bioconductor packages
-edgeR and DESeq2 ). Additionally, the package builds on Rgraphics routines to provide a
+edgeR and DESeq2). Additionally, the package builds on Rgraphics routines to provide a
 set of standardized plots to aid in binding analysis.
 The `DiffBind User Guide`_ includes a brief overview of the processing flow, followed by four sections of
 examples: the first focusing on the core task of obtaining differentially bound sites based on
 affinity data, the second working through the main plotting routines, the third discussing the
 Note DiffBind requires a minimum of four samples (two groups with two replicates each).
 .. _DiffBind: https://bioconductor.org/packages/release/bioc/html/DiffBind.html
 .. _`Bioconductor package`: https://bioconductor.org/packages/release/bioc/html/DiffBind.html
 .. _`DiffBind User Guide`: https://bioconductor.org/packages/release/bioc/vignettes/DiffBind/inst/doc/DiffBind.pdf
+-----
 **Inputs**
 DiffBind works primarily with peaksets, which are sets of genomic intervals representing
 candidate protein binding sites. Each interval consists of a chromosome, a start and end
 be associated with each peakset (one for the ChIP data, and optionally another representing
 a control sample)
 **Sample Information**
-You have to specify your sample information in the tool form above.
+You have to specify your sample information in the tool form above, where Condition contains the groups you want to compare.
 Example:
 ============= ========== ========== ============= =============
 **SampleID** **Tissue** **Factor** **Condition** **Replicate**
 MCF7r2        MCF7       ER         Resistant     2
 ZR751         ZR75       ER         Responsive    1
 ZR752         ZR75       ER         Responsive    2
 ============= ========== ========== ============= =============
-Or provide a sample sheet tabular file such as below.
-Example:
-======== ======  ====== ========== ========== ========= ====================  ========= ===================== ================= ==========
-SampleID Tissue  Factor Condition  Treatment  Replicate bamReads              ControlID bamControl            Peaks             PeakCaller
-======== ======  ====== ========== ========== ========= ====================  ========= ===================== ================= ==========
-BT4741   BT474   ER     Resistant  Full-Media  1        Chr18_BT474_ER_1.bam  BT474c    Chr18_BT474_input.bam BT474_ER_1.bed.gz bed
-BT4742   BT474   ER     Resistant  Full-Media  2        Chr18_BT474_ER_2.bam  BT474c    Chr18_BT474_input.bam BT474_ER_2.bed.gz bed
-MCF71    MCF7    ER     Responsive Full-Media  1        Chr18_MCF7_ER_1.bam   MCF7c     Chr18_MCF7_input.bam  MCF7_ER_1.bed.gz  bed
-MCF72    MCF7    ER     Responsive Full-Media  2        Chr18_MCF7_ER_2.bam   MCF7c     Chr18_MCF7_input.bam  MCF7_ER_2.bed.gz  bed
-MCF73    MCF7    ER     Responsive Full-Media  3        Chr18_MCF7_ER_3.bam   MCF7c     Chr18_MCF7_input.bam  MCF7_ER_3.bed.gz  bed
-T47D1    T47D    ER     Responsive Full-Media  1        Chr18_T47D_ER_1.bam   T47Dc     Chr18_T47D_input.bam  T47D_ER_1.bed.gz  bed
-T47D2    T47D    ER     Responsive Full-Media  2        Chr18_T47D_ER_2.bam   T47Dc     Chr18_T47D_input.bam  T47D_ER_2.bed.gz  bed
-MCF7r1   MCF7    ER     Resistant  Full-Media  1        Chr18_TAMR_ER_1.bam   TAMRc     Chr18_TAMR_input.bam  TAMR_ER_1.bed.gz  bed
-MCF7r2   MCF7    ER     Resistant  Full-Media  2        Chr18_TAMR_ER_2.bam   TAMRc     Chr18_TAMR_input.bam  TAMR_ER_2.bed.gz  bed
-ZR751    ZR75    ER     Responsive Full-Media  1        Chr18_ZR75_ER_1.bam   ZR75c     Chr18_ZR75_input.bam  ZR75_ER_1.bed.gz  bed
-ZR752    ZR75    ER     Responsive Full-Media  2        Chr18_ZR75_ER_2.bam   ZR75c     Chr18_ZR75_input.bam  ZR75_ER_2.bed.gz  bed
-======== ======  ====== ========== ========== ========= ====================  ========= ===================== ================= ==========
 **Peak files**
 Result of your Peak calling experiment in bed format, one file for each sample is required.
 ======= ======= ======= =============== =======
 * BAM file which contains the mapped sequencing reads can be associated with each peakset
 * Control BAM file represents a control dataset and are optional, but have to specified for all when used.
+-----
 **Outputs**
+This tool outputs
+* differentially bound sites in BED, WIG or GFF format
+Optionally, under **Output Options** you can choose to output
+* a correlation heatmap plot
+* a binding affinity matrix
+* an RData file
+**Differentially Bound Sites**
 As output format you can choose BED, GFF, WIG.
-Example:
+Example - BED format:
-======== ====== =======+
+=====  ======  ======  ===== ====  ====    ====    ====    =====   ========    ========
-seqnames ranges strand             Conc Conc_Resistant
+1      2       3       4     5     6       7       8       9       10          **11**
+=====  ======  ======  ===== ====  ====    ====    ====    =====   ========    ========
-2452     chr18 [64490686, 64491186] * | 6.36 1.39
+chr18  394600  396513  1914    *   7.15    7.89    5.55    2.35    7.06e-24    9.84e-21
-1291     chr18 [34597713, 34598213] * | 5.33 0.22
+chr18  111567  112005  439     *   5.71    3.63    6.53    -2.89   1.27e-08    8.88e-06
-976      chr18 [26860997, 26861497] * | 7.3 3.13
+chr18  346464  347342  879     *   5       3.24    5.77    -2.52   6.51e-06    0.00303
-2338     chr18 [60892900, 60893400] * | 7.13 1.84
+chr18  399014  400382  1369    *   7.62    8.05    7       1.04    1.04e-05    0.00364
-2077     chr18 [55569087, 55569587] * | 5.52 1.89
+chr18  371110  372102  993     *   4.63    5.36    3.07    2.3     8.1e-05     0.0226
+=====  ======  ======  ===== ====  ====    ====    ====    =====   ========    ========
-Conc_Responsive Fold p-value FDR
-<numeric> <numeric> <numeric> <numeric>
+Columns contain the following data:
-2452 7 -5.61 3.57e-10 1.02e-06
-1291 5.97 -5.75 1.1e-09 1.57e-06
+* **1st**: Chromosome name
-976 7.92 -4.79 1.1e-08 1.05e-05
+* **2nd**: Start position of site
-2338 7.77 -5.93 1.68e-08 1.17e-05
+* **3rd**: End position of site
-2077 6.13 -4.23 2.36e-08 1.17e-05
+* **4th**: Length of site
+* **5th**: Strand
-The value columns show the
+* **6th**: Mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted)
-Conc mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted)
+* **7th**: Mean concentration over the first (e.g. Resistant) group
-Conc_Resistant mean concentration over the first (Resistant) group
+* **8th**: Mean concentration over second (e.g. Responsive) group
-Conc_Responsive mean concentration over second (Responsive) group
+* **9th**: Fold shows the difference in mean concentrations between the two groups (e.g. Resistant - Responsive), with a positive value indicating increased binding affinity in the first group and a negative value indicating increased binding affinity in the second group.
-Fold column shows the difference in mean concentrations between the two groups (Conc_Resistant - Conc_Responsive), with a positive value indicating increased binding affinity in the Resistant group and a negative value indicating increased binding affinity in the Responsive group.
+* **10th**: P-value confidence measure for identifying these sites as differentially bound
-p-value confidence measure for identifying these sites as differentially bound
+* **11th**: a multiple testing corrected FDR p-value
-FDR a multiple testing corrected FDR p-value
 **Binding Affinity Matrix**
 The final result of counting is a binding affinity matrix containing a (normalized) read count for each sample at every potential binding site. With this matrix, the samples can be re-clustered using affinity, rather than occupancy, data. The binding affinity matrix can be used for QC plotting as well as for subsequent
 MCF7r2 MCF7   ER     Resistant  Full-Media 2         counts 2845      0.13
 ZR751  ZR75   ER     Responsive Full-Media 1         counts 2845      0.32
 ZR752  ZR75   ER     Responsive Full-Media 2         counts 2845      0.22
 ====== ====== ====== ========== ========== ========= ====== ========= ====
+-----
 **More Information**
 Generally, processing data with DiffBind involves five phases:
 #. Counting reads
 #. Differential binding affinity analysis
 #. Plotting and reporting
-* **Reading in peaksets**:
+**Reading in peaksets**:
 The first step is to read in a set of peaksets and associated
-metadata. Peaksets are derived either from ChIP-Seq peak callers, such as MACS
+metadata. Peaksets are derived either from ChIP-Seq peak callers, such as **MACS2**, or using some other criterion (e.g. genomic windows, or all the promoter regions
-([1]), or using some other criterion (e.g. genomic windows, or all the promoter regions
+in a genome).  A single experiment can have more than
-in a genome). The easiest way to read in peaksets is using a comma-separated value
-(csv) sample sheet with one line for each peakset. (Spreadsheets in Excel® format, with
-a .xls or .xlsx suffix, are also accepted.) A single experiment can have more than
 one associated peakset; e.g. if multiple peak callers are used for comparison purposes
 each sample would have more than one line in the sample sheet. Once the peaksets
 are read in, a merging function finds all overlapping peaks and derives a single set of
 unique genomic intervals covering all the supplied peaks (a consensus peakset for the
 experiment).
-* **Occupancy analysis**:
+**Occupancy analysis**:
 Peaksets, especially those generated by peak callers, provide
 an insight into the potential occupancy of the protein being ChIPed for at specific
 genomic loci. After the peaksets have been loaded, it can be useful to perform some
 exploratory plotting to determine how these occupancy maps agree with each other,
 overlaps to be examined, as well as functions to determine how well similar samples
 cluster together. Beyond quality control, the product of an occupancy analysis may be
 a consensus peakset, representing an overall set of candidate binding sites to be used
 in further analysis.
-* **Counting reads**:
+**Counting reads**:
 Once a consensus peakset has been derived, DiffBind can use the
 supplied sequence read files to count how many reads overlap each interval for each
 unique sample. The peaks in the consensus peakset may be re-centered and trimmed
 based on calculating their summits (point of greatest read overlap) in order to provide
 containing a (normalized) read count for each sample at every potential binding site.
 With this matrix, the samples can be re-clustered using affinity, rather than occupancy,
 data. The binding affinity matrix is used for QC plotting as well as for subsequent
 differential analysis.
-* **Differential binding affinity analysis**:
+**Differential binding affinity analysis**:
 The core functionality of DiffBind is the
 differential binding affinity analysis, which enables binding sites to be identified that
 are statistically significantly differentially bound between sample groups. To accomplish
 this, first a contrast (or contrasts) is established, dividing the samples into groups to
 be compared. Next the core analysis routines are executed, by default using DESeq2 .
 This will assign a p-value and FDR to each candidate binding site indicating confidence
 that they are differentially bound.
-* **Plotting and reporting**:
+**Plotting and reporting**:
 Once one or more contrasts have been run, DiffBind provides
 a number of functions for reporting and plotting the results. MA plots give an
 overview of the results of the analysis, while correlation heatmaps and PCA plots show
 how the groups cluster based on differentially bound sites. Boxplots show the distribution
 of reads within differentially bound sites corresponding to whether they gain or
 lose affinity between the two sample groups. A reporting mechanism enables differentially
 bound sites to be extracted for further processing, such as annotation, motif, and
-pathway analyses.
+pathway analyses. *Note that currently only the correlation plot is implemented in this Galaxy tool.*
+-----
 **References**
 DiffBind Authors:  Rory Stark, Gordon Brown (2011)
 Wrapper authors: Bjoern Gruening, Pavankumar Videm

Mercurial > repos > bgruening > diffbind

comparison diffbind.xml @ 10:d7725c5596ab draft