Mercurial > repos > bgruening > diffbind
changeset 12:fa56d93f7980 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/diffbind commit 11f68fe2b872f5abc5b660adb10336b0955fa0ee
author | iuc |
---|---|
date | Thu, 19 Apr 2018 17:15:53 -0400 |
parents | 4c7ab9995f9e |
children | 1de83981d43c |
files | diffbind.xml test-data/out_diffbind.bed test-data/out_diffbind.tab |
diffstat | 3 files changed, 82 insertions(+), 86 deletions(-) [+] |
line wrap: on
line diff
--- a/diffbind.xml Sat Apr 07 15:45:41 2018 -0400 +++ b/diffbind.xml Thu Apr 19 17:15:53 2018 -0400 @@ -1,4 +1,4 @@ -<tool id="diffbind" name="DiffBind" version="2.6.6.1"> +<tool id="diffbind" name="DiffBind" version="2.6.6.2"> <description> differential binding analysis of ChIP-Seq peak data</description> <requirements> <requirement type="package" version="2.6.6">bioconductor-diffbind</requirement> @@ -17,7 +17,7 @@ <regex match="Error in" source="both" level="fatal" - description="An undefined error occured, please check your intput carefully and contact your administrator." /> + description="An undefined error occured, please check your input carefully and contact your administrator." /> </stdio> <version_command><![CDATA[ echo $(R --version | grep version | grep -v GNU)", DiffBind version" $(R --vanilla --slave -e "library(DiffBind); cat(sessionInfo()\$otherPkgs\$DiffBind\$Version)" 2> /dev/null | grep -v -i "WARNING: ")", rjson version" $(R --vanilla --slave -e "library(rjson); cat(sessionInfo()\$otherPkgs\$rjson\$Version)" 2> /dev/null | grep -v -i "WARNING: ") @@ -65,7 +65,7 @@ #end for $temp_factor.reverse() -$temp_factor_names.append([str($factorName), $temp_factor]) +$temp_factor_names.append(["Condition", $temp_factor]) Rscript '$__tool_directory__/diffbind.R' @@ -104,26 +104,30 @@ ]]> </command> <inputs> - <param name="factorName" type="text" label="Name" help="Name of experiment factor of interest (e.g. Condition). One factor must be entered and there must be two or more groups. NOTE: Please only use letters, numbers or underscores."> - <sanitizer> - <valid initial="string.letters,string.digits"><add value="_" /></valid> - </sanitizer> - </param> - <repeat name="rep_group" title="Group" min="2" default="2"> + <repeat name="rep_group" title="Group" min="2" max="2" default="2"> <param name="groupName" type="text" label="Name" - help="Name of group that the peak files belong to (e.g. Resistant or Responsive). NOTE: Please only use letters, numbers or underscores (case sensitive)."> - <sanitizer> - <valid initial="string.letters,string.digits"><add value="_" /></valid> - </sanitizer> + help="Name for the Group that the peak and BAM files belong to e.g. Resistant/Responsive (two Groups in total must be specified for DiffBind). NOTE: Please only use letters, numbers or underscores."> + <sanitizer> + <valid initial="string.letters,string.digits"><add value="_" /></valid> + </sanitizer> + <validator type="empty_field" /> </param> <param name="peaks" type="data" format="bed" multiple="true" label="Peak files" help="Result of your Peak calling experiment"/> - <param name="bamreads" type="data" format="bam" multiple="true" label="Read BAM file" help="Specify the Read BAM file used for Peak calling."/> - <param name="bamcontrol" type="data" format="bam" multiple="true" optional="True" label="Control BAM file" help="If specifying a control BAM file, all samples are required to specify one."/> + <param name="bamreads" type="data" format="bam" multiple="true" label="Read BAM file" help="Specify the Read BAM file used in the Peak calling."/> + <param name="bamcontrol" type="data" format="bam" multiple="true" optional="True" label="Control BAM file" help="If specifying a control BAM file, all samples are required to specify one, see Help section below."/> </repeat> - <param name="scorecol" type="integer" min="0" value="8" label="Score Column" help="Column in peak files that contains peak scores. Default: 8 (narrowPeak)"/> + <param name="scorecol" type="integer" min="0" value="8" label="Score Column" help="Column in peak files that contains peak scores. Default: 8 (narrowPeak)"> + <sanitizer> + <valid initial="string.digits"/> + </sanitizer> + </param> <param name="lowerbetter" type="boolean" truevalue="True" falsevalue="" checked="False" label="Lower score is better?" help="DiffBind by default assumes that a higher score indicates a better peak, for example narrowPeaks -log10pvalue. If this is not the case, for example if the score is a p-value or FDR, set this option to Yes. Default: No" /> - <param name="summits" type="integer" min="0" optional="True" label="Summits" help="Extend peaks Nbp up- and downstream of the summit. For punctate peaks it is advisable to extend (e.g. 250bp), see the DiffBind User Guide"/> + <param name="summits" type="integer" min="0" optional="True" label="Summits" help="Extend peaks Nbp up- and downstream of the summit. For punctate peaks it is advisable to extend (e.g. 250bp), see the DiffBind User Guide"> + <sanitizer> + <valid initial="string.digits"/> + </sanitizer> + </param> <param name="th" type="float" value="0.05" min="0" max="1" label="FDR Threshold" help="Significance threshold; all sites with FDR less than or equal to this value will be included in the output. A value of 1 will output all binding sites. Default: 0.05"/> <!-- Output Options --> @@ -142,12 +146,7 @@ </inputs> <outputs> - <data name="outfile" format="bed" label="${tool.name} on ${on_string}: Differentially bound sites"> - <change_format> - <when input="format" value="wig" format="wig" /> - <when input="format" value="gff" format="gff" /> - </change_format> - </data> + <data name="outfile" format="tabular" label="${tool.name} on ${on_string}: Differentially bound sites" /> <data name="plots" format="pdf" label="${tool.name} on ${on_string}: Plots"> <filter>out['pdf']</filter> </data> @@ -167,7 +166,6 @@ <tests> <test expect_num_outputs="6"> - <param name="factorName" value="Condition"/> <repeat name="rep_group"> <param name="groupName" value="Resistant"/> <param name="peaks" value="BT474_ER_1.bed.gz,BT474_ER_2.bed.gz"/> @@ -184,7 +182,7 @@ <param name="rdata" value="True" /> <param name="rscript" value="True"/> <param name="analysis_info" value="True"/> - <output name="outfile" value="out_diffbind.bed" /> + <output name="outfile" value="out_diffbind.tab" /> <output name="plots" value="out_plots.pdf" compare="sim_size" /> <output name="binding_matrix" value="out_binding.matrix" /> <output name="rdata" value="DiffBind_analysis.RData" compare="sim_size"/> @@ -235,18 +233,18 @@ candidate protein binding sites. Each interval consists of a chromosome, a start and end position, and usually a score of some type indicating confidence in, or strength of, the peak. Associated with each peakset are metadata relating to the experiment from which the peakset -was derived. Additionally, files containing mapped sequencing reads (generally .bam files) can +was derived. Additionally, files containing mapped sequencing reads (BAM files) need to be associated with each peakset (one for the ChIP data, and optionally another representing a control sample) -**Sample Information** +**Groups** -You have to specify your sample information in the tool form above, where Factor is the groups you want to compare (e.g Resistant and Responsive). +You have to specify the name of the Group and the peak and BAM files for the two Groups you want to compare (e.g Resistant and Responsive) in the tool form above. Example: ============= ============= - **SampleID** **Group** + **Sample** **Group** ------------- ------------- BT4741 Resistant BT4742 Resistant @@ -259,21 +257,21 @@ Result of your Peak calling experiment in bed format, one file for each sample is required. The peak caller, format and score column can be specified in the tool form above. The default settings expect narrowPeak bed format, which has the score in the 8th column (-log10pvalue), and can be output from MACS2. -Example (MACS.xls file in bed format): +Example: ======= ======= ======= =============== ============== 1 2 3 4 **5 (Score)** ======= ======= ======= =============== ============== - chr18 215562 216063 MACS_peak_16037 56.11 - chr18 311530 312105 MACS_peak_16038 222.49 - chr18 356656 357315 MACS_peak_16039 92.06 - chr18 371110 372092 MACS_peak_16040 123.86 - chr18 395116 396464 MACS_peak_16041 1545.39 - chr18 399014 400382 MACS_peak_16042 1835.19 - chr18 499134 500200 MACS_peak_16043 748.32 - chr18 503518 504552 MACS_peak_16044 818.30 - chr18 531672 532274 MACS_peak_16045 159.30 - chr18 568326 569282 MACS_peak_16046 601.11 + chr18 215562 216063 peak_16037 56.11 + chr18 311530 312105 peak_16038 222.49 + chr18 356656 357315 peak_16039 92.06 + chr18 371110 372092 peak_16040 123.86 + chr18 395116 396464 peak_16041 1545.39 + chr18 399014 400382 peak_16042 1835.19 + chr18 499134 500200 peak_16043 748.32 + chr18 503518 504552 peak_16044 818.30 + chr18 531672 532274 peak_16045 159.30 + chr18 568326 569282 peak_16046 601.11 ======= ======= ======= =============== ============== * BAM file which contains the mapped sequencing reads associated with each peakset, one file for each sample is required. @@ -285,7 +283,7 @@ This tool outputs - * differentially bound sites in BED, WIG or GFF format + * a table of differentially bound sites Optionally, under **Output Options** you can choose to output @@ -297,33 +295,31 @@ **Differentially Bound Sites** -As output format you can choose BED, GFF, WIG. - -Example - BED format: +Example: - ======== ====== ====== ===== ====== ===== =============== ============== ======= ======== ======== - seqnames start end width strand Conc Conc_Responsive Conc_Resistant Fold p.value **FDR** - ======== ====== ====== ===== ====== ===== =============== ============== ======= ======== ======== - chr18 394600 396513 1914 * 7.15 5.55 7.89 -2.35 7.06e-24 9.84e-21 - chr18 111567 112005 439 * 5.71 6.53 3.63 2.89 1.27e-08 8.88e-06 - chr18 346464 347342 879 * 5 5.77 3.24 2.52 6.51e-06 0.00303 - chr18 399014 400382 1369 * 7.62 7 8.05 -1.04 1.04e-05 0.00364 - chr18 371110 372102 993 * 4.63 3.07 5.36 -2.3 8.1e-05 0.0226 - ======== ====== ====== ===== ====== ===== =============== ============== ======= ======== ======== + ======== ====== ====== ===== ====== ===== =============== ============== ====== ======== ======== + seqnames start end width strand Conc Conc_Responsive Conc_Resistant Fold p.value **FDR** + ======== ====== ====== ===== ====== ===== =============== ============== ====== ======== ======== + chr18 394600 396513 1914 \* 7.15 5.55 7.89 -2.35 7.06e-24 9.84e-21 + chr18 111567 112005 439 \* 5.71 6.53 3.63 2.89 1.27e-08 8.88e-06 + chr18 346464 347342 879 \* 5 5.77 3.24 2.52 6.51e-06 0.00303 + chr18 399014 400382 1369 \* 7.62 7 8.05 -1.04 1.04e-05 0.00364 + chr18 371110 372102 993 \* 4.63 3.07 5.36 -2.3 8.1e-05 0.0226 + ======== ====== ====== ===== ====== ===== =============== ============== ====== ======== ======== - Columns contain the following data: +Columns contain the following data: -* **1st**: Chromosome name -* **2nd**: Start position of site -* **3rd**: End position of site -* **4th**: Length of site -* **5th**: Strand -* **6th**: Mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted) -* **7th**: Mean concentration over the first (e.g. Resistant) group -* **8th**: Mean concentration over second (e.g. Responsive) group -* **9th**: Fold shows the difference in mean concentrations between the two groups (e.g. Resistant - Responsive), with a positive value indicating increased binding affinity in the first group and a negative value indicating increased binding affinity in the second group. -* **10th**: P-value confidence measure for identifying these sites as differentially bound -* **11th**: a multiple testing corrected FDR p-value +* **seqnames**: Chromosome name +* **start**: Start position of site +* **end**: End position of site +* **width**: Length of site +* **strand**: Strand +* **Conc**: Mean read concentration over all the samples (the default calculation uses log2 normalized ChIP read counts with control read counts subtracted) +* **Responsive**: Mean concentration over the first (e.g. Responsive) group +* **Resistant**: Mean concentration over second (e.g. Resistant) group +* **Fold**: Fold shows the difference in mean concentrations between the two groups (e.g. Responsive - Resistant), with a positive value indicating increased binding affinity in the first group and a negative value indicating increased binding affinity in the second group. +* **p.value**: P-value confidence measure for identifying these sites as differentially bound +* **FDR**: a multiple testing corrected FDR p-value **Binding Affinity Matrix** @@ -333,20 +329,20 @@ Example: - ===== ====== ====== ================ ================ ================ ================ - CHR START END MCF7_ER_1.bed MCF7_ER_2.bed BT474_ER_1.bed BT474_ER_2.bed - ===== ====== ====== ================ ================ ================ ================ - chr18 111567 112005 137.615208000375 59.878372946728 29.4139375878664 19.9594576489093 - chr18 189223 189652 19.9594576489093 12.6059732519427 11.5554754809475 23.110950961895 - chr18 215232 216063 11.5554754809475 15.7574665649284 31.5149331298568 72.4843461986707 - chr18 311530 312172 17.8584621069189 11.5554754809475 54.6258840917518 43.0704086108043 - chr18 346464 347342 75.6358395116564 40.9694130688139 21.0099554199046 16.8079643359236 - chr18 356560 357362 11.5554754809475 14.7069687939332 57.7773774047375 53.5753863207566 - chr18 371110 372102 8.40398216796182 9.45447993895705 81.9388261376278 82.989323908623 - chr18 394600 396513 56.7268796337423 43.0704086108043 510.541916703681 438.05757050501 - chr18 399014 400382 156.524167878289 117.655750351465 558.864814169461 496.885445680743 - chr18 498906 500200 767.913870597511 278.381909313735 196.443083176108 181.736114382174 - ===== ====== ====== ================ ================ ================ ================ + ===== ====== ====== ========= ========= ========== ========== + CHR START END MCF7_ER_1 MCF7_ER_2 BT474_ER_1 BT474_ER_2 + ===== ====== ====== ========= ========= ========== ========== + chr18 111567 112005 137.6152 59.87837 29.41393 19.95945 + chr18 189223 189652 19.95945 12.60597 11.55547 23.11095 + chr18 215232 216063 11.55547 15.75746 31.51493 72.48434 + chr18 311530 312172 17.85846 11.55547 54.62588 43.07040 + chr18 346464 347342 75.63583 40.96941 21.00995 16.80796 + chr18 356560 357362 11.55547 14.70696 57.77737 53.57538 + chr18 371110 372102 8.403982 9.454479 81.93882 82.98932 + chr18 394600 396513 56.72687 43.07040 510.5419 438.0575 + chr18 399014 400382 156.5241 117.6557 558.8648 496.8854 + chr18 498906 500200 767.9138 278.3819 196.4430 181.7361 + ===== ====== ====== ========= ========= ========== ========== ----- @@ -404,7 +400,7 @@ differential binding affinity analysis, which enables binding sites to be identified that are statistically significantly differentially bound between sample groups. To accomplish this, first a contrast (or contrasts) is established, dividing the samples into groups to -be compared. Next the core analysis routines are executed, by default using DESeq2 . +be compared. Next the core analysis routines are executed, by default using DESeq2. This will assign a p-value and FDR to each candidate binding site indicating confidence that they are differentially bound.
--- a/test-data/out_diffbind.bed Sat Apr 07 15:45:41 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,6 +0,0 @@ -seqnames start end width strand Conc Conc_Responsive Conc_Resistant Fold p.value FDR -chr18 394600 396513 1914 * 7.15 5.55 7.89 -2.35 7.06e-24 9.84e-21 -chr18 111567 112005 439 * 5.71 6.53 3.63 2.89 1.27e-08 8.88e-06 -chr18 346464 347342 879 * 5 5.77 3.24 2.52 6.51e-06 0.00303 -chr18 399014 400382 1369 * 7.62 7 8.05 -1.04 1.04e-05 0.00364 -chr18 371110 372102 993 * 4.63 3.07 5.36 -2.3 8.1e-05 0.0226
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/out_diffbind.tab Thu Apr 19 17:15:53 2018 -0400 @@ -0,0 +1,6 @@ +seqnames start end width strand Conc Conc_Responsive Conc_Resistant Fold p.value FDR +chr18 394600 396513 1914 * 7.15 5.55 7.89 -2.35 7.06e-24 9.84e-21 +chr18 111567 112005 439 * 5.71 6.53 3.63 2.89 1.27e-08 8.88e-06 +chr18 346464 347342 879 * 5 5.77 3.24 2.52 6.51e-06 0.00303 +chr18 399014 400382 1369 * 7.62 7 8.05 -1.04 1.04e-05 0.00364 +chr18 371110 372102 993 * 4.63 3.07 5.36 -2.3 8.1e-05 0.0226