Mercurial > repos > bgruening > deeptools_plot_correlation

--- a/deepTools_macros.xml	Tue Jan 24 04:57:11 2017 -0500
+++ b/deepTools_macros.xml	Fri Mar 31 09:27:20 2017 -0400
@@ -1,5 +1,17 @@
 <macros>

+    <token name="@THREADS@">--numberOfProcessors "\${GALAXY_SLOTS:-4}"</token>
+    <token name="@WRAPPER_VERSION@">2.5.0</token>
+    <xml name="requirements">
+        <requirements>
+            <requirement type="package" version="2.7.10">python</requirement>
+            <requirement type="package" version="@WRAPPER_VERSION@">deeptools</requirement>
+            <yield />
+        </requirements>
+        <expand macro="stdio" />
+        <version_command>@BINARY@ --version</version_command>
+    </xml>
+
     <xml name="advancedOpt_scaffold">
         <conditional name="advancedOpt">
             <param name="showAdvancedOpt" type="select" label="Show advanced options" >
@@ -97,18 +109,6 @@
         </param>
     </xml>

-    <token name="@THREADS@">--numberOfProcessors "\${GALAXY_SLOTS:-4}"</token>
-    <token name="@WRAPPER_VERSION@">2.4.2</token>
-    <xml name="requirements">
-        <requirements>
-            <requirement type="package" version="2.7.10">python</requirement>
-            <requirement type="package" version="2.4.2">deeptools</requirement>
-            <yield />
-        </requirements>
-        <expand macro="stdio" />
-        <version_command>@BINARY@ --version</version_command>
-    </xml>
-
     <xml name="smoothLength">
         <param argument="--smoothLength" type="integer" value="" optional="True" min="1"
             label="Smooth values using the following length (in bases)"
@@ -181,10 +181,10 @@
     <xml name="fragLength">
         <param argument="--minFragmentLength" type="integer" optional="True" value="0" min="0"
             label="Minimum fragment length for inclusion."
-            help="A value greater than 0 will filter out ALL single-end reads. This is primarily useful in things like ATACseq, where one would like to look specifically at mono- or di-nucleosome fragments." />
+            help="This is primarily useful in things like ATACseq, where one would like to look specifically at mono- or di-nucleosome fragments." />
         <param argument="--maxFragmentLength" type="integer" optional="True" value="0" min="0"
             label="Maximum fragment length for inclusion."
-            help="As above, but the maximum length. A value of 0 (the default) is equivalent to no maximum." />
+            help="A value of 0 (the default) is equivalent to no maximum." />
     </xml>

     <xml name="read_processing_options">
@@ -324,9 +324,7 @@

     <xml name="scaleFactor">
         <param argument="--scaleFactor" type="float" value="1" label="Scaling factor"
-            help="When used in combination with --normalizeTo1x or
-                --normalizeUsingRPKM, the computed scaling factor will
-                be multiplied by the given scale factor." />
+            help="The computed scaling factor will be multiplied by this (default 1)." />
     </xml>

     <xml name="scaleFactors">
@@ -441,19 +439,22 @@
 <![CDATA[
         #set files=[]
         #set labels=[]
+        #import re
         #if $multibam_conditional.orderMatters == "No":
             #for $counter, $bamfile in enumerate($multibam_conditional.bamfiles):
+                #set identifier = re.sub('[^\.\s\w\-]', '_', str($bamfile.element_identifier))
                 ln -s "${bamfile}" "./${counter}.bam" &&
                 ln -s "${bamfile.metadata.bam_index}" "./${counter}.bam.bai" &&
                 #silent $files.append('%s.bam' % $counter)
-                #silent $labels.append("'%s'" % ($bamfile.display_name))
+                #silent $labels.append("'%s'" % identifier)
             #end for
         #else:
             #for $counter, $f in enumerate($multibam_conditional.multibam_repeats):
+                #set identifier = re.sub('[^\.\s\w\-]', '_', str($f.bamfiles.element_identifier))
                 ln -s "${f.bamfiles}" "./${counter}.bam" &&
                 ln -s "${f.bamfiles.metadata.bam_index}" "./${counter}.bam.bai" &&
                 #silent $files.append('%s.bam' % $counter)
-                #silent $labels.append("'%s'" % ($f.bamfiles.display_name))
+                #silent $labels.append("'%s'" % $identifier)
             #end for
         #end if
 ]]>
@@ -463,17 +464,20 @@
 <![CDATA[
         #set files=[]
         #set labels=[]
+        #import re
         #if $multibigwig_conditional.orderMatters == "No":
             #for $counter, $bigwig in enumerate($multibigwig_conditional.bigwigfiles):
+            #set identifier = re.sub('[^\.\s\w\-]', '_', str($bigwig.element_identifier))
                 ln -s "${bigwig}" "${counter}.bw" &&
                 #silent $files.append('%s.bw' % $counter)
-                #silent $labels.append("'%s'" % ($bigwig.display_name))
+                #silent $labels.append("'%s'" % $identifier)
             #end for
         #else:
             #for $counter, $f in enumerate($multibigwig_conditional.multibigwig_repeats):
+                #set identifier = re.sub('[^\.\s\w\-]', '_', str($f.bigwigfiles.element_identifier))
                 ln -s "${f.bigwigfiles}" "${counter}.bw" &&
                 #silent $files.append('%s.bw' % $counter)
-                #silent $labels.append("'%s'" % ($f.bigwigfiles.display_name))
+                #silent $labels.append("'%s'" % $identifier)
             #end for
         #end if
 ]]>
--- a/plotCorrelation.xml	Tue Jan 24 04:57:11 2017 -0500
+++ b/plotCorrelation.xml	Fri Mar 31 09:27:20 2017 -0400
@@ -1,142 +1,142 @@
-<tool id="deeptools_plot_correlation" name="plotCorrelation" version="@WRAPPER_VERSION@.0">
-    <description>Create a heatmap or scatterplot of correlation scores between different samples </description>
-    <macros>
-        <token name="@BINARY@">plotCorrelation</token>
-        <import>deepTools_macros.xml</import>
-    </macros>
-    <expand macro="requirements"/>
-    <command>
-<![CDATA[
-        @BINARY@
-            --corData "$corData"
-            --plotFile "$outFileName"
-            --corMethod "$corMethod"
-            --whatToPlot "$plotting_type.whatToPlot"
-            #if str($plotting_type.whatToPlot) == 'heatmap':
-                @HEATMAP_OPTIONS@
-            #else:
-                --plotTitle '$plotting_type.plotTitle'
-            #end if
-            $skipZeros
-            --plotFileFormat "$outFileFormat"
-            $removeOutliers
-            #if $outFileCorMatrix:
-                --outFileCorMatrix "$matrix"
-            #end if
-
-]]>
-    </command>
-    <inputs>
-        <param name="corData" format="deeptools_coverage_matrix" type="data" label="Matrix file from the multiBamSummary tool"/>
-        <expand macro="corMethod" />
-
-        <conditional name="plotting_type" >
-            <param argument="--whatToPlot" type="select" label="Plotting type">
-                <option value="heatmap" selected="True">Heatmap</option>
-                <option value="scatterplot">Scatterplot</option>
-            </param>
-            <when value="heatmap">
-                <expand macro="heatmap_options" />
-            </when>
-            <when value="scatterplot">
-                <expand macro="plotTitle" />
-            </when>
-        </conditional>
-
-        <expand macro="skipZeros" />
-
-        <expand macro="input_image_file_format" />
-
-        <param argument="--removeOutliers" type="boolean"
-            truevalue="--removeOutliers" falsevalue="" label="Remove regions with very large counts"
-            help="If set, bins with very large counts are removed. Bins
-                with abnormally high reads counts artificially
-                increase pearson correlation; that's why, by default,
-                plotCorrelation tries to remove outliers using the median
-                absolute deviation (MAD) method applying a threshold
-                of 200 to only consider extremely large deviations
-                from the median. ENCODE blacklist page (https://sites.
-                google.com/site/anshulkundaje/projects/blacklists)
-                contains useful information about regions with
-                unusually high counts."/>
-
-        <param name="outFileCorMatrix" type="boolean" label="Save the matrix of values underlying the heatmap"/>
-
-    </inputs>
-    <outputs>
-        <expand macro="output_image_file_format_not_nested" />
-        <data format="tabular" name="matrix" label="${tool.name} on ${on_string}: Correlation matrix">
-            <filter>outFileCorMatrix is True</filter>
-        </data>
-    </outputs>
-    <tests>
-        <test>
-            <param name="corData" value="multiBamSummary_result1.npz" ftype="deeptools_coverage_matrix" />
-            <param name="outFileFormat" value="png" />
-            <param name="outFileCorMatrix" value="True" />
-            <output name="matrix" file="plotCorrelation_result1.tabular" ftype="tabular" />
-            <output name="outFileName" file="plotCorrelation_result1.png" ftype="png" compare="sim_size" delta="300" />
-        </test>
-        <test>
-            <param name="corData" value="multiBamSummary_result1.npz" ftype="deeptools_coverage_matrix" />
-            <param name="outFileFormat" value="png" />
-            <param name="whatToPlot" value="scatterplot" />
-            <param name="removeOutliers" value="True" />
-            <param name="plotTitle" value="Test Plot" />
-            <output name="outFileName" file="plotCorrelation_result2.png" ftype="png" compare="sim_size" delta="300" />
-        </test>
-    </tests>
-    <help>
-<![CDATA[
-What it does
-------------
-
-This tools takes the default output of ``multiBamSummary`` or ``multiBigwigSummary``, and computes the pairwise correlation among samples.
-Results can be visualized as **scatterplots** or as a **heatmap** of correlation coefficients (see below for examples).
-
-Theoretical Background
-----------------------
-
-The result of the correlation computation is a **table of correlation coefficients** that indicates how "strong" the relationship between two samples is and it will consist of numbers between -1 and 1. (-1 indicates perfect anti-correlation, 1 perfect correlation.)
-
-We offer two different functions for the correlation computation: *Pearson* or *Spearman*.
-
-The *Pearson method* measures the **metric differences** between samples and is therefore influenced by outliers.
-The *Spearman method* is based on **rankings**.
-
-Output
-------
-
-The default output is a **diagnostic plot** -- either a scatterplot or a clustered heatmap displaying the values for each pair-wise correlation (see below for example plots).
-
-Optionally, you can also obtain a table of the pairwise correlation coefficients.
-
-.. image:: $PATH_TO_IMAGES/plotCorrelation_output.png
-    :width: 600
-    :height: 271
-
-Example plots
--------------
-
-The following is the output of ``plotCorrelation`` with our test ChIP-Seq datasets (to be found under "Shared Data" --> "Data Library").
-
-Average coverages were computed over 10 kb bins for chromosome X,
-from bigWig files using ``multiBigwigSummary``. This was then used with ``plotCorrelation`` to make a heatmap of Spearman correlation coefficients.
-
-.. image:: $PATH_TO_IMAGES/plotCorrelation_galaxy_bw_heatmap_output.png
-    :width: 600
-    :height: 518
-
-The scatterplot could look like this:
-
-.. image:: $PATH_TO_IMAGES/plotCorrelation_scatterplot_PearsonCorr_bigwigScores.png
-    :width: 600
-    :height: 600
-
------
-
-@REFERENCES@
-]]>
-    </help>
-    <expand macro="citations" />
-</tool>
+<tool id="deeptools_plot_correlation" name="plotCorrelation" version="@WRAPPER_VERSION@.0">
+    <description>Create a heatmap or scatterplot of correlation scores between different samples </description>
+    <macros>
+        <token name="@BINARY@">plotCorrelation</token>
+        <import>deepTools_macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command>
+<![CDATA[
+        @BINARY@
+            --corData "$corData"
+            --plotFile "$outFileName"
+            --corMethod "$corMethod"
+            --whatToPlot "$plotting_type.whatToPlot"
+            #if str($plotting_type.whatToPlot) == 'heatmap':
+                @HEATMAP_OPTIONS@
+            #else:
+                --plotTitle '$plotting_type.plotTitle'
+            #end if
+            $skipZeros
+            --plotFileFormat "$outFileFormat"
+            $removeOutliers
+            #if $outFileCorMatrix:
+                --outFileCorMatrix "$matrix"
+            #end if
+
+]]>
+    </command>
+    <inputs>
+        <param name="corData" format="deeptools_coverage_matrix" type="data" label="Matrix file from the multiBamSummary tool"/>
+        <expand macro="corMethod" />
+
+        <conditional name="plotting_type" >
+            <param argument="--whatToPlot" type="select" label="Plotting type">
+                <option value="heatmap" selected="True">Heatmap</option>
+                <option value="scatterplot">Scatterplot</option>
+            </param>
+            <when value="heatmap">
+                <expand macro="heatmap_options" />
+            </when>
+            <when value="scatterplot">
+                <expand macro="plotTitle" />
+            </when>
+        </conditional>
+
+        <expand macro="skipZeros" />
+
+        <expand macro="input_image_file_format" />
+
+        <param argument="--removeOutliers" type="boolean"
+            truevalue="--removeOutliers" falsevalue="" label="Remove regions with very large counts"
+            help="If set, bins with very large counts are removed. Bins
+                with abnormally high reads counts artificially
+                increase pearson correlation; that's why, by default,
+                plotCorrelation tries to remove outliers using the median
+                absolute deviation (MAD) method applying a threshold
+                of 200 to only consider extremely large deviations
+                from the median. ENCODE blacklist page (https://sites.
+                google.com/site/anshulkundaje/projects/blacklists)
+                contains useful information about regions with
+                unusually high counts."/>
+
+        <param name="outFileCorMatrix" type="boolean" label="Save the matrix of values underlying the heatmap"/>
+
+    </inputs>
+    <outputs>
+        <expand macro="output_image_file_format_not_nested" />
+        <data format="tabular" name="matrix" label="${tool.name} on ${on_string}: Correlation matrix">
+            <filter>outFileCorMatrix is True</filter>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <param name="corData" value="multiBamSummary_result1.npz" ftype="deeptools_coverage_matrix" />
+            <param name="outFileFormat" value="png" />
+            <param name="outFileCorMatrix" value="True" />
+            <output name="matrix" file="plotCorrelation_result1.tabular" ftype="tabular" />
+            <output name="outFileName" file="plotCorrelation_result1.png" ftype="png" compare="sim_size" delta="300" />
+        </test>
+        <test>
+            <param name="corData" value="multiBamSummary_result1.npz" ftype="deeptools_coverage_matrix" />
+            <param name="outFileFormat" value="png" />
+            <param name="whatToPlot" value="scatterplot" />
+            <param name="removeOutliers" value="True" />
+            <param name="plotTitle" value="Test Plot" />
+            <output name="outFileName" file="plotCorrelation_result2.png" ftype="png" compare="sim_size" delta="300" />
+        </test>
+    </tests>
+    <help>
+<![CDATA[
+What it does
+------------
+
+This tools takes the default output of ``multiBamSummary`` or ``multiBigwigSummary``, and computes the pairwise correlation among samples.
+Results can be visualized as **scatterplots** or as a **heatmap** of correlation coefficients (see below for examples).
+
+Theoretical Background
+----------------------
+
+The result of the correlation computation is a **table of correlation coefficients** that indicates how "strong" the relationship between two samples is and it will consist of numbers between -1 and 1. (-1 indicates perfect anti-correlation, 1 perfect correlation.)
+
+We offer two different functions for the correlation computation: *Pearson* or *Spearman*.
+
+The *Pearson method* measures the **metric differences** between samples and is therefore influenced by outliers.
+The *Spearman method* is based on **rankings**.
+
+Output
+------
+
+The default output is a **diagnostic plot** -- either a scatterplot or a clustered heatmap displaying the values for each pair-wise correlation (see below for example plots).
+
+Optionally, you can also obtain a table of the pairwise correlation coefficients.
+
+.. image:: $PATH_TO_IMAGES/plotCorrelation_output.png
+    :width: 600
+    :height: 271
+
+Example plots
+-------------
+
+The following is the output of ``plotCorrelation`` with our test ChIP-Seq datasets (to be found under "Shared Data" --> "Data Library").
+
+Average coverages were computed over 10 kb bins for chromosome X,
+from bigWig files using ``multiBigwigSummary``. This was then used with ``plotCorrelation`` to make a heatmap of Spearman correlation coefficients.
+
+.. image:: $PATH_TO_IMAGES/plotCorrelation_galaxy_bw_heatmap_output.png
+    :width: 600
+    :height: 518
+
+The scatterplot could look like this:
+
+.. image:: $PATH_TO_IMAGES/plotCorrelation_scatterplot_PearsonCorr_bigwigScores.png
+    :width: 600
+    :height: 600
+
+-----
+
+@REFERENCES@
+]]>
+    </help>
+    <expand macro="citations" />
+</tool>
Binary file test-data/bamCompare_result2.bw has changed
Binary file test-data/bamCoverage_result1.bw has changed
Binary file test-data/bamCoverage_result2.bw has changed
Binary file test-data/bamCoverage_result6.bw has changed
Binary file test-data/bamPEFragmentSize_histogram_result1.png has changed
Binary file test-data/bigwigCompare_result1.bw has changed
--- a/test-data/computeMatrixOperations.txt	Tue Jan 24 04:57:11 2017 -0500
+++ b/test-data/computeMatrixOperations.txt	Fri Mar 31 09:27:20 2017 -0400
@@ -1,4 +1,4 @@
 Groups:
 	genes
 Samples:
-	file_0
+	bamCoverage_result4_bw_0
Binary file test-data/computeMatrixOperations_result2.mat.gz has changed
Binary file test-data/computeMatrix_result1.gz has changed
Binary file test-data/computeMatrix_result2.gz has changed
Binary file test-data/computeMatrix_result3.gz has changed
Binary file test-data/correctGCBias_result1.bam has changed
Binary file test-data/heatmapper_result1.png has changed
Binary file test-data/heatmapper_result2.png has changed
Binary file test-data/multiBamSummary_result1.npz has changed
Binary file test-data/multiBamSummary_result2.npz has changed
Binary file test-data/multiBamSummary_result2b.npz has changed
Binary file test-data/multiBigwigSummary_result1.npz has changed
Binary file test-data/plotCorrelation_result1.png has changed
--- a/test-data/plotCorrelation_result1.tabular	Tue Jan 24 04:57:11 2017 -0500
+++ b/test-data/plotCorrelation_result1.tabular	Fri Mar 31 09:27:20 2017 -0400
@@ -1,3 +1,3 @@
-	'bowtie2-test1.bam'	'bowtie2-test1.bam'
-'bowtie2-test1.bam'	1.0000	1.0000
-'bowtie2-test1.bam'	1.0000	1.0000
+	'bowtie2 test1.bam'	'bowtie2 test1.bam'
+'bowtie2 test1.bam'	1.0000	1.0000
+'bowtie2 test1.bam'	1.0000	1.0000
Binary file test-data/plotCorrelation_result2.png has changed
Binary file test-data/plotCoverage_result1.png has changed
Binary file test-data/plotEnrichment_output.png has changed
--- a/test-data/plotFingerprint_quality_metrics.tabular	Tue Jan 24 04:57:11 2017 -0500
+++ b/test-data/plotFingerprint_quality_metrics.tabular	Fri Mar 31 09:27:20 2017 -0400
@@ -1,3 +1,3 @@
 Sample	AUC	Synthetic AUC	X-intercept	Synthetic X-intercept	Elbow Point	Synthetic Elbow Point	JS Distance	Synthetic JS Distance	% genome enriched	diff. enrichment	CHANCE divergence
-bowtie2 test1.bam	0.00493632029864	0.481650684758	0.984443061605	1.15310443503e-24	0.984940883634	0.523268829811	NA	0.269861238192	NA	NA	NA
-bowtie2 test1.bam	0.00493632029864	0.481650684758	0.984443061605	1.15310443503e-24	0.984940883634	0.523268829811	NA	0.269861238192	NA	NA	NA
+bowtie2 test1.bam	0.00493632029864	0.481650684758	0.984443061605	1.15310443503e-24	0.984940883634	0.523268829811	NA	0.269004498068	NA	NA	NA
+bowtie2 test1.bam	0.00493632029864	0.481650684758	0.984443061605	1.15310443503e-24	0.984940883634	0.523268829811	NA	0.269004498068	NA	NA	NA
Binary file test-data/plotFingerprint_result1.png has changed
Binary file test-data/plotFingerprint_result2.png has changed
Binary file test-data/profiler_result1.png has changed
Binary file test-data/profiler_result2.png has changed
--- a/test-data/profiler_result2.tabular	Tue Jan 24 04:57:11 2017 -0500
+++ b/test-data/profiler_result2.tabular	Fri Mar 31 09:27:20 2017 -0400
@@ -1,3 +1,3 @@
 bin labels		-0.0Kb	0.0Kb
 bins		1	2
-file_0	genes	2477942.34473	2610259.65234
+bamCoverage_result4_bw_0	genes	2477942.875	2610260.125
--- a/tool_dependencies.xml	Tue Jan 24 04:57:11 2017 -0500
+++ b/tool_dependencies.xml	Fri Mar 31 09:27:20 2017 -0400
@@ -1,7 +1,7 @@
 <?xml version="1.0"?>
 <tool_dependency>
     <package name="python" version="2.7.10">
-        <repository changeset_revision="0339c4a9b87b" name="package_python_2_7_10" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
+        <repository changeset_revision="bd7165ea6526" name="package_python_2_7_10" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" />
     </package>
     <package name="deeptools" version="2.4.2">
         <repository changeset_revision="efc55c226f11" name="package_python_2_7_deeptools_2_4_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />