Mercurial > repos > bgruening > deeptools_plot_pca
changeset 12:814bea57a4fe draft
planemo upload for repository https://github.com/fidelram/deepTools/tree/master/galaxy/wrapper/ commit 09975f870c75347fba5c6777c9f3b442bdeeb289
line wrap: on
line diff
--- a/deepTools_macros.xml Tue Jan 24 04:58:30 2017 -0500 +++ b/deepTools_macros.xml Fri Mar 31 09:28:03 2017 -0400 @@ -1,5 +1,17 @@ <macros> + <token name="@THREADS@">--numberOfProcessors "\${GALAXY_SLOTS:-4}"</token> + <token name="@WRAPPER_VERSION@">2.5.0</token> + <xml name="requirements"> + <requirements> + <requirement type="package" version="2.7.10">python</requirement> + <requirement type="package" version="@WRAPPER_VERSION@">deeptools</requirement> + <yield /> + </requirements> + <expand macro="stdio" /> + <version_command>@BINARY@ --version</version_command> + </xml> + <xml name="advancedOpt_scaffold"> <conditional name="advancedOpt"> <param name="showAdvancedOpt" type="select" label="Show advanced options" > @@ -97,18 +109,6 @@ </param> </xml> - <token name="@THREADS@">--numberOfProcessors "\${GALAXY_SLOTS:-4}"</token> - <token name="@WRAPPER_VERSION@">2.4.2</token> - <xml name="requirements"> - <requirements> - <requirement type="package" version="2.7.10">python</requirement> - <requirement type="package" version="2.4.2">deeptools</requirement> - <yield /> - </requirements> - <expand macro="stdio" /> - <version_command>@BINARY@ --version</version_command> - </xml> - <xml name="smoothLength"> <param argument="--smoothLength" type="integer" value="" optional="True" min="1" label="Smooth values using the following length (in bases)" @@ -181,10 +181,10 @@ <xml name="fragLength"> <param argument="--minFragmentLength" type="integer" optional="True" value="0" min="0" label="Minimum fragment length for inclusion." - help="A value greater than 0 will filter out ALL single-end reads. This is primarily useful in things like ATACseq, where one would like to look specifically at mono- or di-nucleosome fragments." /> + help="This is primarily useful in things like ATACseq, where one would like to look specifically at mono- or di-nucleosome fragments." /> <param argument="--maxFragmentLength" type="integer" optional="True" value="0" min="0" label="Maximum fragment length for inclusion." - help="As above, but the maximum length. A value of 0 (the default) is equivalent to no maximum." /> + help="A value of 0 (the default) is equivalent to no maximum." /> </xml> <xml name="read_processing_options"> @@ -324,9 +324,7 @@ <xml name="scaleFactor"> <param argument="--scaleFactor" type="float" value="1" label="Scaling factor" - help="When used in combination with --normalizeTo1x or - --normalizeUsingRPKM, the computed scaling factor will - be multiplied by the given scale factor." /> + help="The computed scaling factor will be multiplied by this (default 1)." /> </xml> <xml name="scaleFactors"> @@ -441,19 +439,22 @@ <![CDATA[ #set files=[] #set labels=[] + #import re #if $multibam_conditional.orderMatters == "No": #for $counter, $bamfile in enumerate($multibam_conditional.bamfiles): + #set identifier = re.sub('[^\.\s\w\-]', '_', str($bamfile.element_identifier)) ln -s "${bamfile}" "./${counter}.bam" && ln -s "${bamfile.metadata.bam_index}" "./${counter}.bam.bai" && #silent $files.append('%s.bam' % $counter) - #silent $labels.append("'%s'" % ($bamfile.display_name)) + #silent $labels.append("'%s'" % identifier) #end for #else: #for $counter, $f in enumerate($multibam_conditional.multibam_repeats): + #set identifier = re.sub('[^\.\s\w\-]', '_', str($f.bamfiles.element_identifier)) ln -s "${f.bamfiles}" "./${counter}.bam" && ln -s "${f.bamfiles.metadata.bam_index}" "./${counter}.bam.bai" && #silent $files.append('%s.bam' % $counter) - #silent $labels.append("'%s'" % ($f.bamfiles.display_name)) + #silent $labels.append("'%s'" % $identifier) #end for #end if ]]> @@ -463,17 +464,20 @@ <![CDATA[ #set files=[] #set labels=[] + #import re #if $multibigwig_conditional.orderMatters == "No": #for $counter, $bigwig in enumerate($multibigwig_conditional.bigwigfiles): + #set identifier = re.sub('[^\.\s\w\-]', '_', str($bigwig.element_identifier)) ln -s "${bigwig}" "${counter}.bw" && #silent $files.append('%s.bw' % $counter) - #silent $labels.append("'%s'" % ($bigwig.display_name)) + #silent $labels.append("'%s'" % $identifier) #end for #else: #for $counter, $f in enumerate($multibigwig_conditional.multibigwig_repeats): + #set identifier = re.sub('[^\.\s\w\-]', '_', str($f.bigwigfiles.element_identifier)) ln -s "${f.bigwigfiles}" "${counter}.bw" && #silent $files.append('%s.bw' % $counter) - #silent $labels.append("'%s'" % ($f.bigwigfiles.display_name)) + #silent $labels.append("'%s'" % $identifier) #end for #end if ]]>
--- a/plotPCA.xml Tue Jan 24 04:58:30 2017 -0500 +++ b/plotPCA.xml Fri Mar 31 09:28:03 2017 -0400 @@ -1,88 +1,90 @@ -<tool id="deeptools_plot_pca" name="plotPCA" version="@WRAPPER_VERSION@.0"> - <description>Generate principal component analysis (PCA) plots from multiBamSummary or multiBigwigSummary output</description> - <macros> - <token name="@BINARY@">plotPCA</token> - <import>deepTools_macros.xml</import> - </macros> - <expand macro="requirements"/> - <command> -<![CDATA[ - @BINARY@ - --corData "$corData" - --plotTitle "$plotTitle" - --plotFile "$outFileName" - --plotFileFormat "$outFileFormat" - #if $outFileNameData - --outFileNameData "$output_outFileNameData" - #end if -]]> - </command> - <inputs> - <param name="corData" format="deeptools_coverage_matrix" type="data" label="Matrix file from the multiBamSummary or multiBigwigSummary tools"/> - <expand macro="input_image_file_format" /> - <expand macro="plotTitle" /> - <param argument="--outFileNameData" type="boolean" label="Save the matrix of PCA and eigenvalues underlying the plot."/> - </inputs> - <outputs> - <expand macro="output_image_file_format_not_nested" /> - <data format="tabular" name="output_outFileNameData" label="${tool.name} on ${on_string}: PCA matrix"> - <filter>outFileNameData</filter> - </data> - </outputs> - <tests> - <test> - <param name="corData" value="multiBamSummary_result2.npz" ftype="deeptools_coverage_matrix" /> - <param name="plotTitle" value="Test Plot" /> - <param name="outFileFormat" value="png" /> - <output name="outFileName" file="plotPCA_result1.png" ftype="png" compare="sim_size" delta="4000" /> - </test> - <test> - <param name="corData" value="multiBamSummary_result2.npz" ftype="deeptools_coverage_matrix" /> - <param name="plotTitle" value="Test Plot" /> - <param name="outFileFormat" value="png" /> - <param name="outFileNameData" value="True" /> - <output name="outFileName" file="plotPCA_result2.png" ftype="png" compare="sim_size" delta="4000" /> - <output name="output_outFileNameData" file="plotPCA_result2.tabular" ftype="tabular" /> - </test> - </tests> - <help> -<![CDATA[ - -What it does ------------- - -This tool takes the **default output file** of ``multiBamSummary`` or ``multiBigwigSummary`` to perform a principal component analysis (PCA). - -Output ------- - -The result is a panel of two plots: - -1. The eigenvalues of the **top two principal components**. -2. The **Scree plot** for the top five principal components where the bars represent the amount of variability explained by the individual factors and the red line traces the amount of variability is explained by the individual components in a cumulative manner - -Example plot ------------- - -.. image:: $PATH_TO_IMAGES/plotPCA_annotated.png - :width: 600 - :height: 315 - ------ - -Theoretical Background ----------------------- - -Principal component analysis (PCA) can be used, for example, to determine whether **samples display greater variability** between experimental conditions than between replicates of the same treatment. PCA is also useful to identify unexpected patterns, such as those caused by batch effects or outliers. -Principal components represent the directions along which the variation in the data is maximal, so that the information (e.g., read coverage values) from thousands of regions can be represented by just a few dimensions. - -PCA is not necessarily meant to identify unknown groupings or clustering; it is up to the researcher to determine the experimental or technical reason underlying the principal components. - - ------ - -@REFERENCES@ -]]> - </help> - <expand macro="citations" /> -</tool> +<tool id="deeptools_plot_pca" name="plotPCA" version="@WRAPPER_VERSION@.0"> + <description>Generate principal component analysis (PCA) plots from multiBamSummary or multiBigwigSummary output</description> + <macros> + <token name="@BINARY@">plotPCA</token> + <import>deepTools_macros.xml</import> + </macros> + <expand macro="requirements"/> + <command> +<![CDATA[ + @BINARY@ + --corData "$corData" + --plotTitle "$plotTitle" + --plotFile "$outFileName" + --plotFileFormat "$outFileFormat" + $rowCenter + #if $outFileNameData + --outFileNameData "$output_outFileNameData" + #end if +]]> + </command> + <inputs> + <param name="corData" format="deeptools_coverage_matrix" type="data" label="Matrix file from the multiBamSummary or multiBigwigSummary tools"/> + <expand macro="input_image_file_format" /> + <expand macro="plotTitle" /> + <param argument="--outFileNameData" type="boolean" label="Save the matrix of PCA and eigenvalues underlying the plot."/> + <param argument="--rowCenter" type="boolean" label="Center Rows?" help="When specified, each row (bin, gene, etc.) in the matrix is centered at 0 before the PCA is computed. This is useful only if you have a strong bin/gene/etc. correlation and the resulting principal component has samples stacked vertically." truevalue="--rowCenter" falsevalue="" /> + </inputs> + <outputs> + <expand macro="output_image_file_format_not_nested" /> + <data format="tabular" name="output_outFileNameData" label="${tool.name} on ${on_string}: PCA matrix"> + <filter>outFileNameData</filter> + </data> + </outputs> + <tests> + <test> + <param name="corData" value="multiBamSummary_result2b.npz" ftype="deeptools_coverage_matrix" /> + <param name="plotTitle" value="Test Plot" /> + <param name="outFileFormat" value="png" /> + <output name="outFileName" file="plotPCA_result1.png" ftype="png" compare="sim_size" delta="4000" /> + </test> + <test> + <param name="corData" value="multiBamSummary_result2b.npz" ftype="deeptools_coverage_matrix" /> + <param name="plotTitle" value="Test Plot" /> + <param name="outFileFormat" value="png" /> + <param name="outFileNameData" value="True" /> + <output name="outFileName" file="plotPCA_result2.png" ftype="png" compare="sim_size" delta="4000" /> + <output name="output_outFileNameData" file="plotPCA_result2.tabular" ftype="tabular" /> + </test> + </tests> + <help> +<![CDATA[ + +What it does +------------ + +This tool takes the **default output file** of ``multiBamSummary`` or ``multiBigwigSummary`` to perform a principal component analysis (PCA). + +Output +------ + +The result is a panel of two plots: + +1. The eigenvalues of the **top two principal components**. +2. The **Scree plot** for the top five principal components where the bars represent the amount of variability explained by the individual factors and the red line traces the amount of variability is explained by the individual components in a cumulative manner + +Example plot +------------ + +.. image:: $PATH_TO_IMAGES/plotPCA_annotated.png + :width: 600 + :height: 315 + +----- + +Theoretical Background +---------------------- + +Principal component analysis (PCA) can be used, for example, to determine whether **samples display greater variability** between experimental conditions than between replicates of the same treatment. PCA is also useful to identify unexpected patterns, such as those caused by batch effects or outliers. +Principal components represent the directions along which the variation in the data is maximal, so that the information (e.g., read coverage values) from thousands of regions can be represented by just a few dimensions. + +PCA is not necessarily meant to identify unknown groupings or clustering; it is up to the researcher to determine the experimental or technical reason underlying the principal components. + + +----- + +@REFERENCES@ +]]> + </help> + <expand macro="citations" /> +</tool>
--- a/test-data/computeMatrixOperations.txt Tue Jan 24 04:58:30 2017 -0500 +++ b/test-data/computeMatrixOperations.txt Fri Mar 31 09:28:03 2017 -0400 @@ -1,4 +1,4 @@ Groups: genes Samples: - file_0 + bamCoverage_result4_bw_0
--- a/test-data/plotCorrelation_result1.tabular Tue Jan 24 04:58:30 2017 -0500 +++ b/test-data/plotCorrelation_result1.tabular Fri Mar 31 09:28:03 2017 -0400 @@ -1,3 +1,3 @@ - 'bowtie2-test1.bam' 'bowtie2-test1.bam' -'bowtie2-test1.bam' 1.0000 1.0000 -'bowtie2-test1.bam' 1.0000 1.0000 + 'bowtie2 test1.bam' 'bowtie2 test1.bam' +'bowtie2 test1.bam' 1.0000 1.0000 +'bowtie2 test1.bam' 1.0000 1.0000
--- a/test-data/plotFingerprint_quality_metrics.tabular Tue Jan 24 04:58:30 2017 -0500 +++ b/test-data/plotFingerprint_quality_metrics.tabular Fri Mar 31 09:28:03 2017 -0400 @@ -1,3 +1,3 @@ Sample AUC Synthetic AUC X-intercept Synthetic X-intercept Elbow Point Synthetic Elbow Point JS Distance Synthetic JS Distance % genome enriched diff. enrichment CHANCE divergence -bowtie2 test1.bam 0.00493632029864 0.481650684758 0.984443061605 1.15310443503e-24 0.984940883634 0.523268829811 NA 0.269861238192 NA NA NA -bowtie2 test1.bam 0.00493632029864 0.481650684758 0.984443061605 1.15310443503e-24 0.984940883634 0.523268829811 NA 0.269861238192 NA NA NA +bowtie2 test1.bam 0.00493632029864 0.481650684758 0.984443061605 1.15310443503e-24 0.984940883634 0.523268829811 NA 0.269004498068 NA NA NA +bowtie2 test1.bam 0.00493632029864 0.481650684758 0.984443061605 1.15310443503e-24 0.984940883634 0.523268829811 NA 0.269004498068 NA NA NA
--- a/test-data/profiler_result2.tabular Tue Jan 24 04:58:30 2017 -0500 +++ b/test-data/profiler_result2.tabular Fri Mar 31 09:28:03 2017 -0400 @@ -1,3 +1,3 @@ bin labels -0.0Kb 0.0Kb bins 1 2 -file_0 genes 2477942.34473 2610259.65234 +bamCoverage_result4_bw_0 genes 2477942.875 2610260.125
--- a/tool_dependencies.xml Tue Jan 24 04:58:30 2017 -0500 +++ b/tool_dependencies.xml Fri Mar 31 09:28:03 2017 -0400 @@ -1,7 +1,7 @@ <?xml version="1.0"?> <tool_dependency> <package name="python" version="2.7.10"> - <repository changeset_revision="0339c4a9b87b" name="package_python_2_7_10" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> + <repository changeset_revision="bd7165ea6526" name="package_python_2_7_10" owner="iuc" prior_installation_required="True" toolshed="https://toolshed.g2.bx.psu.edu" /> </package> <package name="deeptools" version="2.4.2"> <repository changeset_revision="efc55c226f11" name="package_python_2_7_deeptools_2_4_2" owner="iuc" toolshed="https://toolshed.g2.bx.psu.edu" />