changeset 0:859e0611960c draft

"planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/jvarkit commit ee258cf884aa478b4ce4f978c4239f237c813701"
author iuc
date Thu, 11 Feb 2021 08:00:58 +0000
parents
children 364b8ee3a3e4
files jvarkit_wgscoverageplotter.xml test-data/fasta_indexes.loc test-data/reference.fasta test-data/reference.fasta.fai test-data/sars_cov2_trimmed_cut.bam tool_data_table_conf.xml.sample tool_data_table_conf.xml.test
diffstat 7 files changed, 288 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/jvarkit_wgscoverageplotter.xml	Thu Feb 11 08:00:58 2021 +0000
@@ -0,0 +1,232 @@
+<tool id="jvarkit_wgscoverageplotter" name="BAM Coverage Plotter" version="@TOOL_VERSION@+galaxy0">
+    <description>Plot read coverage across a genomic contig</description>
+    <macros>
+        <token name="@TOOL_VERSION@">20201223</token>
+    </macros>
+    <requirements>
+        <requirement type="package" version="@TOOL_VERSION@">jvarkit-wgscoverageplotter</requirement>
+        <requirement type="package" version="1.11">samtools</requirement>
+        <requirement type="package" version="7.0.10_62">imagemagick</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        #set include_regex = str($adv.include_contig_regex).strip()
+        #set exclude_regex = str($adv.skip_contig_regex).strip()
+        #set alignment_name = str($alignment_file.element_identifier)
+        #if str($reference.source) == "history"
+            ln -s '$reference.hist_genome' reference.fasta &&
+            samtools faidx reference.fasta &&
+        #else
+            #set ref_path = str($reference.cached_genome.fields.path)
+            #set ref_index_path = str($reference.cached_genome.fields.path) + '.fai'
+            ln -s '$ref_path' reference.fasta &&
+            ln -s '$ref_index_path' reference.fasta.fai &&
+        #end if
+        samtools view -H '$alignment_file' |grep -E '^(@HD|@SQ)' >reference.dict &&
+        cat reference.dict &&
+        ln -s '$alignment_file' '$alignment_name' &&
+        ln -s '${alignment_file.metadata.bam_index}' '${alignment_name}.bai' &&
+        wgscoverageplotter.sh 
+        --reference reference.fasta
+        --dimension '$dimension'
+        --output plot.svg
+        $adv.disable_paired_overlap
+        #if $include_regex
+            -I '$include_regex'
+        #end if
+        #if $exclude_regex
+            -X '$exclude_regex'
+        #end if
+        --mapq $adv.min_mapq
+        --max-depth $adv.max_depth
+        $adv.clip
+        --min-contig-length '$adv.min_contig_length'
+        --percentile $adv.percentile
+        $adv.points
+        #if str($adv.sample_filter.filter_by_sample) == "true"
+            --samples '$adv.sample_filter.samples'
+            --partition '$adv.sample_filter.partition'
+        #end if
+        '$alignment_name' &&
+        #if str($format) == "SVG"
+            mv plot.svg '$plot_output'
+        #else
+            convert plot.svg '$format:$plot_output'
+        #end if
+    ]]>
+    </command>
+    <inputs>
+        <conditional name="reference">
+            <param name="source" type="select"
+            label="Will you select a reference genome from your history or use a built-in genome?">
+                <option value="cached">Use a built-in genome</option>
+                <option value="history" selected="true">Use a genome from my history</option>
+            </param>
+            <when value="cached">
+                <!-- NOTE: wgscoverageplotter requires the genome to be both indexed (with samtools index)
+                     and also have a dictionary (from picard CreateSequenceDictionary). since there is no
+                     way to specify both of these requirements, we take the indexed genome and build a dictionary -->
+                <param name="cached_genome" type="select"
+                label="Reference genome"
+                help="The FASTA reference genome that reads were mapped against.">
+                    <options from_data_table="fasta_indexes" />
+                </param>
+            </when>
+            <when value="history">
+                <param name="hist_genome" type="data" format="fasta"
+                label="Reference genome"
+                help="The FASTA reference genome that reads were mapped against."/>
+            </when>
+        </conditional>
+        <param name="alignment_file" type="data" format="bam" label="BAM alignment" />
+        <param argument="--dimension" value="1000x500" type="text" label="Image dimensions" />
+        <param name="format" type="select" label="Output format" help="Note that SVG might not display correctly on all Galaxy servers">
+            <option value="PNG" selected="true">PNG</option>
+            <option value="SVG">SVG</option>
+        </param>
+        <section name="adv" title="Advanced options" expanded="false">
+            <param argument="--disable-paired-overlap" 
+                name="disable_paired_overlap" 
+                type="boolean" truevalue="--disable-paired-overlap" 
+                falsevalue="" label="Count overlapping bases with mate for paired-end" />
+            <param argument="--include-contig-regex" name="include_contig_regex" type="text" label="Only keep chromosomes matching this regular expression." />
+            <param argument="--mapq" name="min_mapq" type="integer" value="1" label="Minimum mapping quality" />
+            <param argument="--max-depth" 
+                name="max_depth" 
+                type="integer" 
+                label="y-axis (depth) limit of the plot" 
+                value="-1"
+                help="The special value '-1' will first compute the average depth and the set the max depth to 2*average" />
+            <param argument="--clip"
+                type="boolean"
+                label="Clip the plot at the y-axis limit"
+                help="Default is to let the coverage plot exceed the y-axis limit"
+                truevalue="--clip"
+                falsevalue="" />
+            <param argument="--min-contig-length" 
+                name="min_contig_length" 
+                type="text" 
+                label="Skip chromosomes shorter than"
+                value="0"
+                help="A distance specified as a positive integer.">
+                <validator type="regex" message="A positive integer optionally followed by the suffixes b,bp,k,kb,m,mb is required">[0-9,]+(bp|b|kb|k|mb|m)?$</validator>
+            </param>
+            <param argument="--percentile" type="select" label="How to bin the coverage under one pixel">
+                <option value="median" selected="true">median</option>
+                <option value="average">average</option>
+                <option value="min">min</option>
+                <option value="max">max</option>
+            </param>
+            <param argument="--points" 
+                type="boolean" 
+                label="Plot coverage using points instead of areas" 
+                truevalue="--points" 
+                falsevalue=""
+            />
+            <param argument="--skip-contig-regex"
+                name="skip_contig_regex"
+                type="text"
+                label="Skip chromosomes matching this regular expression" />
+            <conditional name="sample_filter">
+                <param name="filter_by_sample" type="select" label="Filter by sample">
+                    <option value="true">Yes</option>
+                    <option value="false" selected="true">No</option>
+                </param>
+                <when value="true">
+                    <param argument="--samples"
+                        type="text"
+                        label="Limit to these read groups" 
+                        help="See also --partition. Multiple read groups can be separated with commas.">
+                        <validator type="expression"  message="If a filter is being used, this field cannot be blank">len(str(value).strip()) > 0</validator>
+                    </param>
+                    <param argument="--partition" type="select" label="How to interpret identifiers in --samples" help="Partitions are modelled on the GATK logic as described here: https://gatk.broadinstitute.org/hc/en-us/articles/360051307491-DepthOfCoverage-BETA-#--partition-type">
+                        <option value="sample" selected="true">sample</option>
+                        <option value="readgroup">readgroup</option>
+                        <option value="library">library</option>
+                        <option value="platform">platform</option>
+                        <option value="sample_by_platform">sample_by_platform</option>
+                        <option value="sample_by_platform_by_center">sample_by_platform_by_center</option>
+                        <option value="any">any</option>
+                    </param>
+                </when>
+                <when value="false">
+                </when>
+            </conditional>
+        </section>
+    </inputs>
+    <outputs>
+        <data format="png" name="plot_output">
+            <change_format>
+                <when input="format" value="SVG" format="svg" />
+            </change_format>
+        </data>
+    </outputs>
+    <tests>
+        <test>
+            <conditional name="reference">
+                <param name="source" value="history" />
+                <param name="hist_genome" ftype="fasta" value="reference.fasta" />
+            </conditional>
+            <param name="alignment_file" ftype="bam" value="sars_cov2_trimmed_cut.bam" />
+            <output name="plot_output">
+                <assert_contents>
+                    <has_size value="26303" delta="15000" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <conditional name="reference">
+                <param name="source" value="history" />
+                <param name="hist_genome" ftype="fasta" value="reference.fasta" />
+            </conditional>
+            <param name="alignment_file" ftype="bam" value="sars_cov2_trimmed_cut.bam" />
+            <param name="format" value="SVG" />
+            <output name="plot_output">
+                <assert_contents>
+                    <has_size value="7805" delta="2000"/>
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <conditional name="reference">
+                <param name="source" value="history" />
+                <param name="hist_genome" ftype="fasta" value="reference.fasta" />
+            </conditional>
+            <param name="alignment_file" ftype="bam" value="sars_cov2_trimmed_cut.bam" />
+            <section name="adv">
+                <param name="max_depth" value="50" />
+            </section>
+            <output name="plot_output">
+                <assert_contents>
+                    <has_size value="25410" delta="15000" />
+                </assert_contents>
+            </output>
+        </test>
+        <test>
+            <conditional name="reference">
+                <param name="source" value="cached" />
+                <param name="cached_genome" value="sars-cov-2-fragment" />
+            </conditional>
+            <param name="alignment_file" ftype="bam" value="sars_cov2_trimmed_cut.bam" />
+            <output name="plot_output">
+                <assert_contents>
+                    <has_size value="26303" delta="15000" />
+                </assert_contents>
+            </output>
+        </test>
+
+    </tests>
+    <help><![CDATA[
+
+    **WGSCoveragePlotter from the jvarkit toolkit** 
+
+    WGSCoveragePlotter_ is a tool to plot the coverage of aligned reads across a genomic contig. It takes
+    as input a BAM file and a genomic contig in FASTA format and produces an image in either PNG or SVG format
+    showing the depth of read coverage across the contig. 
+
+    .. _WGSCoveragePlotter: http://lindenb.github.io/jvarkit/WGSCoveragePlotter.html
+    ]]>
+    </help>
+    <citations>
+        <citation type="doi">10.6084/m9.figshare.1425030</citation>
+    </citations>
+</tool>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/fasta_indexes.loc	Thu Feb 11 08:00:58 2021 +0000
@@ -0,0 +1,1 @@
+sars-cov-2-fragment	SARS-CoV-2	SARS-CoV-2	${__HERE__}/reference.fasta
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reference.fasta	Thu Feb 11 08:00:58 2021 +0000
@@ -0,0 +1,35 @@
+>MN908947.3
+ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCT
+GTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACT
+CACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATC
+TTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTT
+CGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC
+ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGG
+AGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGG
+CTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAA
+ACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACT
+CGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGG
+CGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGG
+TGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGA
+TCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGA
+ACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGG
+CCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTC
+ATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCG
+TGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCA
+GACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAA
+TTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAA
+GCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATG
+CAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCA
+GACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGA
+AGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGC
+ATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGG
+CTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTC
+TTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGG
+TTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGA
+AATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGA
+GATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAA
+AGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTAC
+AAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCC
+TCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCT
+TGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGG
+AATTTCACAGTATTCACTGA
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/reference.fasta.fai	Thu Feb 11 08:00:58 2021 +0000
@@ -0,0 +1,1 @@
+MN908947.3	2000	12	60	61
Binary file test-data/sars_cov2_trimmed_cut.bam has changed
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.sample	Thu Feb 11 08:00:58 2021 +0000
@@ -0,0 +1,12 @@
+<tables>
+    <!-- Locations of all fasta files under genome directory -->
+    <table name="all_fasta" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="tool-data/all_fasta.loc" />
+    </table>
+    <!-- Locations of samtools indexed genomes -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+    <file path="tool-data/fasta_indexes.loc" />
+    </table>
+</tables>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/tool_data_table_conf.xml.test	Thu Feb 11 08:00:58 2021 +0000
@@ -0,0 +1,7 @@
+<tables>
+    <!-- Locations of FASTA index ffiles for testing -->
+    <table name="fasta_indexes" comment_char="#">
+        <columns>value, dbkey, name, path</columns>
+        <file path="${__HERE__}/test-data/fasta_indexes.loc" />
+    </table>
+</tables>