Mercurial > repos > iuc > jvarkit_wgscoverageplotter
changeset 0:859e0611960c draft
"planemo upload for repository https://github.com/galaxyproject/iuc/tree/master/tools/jvarkit commit ee258cf884aa478b4ce4f978c4239f237c813701"
author | iuc |
---|---|
date | Thu, 11 Feb 2021 08:00:58 +0000 |
parents | |
children | 364b8ee3a3e4 |
files | jvarkit_wgscoverageplotter.xml test-data/fasta_indexes.loc test-data/reference.fasta test-data/reference.fasta.fai test-data/sars_cov2_trimmed_cut.bam tool_data_table_conf.xml.sample tool_data_table_conf.xml.test |
diffstat | 7 files changed, 288 insertions(+), 0 deletions(-) [+] |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/jvarkit_wgscoverageplotter.xml Thu Feb 11 08:00:58 2021 +0000 @@ -0,0 +1,232 @@ +<tool id="jvarkit_wgscoverageplotter" name="BAM Coverage Plotter" version="@TOOL_VERSION@+galaxy0"> + <description>Plot read coverage across a genomic contig</description> + <macros> + <token name="@TOOL_VERSION@">20201223</token> + </macros> + <requirements> + <requirement type="package" version="@TOOL_VERSION@">jvarkit-wgscoverageplotter</requirement> + <requirement type="package" version="1.11">samtools</requirement> + <requirement type="package" version="7.0.10_62">imagemagick</requirement> + </requirements> + <command detect_errors="exit_code"><![CDATA[ + #set include_regex = str($adv.include_contig_regex).strip() + #set exclude_regex = str($adv.skip_contig_regex).strip() + #set alignment_name = str($alignment_file.element_identifier) + #if str($reference.source) == "history" + ln -s '$reference.hist_genome' reference.fasta && + samtools faidx reference.fasta && + #else + #set ref_path = str($reference.cached_genome.fields.path) + #set ref_index_path = str($reference.cached_genome.fields.path) + '.fai' + ln -s '$ref_path' reference.fasta && + ln -s '$ref_index_path' reference.fasta.fai && + #end if + samtools view -H '$alignment_file' |grep -E '^(@HD|@SQ)' >reference.dict && + cat reference.dict && + ln -s '$alignment_file' '$alignment_name' && + ln -s '${alignment_file.metadata.bam_index}' '${alignment_name}.bai' && + wgscoverageplotter.sh + --reference reference.fasta + --dimension '$dimension' + --output plot.svg + $adv.disable_paired_overlap + #if $include_regex + -I '$include_regex' + #end if + #if $exclude_regex + -X '$exclude_regex' + #end if + --mapq $adv.min_mapq + --max-depth $adv.max_depth + $adv.clip + --min-contig-length '$adv.min_contig_length' + --percentile $adv.percentile + $adv.points + #if str($adv.sample_filter.filter_by_sample) == "true" + --samples '$adv.sample_filter.samples' + --partition '$adv.sample_filter.partition' + #end if + '$alignment_name' && + #if str($format) == "SVG" + mv plot.svg '$plot_output' + #else + convert plot.svg '$format:$plot_output' + #end if + ]]> + </command> + <inputs> + <conditional name="reference"> + <param name="source" type="select" + label="Will you select a reference genome from your history or use a built-in genome?"> + <option value="cached">Use a built-in genome</option> + <option value="history" selected="true">Use a genome from my history</option> + </param> + <when value="cached"> + <!-- NOTE: wgscoverageplotter requires the genome to be both indexed (with samtools index) + and also have a dictionary (from picard CreateSequenceDictionary). since there is no + way to specify both of these requirements, we take the indexed genome and build a dictionary --> + <param name="cached_genome" type="select" + label="Reference genome" + help="The FASTA reference genome that reads were mapped against."> + <options from_data_table="fasta_indexes" /> + </param> + </when> + <when value="history"> + <param name="hist_genome" type="data" format="fasta" + label="Reference genome" + help="The FASTA reference genome that reads were mapped against."/> + </when> + </conditional> + <param name="alignment_file" type="data" format="bam" label="BAM alignment" /> + <param argument="--dimension" value="1000x500" type="text" label="Image dimensions" /> + <param name="format" type="select" label="Output format" help="Note that SVG might not display correctly on all Galaxy servers"> + <option value="PNG" selected="true">PNG</option> + <option value="SVG">SVG</option> + </param> + <section name="adv" title="Advanced options" expanded="false"> + <param argument="--disable-paired-overlap" + name="disable_paired_overlap" + type="boolean" truevalue="--disable-paired-overlap" + falsevalue="" label="Count overlapping bases with mate for paired-end" /> + <param argument="--include-contig-regex" name="include_contig_regex" type="text" label="Only keep chromosomes matching this regular expression." /> + <param argument="--mapq" name="min_mapq" type="integer" value="1" label="Minimum mapping quality" /> + <param argument="--max-depth" + name="max_depth" + type="integer" + label="y-axis (depth) limit of the plot" + value="-1" + help="The special value '-1' will first compute the average depth and the set the max depth to 2*average" /> + <param argument="--clip" + type="boolean" + label="Clip the plot at the y-axis limit" + help="Default is to let the coverage plot exceed the y-axis limit" + truevalue="--clip" + falsevalue="" /> + <param argument="--min-contig-length" + name="min_contig_length" + type="text" + label="Skip chromosomes shorter than" + value="0" + help="A distance specified as a positive integer."> + <validator type="regex" message="A positive integer optionally followed by the suffixes b,bp,k,kb,m,mb is required">[0-9,]+(bp|b|kb|k|mb|m)?$</validator> + </param> + <param argument="--percentile" type="select" label="How to bin the coverage under one pixel"> + <option value="median" selected="true">median</option> + <option value="average">average</option> + <option value="min">min</option> + <option value="max">max</option> + </param> + <param argument="--points" + type="boolean" + label="Plot coverage using points instead of areas" + truevalue="--points" + falsevalue="" + /> + <param argument="--skip-contig-regex" + name="skip_contig_regex" + type="text" + label="Skip chromosomes matching this regular expression" /> + <conditional name="sample_filter"> + <param name="filter_by_sample" type="select" label="Filter by sample"> + <option value="true">Yes</option> + <option value="false" selected="true">No</option> + </param> + <when value="true"> + <param argument="--samples" + type="text" + label="Limit to these read groups" + help="See also --partition. Multiple read groups can be separated with commas."> + <validator type="expression" message="If a filter is being used, this field cannot be blank">len(str(value).strip()) > 0</validator> + </param> + <param argument="--partition" type="select" label="How to interpret identifiers in --samples" help="Partitions are modelled on the GATK logic as described here: https://gatk.broadinstitute.org/hc/en-us/articles/360051307491-DepthOfCoverage-BETA-#--partition-type"> + <option value="sample" selected="true">sample</option> + <option value="readgroup">readgroup</option> + <option value="library">library</option> + <option value="platform">platform</option> + <option value="sample_by_platform">sample_by_platform</option> + <option value="sample_by_platform_by_center">sample_by_platform_by_center</option> + <option value="any">any</option> + </param> + </when> + <when value="false"> + </when> + </conditional> + </section> + </inputs> + <outputs> + <data format="png" name="plot_output"> + <change_format> + <when input="format" value="SVG" format="svg" /> + </change_format> + </data> + </outputs> + <tests> + <test> + <conditional name="reference"> + <param name="source" value="history" /> + <param name="hist_genome" ftype="fasta" value="reference.fasta" /> + </conditional> + <param name="alignment_file" ftype="bam" value="sars_cov2_trimmed_cut.bam" /> + <output name="plot_output"> + <assert_contents> + <has_size value="26303" delta="15000" /> + </assert_contents> + </output> + </test> + <test> + <conditional name="reference"> + <param name="source" value="history" /> + <param name="hist_genome" ftype="fasta" value="reference.fasta" /> + </conditional> + <param name="alignment_file" ftype="bam" value="sars_cov2_trimmed_cut.bam" /> + <param name="format" value="SVG" /> + <output name="plot_output"> + <assert_contents> + <has_size value="7805" delta="2000"/> + </assert_contents> + </output> + </test> + <test> + <conditional name="reference"> + <param name="source" value="history" /> + <param name="hist_genome" ftype="fasta" value="reference.fasta" /> + </conditional> + <param name="alignment_file" ftype="bam" value="sars_cov2_trimmed_cut.bam" /> + <section name="adv"> + <param name="max_depth" value="50" /> + </section> + <output name="plot_output"> + <assert_contents> + <has_size value="25410" delta="15000" /> + </assert_contents> + </output> + </test> + <test> + <conditional name="reference"> + <param name="source" value="cached" /> + <param name="cached_genome" value="sars-cov-2-fragment" /> + </conditional> + <param name="alignment_file" ftype="bam" value="sars_cov2_trimmed_cut.bam" /> + <output name="plot_output"> + <assert_contents> + <has_size value="26303" delta="15000" /> + </assert_contents> + </output> + </test> + + </tests> + <help><![CDATA[ + + **WGSCoveragePlotter from the jvarkit toolkit** + + WGSCoveragePlotter_ is a tool to plot the coverage of aligned reads across a genomic contig. It takes + as input a BAM file and a genomic contig in FASTA format and produces an image in either PNG or SVG format + showing the depth of read coverage across the contig. + + .. _WGSCoveragePlotter: http://lindenb.github.io/jvarkit/WGSCoveragePlotter.html + ]]> + </help> + <citations> + <citation type="doi">10.6084/m9.figshare.1425030</citation> + </citations> +</tool>
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fasta_indexes.loc Thu Feb 11 08:00:58 2021 +0000 @@ -0,0 +1,1 @@ +sars-cov-2-fragment SARS-CoV-2 SARS-CoV-2 ${__HERE__}/reference.fasta
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/reference.fasta Thu Feb 11 08:00:58 2021 +0000 @@ -0,0 +1,35 @@ +>MN908947.3 +ATTAAAGGTTTATACCTTCCCAGGTAACAAACCAACCAACTTTCGATCTCTTGTAGATCT +GTTCTCTAAACGAACTTTAAAATCTGTGTGGCTGTCACTCGGCTGCATGCTTAGTGCACT +CACGCAGTATAATTAATAACTAATTACTGTCGTTGACAGGACACGAGTAACTCGTCTATC +TTCTGCAGGCTGCTTACGGTTTCGTCCGTGTTGCAGCCGATCATCAGCACATCTAGGTTT +CGTCCGGGTGTGACCGAAAGGTAAGATGGAGAGCCTTGTCCCTGGTTTCAACGAGAAAAC +ACACGTCCAACTCAGTTTGCCTGTTTTACAGGTTCGCGACGTGCTCGTACGTGGCTTTGG +AGACTCCGTGGAGGAGGTCTTATCAGAGGCACGTCAACATCTTAAAGATGGCACTTGTGG +CTTAGTAGAAGTTGAAAAAGGCGTTTTGCCTCAACTTGAACAGCCCTATGTGTTCATCAA +ACGTTCGGATGCTCGAACTGCACCTCATGGTCATGTTATGGTTGAGCTGGTAGCAGAACT +CGAAGGCATTCAGTACGGTCGTAGTGGTGAGACACTTGGTGTCCTTGTCCCTCATGTGGG +CGAAATACCAGTGGCTTACCGCAAGGTTCTTCTTCGTAAGAACGGTAATAAAGGAGCTGG +TGGCCATAGTTACGGCGCCGATCTAAAGTCATTTGACTTAGGCGACGAGCTTGGCACTGA +TCCTTATGAAGATTTTCAAGAAAACTGGAACACTAAACATAGCAGTGGTGTTACCCGTGA +ACTCATGCGTGAGCTTAACGGAGGGGCATACACTCGCTATGTCGATAACAACTTCTGTGG +CCCTGATGGCTACCCTCTTGAGTGCATTAAAGACCTTCTAGCACGTGCTGGTAAAGCTTC +ATGCACTTTGTCCGAACAACTGGACTTTATTGACACTAAGAGGGGTGTATACTGCTGCCG +TGAACATGAGCATGAAATTGCTTGGTACACGGAACGTTCTGAAAAGAGCTATGAATTGCA +GACACCTTTTGAAATTAAATTGGCAAAGAAATTTGACACCTTCAATGGGGAATGTCCAAA +TTTTGTATTTCCCTTAAATTCCATAATCAAGACTATTCAACCAAGGGTTGAAAAGAAAAA +GCTTGATGGCTTTATGGGTAGAATTCGATCTGTCTATCCAGTTGCGTCACCAAATGAATG +CAACCAAATGTGCCTTTCAACTCTCATGAAGTGTGATCATTGTGGTGAAACTTCATGGCA +GACGGGCGATTTTGTTAAAGCCACTTGCGAATTTTGTGGCACTGAGAATTTGACTAAAGA +AGGTGCCACTACTTGTGGTTACTTACCCCAAAATGCTGTTGTTAAAATTTATTGTCCAGC +ATGTCACAATTCAGAAGTAGGACCTGAGCATAGTCTTGCCGAATACCATAATGAATCTGG +CTTGAAAACCATTCTTCGTAAGGGTGGTCGCACTATTGCCTTTGGAGGCTGTGTGTTCTC +TTATGTTGGTTGCCATAACAAGTGTGCCTATTGGGTTCCACGTGCTAGCGCTAACATAGG +TTGTAACCATACAGGTGTTGTTGGAGAAGGTTCCGAAGGTCTTAATGACAACCTTCTTGA +AATACTCCAAAAAGAGAAAGTCAACATCAATATTGTTGGTGACTTTAAACTTAATGAAGA +GATCGCCATTATTTTGGCATCTTTTTCTGCTTCCACAAGTGCTTTTGTGGAAACTGTGAA +AGGTTTGGATTATAAAGCATTCAAACAAATTGTTGAATCCTGTGGTAATTTTAAAGTTAC +AAAAGGAAAAGCTAAAAAAGGTGCCTGGAATATTGGTGAACAGAAATCAATACTGAGTCC +TCTTTATGCATTTGCATCAGAGGCTGCTCGTGTTGTACGATCAATTTTCTCCCGCACTCT +TGAAACTGCTCAAAATTCTGTGCGTGTTTTACAGAAGGCCGCTATAACAATACTAGATGG +AATTTCACAGTATTCACTGA
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/reference.fasta.fai Thu Feb 11 08:00:58 2021 +0000 @@ -0,0 +1,1 @@ +MN908947.3 2000 12 60 61
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.sample Thu Feb 11 08:00:58 2021 +0000 @@ -0,0 +1,12 @@ +<tables> + <!-- Locations of all fasta files under genome directory --> + <table name="all_fasta" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/all_fasta.loc" /> + </table> + <!-- Locations of samtools indexed genomes --> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="tool-data/fasta_indexes.loc" /> + </table> +</tables> \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tool_data_table_conf.xml.test Thu Feb 11 08:00:58 2021 +0000 @@ -0,0 +1,7 @@ +<tables> + <!-- Locations of FASTA index ffiles for testing --> + <table name="fasta_indexes" comment_char="#"> + <columns>value, dbkey, name, path</columns> + <file path="${__HERE__}/test-data/fasta_indexes.loc" /> + </table> +</tables>