Mercurial > repos > iuc > tasmanian_mismatch
changeset 3:98f2b5dfdaa9 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tasmanian_mismatch commit bf266268c9f9f2906a45d0f39e3bcb17fc05d849
author | iuc |
---|---|
date | Fri, 27 Jun 2025 17:08:52 +0000 |
parents | d7cbbb12b152 |
children | |
files | Tasmanian.xml |
diffstat | 1 files changed, 87 insertions(+), 63 deletions(-) [+] |
line wrap: on
line diff
--- a/Tasmanian.xml Fri Jun 27 10:07:26 2025 +0000 +++ b/Tasmanian.xml Fri Jun 27 17:08:52 2025 +0000 @@ -1,42 +1,48 @@ -<tool id="tasmanian_mismatch" name="Analysis of artifacts with Tasmanian" version="@TOOL_VERSION@" profile="20.05"> +<tool id="tasmanian_mismatch" name="Analysis of artifacts with Tasmanian" version="@TOOL_VERSION@+galaxy1" profile="20.05"> <description>Quantify, visualize and summarize mismatches in deep sequencing data</description> <macros> - <token name="@TOOL_VERSION@">1.0.9</token> - <token name="@SAMTOOLS_VERSION@">1.13</token> + <token name="@TOOL_VERSION@">1.0.9</token> </macros> + <edam_topics> + <edam_topic>topic_3168</edam_topic> <!-- Sequencing --> + <edam_topic>topic_0080</edam_topic> <!-- Sequence analysis --> + </edam_topics> + <edam_operations> + <edam_operation>operation_3218</edam_operation> <!-- Sequencing quality control --> + <edam_operation>operation_0564</edam_operation> <!-- Sequence visualisation --> + <edam_operation>operation_3197</edam_operation> <!-- Genetic variation analysis --> + </edam_operations> <requirements> - <requirement type="package" version="@TOOL_VERSION@">tasmanian-mismatch</requirement> + <requirement type="package" version="@TOOL_VERSION@">tasmanian-mismatch</requirement> <requirement type="package" version="1.22">samtools</requirement> </requirements> - <command detect_errors="exit_code"> - <![CDATA[ - - #set $reference_fasta_filename = "localref.fa" + <command detect_errors="exit_code"><![CDATA[ + #set $reference_fasta_filename = "localref.fa" - #if str( $reference_source.reference_source_selector ) == "history": + #if str( $reference_source.reference_source_selector ) == "history": ln -s '${reference_source.ref_file}' '${reference_fasta_filename}' && - #else: + #else: #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path ) - #end if + #end if + + samtools view '${bam_input}' | - samtools view '${bam_input}' | - - #if $bed_filename - run_intersections -b '$bed_filename' | - #end if - - run_tasmanian + #if $bed_filename + run_intersections -b '$bed_filename' | + #end if + + run_tasmanian -q '${basequality}' -s '${softclips}' -m '${mapquality}' -c '${confidence}' -r '${reference_fasta_filename}' > '${output_table}' - ]]></command> <inputs> - <!-- Bam alignment file --> - <param type="data" name="bam_input" label="Bam/Sam alignemnt file" format="bam" help="Specify BAM/SAM dataset. If not using a bed file, this file MUST BE SORTED"/> - <!-- reference genome upload --> + <!-- BAM alignment file --> + <param type="data" name="bam_input" label="BAM/SAM alignment file" format="bam" help="Specify BAM/SAM dataset. If not using a bed file, this file MUST BE SORTED" /> + + <!-- Reference genome upload --> <conditional name="reference_source"> <param name="reference_source_selector" type="select" label="Reference genome" help="You can select a reference genome from your history or use a built-in index (Local cache)"> <option value="cached">Local cache</option> @@ -55,60 +61,78 @@ </when> </conditional> - <!-- bed file --> - <param name="bed_filename" type="data" format="bed" optional="true" label="Select a bed file" help="The bed file should contain at least: "chrN", "start" and "stop", and is tab separated."/> + <!-- BED file --> + <param name="bed_filename" type="data" format="bed" optional="true" label="Select a bed file" help="The bed file should contain at least: 'chrN', 'start' and 'stop', and is tab separated." /> <!-- Additional parameters --> <param name="confidence" label="Boundary" type="integer" value="20" min="0" max="100" - help="Number of bases in boundary region, from 0 to length of the read (read help below). Default=20"/> + help="Number of bases in boundary region, from 0 to length of the read (read help below). Default=20" /> <param name="softclips" label="Choose an action with softclips" type="select" display="radio" - help="How sofclips whould be treated. Values include 0,1 or 2 (read the help below). Default=0"> - <option value="1">Never use softcliped bases</option> - <option value="2">Always use softcliped bases</option> - <option value="0" selected="True">Automatic desicion (Default)</option> + help="How softclips should be treated. Values include 0,1 or 2 (read the help below). Default=0"> + <option value="1">Never use softclipped bases</option> + <option value="2">Always use softclipped bases</option> + <option value="0" selected="true">Automatic decision (Default)</option> </param> - <param name="mapquality" label="Map quality" type="integer" min="0" max="70" value="20" help="Exclude reads with lower mapQ than this number. Default=20"/> - <param name="basequality" label="Base quality" type="integer" min="0" max="70" value="20" help="Exclude bases with lower Base quality than this number. Default=20"/> - <param name="keepHTML_conditional" type="select" label="keep HTML output file?"> + <param name="mapquality" label="Map quality" type="integer" min="0" max="70" value="20" help="Exclude reads with lower mapQ than this number. Default=20" /> + <param name="basequality" label="Base quality" type="integer" min="0" max="70" value="20" help="Exclude bases with lower Base quality than this number. Default=20" /> + <param name="keepHTML_conditional" type="select" label="Keep HTML output file?"> <option value="yes">Yes</option> <option value="no">No</option> </param> </inputs> <outputs> - <data name="output_table" format="txt" /> - <data format="html" name="html_file" from_work_dir="Tasmanian_artifact_report.html" label="tasmanian-mismatch results table"> + <data name="output_table" format="txt" label="${tool.name} on ${on_string}: mismatch table" /> + <data format="html" name="html_file" from_work_dir="Tasmanian_artifact_report.html" label="${tool.name} on ${on_string}: results table"> <filter>keepHTML_conditional == "yes"</filter> </data> </outputs> <tests> - <!-- test when reference from history with bed--> - <test> - <param name="bam_input" value="test2.bam" ftype="bam"/> - <param name="reference_source_selector" value="history"/> - <param name="ref_file" value="small_region.fa"/> - <param name="bed_filename" value="test2.bed" ftype="bed"/> - <output name="output_table" file="test2-bed.output" lines_diff="4"/> + <!-- Test when reference from history with bed --> + <test expect_num_outputs="1"> + <param name="bam_input" value="test2.bam" ftype="bam" /> + <param name="reference_source|reference_source_selector" value="history" /> + <param name="reference_source|ref_file" value="small_region.fa" /> + <param name="bed_filename" value="test2.bed" ftype="bed" /> + <param name="keepHTML_conditional" value="no" /> + <output name="output_table" file="test2-bed.output" lines_diff="4" /> + </test> + + <!-- Test when reference from history without bed --> + <test expect_num_outputs="1"> + <param name="bam_input" value="test2.bam" ftype="bam" /> + <param name="reference_source|reference_source_selector" value="history" /> + <param name="reference_source|ref_file" value="small_region.fa" /> + <param name="keepHTML_conditional" value="no" /> + <output name="output_table" file="test2-nobed.output" lines_diff="4" /> </test> - <!-- test when reference from history without bed--> - <test> - <param name="bam_input" value="test2.bam" ftype="bam"/> - <param name="reference_source_selector" value="history"/> - <param name="ref_file" value="small_region.fa"/> - <output name="output_table" file="test2-nobed.output" lines_diff="4"/> + + <!-- Test when reference from cached --> + <test expect_num_outputs="1"> + <param name="bam_input" value="test2.bam" ftype="bam" dbkey="hg38" /> + <param name="reference_source|reference_source_selector" value="cached" /> + <param name="reference_source|ref_file" value="hg38" /> + <param name="keepHTML_conditional" value="no" /> + <output name="output_table" file="test2-nobed.output" lines_diff="4" /> </test> - <!-- test when reference from cached--> - <test> - <param name="bam_input" value="test2.bam" ftype="bam" dbkey="hg38"/> - <param name="reference_source_selector" value="cached"/> - <param name="ref_file" value="hg38"/> - <output name="output_table" file="test2-nobed.output" lines_diff="4"/> + + <!-- Test HTML output --> + <test expect_num_outputs="2"> + <param name="bam_input" value="test2.bam" ftype="bam" /> + <param name="reference_source|reference_source_selector" value="history" /> + <param name="reference_source|ref_file" value="small_region.fa" /> + <param name="keepHTML_conditional" value="yes" /> + <output name="output_table" file="test2-nobed.output" lines_diff="4" /> + <output name="html_file" ftype="html"> + <assert_contents> + <has_text text="Tasmanian" /> + </assert_contents> + </output> </test> </tests> - <help> - <![CDATA[ + <help><![CDATA[ **What it does** @@ -123,9 +147,9 @@ **What is special** -By providing a bed file, tasmanian-mismatch will count mismatches from all regions depicted in the figure below, +By providing a BED file, tasmanian-mismatch will count mismatches from all regions depicted in the figure below, and will report them separately. Also, a parameter defined as *"confidence"* allows including reads with >= -bases in the boundary region in a separate group. This is useful when the bed refers to repeat regions. Since these +bases in the boundary region in a separate group. This is useful when the BED refers to repeat regions. Since these regions might not have been correctly placed in the assembly or are not the same in different individuals, we can include this *confidence* repeat regions where we have high confidence on the reference genome to which we mapped the reads. @@ -138,25 +162,25 @@ provides different ways to deal with this: The *softclips* field allows for 3 different ways at treating softclips: + 0) Exclude these region if there is less than 2/3 identity with the reference genome 1) Exclude all softclipped bases 2) Include all softclipped bases .. class:: warningmark -BAM/SAM file must be **sorted** if not using a bed file. +BAM/SAM file must be **sorted** if not using a BED file. - ]]> - </help> + ]]></help> <citations> <citation type="bibtex"> @misc{githubtasmanian, - author = {Langhorst B., Others, Erijman A.}, + author = {Langhorst, B. and Erijman, A.}, year = {2020}, - title = {TBD}, + title = {Tasmanian-mismatch: A tool for analyzing positional mismatches in sequencing data}, publisher = {GitHub}, journal = {GitHub repository}, - url = {https://github.com/nebiolabs/tasmanian-mismatch}, + url = {https://github.com/nebiolabs/tasmanian-mismatch} } </citation> </citations>