view Tasmanian.xml @ 3:98f2b5dfdaa9 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/tasmanian_mismatch commit bf266268c9f9f2906a45d0f39e3bcb17fc05d849
author iuc
date Fri, 27 Jun 2025 17:08:52 +0000
parents d7cbbb12b152
children
line wrap: on
line source

<tool id="tasmanian_mismatch" name="Analysis of artifacts with Tasmanian" version="@TOOL_VERSION@+galaxy1" profile="20.05">
  <description>Quantify, visualize and summarize mismatches in deep sequencing data</description>
  <macros>
    <token name="@TOOL_VERSION@">1.0.9</token>
  </macros>
  <edam_topics>
    <edam_topic>topic_3168</edam_topic> <!-- Sequencing -->
    <edam_topic>topic_0080</edam_topic> <!-- Sequence analysis -->
  </edam_topics>
  <edam_operations>
    <edam_operation>operation_3218</edam_operation> <!-- Sequencing quality control -->
    <edam_operation>operation_0564</edam_operation> <!-- Sequence visualisation -->
    <edam_operation>operation_3197</edam_operation> <!-- Genetic variation analysis -->
  </edam_operations>
  <requirements>
    <requirement type="package" version="@TOOL_VERSION@">tasmanian-mismatch</requirement>
    <requirement type="package" version="1.22">samtools</requirement>
  </requirements>
  <command detect_errors="exit_code"><![CDATA[
    #set $reference_fasta_filename = "localref.fa"

    #if str( $reference_source.reference_source_selector ) == "history":
        ln -s '${reference_source.ref_file}' '${reference_fasta_filename}' &&
    #else:
        #set $reference_fasta_filename = str( $reference_source.ref_file.fields.path )
    #end if

    samtools view '${bam_input}' |

    #if $bed_filename
        run_intersections -b '$bed_filename' |
    #end if

    run_tasmanian
        -q '${basequality}'
        -s '${softclips}'
        -m '${mapquality}'
        -c '${confidence}'
        -r '${reference_fasta_filename}' > '${output_table}'
  ]]></command>
  <inputs>
    <!-- BAM alignment file -->
    <param type="data" name="bam_input" label="BAM/SAM alignment file" format="bam" help="Specify BAM/SAM dataset. If not using a bed file, this file MUST BE SORTED" />
    
    <!-- Reference genome upload -->
    <conditional name="reference_source">
      <param name="reference_source_selector" type="select" label="Reference genome" help="You can select a reference genome from your history or use a built-in index (Local cache)">
        <option value="cached">Local cache</option>
        <option value="history">History</option>
      </param>
      <when value="cached">
        <param name="ref_file" type="select" label="Select the reference genome from the list">
          <options from_data_table="all_fasta">
            <filter type="sort_by" column="2" />
            <validator type="no_options" message="No indexes are available" />
          </options>
        </param>
      </when>
      <when value="history">
        <param name="ref_file" type="data" format="fasta" label="Use reference genome from history" help="You can first upload a FASTA sequence to the history" />
      </when>
    </conditional>
            
    <!-- BED file -->       
    <param name="bed_filename" type="data" format="bed" optional="true" label="Select a bed file" help="The bed file should contain at least: 'chrN', 'start' and 'stop', and is tab separated." />

    <!-- Additional parameters -->
    <param name="confidence" label="Boundary" type="integer" value="20" min="0" max="100"
        help="Number of bases in boundary region, from 0 to length of the read (read help below). Default=20" /> 
    <param name="softclips" label="Choose an action with softclips" type="select" display="radio"
        help="How softclips should be treated. Values include 0,1 or 2 (read the help below). Default=0">
      <option value="1">Never use softclipped bases</option>
      <option value="2">Always use softclipped bases</option>
      <option value="0" selected="true">Automatic decision (Default)</option>
    </param>
    <param name="mapquality" label="Map quality" type="integer" min="0" max="70" value="20" help="Exclude reads with lower mapQ than this number. Default=20" />
    <param name="basequality" label="Base quality" type="integer" min="0" max="70" value="20" help="Exclude bases with lower Base quality than this number. Default=20" />
    <param name="keepHTML_conditional" type="select" label="Keep HTML output file?">
      <option value="yes">Yes</option>
      <option value="no">No</option>
    </param>    
  </inputs>

  <outputs>
    <data name="output_table" format="txt" label="${tool.name} on ${on_string}: mismatch table" />
    <data format="html" name="html_file" from_work_dir="Tasmanian_artifact_report.html" label="${tool.name} on ${on_string}: results table">
      <filter>keepHTML_conditional == "yes"</filter>
    </data>
  </outputs>

  <tests>
    <!-- Test when reference from history with bed -->
    <test expect_num_outputs="1">
      <param name="bam_input" value="test2.bam" ftype="bam" />
      <param name="reference_source|reference_source_selector" value="history" />
      <param name="reference_source|ref_file" value="small_region.fa" />
      <param name="bed_filename" value="test2.bed" ftype="bed" />
      <param name="keepHTML_conditional" value="no" />
      <output name="output_table" file="test2-bed.output" lines_diff="4" />
    </test>
    
    <!-- Test when reference from history without bed -->
    <test expect_num_outputs="1">
      <param name="bam_input" value="test2.bam" ftype="bam" />
      <param name="reference_source|reference_source_selector" value="history" />
      <param name="reference_source|ref_file" value="small_region.fa" />
      <param name="keepHTML_conditional" value="no" />
      <output name="output_table" file="test2-nobed.output" lines_diff="4" />
    </test>
    
    <!-- Test when reference from cached -->
    <test expect_num_outputs="1">
      <param name="bam_input" value="test2.bam" ftype="bam" dbkey="hg38" />
      <param name="reference_source|reference_source_selector" value="cached" />
      <param name="reference_source|ref_file" value="hg38" />
      <param name="keepHTML_conditional" value="no" />
      <output name="output_table" file="test2-nobed.output" lines_diff="4" />
    </test>
    
    <!-- Test HTML output -->
    <test expect_num_outputs="2">
      <param name="bam_input" value="test2.bam" ftype="bam" />
      <param name="reference_source|reference_source_selector" value="history" />
      <param name="reference_source|ref_file" value="small_region.fa" />
      <param name="keepHTML_conditional" value="yes" />
      <output name="output_table" file="test2-nobed.output" lines_diff="4" />
      <output name="html_file" ftype="html">
        <assert_contents>
          <has_text text="Tasmanian" />
        </assert_contents>
      </output>
    </test>
  </tests>

  <help><![CDATA[

**What it does**

This tool counts the number/proportion of mismatches per position along the read, 
for each read (see figure below).

.. image:: ${static_path}/images/snapshot_good.jpg
    :height: 350                                                            
    :width: 650   

-----

**What is special**

By providing a BED file, tasmanian-mismatch will count mismatches from all regions depicted in the figure below, 
and will report them separately. Also, a parameter defined as *"confidence"* allows including reads with >=
bases in the boundary region in a separate group. This is useful when the BED refers to repeat regions. Since these 
regions might not have been correctly placed in the assembly or are not the same in different individuals, we can 
include this *confidence* repeat regions where we have high confidence on the reference genome to which we mapped the reads.

.. image:: ${static_path}/images/intersections_tasmanian.jpg                
    :height: 150                                                            
    :width: 650 

Softclips are critical in FFPE (Formalin-fixed paraffin-embedded) experiments as mismatches tend to accumulate at the ends of the reads. Most often, softclips 
are all accepted during the analysis and many real mismatches are indirectly excluded from the analysis. Hence, this tool 
provides different ways to deal with this: 

The *softclips* field allows for 3 different ways at treating softclips:    

0) Exclude these region if there is less than 2/3 identity with the reference genome  
1) Exclude all softclipped bases  
2) Include all softclipped bases  

.. class:: warningmark   

BAM/SAM file must be **sorted** if not using a BED file.
  
  ]]></help>
  <citations>
    <citation type="bibtex">
      @misc{githubtasmanian,
        author = {Langhorst, B. and Erijman, A.},
        year = {2020},
        title = {Tasmanian-mismatch: A tool for analyzing positional mismatches in sequencing data},
        publisher = {GitHub},
        journal = {GitHub repository},
        url = {https://github.com/nebiolabs/tasmanian-mismatch}
      }
    </citation>
  </citations>
</tool>