Mercurial > repos > mheinzl > variant_analyzer2

<?xml version="1.0" encoding="UTF-8"?>
<tool id="read2mut" name="Call specific mutations in reads:" version="3.1.0" profile="19.01">
    <description>Looks for reads with a mutation at known positions and calculates frequencies and stats.</description>
    <macros>
        <import>va_macros.xml</import>
    </macros>
    <expand macro="requirements">
        <requirement type="package" version="1.1.0">xlsxwriter</requirement>
    </expand>
    <command><![CDATA[
        ln -s '$file2' bam_input.bam &&
        ln -s '${file2.metadata.bam_index}' bam_input.bam.bai &&
        python '$__tool_directory__/read2mut.py'
        --mutFile '$file1'
        --bamFile bam_input.bam
        --inputJson '$file3'
        --sscsJson '$file4'
        --thresh '$thresh'
        --phred '$phred'
        --trim '$trim'
        $chimera_correction
        $refalttiers
        --softclipping_dist '$softclipping_dist'
        --reads_threshold '$reads_threshold'
        --outputFile '$output_xlsx'
        --outputFile_csv '$outputFile_csv'
        --outputFile2 '$output_xlsx2'
        --outputFile3 '$output_xlsx3'
    ]]>
    </command>
    <inputs>
        <param name="file1" type="data" format="vcf" label="DCS Mutation File" optional="false" help="VCF file with DCS mutations. See the Help section below for a detailed explanation."/>
        <param name="file2" type="data" format="bam" label="BAM File of raw reads" optional="false" help="BAM file with aligned raw reads of selected tags."/>
        <param name="file3" type="data" format="json" label="JSON File with DCS tag stats" optional="false" help="JSON file generated by DCS mutations to tags/reads"/>
        <param name="file4" type="data" format="json" label="JSON File with SSCS tag stats" optional="false" help="JSON file generated by DCS mutations to SSCS stats."/>
        <param name="thresh" type="integer" label="Tag count threshold" value="0" help="Integer threshold for displaying mutations. Only mutations occurring in DCS of less than thresh tags are displayed. Default of 0 displays all."/>
        <param name="phred" type="integer" label="Phred quality score threshold" min="0" max="41" value="20" help="Integer threshold for Phred quality score. Only reads higher than this threshold is considered. Default = 20."/>
        <param name="trim" type="integer" label="Trimming threshold" value="10" help="Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10."/>
        <param name="chimera_correction" type="boolean" label="Apply chimera correction?" truevalue="--chimera_correction" falsevalue="" checked="False" help="Count chimeric variants (not for the reference allele) and correct the variant frequencies."/>
        <param name="softclipping_dist" type="integer" label="Distance between artifact and softclipping of the reads" min="1" value="15" help="Count mutation as an artifact if mutation lies within this parameter away from the softclipping part of the reads. Default = 20"/>
        <param name="reads_threshold" type="float" label="Minimum percentage of softclipped reads in a family" min="0.0" max="1.0" value="1.0" help="Float number which specifies the minimum percentage of softclipped reads in a family to be considered in the softclipping tiers. Default: 1.0, means all reads of a family have to be softclipped."/>
        <param name="refalttiers" type="boolean" label="Extract tiers also for reference allele" truevalue="--refalttiers" falsevalue="" checked="False" help="Extracts tier information for the alternate and reference allele. Note that this will increase the running time of the tool. Otherwise only the tier information for the alternate allele is extracted."/>
    </inputs>
    <outputs>
        <data name="output_xlsx" format="xlsx" label="${tool.name} on ${on_string}: XLSX summary"/>
        <data name="outputFile_csv" format="csv" label="${tool.name} on ${on_string}: CSV summary"/>
        <data name="output_xlsx2" format="xlsx" label="${tool.name} on ${on_string}: XLSX allele frequencies"/>
        <data name="output_xlsx3" format="xlsx" label="${tool.name} on ${on_string}: XLSX tiers"/>
    </outputs>
    <tests>
        <test>
            <param name="file1" value="FreeBayes_test.vcf"/>
            <param name="file2" value="Interesting_Reads_test.trim.bam"/>
            <param name="file3" value="tag_count_dict_test.json"/>
            <param name="file4" value="SSCS_counts_test.json"/>
            <param name="thresh" value="0"/>
            <param name="phred" value="20"/>
            <param name="trim" value="10"/>
            <param name="chimera_correction"/>
            <param name="softclipping_dist" value="15"/>
            <param name="reads_threshold" value="1.0"/>
            <param name="refalttiers"/>
            <output name="output_xlsx" file="Variant_Analyzer_summary_test.xlsx" decompress="true"/>
            <output name="outputFile_csv" file="Variant_Analyzer_summary_test.csv" decompress="true"/>
            <output name="output_xlsx2" file="Variant_Analyzer_allele_frequencies_test.xlsx" decompress="true"/>
            <output name="output_xlsx3" file="Variant_Analyzer_tiers_test.xlsx" decompress="true"/>
        </test>
    </tests>
    <help> <![CDATA[
**What it does**

Takes a VCF file with mutations, a BAM file of aligned raw reads, and JSON files
created by the tools **DCS mutations to tags/reads** and **DCS mutations to SSCS stats**
as input and calculates frequencies and stats for DCS mutations based on information
from the raw reads.

**Input**

**Dataset 1:** VCF file with duplex consensus sequence (DCS) mutations. E.g.
generated by the `FreeBayes <https://arxiv.org/abs/1207.3907>`_ or `LoFreq <https://academic.oup.com/nar/article/40/22/11189/1152727>`_ variant caller.

**Dataset 2:** BAM file of aligned raw reads. This file can be obtained by the
tool `Map with BWA-MEM <https://arxiv.org/abs/1303.3997>`_.

**Dataset 3:** JSON file generated by the **DCS mutations to tags/reads** tool
containing dictionaries of the tags of reads containing mutations
in the DCS.

**Dataset 4:** JSON file generated by the **DCS mutations to SSCS stats** tool
stats of tags that carry a mutation (and optional the reference allele) in the SSCS at the same position a mutation
is called in the DCS.

**Output**

The output is three XLSX files containing frequencies stats for DCS mutations based
on information from the raw reads and a CSV file containing the summary information without color-coding. In addition to that, a tier-based
classification is provided based on the amount of support for a true variant call.


    ]]>
    </help>
    <expand macro="citation" />
</tool>
author	mheinzl
date	Wed, 24 Aug 2022 09:47:08 +0000
parents	e46d5e377760
children	97bd9c7a1b44