Mercurial > repos > mheinzl > variant_analyzer2
view read2mut.xml @ 17:f06067bedfc5 draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author | mheinzl |
---|---|
date | Mon, 22 Feb 2021 14:40:07 +0000 |
parents | 30aec05d04d3 |
children | 858ca8b7ad43 |
line wrap: on
line source
<?xml version="1.0" encoding="UTF-8"?> <tool id="read2mut" name="Call specific mutations in reads:" version="2.0.3" profile="17.01"> <description>Looks for reads with mutation at known positions and calculates frequencies and stats.</description> <macros> <import>va_macros.xml</import> </macros> <expand macro="requirements"> <requirement type="package" version="1.1.0">xlsxwriter</requirement> </expand> <command><![CDATA[ ln -s '$file2' bam_input.bam && ln -s '${file2.metadata.bam_index}' bam_input.bam.bai && python '$__tool_directory__/read2mut.py' --mutFile '$file1' --bamFile bam_input.bam --inputJson '$file3' --sscsJson '$file4' --thresh '$thresh' --phred '$phred' --trim '$trim' $chimera_correction --outputFile '$output_xlsx' --outputFile_csv '$outputFile_csv' --outputFile2 '$output_xlsx2' --outputFile3 '$output_xlsx3' ]]> </command> <inputs> <param name="file1" type="data" format="vcf" label="DCS Mutation File" optional="false" help="VCF file with DCS mutations. See Help section below for a detailed explanation."/> <param name="file2" type="data" format="bam" label="BAM File of raw reads" optional="false" help="BAM file with aligned raw reads of selected tags."/> <param name="file3" type="data" format="json" label="JSON File with DCS tag stats" optional="false" help="JSON file generated by DCS mutations to tags/reads"/> <param name="file4" type="data" format="json" label="JSON File with SSCS tag stats" optional="false" help="JSON file generated by DCS mutations to SSCS stats."/> <param name="thresh" type="integer" label="Tag count threshold" value="0" help="Integer threshold for displaying mutations. Only mutations occuring in DCS of less than thresh tags are displayed. Default of 0 displays all."/> <param name="phred" type="integer" label="Phred quality score threshold" min="0" max="41" value="20" help="Integer threshold for Phred quality score. Only reads higher than this threshold are considered. Default = 20."/> <param name="trim" type="integer" label="Trimming threshold" value="10" help="Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10."/> <param name="chimera_correction" type="boolean" label="Apply chimera correction?" truevalue="--chimera_correction" falsevalue="" checked="False" help="Count chimeric variants and correct the variant frequencies."/> </inputs> <outputs> <data name="output_xlsx" format="xlsx" label="${tool.name} on ${on_string}: XLSX summary"/> <data name="outputFile_csv" format="csv" label="${tool.name} on ${on_string}: CSV summary"/> <data name="output_xlsx2" format="xlsx" label="${tool.name} on ${on_string}: XLSX allele frequencies"/> <data name="output_xlsx3" format="xlsx" label="${tool.name} on ${on_string}: XLSX tiers"/> </outputs> <tests> <test> <param name="file1" value="FreeBayes_test.vcf"/> <param name="file2" value="Interesting_Reads_test.trim.bam"/> <param name="file3" value="tag_count_dict_test.json"/> <param name="file4" value="SSCS_counts_test.json"/> <param name="thresh" value="0"/> <param name="phred" value="20"/> <param name="trim" value="10"/> <param name="delim_csv" value=","/> <output name="output_xlsx" file="Variant_Analyzer_summary_test.xlsx" decompress="true" lines_diff="10"/> <output name="outputFile_csv" file="Variant_Analyzer_summary_test.csv" decompress="true" lines_diff="10"/> <output name="output_xlsx2" file="Variant_Analyzer_allele_frequencies_test.xlsx" decompress="true" lines_diff="10"/> <output name="output_xlsx3" file="Variant_Analyzer_tiers_test.xlsx" decompress="true" lines_diff="10"/> </test> </tests> <help> <![CDATA[ **What it does** Takes a VCF file with mutations, a BAM file of aligned raw reads, and JSON files created by the tools **DCS mutations to tags/reads** and **DCS mutations to SSCS stats** as input and calculates frequencies and stats for DCS mutations based on information from the raw reads. **Input** **Dataset 1:** VCF file with duplex consesus sequence (DCS) mutations. E.g. generated by the `FreeBayes variant caller <https://arxiv.org/abs/1207.3907>`_. **Dataset 2:** BAM file of aligned raw reads. This file can be obtained by the tool `Map with BWA-MEM <https://arxiv.org/abs/1303.3997>`_. **Dataset 3:** JSON file generated by the **DCS mutations to tags/reads** tool containing dictonaries of the tags of reads containing mutations in the DCS. **Dataset 4:** JSON file generated by the **DCS mutations to SSCS stats** tool stats of tags that carry a mutation in the SSCS at the same position a mutation is called in the DCS. **Output** The output are three XLSX files containing frequencies stats for DCS mutations based on information from the raw reads. In addition to that a tier based classification is provided based on the amout of support for a true variant call. ]]> </help> <expand macro="citation" /> </tool>