diff read2mut.xml @ 6:11a2a34f8a2b draft

planemo upload for repository https://github.com/gpovysil/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author mheinzl
date Mon, 18 Jan 2021 09:49:15 +0000
parents 386438cd4c3b
children 84a1a3f70407
line wrap: on
line diff
--- a/read2mut.xml	Tue Oct 27 12:46:55 2020 +0000
+++ b/read2mut.xml	Mon Jan 18 09:49:15 2021 +0000
@@ -1,12 +1,16 @@
 <?xml version="1.0" encoding="UTF-8"?>
-<tool id="read2mut" name="Call specific mutations in reads:" version="1.0.5" profile="19.01">
+<tool id="read2mut" name="Call specific mutations in reads:" version="2.1.0" profile="19.01">
     <description>Looks for reads with mutation at known positions and calculates frequencies and stats.</description>
     <macros>
         <import>va_macros.xml</import>
     </macros>
-    <expand macro="requirements">
+    <requirements>
+        <requirement type="package" version="2.7">python</requirement>
+        <requirement type="package" version="1.4.0">matplotlib</requirement>
+        <requirement type="package" version="0.15">pysam</requirement>
         <requirement type="package" version="1.1.0">xlsxwriter</requirement>
-    </expand>
+        <requirement type="package" version="0.11.6">cyvcf2</requirement>
+    </requirements>
     <command><![CDATA[
         ln -s '$file2' bam_input.bam &&
         ln -s '${file2.metadata.bam_index}' bam_input.bam.bai &&
@@ -19,11 +23,15 @@
         --phred '$phred'
         --trim '$trim'
         $chimera_correction
+        --softclipping_dist '$softclipping_dist'
+        --reads_threshold '$reads_threshold'
         --outputFile '$output_xlsx'
+        --outputFile2 '$output_xlsx2'
+        --outputFile3 '$output_xlsx3'
     ]]>
     </command>
     <inputs>
-        <param name="file1" type="data" format="tabular" label="DCS Mutation File" optional="false" help="TABULAR file with DCS mutations. See Help section below for a detailed explanation."/>
+        <param name="file1" type="data" format="vcf" label="DCS Mutation File" optional="false" help="VCF file with DCS mutations. See Help section below for a detailed explanation."/>
         <param name="file2" type="data" format="bam" label="BAM File of raw reads" optional="false" help="BAM file with aligned raw reads of selected tags."/>
         <param name="file3" type="data" format="json" label="JSON File with DCS tag stats" optional="false" help="JSON file generated by DCS mutations to tags/reads"/>
         <param name="file4" type="data" format="json" label="JSON File with SSCS tag stats" optional="false" help="JSON file generated by DCS mutations to SSCS stats."/>
@@ -31,35 +39,43 @@
         <param name="phred" type="integer" label="Phred quality score threshold" min="0" max="41" value="20" help="Integer threshold for Phred quality score. Only reads higher than this threshold are considered. Default = 20."/>
         <param name="trim" type="integer" label="Trimming threshold" value="10" help="Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10."/>
         <param name="chimera_correction" type="boolean" label="Apply chimera correction?" truevalue="--chimera_correction" falsevalue="" checked="False" help="Count chimeric variants and correct the variant frequencies."/>
+        <param name="softclipping_dist" type="integer" label="Distance between artifact and softclipping of the reads" min="1" value="15" help="Count mutation as an artifact if mutation lies within this parameter away from the softclipping part of the reads. Default = 20"/>
+<param name="reads_threshold" type="float" label="Minimum percentage of softclipped reads in a family" min="0.0" max="1.0" value="1.0" help="Float number which specifies the minimum percentage of softclipped reads in a family to be considered in the softclipping tiers. Default: 1.0, means all reads of a family have to be softclipped."/>
     </inputs>
     <outputs>
-        <data name="output_xlsx" format="xlsx" label="${tool.name} on ${on_string}: XLSX"/>
+        <data name="output_xlsx" format="xlsx" label="${tool.name} on ${on_string}: XLSX summary"/>
+        <data name="output_xlsx2" format="xlsx" label="${tool.name} on ${on_string}: XLSX allele frequencies"/>
+        <data name="output_xlsx3" format="xlsx" label="${tool.name} on ${on_string}: XLSX tiers"/>
     </outputs>
     <tests>
         <test>
-            <param name="file1" value="DCS_Mutations_test_data_VA.tabular"/>
-            <param name="file2" value="Interesting_Reads_test_data_VA.trim.bam"/>
-            <param name="file3" value="tag_count_dict_test_data_VA.json"/>
-            <param name="file4" value="SSCS_counts_test_data_VA.json"/>
+            <param name="file1" value="FreeBayes_test.vcf"/>
+            <param name="file2" value="Interesting_Reads_test.trim.bam"/>
+            <param name="file3" value="tag_count_dict_test.json"/>
+            <param name="file4" value="SSCS_counts_test.json"/>
             <param name="thresh" value="0"/>
             <param name="phred" value="20"/>
             <param name="trim" value="10"/>
-            <param name="chimera_correction" value="False"/>
-            <output name="output_xlsx" file="mutant_reads_summary_short_trim_test_data_VA.xlsx" decompress="true" lines_diff="10"/>
+            <param name="chimera_correction"/>
+            <param name="softclipping_dist" value="15"/>
+            <param name="reads_threshold" value="1.0"/>
+            <output name="output_xlsx" file="Variant_Analyzer_summary_test.xlsx" decompress="true" lines_diff="10"/>
+            <output name="output_xlsx2" file="Variant_Analyzer_allele_frequencies_test.xlsx" decompress="true" lines_diff="10"/>
+            <output name="output_xlsx3" file="Variant_Analyzer_tiers_test.xlsx" decompress="true" lines_diff="10"/>
         </test>
     </tests>
     <help> <![CDATA[
 **What it does**
 
-Takes a tabular file with mutations, a BAM file of aligned raw reads, and JSON files 
+Takes a VCF file with mutations, a BAM file of aligned raw reads, and JSON files 
 created by the tools **DCS mutations to tags/reads** and **DCS mutations to SSCS stats** 
 as input and calculates frequencies and stats for DCS mutations based on information 
 from the raw reads.
 
 **Input** 
 
-**Dataset 1:** Tabular file with duplex consesus sequence (DCS) mutations as 
-generated by the **Variant Annotator** tool.
+**Dataset 1:** VCF file with duplex consesus sequence (DCS) mutations. E.g. 
+generated by the `FreeBayes variant caller <https://arxiv.org/abs/1207.3907>`_.
 
 **Dataset 2:** BAM file of aligned raw reads. This file can be obtained by the 
 tool `Map with BWA-MEM <https://arxiv.org/abs/1303.3997>`_.
@@ -74,7 +90,7 @@
 
 **Output**
 
-The output is an XLSX file containing frequencies stats for DCS mutations based 
+The output are three XLSX files containing frequencies stats for DCS mutations based 
 on information from the raw reads. In addition to that a tier based 
 classification is provided based on the amout of support for a true variant call.