view virheat.xml @ 3:b5411b5e8259 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/virheat commit 8f495733edea9287f6392a893c9429fd16b1cb1d
author iuc
date Mon, 26 Aug 2024 18:10:25 +0000
parents 927e55d1aa97
children
line wrap: on
line source

<tool id="virheat" name="VirHEAT: visualize allele frequencies"
      version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"
      profile="@PROFILE@">
    <description>
        from VCFs as a heatmap and map mutations to respective genes
    </description>
    <macros>
        <import>macros.xml</import>
    </macros>
    <expand macro="biotools"/>
    <expand macro="requirements"/>
    <expand macro="version"/>
    <command detect_errors="exit_code"><![CDATA[

mkdir -p ./output_dir &&
#set $outfile = 'tmp_output.' + str($advanced.output_type)
#set $output_dir = 'output_dir/'
bash vcfs_to_dir.sh &&
virheat
    'vcfs_dir/'
    '$output_dir'
    --name '$outfile'
    -r '$ref_id'
    #if $genome_data.choice == 'length'
        -l $genome_data.genome_length
    #else
        -g '$genome_data.gff3_file'
        #if $genome_data.annotations
            -a       
            #for $an in $genome_data.annotations
                '${an.ann}'
            #end for
        #end if
    #end if
    #if $varfreq
        -t $varfreq
    #end if
    $delete
    -n $delete_n
    #if $zoom.choice == 'yes'
        -z $zoom.zoom_start $zoom.zoom_stop
    #end if
    $sort
    --min-cov $min_cov
    #for $sc in $scores
        -s '${sc.scores_file}' '${sc.pos_col}' '${sc.score_col}' 
        #if $sc.score_name
            '${sc.score_name}'
        #else
            '${sc.scores_file.element_identifier}'
        #end if
    #end for
    ]]></command>
    <configfiles>
        <configfile filename="vcfs_to_dir.sh"><![CDATA[
mkdir -p 'vcfs_dir/' &&
#for $input_file in $sinputs
#set $sample_id = $input_file.element_identifier.replace("'", '_').replace('/', '_')
#set $file_path = 'vcfs_dir/' + $sample_id
#if not $file_path.endswith('.vcf'):
#set $file_path = $file_path + '.vcf'
#end if
ln -s '$input_file' '$file_path' &&
#end for
echo "VCF files are placed in vcfs_dir/."
    ]]></configfile>
    </configfiles>
    <inputs>
        <param name="sinputs" type="data_collection" format="vcf" label="Variant lists data" help="Select at least two datasets (or a dataset collection) with variant lists. Datasets are expected to represent individual samples and dataset names will be used as sample identifiers." multiple="true" collection_type="list" />
        <conditional name="genome_data">
            <param name="choice" type="select"
                   label="Data for genomic annotation">
                <option value="length">length of the genome</option>
                <option value="gff3">gff3 file</option>
            </param>
            <when value="length">
                <param name="genome_length" argument="-l" type="integer" min="0" value="0" label="Genome length"/>
            </when>
            <when value="gff3">
                <param name="gff3_file" argument="-g" type="data" format="gff3" label="GFF3 file" help="GFF3 file contains annotations for the genome and allows to display genes harboring a mutation. Ensure that the file follows the GFF3 specification."/>
                <repeat name="annotations" title="Annotations" help="Add annotations from the GFF3 file to display. If no annotations are added, 'gene' will be used by default.">
                    <param name="ann" type="text" value="gene" optional="false" label="Annotation name"/>
                </repeat>
            </when>
        </conditional>
        <param name="ref_id" argument="-r" type="text" optional="false" label="Reference identifier" help="Specify the ref id and generate plots for vcf with multiple refs, e.g. segmented viruses."/>
        <param name="varfreq" argument="-t" type="float" min="0" max="1" value="0" optional="true" label="Variant Frequency Threshold" help="Only plot variants with an intrasample frequency above this threshold in at least one sample." />
        <param argument="--delete" type="boolean" truevalue="--delete" falsevalue="--no-delete" checked="true" label="Delete mutations that are present in all samples and their maximum frequency divergence is smaller than 0.5"/>
        <param name="delete_n" argument="-n" type="integer" min="0" value="0" label="Do not show mutations that occur n times or less"/>
        <conditional name="zoom">
            <param name="choice" type="select" label="Restrict the plot to a specific genomic region">
                <option value="yes">Yes</option>
                <option value="no">No</option>
            </param>
            <when value="yes">
                <param name="zoom_start" type="integer" value="0" label="Start"/> 
                <param name="zoom_stop" type="integer" value="0" label="End"/>
            </when>
            <when value="no"/>
        </conditional>
        <param argument="--sort" type="boolean" truevalue="--sort" falsevalue="--no-sort" checked="false" label="Sort sample names alphanumerically"/>
        <param argument="--min-cov" type="integer" min="1" max="1000" value="20" label="Display mutations covered at least x time"/>
        <repeat name="scores" title="Scores" help="Include visualizations of additional scores (e.g., MAVE scores for binding affinity, expression level, antibody escape, etc.) mapped to mutations on the heatmap.">
            <param name="scores_file" type="data" format="csv" optional="false" label="CSV file" help="CSV file containing scores"/>
            <param name="pos_col" type="select" label="Mutation position column" help="The name of the column in the CSV file that contains the mutation positions in classic notation (e.g., T430Y)">
                <options>
                    <filter type="data_meta" ref="scores_file" key="column_names"/>
                </options>
            </param>
            <param name="score_col" type="select" label="Scores column" help="The name of the column in the CSV file that contains the scores">
                <options>
                    <filter type="data_meta" ref="scores_file" key="column_names"/>
                </options>
            </param>
            <param name="score_name" type="text" optional="true" label="Score name" help="A descriptive name for the score to be used as labels in the plot. If not specified, the CSV file name will be used as the label instead"/>
        </repeat>
        <section name="advanced" title="Image Properties" expanded="true">
            <param name="output_type" type="select" label="Plot output format" >
                <option value="pdf" selected="true" >PDF</option>
                <option value="png" >PNG</option>
                <option value="svg">SVG</option>
                <option value="tiff" >TIFF</option>
                <option value="bmp" >BMP</option>
                <option value="jpeg" >JPEG</option>
            </param>
        </section>
    </inputs>
    <outputs>
        <data name="outfile" format="pdf" from_work_dir="output_dir/tmp_output.*"
            label="VirHEAT: Variant-Frequency Plot on ${on_string}: ${advanced.output_type}">
            <change_format>
                <when input="advanced.output_type" value="svg" format="svg" />
                <when input="advanced.output_type" value="png" format="png" />
                <when input="advanced.output_type" value="tiff" format="tiff" />
                <when input="advanced.output_type" value="bmp" format="bmp" />
                <when input="advanced.output_type" value="jpeg" format="jpg" />
            </change_format>
        </data>
    </outputs>
    <tests>
        <test expect_num_outputs="1">
            <!-- test1: gff3, pdf -->
            <param name="sinputs" value="day_0.vcf,day_2.vcf,day_4.vcf,day_18.vcf,day_26.vcf,day_32.vcf,day_47.vcf,day_55.vcf,day_60.vcf,day_76.vcf,day_83.vcf,day_85.vcf,day_91.vcf,day_96.vcf,day_100.vcf,day_103.vcf,day_111.vcf"/>
            <conditional name="genome_data">
                <param name="choice" value="gff3"/>
                <param name="gff3_file" value="SARS-CoV-2.gff3"/>
            </conditional>
            <conditional name="zoom">
                <param name="choice" value="yes"/>
                <param name="zoom_start" value="22599"/>
                <param name="zoom_stop" value="23278"/>
            </conditional>
            <output name="outfile" ftype="pdf">
                <assert_contents>
                    <has_text text="day"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test2: length, svg -->
            <param name="sinputs" value="day_76.vcf,day_83.vcf,day_91.vcf,day_103.vcf" ftype="vcf" />
            <conditional name="genome_data">
                <param name="choice" value="length"/>
                <param name="genome_length" value="29903"/>
            </conditional>
            <conditional name="zoom">
                <param name="choice" value="yes"/>
                <param name="zoom_start" value="22599"/>
                <param name="zoom_stop" value="23278"/>
            </conditional>
            <output name="outfile" ftype="svg">
                <assert_contents>
                    <has_text text="day"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test3: gff3, threshold, png, delete, delete-n -->
            <param name="sinputs" value="day_76.vcf,day_83.vcf,day_91.vcf,day_103.vcf" ftype="vcf" />
            <param name="varfreq" value="0.1"/>
            <param name="delete" value="yes"/>
            <param name="delete_n" value="1"/>
            <conditional name="genome_data">
                <param name="choice" value="gff3"/>
                <param name="gff3_file" value="SARS-CoV-2.gff3"/>
            </conditional>
            <conditional name="zoom">
                <param name="choice" value="no"/>
            </conditional>
            <output name="outfile" ftype="png">
                <assert_contents>
                    <has_text text="day"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test4: length, zoom, sort, tiff -->
            <param name="sinputs" value="day_0.vcf,day_2.vcf,day_4.vcf,day_18.vcf,day_26.vcf,day_32.vcf,day_47.vcf,day_55.vcf,day_60.vcf,day_76.vcf,day_83.vcf,day_85.vcf,day_91.vcf,day_96.vcf,day_100.vcf,day_103.vcf,day_111.vcf"/>
            <param name="varfreq" value="0.1"/>
            <param name="delete" value="false"/>
            <param name="delete_n" value="0"/>
            <param name="sort" value="true"/>
            <param name="min_cov" value="25"/>
            <conditional name="zoom">
                <param name="choice" value="yes"/>
                <param name="zoom_start" value="22599"/>
                <param name="zoom_stop" value="23278"/>
            </conditional>
            <section name="advanced">
                <param name="output_type" value="tiff"/>
            </section>
            <conditional name="genome_data">
                <param name="choice" value="length"/>
                <param name="genome_length" value="29903"/>
            </conditional>
            <output name="outfile">
                <assert_contents>
                    <has_text text="day"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test5: annotations -->
            <param name="sinputs" value="day_0.vcf,day_2.vcf,day_4.vcf,day_18.vcf,day_26.vcf,day_32.vcf,day_47.vcf,day_55.vcf,day_60.vcf,day_76.vcf,day_83.vcf,day_85.vcf,day_91.vcf,day_96.vcf,day_100.vcf,day_103.vcf,day_111.vcf"/>
            <conditional name="genome_data">
                <param name="choice" value="gff3"/>
                <param name="gff3_file" value="SARS-CoV-2.gff3"/>
                <repeat name="annotations">
                    <param name="ann" value="gene"/>
                </repeat>
                <repeat name="annotations">
                    <param name="ann" value="CDS"/>
                </repeat>
            </conditional>
            <conditional name="zoom">
                <param name="choice" value="yes"/>
                <param name="zoom_start" value="22599"/>
                <param name="zoom_stop" value="23278"/>
            </conditional>
            <output name="outfile" ftype="pdf">
                <assert_contents>
                    <has_text text="day"/>
                </assert_contents>
            </output>
        </test>
        <test expect_num_outputs="1">
            <!-- test6: scores -->
            <param name="sinputs" value="day_0.vcf,day_2.vcf,day_4.vcf,day_18.vcf,day_26.vcf,day_32.vcf,day_47.vcf,day_55.vcf,day_60.vcf,day_76.vcf,day_83.vcf,day_85.vcf,day_91.vcf,day_96.vcf,day_100.vcf,day_103.vcf,day_111.vcf"/>
            <conditional name="genome_data">
                <param name="choice" value="gff3"/>
                <param name="gff3_file" value="SARS-CoV-2.gff3"/>
            </conditional>
            <conditional name="zoom">
                <param name="choice" value="yes"/>
                <param name="zoom_start" value="22599"/>
                <param name="zoom_stop" value="23278"/>
            </conditional>
            <repeat name="scores">
                <param name="scores_file" value="MaveBindRBD.csv"/>
                <param name="pos_col" value="AA"/>
                <param name="score_col" value="score"/>
                <param name="score_name" value="binding"/>
            </repeat>
            <repeat name="scores">
                <param name="scores_file" value="MaveExpRBD.csv"/>
                <param name="pos_col" value="AA"/>
                <param name="score_col" value="score"/>
                <param name="score_name" value="expression"/>
            </repeat>
            <output name="outfile" ftype="pdf">
                <assert_contents>
                    <has_text text="day"/>
                </assert_contents>
            </output>
        </test>
    </tests>
    <help><![CDATA[
@HELP_HEADER@
    ]]></help>
    <expand macro="citations"/>
</tool>