view viralverify.xml @ 0:ddbdb0763437 draft default tip

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/main/tools/viralverify commit daf56661e044ace256c1fd635b494d8998af3201
author iuc
date Sun, 06 Jul 2025 18:23:42 +0000
parents
children
line wrap: on
line source

<tool id="viralverify" name="viralverify" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="@PROFILE@">
   <description>
      Classify contigs as viral, non-viral or uncertain, based on gene content
   </description>
   <macros>
      <token name="@TOOL_VERSION@">1.1</token>
      <token name="@VERSION_SUFFIX@">0</token>
      <token name="@PROFILE@">23.2</token> 
   </macros>
   <requirements>
      <requirement type="package" version="@TOOL_VERSION@">viralverify</requirement>
   </requirements>
   <command detect_errors="exit_code"><![CDATA[
ln -s '$input_fasta' 'contigs.fasta' &&
viralverify
   -f 'contigs.fasta'
   -o .
   --hmm '$hmm'
   ## OPTIONNAL BLAST NOT TESTED YET
   ##  #if '$locdb'
   ## 	--db $locdb
   ##  #end if 
   ##  #if '$histdb'
   ## 	--db '$histdb'
   ##  #end if
   --thr $thr
   -t \${GALAXY_SLOTS:-1}
   $plasmid_detect
   ]]></command>
   <inputs>
      <!--MANDATORY :-->
      <!-- INPUT FASTA FILE-->
      <param name="input_fasta" type="data" format="fasta" label="Fasta file containing contigs to predict"/>
      <!-- INPUT HMM PROFILE -->
      <param argument="--hmm" type="data" format="hmm3" label="HMM profile"/>
      <!-- OPTIONAL : -->
      <!-- RUN BLAST -->
      <!-- Trying to implement the - -db option here but im currently unable to test it with a blastdbn file -->
      <!-- <conditional name="do_run_blast"> 
         <param name="runblast" type="select" label="Run BLAST on input contigs?" >
            <option value="locdb" selected="true">With locally installed BLAST database </option>
            <option value="histdb" selected="false">With BLAST database from your history </option>
            <option value="no_blast">No </option>
         </param>
            <when value="locdb">
               <param name="database" type="select" multiple="true" optional="false" label="Nucleotide BLAST database">
                  <options from_data_table="blastdb" />
               </param>
               <param name="histdb" type="hidden" value="" />
               <param name="subject" type="hidden" value="" />
            </when>
            <when value="histdb">
               <param name="database" type="hidden" value="" />
               <param name="histdb" type="data" format="blastdbn" label="Nucleotide BLAST database" />
               <param name="subject" type="hidden" value="" />
            </when>
            <when value="no_blast"/>
      </conditional> -->
      <!-- RUN PLASMID DETECTION -->
      <param name="plasmid_detect" type="boolean" truevalue="-p" falsevalue="" checked="false" label="Enable plasmid prediction?"/>
      <!-- SENSITIVITY THRESHOLD -->
      <param argument="--thr" type="integer" value="7" label="Sensitivity threshold" help="Minimal absolute score to classify sequence."/>
      <!--OUTPUTS CONTROL-->
      <param name="outputs" type="select" multiple="true" label="Output files to generate" help="If plasmid prediction is not enabled, plasmid outputs won't be generated.">
         <option value="viral_pred" selected="true" >Predicted viral contigs (FASTA)</option>
         <option value="Unc_viral_pred" selected="true">Uncertain viral contigs (FASTA)</option>
         <option value="plasmid_pred" selected="false">Predicted plasmid contigs(FASTA) - plasmid prediction must be checked</option>
         <option value="Unc_plasmid_pred" selected="false">Uncertain plasmid contigs (FASTA)</option>
         <option value="chromosomal_pred" selected="true">Predicted chromosomal contigs(FASTA) - plasmid prediction must be checked</option>
         <option value="result_table" selected="true">Table with contig predictions</option>
         <option value="prodigal_report" selected="false">Prodigal report</option>
         <option value="prodigal_genbank" selected="false">Prodigal GenBank file</option>
         <option value="prodigal_proteins_circ" selected="false">Prodigal proteins (circular)</option>
         <option value="prodigal_proteins" selected="false">Prodigal proteins (linear)</option>
         <option value="hmmsearch_report" selected="false">HMM search report</option>
         <option value="hmmsearch_domtblout" selected="false">hmmsearch output</option>
      </param>
   </inputs>    
   <outputs> 
      <!-- VIRAL PREDICTIONS -->
      <data name="predicted_viral" format="fasta" label="${tool.name} on ${on_string}: Predicted viral contigs" from_work_dir="Prediction_results_fasta/contigs_virus.fasta">
         <filter>outputs and 'viral_pred' in outputs</filter>
      </data>
      <data name="uncertain_viral" format="fasta" label="${tool.name} on ${on_string}: Uncertain viral contigs" from_work_dir="Prediction_results_fasta/contigs_virus_uncertain.fasta">	
         <filter>outputs and 'Unc_viral_pred' in outputs</filter>
      </data>
      <!-- PLASMID PREDICTIONS -->
      <data name="predicted_plasmid" format="fasta" label="${tool.name} on ${on_string}: Predicted plasmid contigs" from_work_dir="Prediction_results_fasta/contigs_plasmid.fasta">
         <filter>outputs and 'plasmid_pred' in outputs</filter>
         <filter>plasmid_detect</filter>
      </data>
      <data name="uncertain_plasmid" format="fasta" label="${tool.name} on ${on_string}: Uncertain plasmid contigs" from_work_dir="Prediction_results_fasta/contigs_plasmid_uncertain.fasta">
         <filter>outputs and 'Unc_plasmid_pred' in outputs</filter>
         <filter>plasmid_detect</filter>
      </data>
      <!-- CHROMOSOME PREDICTIONS -->
      <data name="chromosomal" format="fasta" label="${tool.name} on ${on_string}: Predicted chromosomal contig contigs" from_work_dir="Prediction_results_fasta/contigs_uncertain_plasmid.fasta">	
         <filter>outputs and 'chromosomal_pred' in outputs</filter>
      </data>
      <!-- TABLE ALL PREDICTIONS + FEATURES -->
      <data name="result_table" format="csv" label="${tool.name} on ${on_string}: Table with contig predictions" from_work_dir="contigs_result_table.csv">	
         <filter>outputs and 'result_table' in outputs</filter>
      </data>
      <!-- PRODIGAL OUTPUTS -->
      <data name="prodigal_report" format="txt" label="${tool.name} on ${on_string}: Prodigal report" from_work_dir="contigs_prodigal.log">	
         <filter>outputs and 'prodigal_report' in outputs</filter>
      </data>
      <data name="prodigal_genbank" format="genbank" label="${tool.name} on ${on_string}: Prodigal genbank file" from_work_dir="contigs_genes.fa">	
         <filter>outputs and 'prodigal_genbank' in outputs</filter>
      </data>
      <data name="prodigal_proteins_circ" format="fasta" label="${tool.name} on ${on_string}: Prodigal proteins (circular)" from_work_dir="contigs_proteins_circ.fa">
         <filter>outputs and 'prodigal_proteins_circ' in outputs</filter>
      </data>
      <data name="prodigal_proteins" format="fasta" label="${tool.name} on ${on_string}: Prodigal proteins (linear)" from_work_dir="contigs_proteins.fa">
         <filter>outputs and 'prodigal_proteins' in outputs</filter>
      </data>
      <!-- HMMSEARCH OUTPUTS -->
      <data name="hmmsearch_report" format="txt" label="${tool.name} on ${on_string}: HMM search report" from_work_dir="contigs_out_pfam.txt">
         <filter>outputs and 'hmmsearch_report' in outputs</filter>
      </data>
      <data name="hmmsearch_domtblout" format="txt" label="${tool.name} on ${on_string}: hmmsearch output" from_work_dir="contigs_domtblout">
         <filter>outputs and 'hmmsearch_domtblout' in outputs</filter>
      </data>
   </outputs>
   <tests>
      <test expect_num_outputs="12">
         <param name="input_fasta" value="test.fa"/>
         <param name="hmm" value="test.hmm"/>
         <param name="plasmid_detect" value="true"/>
         <param name="thr" value="7"/>
         <param name="outputs" value="viral_pred,Unc_viral_pred,chromosomal_pred,plasmid_pred,Unc_plasmid_pred,result_table,prodigal_report,prodigal_genbank,prodigal_proteins_circ,prodigal_proteins,hmmsearch_report,hmmsearch_domtblout"/>
         <!-- <conditional name="do_run_blast">
            <param name="runblast" value="no_blast" />
         </conditional> -->
         <output name="uncertain_viral">
            <assert_contents>
               <has_text text=">NC_045512.2 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome" />
               <has_text text=">NC_001416.1 Enterobacteria phage lambda, complete genome" />
               <has_text text=">NZ_CP021331.1 Maritalea myrionectae strain HL2708#5 plasmid pHL2708X3, complete sequence" />
               <has_text text=">CP045262.1 Yersinia pestis strain SCPM-O-B-5942 (I-2638) plasmid pPCP, complete sequence" />
               <has_text text=">NC_000913.3:1-30000 Escherichia coli str. K-12 substr. MG1655, complete genome" />
            </assert_contents>
         </output>
         <output name="predicted_viral">
            <assert_contents>
               <not_has_text text=">NC_045512.2 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome" />
            </assert_contents>
         </output>
         <output name="predicted_plasmid">
            <assert_contents>
               <not_has_text text=">NC_045512.2 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome" />
            </assert_contents>
         </output>
         <output name="uncertain_plasmid">
            <assert_contents>
               <not_has_text text=">NC_045512.2 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome" />
            </assert_contents>
         </output>
         <output name="chromosomal">
            <assert_contents>
               <not_has_text text=">NC_045512.2 Severe acute respiratory syndrome coronavirus 2 isolate Wuhan-Hu-1, complete genome" />
            </assert_contents>
         </output>
         <output name="result_table">
            <assert_contents>
               <has_text text="Contig name,Prediction,Length,Circular,Score,Pfam hits"/>
            </assert_contents>
         </output>
         <output name="hmmsearch_domtblout">
            <assert_contents>
               <has_text text="# Program:"/>
            </assert_contents>
         </output>
         <!-- Somehow the output below is empty while all others are correponding to the expected output-->
         <!-- <output name="hmmsearch_report">
            <assert_contents>
               <has_text text="# hmmsearch :: search profile(s) against a sequence database"/>
            </assert_contents>
         </output> -->
         <output name="prodigal_genbank">
            <assert_contents>
               <has_text text="NC_045512.2 Severe acute respiratory syndrome"/>
            </assert_contents>
         </output>
         <output name="prodigal_report">
            <assert_contents>
               <has_text text="PRODIGAL v2.6.3 [February, 2016]"/>
            </assert_contents>
         </output>
         <output name="prodigal_proteins_circ">
            <assert_contents>
               <has_text text=">NC_045512.2_1"/>
            </assert_contents>
         </output>
         <output name="prodigal_proteins">
            <assert_contents>
               <has_text text=">NC_045512.2_1"/>
            </assert_contents>
         </output>
      </test>
   </tests>
   <help>
<![CDATA[
viralVerify classifies contigs (output of metaviralSPAdes or other assemblers) as viral, non-viral or uncertain, based on gene content. Also for non-viral contigs it can optionally provide plasmid/non-plasmid classification.

viralVerify predicts genes in the contig using Prodigal in the metagenomic mode, runs hmmsearch on the predicted proteins and classifies the contig as viral or non-viral by applying the Naive Bayes classifier (NBC). For the set of predicted HMMs, viralVerify uses trained NBC to classify this set to be viral or chromosomal.

To improve results in the case of metagenomes with possible host contamination, we recommend users to filter out reads that align to the host genome prior to assembly. Since viralVerify is based on gene classification, it can be used on contigs on any length, and short viruses can be detected as long as they contain a recognizable virus-specific gene. 

More informations about this tool on the `viralVerify package <https://github.com/ablab/viralVerify>`_ and the `viralVerify bioconda package <https://anaconda.org/bioconda/viralverify>`_.
]]>
   </help>
   <citations>
      <citation type="bibtex">
@misc{viralverify2023,
   author = {Mikhail Raiko,Dmitry Antipov,Sam Nooij}, 
   title = {ViralVerify: Classify contigs as viral or non-viral based on gene content},
   year = {2021},
   howpublished = {\url{https://github.com/ablab/viralVerify/}}
}
      </citation>
   </citations>
</tool>