changeset 2:ea2cccb9f73e draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/VirHunter commit c3685ed6a70b47012b62b95a2a3db062bd3b7475
author iuc
date Thu, 05 Jan 2023 14:27:54 +0000
parents 9b12bc1b1e2c
children 302332b914ef
files macros.xml predict.py tool-data/virhunter.loc.sample virhunter.xml
diffstat 4 files changed, 19 insertions(+), 20 deletions(-) [+]
line wrap: on
line diff
--- a/macros.xml	Wed Nov 30 17:31:52 2022 +0000
+++ b/macros.xml	Thu Jan 05 14:27:54 2023 +0000
@@ -1,6 +1,6 @@
 <macros>
     <token name="@TOOL_VERSION@">1.0.0</token>
-    <token name="@VERSION_SUFFIX@">1</token>
+    <token name="@VERSION_SUFFIX@">2</token>
     <xml name="requirements">
         <requirements>
             <requirement type="package" version="1.23.3">numpy</requirement>
--- a/predict.py	Wed Nov 30 17:31:52 2022 +0000
+++ b/predict.py	Thu Jan 05 14:27:54 2023 +0000
@@ -104,7 +104,7 @@
         df.groupby(["id", "length", 'RF_decision'], sort=False).size().unstack(fill_value=0)
     )
     df = df.reset_index()
-    df = df.reindex(['length', 'id', 'virus', 'plant', 'bacteria'], axis=1)
+    df = df.reindex(['length', 'id', 'virus', 'plant', 'bacteria'], axis=1).fillna(value=0)
     conditions = [
         (df['virus'] > df['plant']) & (df['virus'] > df['bacteria']),
         (df['plant'] > df['virus']) & (df['plant'] > df['bacteria']),
--- a/tool-data/virhunter.loc.sample	Wed Nov 30 17:31:52 2022 +0000
+++ b/tool-data/virhunter.loc.sample	Thu Jan 05 14:27:54 2023 +0000
@@ -1,29 +1,27 @@
 #This is a sample file distributed with Galaxy that enables tools
-#to use a directory of Samtools indexed sequences data files.  You will need
-#to create these data files and then create a fasta_indexes.loc file
+#to use a directory of virhunter hdf5 model files.  You will need
+#to create these data files and then create a virhunter.loc file
 #similar to this one (store it in this directory) that points to
-#the directories in which those files are stored. The fasta_indexes.loc
+#the directories in which those files are stored. The virhunter.loc
 #file has this format (white space characters are TAB characters):
 #
-# <unique_build_id>	<dbkey>	<display_name>	<file_base_path>
+# <value>	<name>	<path>
 #
-#So, for example, if you had hg19 Canonical indexed stored in
+#So, for example, if you had fungi hdf5 model files  stored in
 #
-# /depot/data2/galaxy/hg19/sam/,
+# /tool-data/weights/peach/,
 #
-#then the fasta_indexes.loc entry would look like this:
+#then the virhunter.loc entry would look like this:
 #
-#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
+#peach	peach	/data/databases/path/weights/peach
 #
-#and your /depot/data2/galaxy/hg19/sam/ directory
-#would contain hg19canon.fa and hg19canon.fa.fai files.
+#and your /tool-data/weights/peach/ directory
+#would contain model_5_500.h5,model_7_500.h5,model_10_500.h5 and model_5_1000.h5, model_7_1000.h5, model_10_1000.h5 files.
 #
-#Your fasta_indexes.loc file should include an entry per line for
+#Your virhunter.loc file should include an entry per line for
 #each index set you have stored.  The file in the path does actually
 #exist, but it should never be directly used. Instead, the name serves
 #as a prefix for the index file.  For example:
 #
-#hg18canon	hg18	Human (Homo sapiens): hg18 Canonical	/depot/data2/galaxy/hg18/sam/hg18canon.fa
-#hg18full	hg18	Human (Homo sapiens): hg18 Full	/depot/data2/galaxy/hg18/sam/hg18full.fa
-#hg19canon	hg19	Human (Homo sapiens): hg19 Canonical	/depot/data2/galaxy/hg19/sam/hg19canon.fa
-#hg19full	hg19	Human (Homo sapiens): hg19 Full	/depot/data2/galaxy/hg19/sam/hg19full.fa
\ No newline at end of file
+#peach	peach	/data/databases/path/weights/peach
+#grapevine	grapevine	/data/databases/path/weights/peach
--- a/virhunter.xml	Wed Nov 30 17:31:52 2022 +0000
+++ b/virhunter.xml	Thu Jan 05 14:27:54 2023 +0000
@@ -1,6 +1,6 @@
 <tool id="virhunter" name="virhunter" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.05">
     <description>
-        Deep learning method to identify viruses in sequencing datasets..
+        Deep learning method to identify viruses in sequencing datasets
     </description>
     <macros>
         <import>macros.xml</import>
@@ -24,7 +24,7 @@
 
     ]]></command>
     <inputs>
-        <param name="fasta_file" type="data" format="fasta" label="DNA FASTA file(s)"/>
+        <param name="fasta_file" type="data" format="fasta" label="DNA FASTA file"/>
         <param name="weights" type="select" label="Select a reference model" help="If your model of interest is not listed, contact the Galaxy team">
             <options from_data_table="virhunter_models">
                 <validator type="no_options" message="No models are available for the selected input dataset" />
@@ -49,7 +49,8 @@
 
     <help>
     <![CDATA[
-    VirHunter is a deep learning method that uses Convolutional Neural Networks (CNNs) and a Random Forest Classifier to identify viruses in sequening datasets. More precisely, VirHunter classifies previously assembled contigs as viral, host and bacterial (contamination).
+    VirHunter is a tool that uses deep learning to identify viruses in plant virome sequencing datasets.
+    In particular, VirHunter classifies previously assembled contigs into virus, host and bacteria classes.
  ]]></help>
     <expand macro="citations" />
 </tool>
\ No newline at end of file