changeset 12:808f6cdf2e9f draft

Uploaded
author jose_duarte
date Fri, 26 Nov 2021 12:07:54 +0000
parents 6ef0c9b14d26
children f51906b109cd
files PhageDPO.xml
diffstat 1 files changed, 68 insertions(+), 0 deletions(-) [+]
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/PhageDPO.xml	Fri Nov 26 12:07:54 2021 +0000
@@ -0,0 +1,68 @@
+<tool id="PhageDPO" name="PhageDPO" version="0.1.0" python_template_version="3.5">
+	<description>
+Phage Depolymerase Finder
+	</description>
+    <requirements>
+        <requirement type="package" version="1.78">biopython</requirement>
+        <requirement type="package" version="0.24.1">scikit-learn</requirement>
+        <requirement type="package" version="1.21.2">numpy</requirement>
+        <requirement type="package" version="1.2.3">pandas</requirement>
+		<requirement type="package" version="1.0.0a2">propy3</requirement>
+    </requirements>
+    <command detect_errors="exit_code"><![CDATA[
+        python '$__tool_directory__/DPOGALAXY.py' '$adv.model' '${input1}'
+    ]]></command>
+    <inputs>
+		<param type="data" name="input1" format="fasta" label="Fasta file"/>
+		<section name = 'adv' title= 'Advanced Options' expanded = 'False'>
+		<param type = "select" name="model" label="Model">
+		   <option value="SVM4311" selected="yes">SVM4311</option>
+		   <option value="ANN4311">ANN4311</option>
+		   <option value="RF5748">RF5748</option>
+		</param>
+	</section>
+		
+    </inputs>
+    <outputs>
+		<data name="output1" format="html" from_work_dir="output.html" 
+		label="DPO Prediction"/>
+    </outputs>
+	<tests>
+        <test>
+			<param name="model" value="SVM4311"/>
+	        <param name="input1" value="fasta_file.fasta"/>
+            <output name="output1" file="output.html"/>
+        </test>
+    </tests>
+    <help><![CDATA[
+	
+========
+PhageDPO
+========
+
+Predicts the existance of Phage Polysaccharide Depolymerase.
+
+PhageDPO is a python script that predicts the existance of depolymerases (DPOs) using supervised machine learning models. Two different datasets were used to develop three models: The SVM and ANN models was built using a dataset with 45 features and 4311 examples (1437 positives and 2874 negatives) and the RF model was created using a dataset with 54 features and 5748 examples (1437 positives and 4311 negatives).
+
+**Inputs:**
+
+* fasta file: fasta file format contain the ORF or the CDS.
+
+**Advanced options:**
+	
+* Model: selection of the model to run: the SVM model (default), ANN model or the RF model. The SVM and ANN models focus on true positive detection. On the other hand, the RF model uses more negative data ensuring that all DPOs are identified, although with some misclassifications.	
+	
+**Outputs:**
+
+The tool outputs an html file containing the name of the sequence and the percentage of positive prediction for DPO.	
+
+**Requirements:**
+	
+* Biopython
+* Sklearn 
+* Numpy
+* Pandas 
+* Propy	
+
+    ]]></help>
+</tool>
\ No newline at end of file