annotate phage_host_prediction/run_galaxy.xml @ 2:3e1e8be4e65c draft default tip

Uploaded
author pedro_araujo
date Fri, 02 Apr 2021 10:11:13 +0000
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
2
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
1 <tool id="run_galaxy" name="PhageHostPrediction" version="0.1.0" python_template_version="3.5">
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
2 <description>prediction of phage-bacteria interactions</description>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
3 <requirements>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
4 <requirement type="package">biopython</requirement>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
5 <requirement type="package">scikit-learn</requirement>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
6 <requirement type="package">numpy</requirement>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
7 <requirement type="package">pandas</requirement>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
8 <requirement type="package">scikit-bio</requirement>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
9 </requirements>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
10 <command detect_errors="exit_code" interpreter="python3"><![CDATA[
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
11 $__tool_directory__/run_galaxy.py
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
12 $input_phage.phage_input_type $input_phage.phages
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
13
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
14 $input_bact.bact_input_type $input_bact.bacts
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
15
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
16 $adv.run_interpro $adv.ml_model
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
17
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
18 ]]></command>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
19 <inputs>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
20 <conditional name="input_phage">
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
21 <param type="select" name="phage_input_type" label='Phage input:'>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
22 <option value="ID" selected="true">NCBI IDs (comma separated)</option>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
23 <option value="seq_file" selected="false">Sequence fasta file (only one organism)</option>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
24 </param>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
25 <when value="ID">
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
26 <param type="text" name="phages" label='Phage IDs'/>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
27 </when>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
28 <when value="seq_file">
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
29 <param type="data" name="phages" label='Phage fasta file' format="fasta"/>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
30 </when>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
31 </conditional>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
32
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
33 <conditional name="input_bact">
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
34 <param type="select" name="bact_input_type" label='Bacteria input:'>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
35 <option value="ID" selected="true">NCBI IDs (comma separated)</option>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
36 <option value="seq_file" selected="false">Sequence fasta file (only one organism)</option>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
37 </param>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
38 <when value="ID">
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
39 <param type="text" name="bacts" label='Bacteria IDs'/>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
40 </when>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
41 <when value="seq_file">
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
42 <param type="data" name="bacts" label='Bacteria fasta file' format="fasta"/>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
43 </when>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
44 </conditional>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
45
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
46 <section name='adv' lable="Advanced options" title='Advanced Options' expanded='false'>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
47 <param type="boolean" name="run_interpro" label='Perform interpro search' checked="false" truevalue="True" falsevalue="False" />
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
48 <param type="select" name="ml_model" label="Machine learning model">
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
49 <option value="RandomForests" selected="yes">Random Forests</option>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
50 <option value="SVM">SVM</option>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
51 </param>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
52 </section>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
53 </inputs>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
54 <outputs>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
55 <data name="output1" format="tabular" from_work_dir="output.tsv" />
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
56 </outputs>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
57 <help>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
58
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
59 PhageHostPrediction
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
60 ===================
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
61
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
62 Predict interactions between phages and bacterial strains.
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
63
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
64 PhageHostPrediction is a python script that predicts phage-host interactions for *E. coli*, *K. pneumoniae* and *A. baumannii* phages, using supervised machine learning models. The models were built from a dataset containing 252 features and 23 987 entries with balanced outputs of 'Yes' and 'No'. The positive cases of interaction predicted are described in the file "NCBI_Phage_Bacteria_Data.csv", contained within this tool, while the negative were randomly assigned by pairing phages with bacteria of different species.
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
65
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
66 The prediction resorts to complete host proteome and to phage tail proteins, that are inferred within the tool. This inference is made with a locally created database of phage protein functions, available in the file "phagesProteins.json". Unknown proteins are predicted against this database. To help with this prediction, the use of InterProScan is made optional.
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
67
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
68 **Inputs:**
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
69
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
70 * phage/bacteria genome format: ID vs fasta;
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
71 * ID: must be a GenBank ID, with the proteome described;
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
72 * fasta file: must contain the whole proteome of the organism;
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
73 * machine learning model: random forests have better predictive power, while SVM can be slightly faster to run;
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
74 * interpro search: should predict tails with higher confidence, but it significantly increases time to run.
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
75
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
76 **Outputs:**
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
77 this tool outputs a tabular file in which phage-host pairs are present in the first column and the prediction result in the second.
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
78
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
79 **Requirements:**
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
80
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
81 * Biopython
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
82 * Scikit-learn
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
83 * Numpy
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
84 * Pandas
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
85 * Scikit-bio
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
86 * BLAST_ - must be installed locally and available globally as an environment variable
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
87 * InterProScan_ (optional) - must be installed locally and available globally as an environment variable
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
88
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
89 .. _BLAST: https://ftp.ncbi.nlm.nih.gov/blast/executables/blast+/LATEST/
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
90 .. _InterProScan: http://www.ebi.ac.uk/interpro/download/
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
91
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
92 </help>
3e1e8be4e65c Uploaded
pedro_araujo
parents:
diff changeset
93 </tool>