comparison PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.xml @ 0:3ce435b8d648 draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
author jay
date Wed, 28 Oct 2020 02:21:16 +0000
parents
children c6a1b09d8846
comparison
equal deleted inserted replaced
-1:000000000000 0:3ce435b8d648
1 <tool id="pdaug_word_vector_model" name="PDAUG Word Vector Model" python_template_version="3.7" version="0.1.0">
2 <description>Generates the skip-gram model</description>
3 <requirements>
4 <requirement type="package" version="1.0.3">pandas</requirement>
5 <requirement type="package" version="1.76">biopython</requirement>
6 <requirement type="package" version="3.5">nltk</requirement>
7 <requirement type="package" version="3.8.0">gensim</requirement>
8 <requirement type="package" version="0.23.1">scikit-learn</requirement>
9 <requirement type="package" version="1.18.4">numpy</requirement>
10 </requirements>
11 <stdio>
12 <exit_code range="1" level="fatal" />
13 </stdio>
14 <command detect_errors="exit_code"><![CDATA[
15
16 python '$__tool_directory__/PDAUG_Word_Vector_Model.py' -I '$input' -M '$meanCount' -W '$window' -O '$OutFile'
17
18 ]]></command>
19
20 <inputs>
21 <param name="input" type="data" label="Input fasta file" format="fasta" argument= "--Input" help="Input fasta file with peptides"/>
22 <param name="meanCount" type="integer" label="Mean Count" value="0" format="fasta" argument= "--min_count" help="Ignores a all words with total frequency lower than this"/>
23 <param name="window" type="integer" label="window" value="5" argument="--window" help="Maximum distance between the current and predicted word within a sentence"/>
24 </inputs>
25
26 <outputs>
27 <data name='OutFile' format='txt' label="${tool.name} on $on_string - (text)" />
28 </outputs>
29
30 <tests>
31 <test>
32 <param name="input" value="test.fasta"/>
33 <param name="meanCount" value="0"/>
34 <param name="window" value="5"/>
35 <output name="OutFile" value="model.txt" lines_diff="2268" />
36 </test>
37 </tests>
38 <help><![CDATA[
39 .. class:: infomark
40
41 **What it does**
42
43 This tool calculates the skip-gram model which is a neural network where the inputs and outputs of the network are one-hot vectors calculated based on training data that contains input word and output word. 
44
45 -----
46
47 **Inputs**
48 * **--Input** Fasta file with protein sequences.
49 * **--min_count** Ignores all words with total frequency lower than this
50 * **--window** Maximum distance between the current and predicted word within a sentence, accepts integer value.
51
52 -----
53
54 **Outputs**
55 * **--OutFile** Return "model.txt" model file.
56
57 ]]></help>
58
59 <citations>
60
61 <citation type="bibtex">
62 @misc{PDAUGGITHUB,
63 author = {Joshi, Jayadev and Blankenberg, Daniel},
64 year = {2020},
65 title ={PDAUG - a Galaxy based toolset for peptide library analysis, visualization, and machine learning modeling},
66 publisher = {GitHub},
67 journal = {GitHub repository},
68 url =
69 {https://github.com/jaidevjoshi83/pdaug.git},
70 }
71 </citation>
72
73 <citation type="bibtex">
74 @inproceedings{rehurek_lrec,
75 title = {{Software Framework for Topic Modelling with Large Corpora}},
76 author = {Radim {\v R}eh{\r u}{\v r}ek and Petr Sojka},
77 booktitle = {{Proceedings of the LREC 2010 Workshop on New
78 Challenges for NLP Frameworks}},
79 pages = {45--50},
80 year = 2010,
81 month = May,
82 day = 22,
83 publisher = {ELRA},
84 address = {Valletta, Malta},
85 url={http://is.muni.cz/publication/884893/en},
86 language={English}
87 }
88 </citation>
89
90 <citation type="bibtex">
91 @article{Md_Nafiz,
92 title= {Identifying antimicrobial peptides using word embedding with deep recurrent neural networks},
93 volume={35},
94 DOI={https://doi.org/10.1093/bioinformatics/bty937},
95 issue={12},
96 year={2018},
97 pages={2009-2016},
98 journal={Europe PMC},
99 author={Hamid, Md-Nafiz and Friedberg, Iddo}
100 }
101
102 </citation>
103 </citations>
104 </tool>
105
106
107