pdaug_word_vector_model: PDAUG_Word_Vector_Model/PDAUG_Word_Vector

comparison PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.xml @ 0:3ce435b8d648 draft

"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"

author	jay
date	Wed, 28 Oct 2020 02:21:16 +0000
parents
children	c6a1b09d8846

comparison

equal deleted inserted replaced

--1:000000000000
+:3ce435b8d648
+<tool id="pdaug_word_vector_model" name="PDAUG Word Vector Model" python_template_version="3.7" version="0.1.0">
+<description>Generates the skip-gram model</description>
+<requirements>
+<requirement type="package" version="1.0.3">pandas</requirement>
+<requirement type="package" version="1.76">biopython</requirement>
+<requirement type="package" version="3.5">nltk</requirement>
+<requirement type="package" version="3.8.0">gensim</requirement>
+<requirement type="package" version="0.23.1">scikit-learn</requirement>
+<requirement type="package" version="1.18.4">numpy</requirement>
+</requirements>
+<stdio>
+<exit_code range="1" level="fatal" />
+</stdio>
+<command detect_errors="exit_code"><![CDATA[
+python '$__tool_directory__/PDAUG_Word_Vector_Model.py' -I '$input' -M '$meanCount' -W '$window' -O '$OutFile'
+]]></command>
+<inputs>
+<param name="input" type="data" label="Input fasta file" format="fasta" argument= "--Input" help="Input fasta file with peptides"/>
+<param name="meanCount" type="integer" label="Mean Count" value="0" format="fasta" argument= "--min_count" help="Ignores a all words with total frequency lower than this"/>
+<param name="window" type="integer" label="window" value="5" argument="--window" help="Maximum distance between the current and predicted word within a sentence"/>
+</inputs>
+<outputs>
+<data name='OutFile' format='txt' label="${tool.name} on $on_string - (text)" />
+</outputs>
+<tests>
+<test>
+<param name="input" value="test.fasta"/>
+<param name="meanCount" value="0"/>
+<param name="window" value="5"/>
+<output name="OutFile" value="model.txt" lines_diff="2268" />
+</test>
+</tests>
+<help><![CDATA[
+.. class:: infomark
+**What it does**
+This tool calculates the skip-gram model which is a neural network where the inputs and outputs of the network are one-hot vectors calculated based on training data that contains input word and output word.
+-----
+**Inputs**
+* **--Input** Fasta file with protein sequences.
+* **--min_count** Ignores all words with total frequency lower than this
+* **--window** Maximum distance between the current and predicted word within a sentence, accepts integer value.
+-----
+**Outputs**
+* **--OutFile** Return "model.txt" model file.
+]]></help>
+<citations>
+<citation type="bibtex">
+@misc{PDAUGGITHUB,
+author = {Joshi, Jayadev  and Blankenberg, Daniel},
+year = {2020},
+title ={PDAUG - a Galaxy based toolset for peptide library analysis, visualization, and machine learning modeling},
+publisher = {GitHub},
+journal = {GitHub repository},
+url =
+{https://github.com/jaidevjoshi83/pdaug.git},
+}
+</citation>
+<citation type="bibtex">
+@inproceedings{rehurek_lrec,
+title = {{Software Framework for Topic Modelling with Large Corpora}},
+author = {Radim {\v R}eh{\r u}{\v r}ek and Petr Sojka},
+booktitle = {{Proceedings of the LREC 2010 Workshop on New
+Challenges for NLP Frameworks}},
+pages = {45--50},
+year = 2010,
+month = May,
+day = 22,
+publisher = {ELRA},
+address = {Valletta, Malta},
+url={http://is.muni.cz/publication/884893/en},
+language={English}
+}
+</citation>
+<citation type="bibtex">
+@article{Md_Nafiz,
+title= {Identifying antimicrobial peptides using word embedding with deep recurrent neural networks},
+volume={35},
+DOI={https://doi.org/10.1093/bioinformatics/bty937},
+issue={12},
+year={2018},
+pages={2009-2016},
+journal={Europe PMC},
+author={Hamid, Md-Nafiz and  Friedberg,  Iddo}
+}
+</citation>
+</citations>
+</tool>

Mercurial > repos > jay > pdaug_word_vector_model

comparison PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.xml @ 0:3ce435b8d648 draft