Mercurial > repos > jay > pdaug_word_vector_model
diff PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.xml @ 0:3ce435b8d648 draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit a9bd83f6a1afa6338cb6e4358b63ebff5bed155e"
author | jay |
---|---|
date | Wed, 28 Oct 2020 02:21:16 +0000 |
parents | |
children | c6a1b09d8846 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.xml Wed Oct 28 02:21:16 2020 +0000 @@ -0,0 +1,107 @@ +<tool id="pdaug_word_vector_model" name="PDAUG Word Vector Model" python_template_version="3.7" version="0.1.0"> + <description>Generates the skip-gram model</description> + <requirements> + <requirement type="package" version="1.0.3">pandas</requirement> + <requirement type="package" version="1.76">biopython</requirement> + <requirement type="package" version="3.5">nltk</requirement> + <requirement type="package" version="3.8.0">gensim</requirement> + <requirement type="package" version="0.23.1">scikit-learn</requirement> + <requirement type="package" version="1.18.4">numpy</requirement> + </requirements> + <stdio> + <exit_code range="1" level="fatal" /> + </stdio> + <command detect_errors="exit_code"><![CDATA[ + + python '$__tool_directory__/PDAUG_Word_Vector_Model.py' -I '$input' -M '$meanCount' -W '$window' -O '$OutFile' + + ]]></command> + + <inputs> + <param name="input" type="data" label="Input fasta file" format="fasta" argument= "--Input" help="Input fasta file with peptides"/> + <param name="meanCount" type="integer" label="Mean Count" value="0" format="fasta" argument= "--min_count" help="Ignores a all words with total frequency lower than this"/> + <param name="window" type="integer" label="window" value="5" argument="--window" help="Maximum distance between the current and predicted word within a sentence"/> + </inputs> + + <outputs> + <data name='OutFile' format='txt' label="${tool.name} on $on_string - (text)" /> + </outputs> + + <tests> + <test> + <param name="input" value="test.fasta"/> + <param name="meanCount" value="0"/> + <param name="window" value="5"/> + <output name="OutFile" value="model.txt" lines_diff="2268" /> + </test> + </tests> + <help><![CDATA[ +.. class:: infomark + +**What it does** + +This tool calculates the skip-gram model which is a neural network where the inputs and outputs of the network are one-hot vectors calculated based on training data that contains input word and output word. + +----- + +**Inputs** + * **--Input** Fasta file with protein sequences. + * **--min_count** Ignores all words with total frequency lower than this + * **--window** Maximum distance between the current and predicted word within a sentence, accepts integer value. + +----- + +**Outputs** + * **--OutFile** Return "model.txt" model file. + +]]></help> + +<citations> + + <citation type="bibtex"> + @misc{PDAUGGITHUB, + author = {Joshi, Jayadev and Blankenberg, Daniel}, + year = {2020}, + title ={PDAUG - a Galaxy based toolset for peptide library analysis, visualization, and machine learning modeling}, + publisher = {GitHub}, + journal = {GitHub repository}, + url = + {https://github.com/jaidevjoshi83/pdaug.git}, + } + </citation> + + <citation type="bibtex"> + @inproceedings{rehurek_lrec, + title = {{Software Framework for Topic Modelling with Large Corpora}}, + author = {Radim {\v R}eh{\r u}{\v r}ek and Petr Sojka}, + booktitle = {{Proceedings of the LREC 2010 Workshop on New + Challenges for NLP Frameworks}}, + pages = {45--50}, + year = 2010, + month = May, + day = 22, + publisher = {ELRA}, + address = {Valletta, Malta}, + url={http://is.muni.cz/publication/884893/en}, + language={English} + } + </citation> + + <citation type="bibtex"> + @article{Md_Nafiz, + title= {Identifying antimicrobial peptides using word embedding with deep recurrent neural networks}, + volume={35}, + DOI={https://doi.org/10.1093/bioinformatics/bty937}, + issue={12}, + year={2018}, + pages={2009-2016}, + journal={Europe PMC}, + author={Hamid, Md-Nafiz and Friedberg, Iddo} + } + + </citation> +</citations> +</tool> + + +