Mercurial > repos > jay > pdaug_peptide_ngrams

diff PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.py @ 5:d7e684975db3 draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit ac4353ca5c0ac9ce60df9f4bf160ed08b99fbee3"
author: jay
date: Thu, 28 Jan 2021 04:05:16 +0000
parents: 7557b48b2872
--- a/PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.py	Tue Jan 12 19:17:03 2021 +0000
+++ b/PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.py	Thu Jan 28 04:05:16 2021 +0000
@@ -11,6 +11,7 @@
 parser.add_argument("-M", "--min_count", required=False, default=0, help="Path to target tsv file")
 parser.add_argument("-W", "--window", required=False, default=5, help="Path to target tsv file")
 parser.add_argument("-O", "--OutFile", required=False, default='model.txt', help="Path to target tsv file")
+parser.add_argument("-S", "--SG", required=False, default='skip-gram', help="Training algorithm: 1 for skip-gram; otherwise CBOW")
 
 args = parser.parse_args()
 
@@ -30,9 +31,14 @@
 #min_count = 0
 size = 200
 #window = 5
-sg = 1
+
+print (args.SG)
+if args.SG == 'skip-gram':
+    SG = 1
+elif args.SG == 'CBOW':
+    SG = 0
 
 sentences = ProteinSeq() 
-model = gensim.models.Word2Vec(sentences, min_count=int(args.min_count), size=size, window=int(args.window), sg = sg, workers = 10)
+model = gensim.models.Word2Vec(sentences, min_count=int(args.min_count), size=size, window=int(args.window), sg = SG, workers = 10)
 model.wv.save_word2vec_format(args.OutFile, binary=False)