Mercurial > repos > jay > pdaug_merge_dataframes
diff PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.py @ 5:10c7d4807de6 draft
"planemo upload for repository https://github.com/jaidevjoshi83/pdaug commit ac4353ca5c0ac9ce60df9f4bf160ed08b99fbee3"
author | jay |
---|---|
date | Thu, 28 Jan 2021 03:48:52 +0000 |
parents | 5bb52d4bf172 |
children |
line wrap: on
line diff
--- a/PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.py Tue Jan 12 18:50:55 2021 +0000 +++ b/PDAUG_Word_Vector_Model/PDAUG_Word_Vector_Model.py Thu Jan 28 03:48:52 2021 +0000 @@ -11,6 +11,7 @@ parser.add_argument("-M", "--min_count", required=False, default=0, help="Path to target tsv file") parser.add_argument("-W", "--window", required=False, default=5, help="Path to target tsv file") parser.add_argument("-O", "--OutFile", required=False, default='model.txt', help="Path to target tsv file") +parser.add_argument("-S", "--SG", required=False, default='skip-gram', help="Training algorithm: 1 for skip-gram; otherwise CBOW") args = parser.parse_args() @@ -30,9 +31,14 @@ #min_count = 0 size = 200 #window = 5 -sg = 1 + +print (args.SG) +if args.SG == 'skip-gram': + SG = 1 +elif args.SG == 'CBOW': + SG = 0 sentences = ProteinSeq() -model = gensim.models.Word2Vec(sentences, min_count=int(args.min_count), size=size, window=int(args.window), sg = sg, workers = 10) +model = gensim.models.Word2Vec(sentences, min_count=int(args.min_count), size=size, window=int(args.window), sg = SG, workers = 10) model.wv.save_word2vec_format(args.OutFile, binary=False)