annotate main.py @ 5:4f7e6612906b draft

"planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
author bgruening
date Fri, 06 May 2022 09:05:18 +0000
parents afec8c595124
children e94dc7945639
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
1 """
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
2 Predict next tools in the Galaxy workflows
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
3 using machine learning (recurrent neural network)
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
4 """
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
5
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
6 import argparse
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
7 import time
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
8
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
9 import extract_workflow_connections
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
10 import keras.callbacks as callbacks
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
11 import numpy as np
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
12 import optimise_hyperparameters
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
13 import prepare_data
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
14 import utils
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
15
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
16
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
17 class PredictTool:
2
76251d1ccdcc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 1
diff changeset
18 def __init__(self, num_cpus):
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
19 """ Init method. """
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
20
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
21 def find_train_best_network(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
22 self,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
23 network_config,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
24 reverse_dictionary,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
25 train_data,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
26 train_labels,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
27 test_data,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
28 test_labels,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
29 n_epochs,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
30 class_weights,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
31 usage_pred,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
32 standard_connections,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
33 tool_freq,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
34 tool_tr_samples,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
35 ):
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
36 """
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
37 Define recurrent neural network and train sequential data
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
38 """
3
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
39 # get tools with lowest representation
4
afec8c595124 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 65d36f271296a38deeceb0d0e8d471b2898ee8f4"
bgruening
parents: 3
diff changeset
40 lowest_tool_ids = utils.get_lowest_tools(tool_freq)
3
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
41
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
42 print("Start hyperparameter optimisation...")
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
43 hyper_opt = optimise_hyperparameters.HyperparameterOptimisation()
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
44 best_params, best_model = hyper_opt.train_model(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
45 network_config,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
46 reverse_dictionary,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
47 train_data,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
48 train_labels,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
49 test_data,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
50 test_labels,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
51 tool_tr_samples,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
52 class_weights,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
53 )
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
54
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
55 # define callbacks
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
56 early_stopping = callbacks.EarlyStopping(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
57 monitor="loss",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
58 mode="min",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
59 verbose=1,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
60 min_delta=1e-1,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
61 restore_best_weights=True,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
62 )
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
63 predict_callback_test = PredictCallback(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
64 test_data,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
65 test_labels,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
66 reverse_dictionary,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
67 n_epochs,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
68 usage_pred,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
69 standard_connections,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
70 lowest_tool_ids,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
71 )
2
76251d1ccdcc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 1
diff changeset
72
76251d1ccdcc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 1
diff changeset
73 callbacks_list = [predict_callback_test, early_stopping]
3
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
74 batch_size = int(best_params["batch_size"])
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
75
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
76 print("Start training on the best model...")
2
76251d1ccdcc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 1
diff changeset
77 train_performance = dict()
3
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
78 trained_model = best_model.fit_generator(
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
79 utils.balanced_sample_generator(
2
76251d1ccdcc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 1
diff changeset
80 train_data,
76251d1ccdcc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 1
diff changeset
81 train_labels,
3
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
82 batch_size,
4
afec8c595124 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 65d36f271296a38deeceb0d0e8d471b2898ee8f4"
bgruening
parents: 3
diff changeset
83 tool_tr_samples,
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
84 reverse_dictionary,
3
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
85 ),
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
86 steps_per_epoch=len(train_data) // batch_size,
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
87 epochs=n_epochs,
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
88 callbacks=callbacks_list,
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
89 validation_data=(test_data, test_labels),
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
90 verbose=2,
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
91 shuffle=True,
3
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
92 )
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
93 train_performance["validation_loss"] = np.array(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
94 trained_model.history["val_loss"]
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
95 )
3
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
96 train_performance["precision"] = predict_callback_test.precision
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
97 train_performance["usage_weights"] = predict_callback_test.usage_weights
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
98 train_performance[
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
99 "published_precision"
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
100 ] = predict_callback_test.published_precision
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
101 train_performance[
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
102 "lowest_pub_precision"
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
103 ] = predict_callback_test.lowest_pub_precision
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
104 train_performance[
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
105 "lowest_norm_precision"
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
106 ] = predict_callback_test.lowest_norm_precision
2
76251d1ccdcc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 1
diff changeset
107 train_performance["train_loss"] = np.array(trained_model.history["loss"])
76251d1ccdcc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 1
diff changeset
108 train_performance["model"] = best_model
76251d1ccdcc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 1
diff changeset
109 train_performance["best_parameters"] = best_params
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
110 return train_performance
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
111
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
112
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
113 class PredictCallback(callbacks.Callback):
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
114 def __init__(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
115 self,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
116 test_data,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
117 test_labels,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
118 reverse_data_dictionary,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
119 n_epochs,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
120 usg_scores,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
121 standard_connections,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
122 lowest_tool_ids,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
123 ):
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
124 self.test_data = test_data
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
125 self.test_labels = test_labels
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
126 self.reverse_data_dictionary = reverse_data_dictionary
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
127 self.precision = list()
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
128 self.usage_weights = list()
3
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
129 self.published_precision = list()
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
130 self.n_epochs = n_epochs
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
131 self.pred_usage_scores = usg_scores
3
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
132 self.standard_connections = standard_connections
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
133 self.lowest_tool_ids = lowest_tool_ids
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
134 self.lowest_pub_precision = list()
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
135 self.lowest_norm_precision = list()
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
136
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
137 def on_epoch_end(self, epoch, logs={}):
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
138 """
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
139 Compute absolute and compatible precision for test data
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
140 """
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
141 if len(self.test_data) > 0:
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
142 (
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
143 usage_weights,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
144 precision,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
145 precision_pub,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
146 low_pub_prec,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
147 low_norm_prec,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
148 low_num,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
149 ) = utils.verify_model(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
150 self.model,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
151 self.test_data,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
152 self.test_labels,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
153 self.reverse_data_dictionary,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
154 self.pred_usage_scores,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
155 self.standard_connections,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
156 self.lowest_tool_ids,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
157 )
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
158 self.precision.append(precision)
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
159 self.usage_weights.append(usage_weights)
3
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
160 self.published_precision.append(precision_pub)
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
161 self.lowest_pub_precision.append(low_pub_prec)
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
162 self.lowest_norm_precision.append(low_norm_prec)
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
163 print("Epoch %d usage weights: %s" % (epoch + 1, usage_weights))
3
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
164 print("Epoch %d normal precision: %s" % (epoch + 1, precision))
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
165 print("Epoch %d published precision: %s" % (epoch + 1, precision_pub))
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
166 print("Epoch %d lowest published precision: %s" % (epoch + 1, low_pub_prec))
5b3c08710e47 "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit c635df659fe1835679438589ded43136b0e515c6"
bgruening
parents: 2
diff changeset
167 print("Epoch %d lowest normal precision: %s" % (epoch + 1, low_norm_prec))
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
168 print(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
169 "Epoch %d number of test samples with lowest tool ids: %s"
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
170 % (epoch + 1, low_num)
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
171 )
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
172
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
173
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
174 if __name__ == "__main__":
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
175 start_time = time.time()
2
76251d1ccdcc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 1
diff changeset
176
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
177 arg_parser = argparse.ArgumentParser()
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
178 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
179 "-wf", "--workflow_file", required=True, help="workflows tabular file"
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
180 )
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
181 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
182 "-tu", "--tool_usage_file", required=True, help="tool usage file"
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
183 )
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
184 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
185 "-om", "--output_model", required=True, help="trained model file"
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
186 )
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
187 # data parameters
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
188 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
189 "-cd",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
190 "--cutoff_date",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
191 required=True,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
192 help="earliest date for taking tool usage",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
193 )
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
194 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
195 "-pl",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
196 "--maximum_path_length",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
197 required=True,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
198 help="maximum length of tool path",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
199 )
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
200 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
201 "-ep",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
202 "--n_epochs",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
203 required=True,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
204 help="number of iterations to run to create model",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
205 )
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
206 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
207 "-oe",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
208 "--optimize_n_epochs",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
209 required=True,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
210 help="number of iterations to run to find best model parameters",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
211 )
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
212 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
213 "-me",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
214 "--max_evals",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
215 required=True,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
216 help="maximum number of configuration evaluations",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
217 )
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
218 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
219 "-ts",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
220 "--test_share",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
221 required=True,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
222 help="share of data to be used for testing",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
223 )
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
224 # neural network parameters
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
225 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
226 "-bs",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
227 "--batch_size",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
228 required=True,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
229 help="size of the tranining batch i.e. the number of samples per batch",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
230 )
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
231 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
232 "-ut", "--units", required=True, help="number of hidden recurrent units"
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
233 )
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
234 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
235 "-es",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
236 "--embedding_size",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
237 required=True,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
238 help="size of the fixed vector learned for each tool",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
239 )
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
240 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
241 "-dt", "--dropout", required=True, help="percentage of neurons to be dropped"
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
242 )
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
243 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
244 "-sd",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
245 "--spatial_dropout",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
246 required=True,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
247 help="1d dropout used for embedding layer",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
248 )
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
249 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
250 "-rd",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
251 "--recurrent_dropout",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
252 required=True,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
253 help="dropout for the recurrent layers",
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
254 )
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
255 arg_parser.add_argument(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
256 "-lr", "--learning_rate", required=True, help="learning rate"
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
257 )
2
76251d1ccdcc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 1
diff changeset
258
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
259 # get argument values
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
260 args = vars(arg_parser.parse_args())
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
261 tool_usage_path = args["tool_usage_file"]
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
262 workflows_path = args["workflow_file"]
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
263 cutoff_date = args["cutoff_date"]
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
264 maximum_path_length = int(args["maximum_path_length"])
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
265 trained_model_path = args["output_model"]
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
266 n_epochs = int(args["n_epochs"])
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
267 optimize_n_epochs = int(args["optimize_n_epochs"])
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
268 max_evals = int(args["max_evals"])
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
269 test_share = float(args["test_share"])
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
270 batch_size = args["batch_size"]
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
271 units = args["units"]
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
272 embedding_size = args["embedding_size"]
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
273 dropout = args["dropout"]
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
274 spatial_dropout = args["spatial_dropout"]
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
275 recurrent_dropout = args["recurrent_dropout"]
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
276 learning_rate = args["learning_rate"]
2
76251d1ccdcc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 1
diff changeset
277 num_cpus = 16
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
278
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
279 config = {
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
280 "cutoff_date": cutoff_date,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
281 "maximum_path_length": maximum_path_length,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
282 "n_epochs": n_epochs,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
283 "optimize_n_epochs": optimize_n_epochs,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
284 "max_evals": max_evals,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
285 "test_share": test_share,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
286 "batch_size": batch_size,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
287 "units": units,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
288 "embedding_size": embedding_size,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
289 "dropout": dropout,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
290 "spatial_dropout": spatial_dropout,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
291 "recurrent_dropout": recurrent_dropout,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
292 "learning_rate": learning_rate,
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
293 }
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
294
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
295 # Extract and process workflows
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
296 connections = extract_workflow_connections.ExtractWorkflowConnections()
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
297 (
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
298 workflow_paths,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
299 compatible_next_tools,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
300 standard_connections,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
301 ) = connections.read_tabular_file(workflows_path)
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
302 # Process the paths from workflows
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
303 print("Dividing data...")
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
304 data = prepare_data.PrepareData(maximum_path_length, test_share)
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
305 (
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
306 train_data,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
307 train_labels,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
308 test_data,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
309 test_labels,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
310 data_dictionary,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
311 reverse_dictionary,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
312 class_weights,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
313 usage_pred,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
314 train_tool_freq,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
315 tool_tr_samples,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
316 ) = data.get_data_labels_matrices(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
317 workflow_paths,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
318 tool_usage_path,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
319 cutoff_date,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
320 compatible_next_tools,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
321 standard_connections,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
322 )
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
323 # find the best model and start training
2
76251d1ccdcc "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 6fa2a0294d615c9f267b766337dca0b2d3637219"
bgruening
parents: 1
diff changeset
324 predict_tool = PredictTool(num_cpus)
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
325 # start training with weighted classes
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
326 print("Training with weighted classes and samples ...")
5
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
327 results_weighted = predict_tool.find_train_best_network(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
328 config,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
329 reverse_dictionary,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
330 train_data,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
331 train_labels,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
332 test_data,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
333 test_labels,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
334 n_epochs,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
335 class_weights,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
336 usage_pred,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
337 standard_connections,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
338 train_tool_freq,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
339 tool_tr_samples,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
340 )
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
341 utils.save_model(
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
342 results_weighted,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
343 data_dictionary,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
344 compatible_next_tools,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
345 trained_model_path,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
346 class_weights,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
347 standard_connections,
4f7e6612906b "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 5eebc0cb44e71f581d548b7e842002705dd155eb"
bgruening
parents: 4
diff changeset
348 )
0
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
349 end_time = time.time()
9bf25dbe00ad "planemo upload for repository https://github.com/bgruening/galaxytools/tree/recommendation_training/tools/tool_recommendation_model commit 7fac577189d01cedd01118a77fc2baaefe7d5cad"
bgruening
parents:
diff changeset
350 print("Program finished in %s seconds" % str(end_time - start_time))