Mercurial > repos > bgruening > run_jupyter_job
annotate main.py @ 0:f4619200cb0a draft
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
author | bgruening |
---|---|
date | Sat, 11 Dec 2021 17:56:38 +0000 |
parents | |
children |
rev | line source |
---|---|
0
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
1 import argparse |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
2 import os |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
3 import subprocess |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
4 import warnings |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
5 from zipfile import ZipFile |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
6 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
7 import h5py |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
8 import yaml |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
9 from skl2onnx import convert_sklearn |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
10 from skl2onnx.common.data_types import FloatTensorType |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
11 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
12 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
13 warnings.filterwarnings("ignore") |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
14 os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
15 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
16 SKLEARN_MODELS = [ |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
17 "sklearn.ensemble", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
18 "sklearn.tree", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
19 "sklearn.linear_model", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
20 "sklearn.svm", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
21 "sklearn.neighbors", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
22 "sklearn.preprocessing", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
23 "sklearn.cluster" |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
24 ] |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
25 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
26 TF_MODELS = [ |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
27 "tensorflow.python.keras.engine.training.Model", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
28 "tensorflow.python.keras.engine.sequential.Sequential", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
29 "tensorflow.python.keras.engine.functional.Functional", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
30 "tensorflow.python.keras.layers", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
31 "keras.engine.functional.Functional", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
32 "keras.engine.sequential.Sequential", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
33 "keras.engine.training.Model", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
34 "keras.layers" |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
35 ] |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
36 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
37 ARRAYS = [ |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
38 "numpy.ndarray", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
39 "list" |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
40 ] |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
41 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
42 DATAFRAME = [ |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
43 "pandas.core.frame.DataFrame" |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
44 ] |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
45 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
46 SCALAR_TYPES = [ |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
47 "int", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
48 "float", |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
49 "str" |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
50 ] |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
51 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
52 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
53 def find_replace_paths(script_file, updated_data_dict): |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
54 for item in updated_data_dict: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
55 g_path = updated_data_dict[item] |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
56 script_file = script_file.replace(item, g_path) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
57 return script_file |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
58 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
59 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
60 def update_ml_files_paths(old_file_paths, new_file_paths): |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
61 if old_file_paths == "" or old_file_paths is None or new_file_paths == "" or new_file_paths is None: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
62 return dict() |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
63 o_files = old_file_paths.split(",") |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
64 n_files = new_file_paths.split(",") |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
65 new_paths_dict = dict() |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
66 for i, o_f in enumerate(o_files): |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
67 new_paths_dict[o_f] = n_files[i] |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
68 return new_paths_dict |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
69 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
70 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
71 def read_loaded_file(new_paths_dict, p_loaded_file, a_file, w_dir, z_file): |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
72 global_vars = dict() |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
73 input_file = yaml.safe_load(p_loaded_file) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
74 code_string = open(input_file, "r").read() |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
75 re_code_string = find_replace_paths(code_string, new_paths_dict) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
76 compiled_code = compile(re_code_string, input_file, 'exec') |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
77 exec(compiled_code, global_vars) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
78 check_vars(w_dir, global_vars, a_file) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
79 zip_files(w_dir, z_file) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
80 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
81 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
82 def zip_files(w_dir, z_file): |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
83 with ZipFile(z_file, 'w') as zip_file: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
84 for f_path in os.listdir(w_dir): |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
85 zip_file.write(f_path) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
86 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
87 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
88 def create_model_path(curr_path, key): |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
89 onnx_path = curr_path + "/model_outputs" |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
90 if not os.path.exists(onnx_path): |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
91 os.makedirs(onnx_path) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
92 onnx_model_path = curr_path + "/model_outputs/" + "onnx_model_{}.onnx".format(key) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
93 return onnx_model_path |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
94 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
95 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
96 def save_sklearn_model(w_dir, key, obj): |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
97 initial_type = [('float_input', FloatTensorType([None, 4]))] |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
98 onx = convert_sklearn(obj, initial_types=initial_type) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
99 sk_model_path = create_model_path(w_dir, key) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
100 with open(sk_model_path, "wb") as f: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
101 f.write(onx.SerializeToString()) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
102 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
103 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
104 def save_tf_model(w_dir, key, obj): |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
105 import tensorflow as tf |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
106 tf_file_key = "tf_model_{}".format(key) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
107 tf_model_path = "{}/{}".format(w_dir, tf_file_key) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
108 if not os.path.exists(tf_model_path): |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
109 os.makedirs(tf_model_path) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
110 # save model as tf model |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
111 tf.saved_model.save(obj, tf_model_path) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
112 # save model as ONNX |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
113 tf_onnx_model_p = create_model_path(w_dir, key) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
114 # OPSET level defines a level of tensorflow operations supported by ONNX |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
115 python_shell_script = "python -m tf2onnx.convert --saved-model " + tf_model_path + " --output " + tf_onnx_model_p + " --opset 15 " |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
116 # convert tf/keras model to ONNX and save it to output file |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
117 subprocess.run(python_shell_script, shell=True, check=True) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
118 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
119 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
120 def save_primitives(payload, a_file): |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
121 hf_file = h5py.File(a_file, "w") |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
122 for key in payload: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
123 try: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
124 hf_file.create_dataset(key, data=payload[key]) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
125 except Exception as e: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
126 print(e) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
127 continue |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
128 hf_file.close() |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
129 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
130 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
131 def save_dataframe(payload, a_file): |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
132 for key in payload: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
133 payload[key].to_hdf(a_file, key=key) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
134 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
135 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
136 def check_vars(w_dir, var_dict, a_file): |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
137 if var_dict is not None: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
138 primitive_payload = dict() |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
139 dataframe_payload = dict() |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
140 for key in var_dict: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
141 obj = var_dict[key] |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
142 obj_class = str(obj.__class__) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
143 # save tf model |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
144 if len([item for item in TF_MODELS if item in obj_class]) > 0: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
145 save_tf_model(w_dir, key, obj) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
146 # save scikit-learn model |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
147 elif len([item for item in SKLEARN_MODELS if item in obj_class]) > 0: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
148 save_sklearn_model(w_dir, key, obj) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
149 # save arrays and lists |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
150 elif len([item for item in ARRAYS if item in obj_class]) > 0: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
151 if key not in primitive_payload: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
152 primitive_payload[key] = obj |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
153 elif len([item for item in DATAFRAME if item in obj_class]) > 0: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
154 if key not in dataframe_payload: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
155 dataframe_payload[key] = obj |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
156 elif len([item for item in SCALAR_TYPES if item in obj_class]) > 0: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
157 if key not in primitive_payload: |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
158 primitive_payload[key] = obj |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
159 save_primitives(primitive_payload, a_file) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
160 save_dataframe(dataframe_payload, a_file) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
161 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
162 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
163 if __name__ == "__main__": |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
164 |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
165 arg_parser = argparse.ArgumentParser() |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
166 arg_parser.add_argument("-mlp", "--ml_paths", required=True, help="") |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
167 arg_parser.add_argument("-ldf", "--loaded_file", required=True, help="") |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
168 arg_parser.add_argument("-wd", "--working_dir", required=True, help="") |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
169 arg_parser.add_argument("-oz", "--output_zip", required=True, help="") |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
170 arg_parser.add_argument("-oa", "--output_array", required=True, help="") |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
171 arg_parser.add_argument("-mlf", "--ml_h5_files", required=True, help="") |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
172 # get argument values |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
173 args = vars(arg_parser.parse_args()) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
174 ml_paths = args["ml_paths"] |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
175 loaded_file = args["loaded_file"] |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
176 array_output_file = args["output_array"] |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
177 zip_output_file = args["output_zip"] |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
178 working_dir = args["working_dir"] |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
179 ml_h5_files = args["ml_h5_files"] |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
180 new_paths_dict = update_ml_files_paths(ml_paths, ml_h5_files) |
f4619200cb0a
"planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/jupyter_job commit f945b1bff5008ba01da31c7de64e5326579394d6"
bgruening
parents:
diff
changeset
|
181 read_loaded_file(new_paths_dict, loaded_file, array_output_file, working_dir, zip_output_file) |