comparison pycaret_predict.py @ 3:ccd798db5abb draft default tip

planemo upload for repository https://github.com/goeckslab/Galaxy-Pycaret commit cf47efb521b91a9cb44ae5c5ade860627f9b9030
author goeckslab
date Tue, 03 Jun 2025 19:31:06 +0000
parents 1f20fe57fdee
children
comparison
equal deleted inserted replaced
2:0314dad38aaa 3:ccd798db5abb
1 import argparse 1 import argparse
2 import logging 2 import logging
3 import tempfile 3 import tempfile
4 4
5 import h5py 5 import h5py
6
7 import joblib 6 import joblib
8
9 import pandas as pd 7 import pandas as pd
10
11 from pycaret.classification import ClassificationExperiment 8 from pycaret.classification import ClassificationExperiment
12 from pycaret.regression import RegressionExperiment 9 from pycaret.regression import RegressionExperiment
13
14 from sklearn.metrics import average_precision_score 10 from sklearn.metrics import average_precision_score
15
16 from utils import encode_image_to_base64, get_html_closing, get_html_template 11 from utils import encode_image_to_base64, get_html_closing, get_html_template
17 12
18 LOG = logging.getLogger(__name__) 13 LOG = logging.getLogger(__name__)
19 14
20 15
47 data = pd.read_csv(data_path, engine='python', sep=None) 42 data = pd.read_csv(data_path, engine='python', sep=None)
48 if self.target: 43 if self.target:
49 exp = ClassificationExperiment() 44 exp = ClassificationExperiment()
50 names = data.columns.to_list() 45 names = data.columns.to_list()
51 LOG.error(f"Column names: {names}") 46 LOG.error(f"Column names: {names}")
52 target_index = int(self.target)-1 47 target_index = int(self.target) - 1
53 target_name = names[target_index] 48 target_name = names[target_index]
54 exp.setup(data, target=target_name, test_data=data, index=False) 49 exp.setup(data, target=target_name, test_data=data, index=False)
55 exp.add_metric(id='PR-AUC-Weighted', 50 exp.add_metric(id='PR-AUC-Weighted',
56 name='PR-AUC-Weighted', 51 name='PR-AUC-Weighted',
57 target='pred_proba', 52 target='pred_proba',
71 save=True, 66 save=True,
72 plot_kwargs={ 67 plot_kwargs={
73 'micro': False, 68 'micro': False,
74 'macro': False, 69 'macro': False,
75 'per_class': False, 70 'per_class': False,
76 'binary': True 71 'binary': True})
77 })
78 plot_paths[plot_name] = plot_path 72 plot_paths[plot_name] = plot_path
79 continue 73 continue
80 74
81 plot_path = exp.plot_model(self.model, 75 plot_path = exp.plot_model(self.model,
82 plot=plot_name, save=True) 76 plot=plot_name, save=True)
99 metrics = None 93 metrics = None
100 plot_paths = {} 94 plot_paths = {}
101 data = pd.read_csv(data_path, engine='python', sep=None) 95 data = pd.read_csv(data_path, engine='python', sep=None)
102 if self.target: 96 if self.target:
103 names = data.columns.to_list() 97 names = data.columns.to_list()
104 target_index = int(self.target)-1 98 target_index = int(self.target) - 1
105 target_name = names[target_index] 99 target_name = names[target_index]
106 exp = RegressionExperiment() 100 exp = RegressionExperiment()
107 exp.setup(data, target=target_name, test_data=data, index=False) 101 exp.setup(data, target=target_name, test_data=data, index=False)
108 predictions = exp.predict_model(self.model) 102 predictions = exp.predict_model(self.model)
109 metrics = exp.pull() 103 metrics = exp.pull()