diff main.py @ 5:49b4ee0d0965 draft default tip

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/tabpfn commit cefdfdc13838de5108e13f54ecd69babb44009a1
author bgruening
date Wed, 26 Mar 2025 16:32:51 +0000
parents e7b4afedc471
children
line wrap: on
line diff
--- a/main.py	Tue Feb 11 10:14:12 2025 +0000
+++ b/main.py	Wed Mar 26 16:32:51 2025 +0000
@@ -57,7 +57,9 @@
         plt.plot(
             recall, precision, linestyle="--", color="black", label="Micro-average"
         )
-        plt.title("Precision-Recall Curve (Multiclass Classification)")
+        plt.title(
+            "Precision-Recall Curve (Multiclass Classification)"
+        )
     plt.xlabel("Recall")
     plt.ylabel("Precision")
     plt.legend(loc="lower left")
@@ -85,21 +87,25 @@
     # prepare train data
     tr_features, tr_labels = separate_features_labels(args["train_data"])
     # prepare test data
-    if args["testhaslabels"] == "haslabels":
+    if args["testhaslabels"] == "true":
         te_features, te_labels = separate_features_labels(args["test_data"])
     else:
         te_features = pd.read_csv(args["test_data"], sep="\t")
         te_labels = []
     s_time = time.time()
     if args["selected_task"] == "Classification":
-        classifier = TabPFNClassifier()
+        classifier = TabPFNClassifier(random_state=42)
         classifier.fit(tr_features, tr_labels)
         y_eval = classifier.predict(te_features)
         pred_probas_test = classifier.predict_proba(te_features)
         if len(te_labels) > 0:
             classification_plot(te_labels, pred_probas_test)
+        te_features["predicted_labels"] = y_eval
+        te_features.to_csv(
+            "output_predicted_data", sep="\t", index=None
+        )
     else:
-        regressor = TabPFNRegressor()
+        regressor = TabPFNRegressor(random_state=42)
         regressor.fit(tr_features, tr_labels)
         y_eval = regressor.predict(te_features)
         if len(te_labels) > 0:
@@ -112,14 +118,14 @@
                 "True values",
                 "Predicted values",
             )
+    te_features["predicted_labels"] = y_eval
+    te_features.to_csv(
+        "output_predicted_data", sep="\t", index=None
+    )
     e_time = time.time()
     print(
-        "Time taken by TabPFN for training and prediction: {} seconds".format(
-            e_time - s_time
-        )
+        f"Time taken by TabPFN for training and prediction: {e_time - s_time} seconds"
     )
-    te_features["predicted_labels"] = y_eval
-    te_features.to_csv("output_predicted_data", sep="\t", index=None)
 
 
 if __name__ == "__main__":