comparison tabpfn.xml @ 5:49b4ee0d0965 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/tabpfn commit cefdfdc13838de5108e13f54ecd69babb44009a1
author bgruening
date Wed, 26 Mar 2025 16:32:51 +0000
parents e7b4afedc471
children 1d77df05cbfc
comparison
equal deleted inserted replaced
4:e7b4afedc471 5:49b4ee0d0965
1 <tool id="tabpfn" name="Tabular data prediction using TabPFN" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0"> 1 <tool id="tabpfn" name="Tabular data prediction using TabPFN" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0">
2 <description>with PyTorch</description> 2 <description>with PyTorch</description>
3 <macros> 3 <macros>
4 <token name="@TOOL_VERSION@">2.0.3</token> 4 <token name="@TOOL_VERSION@">2.0.3</token>
5 <token name="@VERSION_SUFFIX@">1.1</token> 5 <token name="@VERSION_SUFFIX@">1.2</token>
6 </macros> 6 </macros>
7 <creator> 7 <creator>
8 <organization name="European Galaxy Team" url="https://galaxyproject.org/eu/" /> 8 <organization name="European Galaxy Team" url="https://galaxyproject.org/eu/"/>
9 <person givenName="Anup" familyName="Kumar" email="kumara@informatik.uni-freiburg.de" /> 9 <person givenName="Anup" familyName="Kumar" email="kumara@informatik.uni-freiburg.de"/>
10 <person givenName="Frank" familyName="Hutter" email="fh@cs.uni-freiburg.de" /> 10 <person givenName="Frank" familyName="Hutter" email="fh@cs.uni-freiburg.de"/>
11 </creator> 11 </creator>
12 <requirements> 12 <requirements>
13 <requirement type="package" version="@TOOL_VERSION@">tabpfn</requirement> 13 <requirement type="package" version="@TOOL_VERSION@">tabpfn</requirement>
14 <requirement type="package" version="2.2.2">pandas</requirement> 14 <requirement type="package" version="2.2.2">pandas</requirement>
15 <requirement type="package" version="3.9.2">matplotlib</requirement> 15 <requirement type="package" version="3.9.2">matplotlib</requirement>
16 </requirements> 16 </requirements>
17 <version_command>echo "@VERSION@"</version_command> 17 <version_command>echo "@VERSION@"</version_command>
18 <command detect_errors="aggressive"> 18 <command detect_errors="aggressive">
19 <![CDATA[ 19 <![CDATA[
20 python '$__tool_directory__/main.py' 20 python '$__tool_directory__/main.py'
21 --selected_task '$selected_task' 21 --selected_task '$selected_task'
22 --train_data '$train_data' 22 --train_data '$train_data'
23 --testhaslabels '$testhaslabels' 23 --testhaslabels '$testhaslabels'
24 --test_data '$test_data' 24 --test_data '$test_data'
25 ]]> 25 ]]>
26 </command> 26 </command>
27 <inputs> 27 <inputs>
28 <param name="selected_task" type="select" label="Select a machine learning task"> 28 <param name="selected_task" type="select" label="Select a machine learning task">
29 <option value="Classification" selected="true"></option> 29 <option value="Classification" selected="true"/>
30 <option value="Regression" selected="false"></option> 30 <option value="Regression" selected="false"/>
31 </param> 31 </param>
32 <param name="train_data" type="data" format="tabular" label="Train data" help="Please provide training data for training model. It should contain labels/class/target in the last column" /> 32 <param name="train_data" type="data" format="tabular" label="Train data" help="Please provide training data for training model. It should contain labels/class/target in the last column"/>
33 <param name="test_data" type="data" format="tabular" label="Test data" help="Please provide test data for evaluating model. It may or may not contain labels/class/target in the last column" /> 33 <param name="test_data" type="data" format="tabular" label="Test data" help="Please provide test data for evaluating model. It may or may not contain labels/class/target in the last column"/>
34 <param name="testhaslabels" type="boolean" truevalue="haslabels" falsevalue="" checked="false" label="Does test data contain labels?" help="Set this parameter when test data contains labels" /> 34 <param name="testhaslabels" type="boolean" truevalue="true" falsevalue="false" checked="false" label="Does test data contain labels?" help="Set this parameter when test data contains labels"/>
35 </inputs> 35 </inputs>
36 <outputs> 36 <outputs>
37 <data format="tabular" name="output_predicted_data" from_work_dir="output_predicted_data" label="Predicted data"></data> 37 <data format="tabular" name="output_predicted_data" from_work_dir="output_predicted_data" label="Predicted data"/>
38 <data format="png" name="output_plot" from_work_dir="output_plot.png" label="Prediction plot on test data"> 38 <data format="png" name="output_plot" from_work_dir="output_plot.png" label="Prediction plot on test data">
39 <filter>testhaslabels is True</filter> 39 <filter>testhaslabels is True</filter>
40 </data> 40 </data>
41 </outputs> 41 </outputs>
42 <tests> 42 <tests>
43 <test expect_num_outputs="1"> 43 <test expect_num_outputs="1">
44 <param name="selected_task" value="Classification" /> 44 <param name="selected_task" value="Classification"/>
45 <param name="train_data" value="classification_local_train_rows.tabular" ftype="tabular" /> 45 <param name="train_data" value="classification_local_train_rows.tabular" ftype="tabular"/>
46 <param name="test_data" value="classification_local_test_rows.tabular" ftype="tabular" /> 46 <param name="test_data" value="classification_local_test_rows.tabular" ftype="tabular"/>
47 <param name="testhaslabels" value="" /> 47 <param name="testhaslabels" value="false"/>
48 <output name="output_predicted_data"> 48 <output name="output_predicted_data">
49 <assert_contents> 49 <assert_contents>
50 <has_n_columns n="42" /> 50 <has_n_columns n="42"/>
51 <has_n_lines n="3" /> 51 <has_n_lines n="3"/>
52 </assert_contents>
53 </output>
54 </test>
55 <test expect_num_outputs="2">
56 <param name="selected_task" value="Classification" />
57 <param name="train_data" value="classification_local_train_rows.tabular" ftype="tabular" />
58 <param name="test_data" value="classification_local_test_rows_labels.tabular" ftype="tabular" />
59 <param name="testhaslabels" value="haslabels" />
60 <output name="output_plot" file="prc_binary.png" compare="sim_size" />
61 </test>
62 <test expect_num_outputs="2">
63 <param name="selected_task" value="Classification" />
64 <param name="train_data" value="train_data_multiclass.tabular" ftype="tabular" />
65 <param name="test_data" value="test_data_multiclass_labels.tabular" ftype="tabular" />
66 <param name="testhaslabels" value="haslabels" />
67 <output name="output_plot" file="prc_multiclass.png" compare="sim_size" />
68 </test>
69 <test expect_num_outputs="1">
70 <param name="selected_task" value="Classification" />
71 <param name="train_data" value="train_data_multiclass.tabular" ftype="tabular" />
72 <param name="test_data" value="test_data_multiclass_nolabels.tabular" ftype="tabular" />
73 <param name="testhaslabels" value="" />
74 <output name="output_predicted_data">
75 <assert_contents>
76 <has_n_columns n="11" />
77 <has_n_lines n="502" />
78 </assert_contents> 52 </assert_contents>
79 </output> 53 </output>
80 </test> 54 </test>
81 <test expect_num_outputs="2"> 55 <test expect_num_outputs="2">
82 <param name="selected_task" value="Regression" /> 56 <param name="selected_task" value="Classification"/>
83 <param name="train_data" value="regression_local_train_rows.tabular" ftype="tabular" /> 57 <param name="train_data" value="classification_local_train_rows.tabular" ftype="tabular"/>
84 <param name="test_data" value="regression_local_test_rows_labels.tabular" ftype="tabular" /> 58 <param name="test_data" value="classification_local_test_rows_labels.tabular" ftype="tabular"/>
85 <param name="testhaslabels" value="haslabels" /> 59 <param name="testhaslabels" value="true"/>
86 <output name="output_plot" file="r2_curve.png" compare="sim_size" /> 60 <output name="output_plot" file="prc_binary.png" compare="sim_size"/>
87 </test> 61 </test>
88 <test expect_num_outputs="1"> 62 <test expect_num_outputs="2">
89 <param name="selected_task" value="Regression" /> 63 <param name="selected_task" value="Classification"/>
90 <param name="train_data" value="regression_local_train_rows.tabular" ftype="tabular" /> 64 <param name="train_data" value="train_data_multiclass.tabular" ftype="tabular"/>
91 <param name="test_data" value="regression_local_test_rows.tabular" ftype="tabular" /> 65 <param name="test_data" value="test_data_multiclass_labels.tabular" ftype="tabular"/>
92 <param name="testhaslabels" value="" /> 66 <param name="testhaslabels" value="true"/>
93 <output name="output_predicted_data"> 67 <output name="output_plot" file="prc_multiclass.png" compare="sim_size"/>
94 <assert_contents> 68 </test>
95 <has_n_columns n="14" /> 69 <test expect_num_outputs="1">
96 <has_n_lines n="105" /> 70 <param name="selected_task" value="Classification"/>
71 <param name="train_data" value="train_data_multiclass.tabular" ftype="tabular"/>
72 <param name="test_data" value="test_data_multiclass_nolabels.tabular" ftype="tabular"/>
73 <param name="testhaslabels" value="false"/>
74 <output name="output_predicted_data">
75 <assert_contents>
76 <has_n_columns n="11"/>
77 <has_n_lines n="502"/>
97 </assert_contents> 78 </assert_contents>
98 </output> 79 </output>
99 </test> 80 </test>
81 <test expect_num_outputs="2">
82 <param name="selected_task" value="Regression"/>
83 <param name="train_data" value="regression_local_train_rows.tabular" ftype="tabular"/>
84 <param name="test_data" value="regression_local_test_rows_labels.tabular" ftype="tabular"/>
85 <param name="testhaslabels" value="true"/>
86 <output name="output_plot" file="r2_curve.png" compare="sim_size"/>
87 </test>
88 <test expect_num_outputs="1">
89 <param name="selected_task" value="Regression"/>
90 <param name="train_data" value="regression_local_train_rows.tabular" ftype="tabular"/>
91 <param name="test_data" value="regression_local_test_rows.tabular" ftype="tabular"/>
92 <param name="testhaslabels" value="false"/>
93 <output name="output_predicted_data">
94 <assert_contents>
95 <has_n_columns n="14"/>
96 <has_n_lines n="105"/>
97 </assert_contents>
98 </output>
99 </test>
100 </tests> 100 </tests>
101 <help> 101 <help>
102 <![CDATA[ 102 <![CDATA[
103 **What it does** 103 **What it does**
104 104
105 Classification and Regression on tabular data by TabPFN 105 Classification and Regression on tabular data by TabPFN. The use of GPU is recommended while training TabPFN to optimize runtime. Currently, TabPFN supports upto 10,000 samples (rows) and 500 features (columns) in a tabular data.
106 106
107 **Input files** 107 **Input files**
108 - Training data: the training data should contain features and the last column should be the class labels. It should be in tabular format. 108 - Training data: the training data should contain features and the last column should be the class labels. It should be in tabular format.
109 - Test data: the test data should also contain the same features as the training data and the last column should be the class labels if labels are avaialble. It should be in tabular format. It is not required for the test data to have labels. 109 - Test data: the test data should also contain the same features as the training data and the last column should be the class labels if labels are avaialble. It should be in tabular format. It is not required for the test data to have labels.
110 110
111 **Output files** 111 **Output files**
112 - Predicted data along with predicted labels. 112 - Predicted data along with predicted labels.
113 - Prediction plot (when test data has labels available). 113 - Prediction plot (when test data has labels available).
114
115 **License**
116 - TabPFN is available under an open source license (https://github.com/PriorLabs/TabPFN?tab=License-1-ov-file) that combines Apache with a LLama-like attribution clause. It requires you to prominently display "Built with TabPFN" when you use a pipeline including it in production.
114 ]]> 117 ]]>
115 </help> 118 </help>
116 <citations> 119 <citations>
117 <citation type="doi">10.1038/s41586-024-08328-6</citation> 120 <citation type="doi">10.1038/s41586-024-08328-6</citation>
118 </citations> 121 </citations>