view tabpfn.xml @ 4:e7b4afedc471 draft

planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/tabpfn commit f514c9038f1aac1ef0ca40a9f4866b1ad0fc7747
author bgruening
date Tue, 11 Feb 2025 10:14:12 +0000
parents c081e5e1d7ce
children 49b4ee0d0965
line wrap: on
line source

<tool id="tabpfn" name="Tabular data prediction using TabPFN" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="23.0">
    <description>with PyTorch</description>
    <macros>
        <token name="@TOOL_VERSION@">2.0.3</token>
        <token name="@VERSION_SUFFIX@">1.1</token>
    </macros>
    <creator>
        <organization name="European Galaxy Team" url="https://galaxyproject.org/eu/" />
        <person givenName="Anup" familyName="Kumar" email="kumara@informatik.uni-freiburg.de" />
        <person givenName="Frank" familyName="Hutter" email="fh@cs.uni-freiburg.de" />
    </creator>
    <requirements>
	<requirement type="package" version="@TOOL_VERSION@">tabpfn</requirement>
	<requirement type="package" version="2.2.2">pandas</requirement>
	<requirement type="package" version="3.9.2">matplotlib</requirement>
    </requirements>
    <version_command>echo "@VERSION@"</version_command>
    <command detect_errors="aggressive">
    <![CDATA[
            python '$__tool_directory__/main.py'
            --selected_task '$selected_task'
            --train_data '$train_data'
            --testhaslabels '$testhaslabels'
            --test_data '$test_data'
    ]]>
    </command>
    <inputs>
	<param name="selected_task" type="select" label="Select a machine learning task">
            <option value="Classification" selected="true"></option>
	    <option value="Regression" selected="false"></option>
	</param>
	<param name="train_data" type="data" format="tabular" label="Train data" help="Please provide training data for training model. It should contain labels/class/target in the last column" />
	<param name="test_data" type="data" format="tabular" label="Test data" help="Please provide test data for evaluating model. It may or may not contain labels/class/target in the last column" />
	<param name="testhaslabels" type="boolean" truevalue="haslabels" falsevalue="" checked="false" label="Does test data contain labels?" help="Set this parameter when test data contains labels" />
    </inputs>
    <outputs>
        <data format="tabular" name="output_predicted_data" from_work_dir="output_predicted_data" label="Predicted data"></data>
	<data format="png" name="output_plot" from_work_dir="output_plot.png" label="Prediction plot on test data">
	    <filter>testhaslabels is True</filter>
	</data>
    </outputs>
    <tests>
	<test expect_num_outputs="1">
            <param name="selected_task" value="Classification" />
	    <param name="train_data" value="classification_local_train_rows.tabular" ftype="tabular" />
	    <param name="test_data" value="classification_local_test_rows.tabular" ftype="tabular" />
	    <param name="testhaslabels" value="" />
            <output name="output_predicted_data">
	        <assert_contents>
		    <has_n_columns n="42" />
                    <has_n_lines n="3" />
	        </assert_contents>
	    </output>
        </test>
	<test expect_num_outputs="2">
	    <param name="selected_task" value="Classification" />
	    <param name="train_data" value="classification_local_train_rows.tabular" ftype="tabular" />
	    <param name="test_data" value="classification_local_test_rows_labels.tabular" ftype="tabular" />
	    <param name="testhaslabels" value="haslabels" />
	    <output name="output_plot" file="prc_binary.png" compare="sim_size" />
        </test>
    	<test expect_num_outputs="2">
	    <param name="selected_task" value="Classification" />
	    <param name="train_data" value="train_data_multiclass.tabular" ftype="tabular" />
	    <param name="test_data" value="test_data_multiclass_labels.tabular" ftype="tabular" />
	    <param name="testhaslabels" value="haslabels" />
	    <output name="output_plot" file="prc_multiclass.png" compare="sim_size" />
        </test>
        <test expect_num_outputs="1">
	    <param name="selected_task" value="Classification" />
	    <param name="train_data" value="train_data_multiclass.tabular" ftype="tabular" />
	    <param name="test_data" value="test_data_multiclass_nolabels.tabular" ftype="tabular" />
	    <param name="testhaslabels" value="" />
	    <output name="output_predicted_data">
	        <assert_contents>
                    <has_n_columns n="11" />
		    <has_n_lines n="502" />
                </assert_contents>
            </output>
        </test>
	<test expect_num_outputs="2">
            <param name="selected_task" value="Regression" />
            <param name="train_data" value="regression_local_train_rows.tabular" ftype="tabular" />
            <param name="test_data" value="regression_local_test_rows_labels.tabular" ftype="tabular" />
	    <param name="testhaslabels" value="haslabels" />
	    <output name="output_plot" file="r2_curve.png" compare="sim_size" />
	</test>
	<test expect_num_outputs="1">
	    <param name="selected_task" value="Regression" />
	    <param name="train_data" value="regression_local_train_rows.tabular" ftype="tabular" />
	    <param name="test_data" value="regression_local_test_rows.tabular" ftype="tabular" />
	    <param name="testhaslabels" value="" />
	    <output name="output_predicted_data">
	        <assert_contents>
                    <has_n_columns n="14" />
		    <has_n_lines n="105" />
                </assert_contents>
            </output>		    
	</test>
    </tests>
    <help>
        <![CDATA[
            **What it does**

            Classification and Regression  on tabular data by TabPFN

            **Input files**
                - Training data: the training data should contain features and the last column should be the class labels. It should be in tabular format.
                - Test data: the test data should also contain the same features as the training data and the last column should be the class labels if labels are avaialble. It should be in tabular format. It is not required for the test data to have labels.

            **Output files**
	        - Predicted data along with predicted labels.
                - Prediction plot (when test data has labels available).
        ]]>
    </help>
    <citations>
        <citation type="doi">10.1038/s41586-024-08328-6</citation>
    </citations>
</tool>