Mercurial > repos > tduigou > icfree_learner
comparison learner.xml @ 0:da588cac4813 draft
planemo upload for repository https://github.com/brsynth/icfree-ml commit 48497f3422d15940998cf709ea74e4b1460fb76c
| author | tduigou |
|---|---|
| date | Wed, 05 Feb 2025 14:04:54 +0000 |
| parents | |
| children | 8e8569c19fa7 |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:da588cac4813 |
|---|---|
| 1 <tool id="icfree_learner" name="iCFree learner" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" license="MIT"> | |
| 2 <description>Active learning and model training</description> | |
| 3 <macros> | |
| 4 <import>macros.xml</import> | |
| 5 </macros> | |
| 6 <expand macro="requirements"/> | |
| 7 <command detect_errors="exit_code"><![CDATA[ | |
| 8 #import os | |
| 9 cp '$input_param_tsv' param.tsv && | |
| 10 unzip '$input_folder_zip' -d indir && | |
| 11 (find indir -type f | while read file; do mv "\$file" indir; done) && | |
| 12 python -m icfree.learner | |
| 13 --data_folder indir | |
| 14 --parameter_file param.tsv | |
| 15 --output_folder 'outdir' | |
| 16 #if str($adv.name_list) != '' | |
| 17 --name_list '$adv.name_list' | |
| 18 #end if | |
| 19 #if str($adv.test) == 'true' | |
| 20 --test | |
| 21 #end if | |
| 22 --nb_rep '$adv.nb_rep' | |
| 23 #if str($adv.flatten) != '' | |
| 24 --flatten | |
| 25 #end if | |
| 26 #if str($adv.seed_cond.seed_param) == 'not_random' | |
| 27 --seed '$adv.seed_cond.seed' | |
| 28 #end if | |
| 29 --nb_new_data_predict '$adv.nb_new_data_predict' | |
| 30 --nb_new_data '$adv.nb_new_data' | |
| 31 --parameter_step '$adv.parameter_step' | |
| 32 --n_group '$adv.n_group' | |
| 33 --km '$adv.km' | |
| 34 --ks '$adv.ks' | |
| 35 --save_plot | |
| 36 --verbose && ls 'outdir' | |
| 37 ]]></command> | |
| 38 <inputs> | |
| 39 <param name="input_folder_zip" type="data" format="zip" label="Zip folder containing the data files" help="Zip folder containing the data file"/> | |
| 40 <param name="input_param_tsv" type="data" format="tabular" label="Parameter values for the experiments" help="Parameter values for the experiment"/> | |
| 41 <section name="adv" title="Advanced Options" expanded="false"> | |
| 42 <param name="name_list" type="text" value="" label="Labels of the feature list" help="A comma-separated string of column names or identifiers, converted to a list of strings representing columns that contain labels (y). This separates y columns from the rest (X features). (Default: Yield1,Yield2,Yield3,Yield4,Yield5)" /> | |
| 43 <param name="test" type="boolean" label="Validate the model" help="A flag for validating the model; not required to run inside the active learning loop. If not set, skip the validating step" checked="false" /> | |
| 44 <param name="nb_rep" type="integer" value="100" label="Number of test repetitions for validation the model behavior" help="The number of test repetitions for validating the model behavior. 80% of data is randomly separated for training, and 20% is used for testing." /> | |
| 45 <param name="flatten" type="boolean" label="Flattent feature data" help="A flag to indicate whether to flatten Y data. If set, treats each repetition in the same experiment independently; multiple same X values with different y outputs are modeled. Else, calculates the average of y across repetitions and only model with y average." /> | |
| 46 <param name="nb_new_data_predict" type="integer" value="1000" label="Number of new data points generated" help="The number of new data points sampled from all possible cases." /> | |
| 47 <param name="nb_new_data" type="integer" value="50" label="Number of new data points used" help="The number of new data points selected from the generated ones. These are the data points labeled after active learning loops. `nb_new_data_predict` must be greater than `nb_new_data` to be meaningful." /> | |
| 48 <param name="parameter_step" type="integer" value="10" label="Step size used to decrement the maximum predefined concentration sequentially" help="The step size used to decrement the maximum predefined concentration sequentially. For example, if the maximum concentration is `max`, the sequence of concentrations is calculated as: `max - 1 * parameter_step`, `max - 2 * parameter_step`, `max - 3 * parameter_step`, and so on. Each concentration is a candidate for experimental testing. Smaller steps result in more possible combinations to sample." /> | |
| 49 <param name="n_group" type="integer" value="15" label="Number of clusters" help="Parameter for the cluster margin algorithm, specifying the number of groups into which generated data will be clustered." /> | |
| 50 <param name="km" type="integer" value="50" label="Number of data points for the first selection" help="Parameter for the cluster margin algorithm, specifying the number of data points for the first selection. Ensure `nb_new_data_predict > ks > km`." /> | |
| 51 <param name="ks" type="integer" value="20" label="Number of data points for the second selection" help="Parameter for the cluster margin algorithm, specifying the number of data points for the second selection. This is also similar to `nb_new_data`." /> | |
| 52 <!-- Seed --> | |
| 53 <conditional name="seed_cond"> | |
| 54 <param name="seed_param" type="select" label="Seed" help="Choose a seed or let it as random"> | |
| 55 <option value="not_random" selected="true">fixed</option> | |
| 56 <option value="random">random</option> | |
| 57 </param> | |
| 58 <when value="random"/> | |
| 59 <when value="not_random"> | |
| 60 <param name="seed" type="text" value="85" label="Seed value" help="Only integer allowed"> | |
| 61 <validator type="empty_field" message="Not empty, select random"/> | |
| 62 <validator type="regex" message="Only integer allowed">^(?:\d+)$</validator> | |
| 63 </param> | |
| 64 </when> | |
| 65 </conditional> | |
| 66 </section> | |
| 67 </inputs> | |
| 68 <outputs> | |
| 69 <collection name="output_csv" type="list" label="${tool.name} - Data"> | |
| 70 <discover_datasets pattern="(?P<name>.*).csv" format="csv" directory="outdir" /> | |
| 71 </collection> | |
| 72 <collection name="output_png" type="list" label="${tool.name} - Plot"> | |
| 73 <discover_datasets pattern="(?P<name>.*).png" format="png" directory="outdir" /> | |
| 74 </collection> | |
| 75 </outputs> | |
| 76 <tests> | |
| 77 <test> | |
| 78 <!-- python -m icfree.learner -data_folder learner -parameter_file learner.input.param.tsv -output_folder tmp -save_plot -verbose -seed 85 --> | |
| 79 <param name="input_folder_zip" value="learner.input.folder.zip" /> | |
| 80 <param name="input_param_tsv" value="learner.input.param.tsv" /> | |
| 81 <output_collection name="output_csv" type="list" count="1"> | |
| 82 <element name="next_sampling_ei50" ftype="csv" > | |
| 83 <assert_contents> | |
| 84 <has_n_lines n="51" /> | |
| 85 </assert_contents> | |
| 86 </element> | |
| 87 </output_collection> | |
| 88 <output_collection name="output_png" type="list" count="4"> | |
| 89 <element name="EI selected"> | |
| 90 <assert_contents> | |
| 91 <has_size value="77k" delta="1k"/> | |
| 92 </assert_contents> | |
| 93 </element> | |
| 94 <element name="EI"> | |
| 95 <assert_contents> | |
| 96 <has_size value="36k" delta="1k"/> | |
| 97 </assert_contents> | |
| 98 </element> | |
| 99 <element name="Train_Test"> | |
| 100 <assert_contents> | |
| 101 <has_size value="64k" delta="1k"/> | |
| 102 </assert_contents> | |
| 103 </element> | |
| 104 <element name="Yield evolution through each active learning query"> | |
| 105 <assert_contents> | |
| 106 <has_size value="19k" delta="1k"/> | |
| 107 </assert_contents> | |
| 108 </element> | |
| 109 </output_collection> | |
| 110 </test> | |
| 111 <test> | |
| 112 <!-- python -m icfree.learner -data_folder learner -parameter_file learner.input.param.tsv -output_folder tmp2 -name_list "Yield1,Yield2" -nb_rep 5 -flatten -seed 85 -nb_new_data_predict 20 -nb_new_data 2 -parameter_step 2 -n_group 3 -km 5 -ks 2 -save_plot -verbose --> | |
| 113 <param name="input_folder_zip" value="learner.input.folder.zip" /> | |
| 114 <param name="input_param_tsv" value="learner.input.param.tsv" /> | |
| 115 <param name="name_list" value="Yield1,Yield2" /> | |
| 116 <param name="nb_rep" value="5" /> | |
| 117 <param name="flatten" value="true" /> | |
| 118 <param name="nb_new_data_predict" value="20" /> | |
| 119 <param name="nb_new_data" value="2" /> | |
| 120 <param name="parameter_step" value="2" /> | |
| 121 <param name="n_group" value="3" /> | |
| 122 <param name="km" value="5" /> | |
| 123 <param name="ks" value="2" /> | |
| 124 <output_collection name="output_csv" type="list" count="1"> | |
| 125 <element name="next_sampling_ei5" file="learner.output.data.2.csv" ftype="csv" > | |
| 126 <assert_contents> | |
| 127 <has_n_lines n="6" /> | |
| 128 </assert_contents> | |
| 129 </element> | |
| 130 </output_collection> | |
| 131 <output_collection name="output_png" type="list" count="4"> | |
| 132 <element name="EI selected"> | |
| 133 <assert_contents> | |
| 134 <has_size value="24k" delta="1k"/> | |
| 135 </assert_contents> | |
| 136 </element> | |
| 137 <element name="EI"> | |
| 138 <assert_contents> | |
| 139 <has_size value="36k" delta="1k"/> | |
| 140 </assert_contents> | |
| 141 </element> | |
| 142 <element name="Train_Test"> | |
| 143 <assert_contents> | |
| 144 <has_size value="60k" delta="1k"/> | |
| 145 </assert_contents> | |
| 146 </element> | |
| 147 <element name="Yield evolution through each active learning query"> | |
| 148 <assert_contents> | |
| 149 <has_size value="19k" delta="1k"/> | |
| 150 </assert_contents> | |
| 151 </element> | |
| 152 </output_collection> | |
| 153 </test> | |
| 154 </tests> | |
| 155 <help><![CDATA[ | |
| 156 Learner | |
| 157 ======= | |
| 158 Active learning and model training | |
| 159 | |
| 160 ]]></help> | |
| 161 <expand macro="creator"/> | |
| 162 <expand macro="citation"/> | |
| 163 </tool> |
