Mercurial > repos > bgruening > model_prediction
diff model_prediction.xml @ 0:db511406350a draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author | bgruening |
---|---|
date | Fri, 09 Aug 2019 07:11:11 -0400 |
parents | |
children | af7ed4d45619 |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/model_prediction.xml Fri Aug 09 07:11:11 2019 -0400 @@ -0,0 +1,128 @@ +<tool id="model_prediction" name="Model Prediction" version="@VERSION@"> + <description>predicts on new data using a preffited model</description> + <macros> + <import>main_macros.xml</import> + <import>keras_macros.xml</import> + </macros> + <expand macro="python_requirements"/> + <expand macro="macro_stdio"/> + <version_command>echo "@VERSION@"</version_command> + <command> + <![CDATA[ + python '$__tool_directory__/model_prediction.py' + --inputs '$inputs' + --infile_estimator '$infile_estimator' + --outfile_predict '$outfile_predict' + --infile_weights '$infile_weights' + #if $input_options.selected_input == 'seq_fasta' + --fasta_path '$input_options.fasta_path' + #elif $input_options.selected_input == 'variant_effect' + --ref_seq '$input_options.ref_genome_file' + --vcf_path '$input_options.vcf_file' + #else + --infile1 '$input_options.infile1' + #end if + ]]> + </command> + <configfiles> + <inputs name="inputs" /> + </configfiles> + <inputs> + <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/> + <param name="infile_weights" type="data" format="h5" optional="true" label="Choose the dataset containing weights for the estimator above" help="Optional. For deep learning only."/> + <param argument="method" type="select" label="Select invocation method"> + <option value="predict" selected="true">predict</option> + <option value="predict_proba">predict_proba</option> + </param> + <conditional name="input_options"> + <param name="selected_input" type="select" label="Select input data type for prediction"> + <option value="tabular" selected="true">tabular data</option> + <option value="sparse">sparse matrix</option> + <option value="seq_fasta">sequnences in a fasta file</option> + <option value="variant_effect">reference genome and variant call file</option> + </param> + <when value="tabular"> + <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/> + <param name="header1" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> + <conditional name="column_selector_options_1"> + <expand macro="samples_column_selector_options" multiple="true"/> + </conditional> + </when> + <when value="sparse"> + <param name="infile1" type="data" format="txt" label="Select a sparse matrix" help=""/> + </when> + <when value="seq_fasta"> + <param name="fasta_path" type="data" format="fasta" label="Dataset containing fasta genomic/protein sequences" help="Sequences will be one-hot encoded to arrays."/> + <param name="seq_type" type="select" label="Sequence type"> + <option value="FastaDNABatchGenerator">DNA</option> + <option value="FastaRNABatchGenerator">RNA</option> + <option value="FastaProteinBatchGenerator">Protein</option> + </param> + </when> + <when value="variant_effect"> + <param name="ref_genome_file" type="data" format="fasta" label="Dataset containing reference genomic sequence" help="fasta"/> + <param name="blacklist_regions" type="select" label="blacklist regioins" help="A pre-loaded list of blacklisted intervals.Refer to `selene` for details."> + <option value="none" selected="true">None</option> + <option value="hg38">hg38</option> + <option value="hg19">hg19</option> + </param> + <param name="vcf_file" type="data" format="vcf" label="Dataset containing sequence variations" help="vcf"/> + <param name="seq_length" type="integer" value="1000" label="Encoding seqence length" help="A stretch of sequence surrounding the variation position on the reference genome."/> + <param name="output_reference" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Predict the reference sequence?" help="If False, predict on the variant sequence."/> + </when> + </conditional> + </inputs> + <outputs> + <data format="tabular" name="outfile_predict"/> + </outputs> + <tests> + <test> + <param name="infile_estimator" value="best_estimator_.zip" ftype="zip"/> + <param name="method" value="predict"/> + <param name="infile1" value="regression_X.tabular" ftype="tabular"/> + <param name="header1" value="true" /> + <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> + <output name="outfile_predict" file="model_pred01.tabular"/> + </test> + <test> + <param name="infile_estimator" value="keras_model04" ftype="zip"/> + <param name="infile_weights" value="train_test_eval_weights02.h5" ftype="h5"/> + <param name="method" value="predict"/> + <param name="infile1" value="regression_X.tabular" ftype="tabular"/> + <param name="header1" value="true" /> + <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> + <output name="outfile_predict" > + <assert_contents> + <has_n_columns n="1"/> + <has_text text="66.936"/> + <has_text text="59.94"/> + <has_text text="66.19"/> + <has_text text="56.82"/> + <has_text text="74.907"/> + </assert_contents> + </output> + </test> + </tests> + <help> + <![CDATA[ +**What it does** + +Given a fitted estimator and new data sets, this tool outpus the prediction results on the data sets via invoking the estimator's `predict` or `predict_proba` method. + +For estimator, this tool supports fitted sklearn estimators (pickled) and trained deep learning models (model skeleton + weights). It predicts on three different dataset inputs, + +- tabular + +- sparse + +- bio-sequences in a fasta file + +- reference genome and variant call file + + ]]> + </help> + <expand macro="sklearn_citation"> + <expand macro="keras_citation"/> + <expand macro="selene_citation"/> + </expand> +</tool>