Mercurial > repos > bgruening > model_prediction
comparison model_prediction.xml @ 0:db511406350a draft
planemo upload for repository https://github.com/bgruening/galaxytools/tree/master/tools/sklearn commit 60f0fbc0eafd7c11bc60fb6c77f2937782efd8a9-dirty
author | bgruening |
---|---|
date | Fri, 09 Aug 2019 07:11:11 -0400 |
parents | |
children | af7ed4d45619 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:db511406350a |
---|---|
1 <tool id="model_prediction" name="Model Prediction" version="@VERSION@"> | |
2 <description>predicts on new data using a preffited model</description> | |
3 <macros> | |
4 <import>main_macros.xml</import> | |
5 <import>keras_macros.xml</import> | |
6 </macros> | |
7 <expand macro="python_requirements"/> | |
8 <expand macro="macro_stdio"/> | |
9 <version_command>echo "@VERSION@"</version_command> | |
10 <command> | |
11 <![CDATA[ | |
12 python '$__tool_directory__/model_prediction.py' | |
13 --inputs '$inputs' | |
14 --infile_estimator '$infile_estimator' | |
15 --outfile_predict '$outfile_predict' | |
16 --infile_weights '$infile_weights' | |
17 #if $input_options.selected_input == 'seq_fasta' | |
18 --fasta_path '$input_options.fasta_path' | |
19 #elif $input_options.selected_input == 'variant_effect' | |
20 --ref_seq '$input_options.ref_genome_file' | |
21 --vcf_path '$input_options.vcf_file' | |
22 #else | |
23 --infile1 '$input_options.infile1' | |
24 #end if | |
25 ]]> | |
26 </command> | |
27 <configfiles> | |
28 <inputs name="inputs" /> | |
29 </configfiles> | |
30 <inputs> | |
31 <param name="infile_estimator" type="data" format="zip" label="Choose the dataset containing pipeline/estimator object"/> | |
32 <param name="infile_weights" type="data" format="h5" optional="true" label="Choose the dataset containing weights for the estimator above" help="Optional. For deep learning only."/> | |
33 <param argument="method" type="select" label="Select invocation method"> | |
34 <option value="predict" selected="true">predict</option> | |
35 <option value="predict_proba">predict_proba</option> | |
36 </param> | |
37 <conditional name="input_options"> | |
38 <param name="selected_input" type="select" label="Select input data type for prediction"> | |
39 <option value="tabular" selected="true">tabular data</option> | |
40 <option value="sparse">sparse matrix</option> | |
41 <option value="seq_fasta">sequnences in a fasta file</option> | |
42 <option value="variant_effect">reference genome and variant call file</option> | |
43 </param> | |
44 <when value="tabular"> | |
45 <param name="infile1" type="data" format="tabular" label="Training samples dataset:"/> | |
46 <param name="header1" type="boolean" optional="true" truevalue="booltrue" falsevalue="boolfalse" checked="False" label="Does the dataset contain header:" /> | |
47 <conditional name="column_selector_options_1"> | |
48 <expand macro="samples_column_selector_options" multiple="true"/> | |
49 </conditional> | |
50 </when> | |
51 <when value="sparse"> | |
52 <param name="infile1" type="data" format="txt" label="Select a sparse matrix" help=""/> | |
53 </when> | |
54 <when value="seq_fasta"> | |
55 <param name="fasta_path" type="data" format="fasta" label="Dataset containing fasta genomic/protein sequences" help="Sequences will be one-hot encoded to arrays."/> | |
56 <param name="seq_type" type="select" label="Sequence type"> | |
57 <option value="FastaDNABatchGenerator">DNA</option> | |
58 <option value="FastaRNABatchGenerator">RNA</option> | |
59 <option value="FastaProteinBatchGenerator">Protein</option> | |
60 </param> | |
61 </when> | |
62 <when value="variant_effect"> | |
63 <param name="ref_genome_file" type="data" format="fasta" label="Dataset containing reference genomic sequence" help="fasta"/> | |
64 <param name="blacklist_regions" type="select" label="blacklist regioins" help="A pre-loaded list of blacklisted intervals.Refer to `selene` for details."> | |
65 <option value="none" selected="true">None</option> | |
66 <option value="hg38">hg38</option> | |
67 <option value="hg19">hg19</option> | |
68 </param> | |
69 <param name="vcf_file" type="data" format="vcf" label="Dataset containing sequence variations" help="vcf"/> | |
70 <param name="seq_length" type="integer" value="1000" label="Encoding seqence length" help="A stretch of sequence surrounding the variation position on the reference genome."/> | |
71 <param name="output_reference" type="boolean" truevalue="booltrue" falsevalue="boolfalse" checked="false" label="Predict the reference sequence?" help="If False, predict on the variant sequence."/> | |
72 </when> | |
73 </conditional> | |
74 </inputs> | |
75 <outputs> | |
76 <data format="tabular" name="outfile_predict"/> | |
77 </outputs> | |
78 <tests> | |
79 <test> | |
80 <param name="infile_estimator" value="best_estimator_.zip" ftype="zip"/> | |
81 <param name="method" value="predict"/> | |
82 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | |
83 <param name="header1" value="true" /> | |
84 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> | |
85 <output name="outfile_predict" file="model_pred01.tabular"/> | |
86 </test> | |
87 <test> | |
88 <param name="infile_estimator" value="keras_model04" ftype="zip"/> | |
89 <param name="infile_weights" value="train_test_eval_weights02.h5" ftype="h5"/> | |
90 <param name="method" value="predict"/> | |
91 <param name="infile1" value="regression_X.tabular" ftype="tabular"/> | |
92 <param name="header1" value="true" /> | |
93 <param name="col1" value="1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17"/> | |
94 <output name="outfile_predict" > | |
95 <assert_contents> | |
96 <has_n_columns n="1"/> | |
97 <has_text text="66.936"/> | |
98 <has_text text="59.94"/> | |
99 <has_text text="66.19"/> | |
100 <has_text text="56.82"/> | |
101 <has_text text="74.907"/> | |
102 </assert_contents> | |
103 </output> | |
104 </test> | |
105 </tests> | |
106 <help> | |
107 <![CDATA[ | |
108 **What it does** | |
109 | |
110 Given a fitted estimator and new data sets, this tool outpus the prediction results on the data sets via invoking the estimator's `predict` or `predict_proba` method. | |
111 | |
112 For estimator, this tool supports fitted sklearn estimators (pickled) and trained deep learning models (model skeleton + weights). It predicts on three different dataset inputs, | |
113 | |
114 - tabular | |
115 | |
116 - sparse | |
117 | |
118 - bio-sequences in a fasta file | |
119 | |
120 - reference genome and variant call file | |
121 | |
122 ]]> | |
123 </help> | |
124 <expand macro="sklearn_citation"> | |
125 <expand macro="keras_citation"/> | |
126 <expand macro="selene_citation"/> | |
127 </expand> | |
128 </tool> |