Mercurial > repos > pieterlukasse > prims_metabolomics

diff create_model.xml @ 0:9d5f4f5f764b
Initial commit to toolshed
author: pieter.lukasse@wur.nl
date: Thu, 16 Jan 2014 13:10:00 +0100
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/create_model.xml	Thu Jan 16 13:10:00 2014 +0100
@@ -0,0 +1,78 @@
+<tool id="create_poly_model" name="RIQC-Create Regression Model" version="1.0.2">
+  <description>Generate coefficients to enable the regression from one GC-column
+		  		         to another GC-column</description>
+  <command interpreter="Rscript">Rscripts/ridb-regression.R 
+               $ridb
+               $out_model
+               $out_log
+               $min_residuals
+               $range_mod
+               $pvalue
+               $rsquared
+               $method
+               $plot
+               #if $plot
+                   $model_graphics
+               #end if
+  </command>
+  <inputs>
+    <param format="tabular" name="ridb" type="select" label="Retention Index (RI) and GC columns Library file"
+           help="Select the RI library file of which all GC columns and their RI values
+                 will be used to create a model" 
+      		 dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/RI_DB_libraries")'/>                 
+                 
+    <param name="method" type="select" label="Select regression method"
+           help="Method to use for calculating the model" >
+           <option value="poly" selected="True">Polynomial (3rd degree)</option>
+           <option value="linear">Linear</option>
+    </param>
+    <param name="min_residuals" type="integer" value="10" optional="False"
+           label="Minimum number of residuals" help="The minimum number of residuals
+                 (datapoints) that both columns should have in common when calculating
+                 the model" />
+    <param name="range_mod" type="integer" value="0" optional="False"
+           label="Range modifier" help="Moves the range of the usable RI space by the
+                  given percentage. Set to 0 to use the full range of available data." />
+    <param name="pvalue" type="float" value="0.05" optional="False" min="0" max="1"
+           label="Pvalue to filter on" help="Set the upper limit for the pvalue (calculated)
+                  by performing an ANOVA analysis on the created model). All models with higher
+                  pvalues are discarded." />
+    <param name="rsquared" type="float" value="0.95" optional="False" min="0" max="1"
+           label="R-squared to filter on" help="Set the lower limit for the R-squared,
+                  all models with lower values are discarded." />
+    <param name="plot" type="boolean" label="Create a separate plot for each model"
+           help="This will create a ZIP file in the history containing PDF plots" />
+  </inputs>
+  <code file="match_library.py" />
+  <outputs>
+  	<data format="zip" label="Model Graphics of ${on_string}" name="model_graphics" >
+  	    <filter>(plot)</filter>
+  	</data>
+    <data format="tabular" label="Regression logfile of ${on_string}"  name="out_log" />
+    <data format="tabular" label="Regression model of ${on_string}"  name="out_model" />
+  </outputs> 
+  <help>
+Calculates regression models for a permutation of all GC columns contained in the selected
+RI database file. The method used for creating the model is either based on a 3rd degree 
+polynomial or a standard linear model.
+
+The *Minimum number of residuals* option will only allow regression if the columns it is based
+on has at least that number of datapoints on the same compound. 
+
+Filtering is possible by setting an upper limit for the *p-value* and / or a lower limit for
+the *R squared* value. The produced logfile will state how many models have been discarded due
+to this filtering. The output model file also includes the p-value and R squared value for
+each created model.
+
+Graphical output of the models is available by selecting the plot option which shows the
+data points used for the model as well as the fit itself and the range of data that will
+be usable. 
+
+.. class:: infomark
+
+**Notes**
+
+The output file produced by this tool is required as input for the CasLookup tool when
+selecting to apply regression when finding hits in the RIDB.
+  </help>
+</tool>
author	pieter.lukasse@wur.nl
date	Thu, 16 Jan 2014 13:10:00 +0100
parents
children