view GCMS/create_model.xml @ 24:2ab40f9266e4 draft

An update to the core of galaxy prevents the way the code is trying to write files. This patch fixes this.
author linda-bakker
date Tue, 20 Feb 2018 07:59:47 -0500
parents 5a753524e525
children
line wrap: on
line source

<tool id="create_poly_model" name="RIQC-Create Regression Model" version="1.0.2">
  <description>Generate coefficients to enable the regression from one GC-column
		  		         to another GC-column</description>
  <command interpreter="Rscript">Rscripts/ridb-regression.R 
               $ridb
               $out_model
               $out_log
               $min_residuals
               $range_mod
               $pvalue
               $rsquared
               $method
               $plot
               #if $plot
                   $model_graphics
               #end if
  </command>
  <inputs>
    <param format="tabular" name="ridb" type="select" label="Retention Index (RI) and GC columns Library file"
           help="Select the RI library file of which all GC columns and their RI values
                 will be used to create a model" 
      		 dynamic_options='get_directory_files("tool-data/shared/PRIMS-metabolomics/RI_DB_libraries")'/>                 
                 
    <param name="method" type="select" label="Select regression method"
           help="Method to use for calculating the model" >
           <option value="poly" selected="True">Polynomial (3rd degree)</option>
           <option value="linear">Linear</option>
    </param>
    <param name="min_residuals" type="integer" value="10" optional="False"
           label="Minimum number of residuals" help="The minimum number of residuals
                 (datapoints) that both columns should have in common when calculating
                 the model" />
    <param name="range_mod" type="integer" value="0" optional="False"
           label="Range modifier" help="Moves the range of the usable RI space by the
                  given percentage. Set to 0 to use the full range of available data." />
    <param name="pvalue" type="float" value="0.05" optional="False" min="0" max="1"
           label="Pvalue to filter on" help="Set the upper limit for the pvalue (calculated)
                  by performing an ANOVA analysis on the created model). All models with higher
                  pvalues are discarded." />
    <param name="rsquared" type="float" value="0.95" optional="False" min="0" max="1"
           label="R-squared to filter on" help="Set the lower limit for the R-squared,
                  all models with lower values are discarded." />
    <param name="plot" type="boolean" label="Create a separate plot for each model"
           help="This will create a ZIP file in the history containing PDF plots" />
  </inputs>
  <code file="../match_library.py" />
  <outputs>
  	<data format="zip" label="Model Graphics of ${on_string}" name="model_graphics" >
  	    <filter>(plot)</filter>
  	</data>
    <data format="tabular" label="Regression logfile of ${on_string}"  name="out_log" />
    <data format="tabular" label="Regression model of ${on_string}"  name="out_model" />
  </outputs> 
  <help>
Calculates regression models for a permutation of all GC columns contained in the selected
RI database file. The method used for creating the model is either based on a 3rd degree 
polynomial or a standard linear model.

The *Minimum number of residuals* option will only allow regression if the columns it is based
on has at least that number of datapoints on the same compound. 

Filtering is possible by setting an upper limit for the *p-value* and / or a lower limit for
the *R squared* value. The produced logfile will state how many models have been discarded due
to this filtering. The output model file also includes the p-value and R squared value for
each created model.

Graphical output of the models is available by selecting the plot option which shows the
data points used for the model as well as the fit itself and the range of data that will
be usable. 

.. class:: infomark

**Notes**

The output file produced by this tool is required as input for the CasLookup tool when
selecting to apply regression when finding hits in the RIDB.
  </help>
</tool>