diff run_order_regression.xml @ 1:2e7d47c0b027 draft

"planemo upload for repository https://malex@toolshed.g2.bx.psu.edu/repos/malex/secimtools"
author malex
date Mon, 08 Mar 2021 22:04:06 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/run_order_regression.xml	Mon Mar 08 22:04:06 2021 +0000
@@ -0,0 +1,93 @@
+<tool id="secimtools_run_order_regression" name="Run Order Regression (ROR)" version="@WRAPPER_VERSION@">
+    <description>using the order samples were run.</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements" />
+    <command detect_errors="exit_code"><![CDATA[
+run_order_regression.py
+--input $input
+--design $design
+--ID $uniqID
+--group $group
+--order $order
+--fig $order_plots
+--table $order_summary
+--flags $flags
+    ]]></command>
+    <inputs>
+        <param name="input" type="data" format="tabular" label="Wide Dataset" help="Input your tab-separated wide format dataset. If file is not tab separated see TIP below."/>
+        <param name="design" type="data" format="tabular" label="Design File" help="Input your design file(tab-separated). Note you need a 'sampleID' column. If not tab separated see TIP below."/>
+        <param name="uniqID" type="text" size="30" value="" label="Unique Feature ID" help="Name of the column in your wide dataset that has unique identifiers."/>
+        <param name="group" type="text" size="30" value="" label="Group/Treatment" help="Name of the column in your design file that contains group classifications."/>
+        <param name="order" type="text" size="30" value="" label="Run Order ID" help="The name of the column in your design file that contains the order the samples were run."/>
+    </inputs>
+    <outputs>
+        <data name="order_plots" format="pdf" label="${tool.name} on ${on_string}: Plots" />
+        <data name="order_summary" format="tabular" label="${tool.name} on ${on_string}: Summary"/>
+        <data name="flags"  format="tabular" label="${tool.name} on ${on_string}: Flags"/>
+    </outputs>
+    <tests>
+        <test>
+            <param name="input"  value="ST000006_data.tsv"/>
+            <param name="design" value="ST000006_design.tsv"/>
+            <param name="uniqID" value="Retention_Index" />
+            <param name="group"  value="White_wine_type_and_source" />
+            <param name="order"  value="run_Order_fake_variable" />
+            <output name="order_plots"        file="ST000006_run_order_regression_figure.pdf" compare="sim_size" delta="10000" />
+            <output name="order_summary" file="ST000006_run_order_regression_table.tsv" />
+            <output name="flags"         file="ST000006_run_order_regression_flags.tsv" />
+        </test>
+    </tests>
+    <help><![CDATA[
+
+@TIP_AND_WARNING@
+
+**Tool Description**
+
+**NOTE:** The tool is intended to evaluate the impact of sample run order on feature (row) values.  Not applicable in the absence of known run order.
+
+It uses linear regression to identify features where the regression slope is not zero for nominal levels of significance.
+
+The tool fits a simple linear regression by feature (row) using values for each feature as a response and sample run order as a linear predictor.
+The goal is to identify a linear trend  that changes over time and determine whether the trends are statistically significant.
+The tool generates flags if the slope is statistically significant for two different levels of statistical significance ( alpha = 0.05 and alpha = 0.01).
+
+NOTE: Groups with one element are excluded from the analysis.
+
+--------------------------------------------------------------------------------
+
+**Input**
+
+    - Two input datasets are required.
+
+@WIDE@
+
+**NOTE:** The sample IDs must match the sample IDs in the Design File (below). Extra columns will automatically be ignored.
+
+@METADATA@
+
+@UNIQID@
+
+@GROUP@
+
+    - **NOTE:** Groups with one element will be excluded.
+
+@RUNORDER@
+
+
+-----------------------------------------------------------------------------------
+
+**Output**
+
+This tool outputs three different files:
+
+(1) a TSV file of regression summaries including the values of the regression slope, corresponding p-value and r-squared value.
+
+(2) a TSV file with the  corresponding flags for two levels of statistical significance (alpha = 0.05 and alpha = 0.01).
+
+(3) and a PDF file with fitted regression plots for each feature. The values of the feature are displayed on the plot together with the regression line, bands, slopes, and corresponding p and r-squared values. The values are colored according to group classification.
+
+    ]]></help>
+    <expand macro="citations"/>
+</tool>