Mercurial > repos > iuc > metagenomeseq_normalization

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/metagenomeseq_normalization.xml	Wed Apr 12 17:19:48 2017 -0400
@@ -0,0 +1,144 @@
+<tool id="metagenomeseq_normalizaton" name="metagenomeSeq Normalization" version="1.16.0-0.0.1">
+    <description>Cumulative sum scaling</description>
+    <requirements>
+        <requirement type="package" version="1.16.0">bioconductor-metagenomeseq</requirement>
+        <requirement type="package" version="1.2.0">bioconductor-biomformat</requirement>
+    </requirements>
+    <version_command><![CDATA[Rscript -e 'suppressMessages(library("metagenomeSeq"));cat(toString(packageVersion("metagenomeSeq")))']]></version_command>
+    <command detect_errors="exit_code"><![CDATA[
+        #if "output_r_script" in str( $include_outputs ).split( "," ):
+            cp '${metagenomeseq_normalization_script}' '${output_r_script}' &&
+        #end if
+        Rscript '${metagenomeseq_normalization_script}'
+    ]]>
+    </command>
+    <configfiles>
+         <configfile name="metagenomeseq_normalization_script"><![CDATA[#!/usr/bin/env RScript
+#set $include_files = str( $include_outputs ).split( "," )
+library(metagenomeSeq)
+library(biomformat)
+
+#if $input_abundance_file.is_of_type( 'biom1' ):
+inputMRe = biom2MRexperiment( biomformat::read_biom( "${input_abundance_file}" ) )
+#else:
+##inputMRe = newMRexperiment( counts, phenoData = NULL, featureData = NULL, libSize = NULL, normFactors = NULL)
+#raise "Not yet implemented"
+#end if
+
+#if str( $percentile_type.percentile_type_selector ) == 'cumNormStat':
+p = cumNormStat( inputMRe,  qFlag = ${percentile_type.qFlag}, rel = ${percentile_type.rel} )
+#elif str( $percentile_type.percentile_type_selector ) == 'cumNormStatFast':
+p = cumNormStatFast( inputMRe, rel = ${percentile_type.rel} )
+#else:
+p = ${percentile_type.raw_value}
+#end if
+
+inputMRe = cumNorm(inputMRe, p = p )
+
+#if "output_tabular" in $include_files:
+mat = MRcounts(inputMRe, norm = TRUE, log = ${log2}, sl = ${sl})
+exportMat(mat, file = "${output_tabular_dataset}")
+#end if
+
+#if "output_biom" in $include_files:
+biomformat::write_biom( MRexperiment2biom(inputMRe, id = NULL, norm = TRUE, log = ${log2}, sl = ${sl}, qiimeVersion = TRUE), "${output_biom_dataset}" )
+#end if
+
+#if "output_stats" in $include_files:
+exportStats(inputMRe, p = p, file = "${output_stats}")
+#end if
+    ]]>
+         </configfile>
+    </configfiles>
+<inputs>
+    <param name="input_abundance_file" type="data" format="biom1" label="Input Matrix" help="BIOM format"/>
+
+    <conditional name="percentile_type">
+        <param name="percentile_type_selector" type="select" label="Percentile for which to scale data">
+            <option value="cumNormStat" selected="True">cumNormStat</option>
+            <option value="cumNormStatFast" selected="False">cumNormStatFast</option>
+            <option value="raw" selected="False">Raw</option>
+        </param>
+        <when value="cumNormStat">
+            <param name="qFlag" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="True" label="qFlag" help="Flag to either calculate the proper percentile using R's step-wise quantile function or approximate function."/>
+            <param name="rel" type="float" value="0.1" label="rel" help="Cutoff for the relative difference from one median difference from the reference to the next"/>
+        </when>
+        <when value="cumNormStatFast">
+            <param name="rel" type="float" value="0.1" label="rel" help="Cutoff for the relative difference from one median difference from the reference to the next"/>
+        </when>
+        <when value="raw">
+            <param name="raw_value" type="float" value=".80" label="Percentile" help="Manual Percentile"/>
+        </when>
+    </conditional>
+
+    <param name="log2" type="boolean" truevalue="TRUE" falsevalue="FALSE" checked="False" label="Log2 transform scaling" help=""/>
+    <param name="sl" type="integer" value="1000" label="The value to scale by." help="(default=1000)"/>
+
+    <param name="include_outputs" type="select" multiple="True" label="Datasets to create">
+        <option value="output_tabular" selected="True">Normalized Tabular Matrix</option>
+        <!-- <option value="output_biom" selected="True">Normalized BIOM</option> -->
+        <option value="output_stats" selected="True">Simple Statistics</option>
+        <option value="output_r_script" selected="False">R script</option>
+    </param>
+</inputs>
+<outputs>
+    <!-- <data format="biom1" name="output_biom_dataset" label="${tool.name} on ${on_string} (BIOM1)">
+         <filter>"output_biom" in include_outputs</filter>
+    </data> -->
+    <data format="tabular" name="output_tabular_dataset" label="${tool.name} on ${on_string} (Tabular)">
+         <filter>"output_tabular" in include_outputs</filter>
+    </data>
+    <data format="tabular" name="output_stats" label="${tool.name} on ${on_string} (Simple Statistics)">
+        <filter>"output_stats" in include_outputs</filter>
+    </data>
+    <data format="txt" name="output_r_script" label="${tool.name} on ${on_string} (Rscript)">
+        <filter>"output_r_script" in include_outputs</filter>
+    </data>
+</outputs>
+<tests>
+    <test>
+        <param name="input_abundance_file" value="input_1.biom1" ftype="biom1"/>
+        <conditional name="percentile_type">
+            <param name="percentile_type_selector" value="cumNormStat"/>
+            <param name="qFlag" value="TRUE"/>
+            <param name="rel" value=".1"/>
+        </conditional>
+        <param name="log2" value="FALSE"/>
+        <param name="sl" value="1000"/>
+        <output name="output_tabular_dataset">
+            <assert_contents>
+                <has_text_matching expression="SAMPLE_2" />
+            </assert_contents>
+        </output>
+    </test>
+</tests>
+    <help><![CDATA[
+metagenomeSeq Cumulative sum scaling
+====================================
+
+Info
+----
+
+::
+
+  Cumulative sum scaling based upon percentile selection. You can manually specificy the percentile or calculate it using cumNormStat or cumNormStatFast.
+
+Inputs
+------
+
+::
+
+  Requires a BIOM formatted file for input.
+
+Outputs
+-------
+
+::
+
+  Creates a normalized and scaled output abundance matrix in Tabular format. Additionally can create simple statistics and the RScript.
+
+    ]]></help>
+<citations>
+    <citation type="doi">10.1038/nmeth.2658</citation>
+</citations>
+</tool>
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/input_1.biom1	Wed Apr 12 17:19:48 2017 -0400
@@ -0,0 +1,1 @@
+{"id": "None","format": "Biological Observation Matrix 1.0.0","format_url": "http://biom-format.org","matrix_type": "sparse","generated_by": "BIOM-Format 2.1.5","date": "2016-05-26T16:43:45.614267","type": "OTU table","matrix_element_type": "float","shape": [19, 2],"data": [[1,0,160.0],[1,1,242.0],[6,0,1.0],[6,1,1.0],[7,0,3.0],[7,1,4.0],[12,0,13.0],[12,1,36.0],[14,0,1.0],[14,1,5.0],[15,0,1.0],[16,0,1.0],[16,1,3.0]],"rows": [{"id": "2", "metadata": {"taxonomy": ["d__Archaea"]}},{"id": "3", "metadata": {"taxonomy": ["d__Bacteria"]}},{"id": "4", "metadata": {"taxonomy": ["d__Archaea", "p__Crenarchaeota"]}},{"id": "5", "metadata": {"taxonomy": ["d__Archaea", "p__Euryarchaeota"]}},{"id": "8", "metadata": {"taxonomy": ["d__Bacteria", "p__AC1"]}},{"id": "9", "metadata": {"taxonomy": ["d__Bacteria", "p__AD3"]}},{"id": "10", "metadata": {"taxonomy": ["d__Bacteria", "p__Acidobacteria"]}},{"id": "11", "metadata": {"taxonomy": ["d__Bacteria", "p__Actinobacteria"]}},{"id": "12", "metadata": {"taxonomy": ["d__Bacteria", "p__AncK6"]}},{"id": "14", "metadata": {"taxonomy": ["d__Bacteria", "p__Armatimonadetes"]}},{"id": "15", "metadata": {"taxonomy": ["d__Bacteria", "p__BHI80-139"]}},{"id": "16", "metadata": {"taxonomy": ["d__Bacteria", "p__BRC1"]}},{"id": "17", "metadata": {"taxonomy": ["d__Bacteria", "p__Bacteroidetes"]}},{"id": "18", "metadata": {"taxonomy": ["d__Bacteria", "p__CD12"]}},{"id": "22", "metadata": {"taxonomy": ["d__Bacteria", "p__Chlorobi"]}},{"id": "23", "metadata": {"taxonomy": ["d__Bacteria", "p__Chloroflexi"]}},{"id": "25", "metadata": {"taxonomy": ["d__Bacteria", "p__Cyanobacteria"]}},{"id": "28", "metadata": {"taxonomy": ["d__Bacteria", "p__EM19"]}},{"id": "29", "metadata": {"taxonomy": ["d__Bacteria", "p__EM3"]}}],"columns": [{"id": "SAMPLE_1", "metadata": null},{"id": "SAMPLE_2", "metadata": null}]}
\ No newline at end of file