diff manipulate_eset.xml @ 0:22232092be53 draft default tip

"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/music/ commit d007ae51743e621dc47524f681501e72ef3a2910"
author bgruening
date Mon, 02 May 2022 09:59:18 +0000
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/manipulate_eset.xml	Mon May 02 09:59:18 2022 +0000
@@ -0,0 +1,302 @@
+<tool id="music_manipulate_eset" name="Manipulate Expression Set Object" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@"
+      profile="21.09" license="GPL-3.0-or-later" >
+    <description>Manipulate ExpressionSet objects by a variety of attributes</description>
+    <macros>
+        <import>macros.xml</import>
+    </macros>
+    <expand macro="requirements"/>
+    <command detect_errors="exit_code"><![CDATA[
+cat '$conf' >> /dev/stderr &&
+Rscript --vanilla '$conf'
+
+]]></command>
+    <configfiles>
+        <configfile name="conf" >
+suppressWarnings(suppressPackageStartupMessages(library(xbioc)))
+suppressWarnings(suppressPackageStartupMessages(library(MuSiC)))
+
+vec_ranges = function(vstr) {
+    ## convert '3:1,22,12:15' to '3,2,1,22,12,13,14,15'
+    unlist(sapply(unlist(strsplit(vstr, split=",")),
+    function(x) {
+        tmp = as.integer(unlist(strsplit(x, split=":")))
+        if (length(tmp) > 1) {
+            seq(tmp[[1]], tmp[[2]])
+        } else {
+            tmp[[1]]
+        }
+    }, USE.NAMES=FALSE))
+}
+
+null_str_vec = function(gstr){
+   tokens = unlist(as.vector(strsplit(gstr, split=",")))
+   if (length(tokens) == 0){
+      return(NULL)
+   }
+   if (length(tokens) == 1){
+      return(tokens[[1]])
+   }
+   return(tokens)
+}
+
+get_subs = function(values, by_method){
+   if (by_method == "subsample") {
+      sample(as.integer(values))
+   } else if (by_method == "labels") {
+      null_str_vec(values)
+   } else if (by_method == "range_and_index") {
+      vec_ranges(values)
+   } else {
+      NA  ## equivalent to ALL
+   }
+}
+
+rds_eset = readRDS('$rds_eset')
+#if str($combine_eset.do) == "Yes":
+new_eset = combine(rds_eset,
+    #for $e, $egroup in enumerate($combine_eset.eset_group):
+    readRDS('$egroup.eset')
+        #if $e != len($combine_eset.eset_group)-1
+    ,
+        #end if
+    #end for
+)
+#else
+new_eset = rds_eset
+#end if
+sub_eset = new_eset
+#if str($subset_eset.do) == "Yes":
+    #if str($subset_eset.subset_yes.by) == "phenotype":
+        #for $s, $sgroup in enumerate($subset_eset.subset_yes.pheno_samples)
+sub_eset = sub_eset[ ,sub_eset[['$sgroup.column']] %in% c(null_str_vec('$sgroup.values'))]
+        #end for
+        #for $g, $ggroup in enumerate($subset_eset.subset_yes.pheno_genes)
+sub_eset = sub_eset[sub_eset[['$ggroup.column']] %in% c(null_str_vec('$ggroup.values')), ]
+        #end for
+    #else
+genes = get_subs('$subset_eset.subset_yes.genes', '$subset_eset.subset_yes.by')
+samples = get_subs('$subset_eset.subset_yes.samples', '$subset_eset.subset_yes.by')
+sub_eset = sub_eset[genes, samples]
+    #end if
+#end if
+
+## print data to stdout
+print(sub_eset)
+saveRDS(sub_eset, file= '$out_eset')
+
+        </configfile>
+    </configfiles>
+    <inputs>
+        <param name="rds_eset" label="Expression Set Dataset" type="data" format="@RDATATYPE@" />
+        <conditional name="combine_eset" >
+            <param name="do" type="select" label="Concatenate other Expression Set objects?"
+                   help="Phenotype data must match between objects, and objects will be concatenated in the order given below." >
+                <option value="No" selected="true" >No</option>
+                <option value="Yes" >Yes</option>
+            </param>
+            <when value="No" ></when>
+            <when value="Yes" >
+                <repeat name="eset_group" title="Additional Dataset" min="1" >
+                    <param name="eset" label="Expression Set Dataset" type="data" format="@RDATATYPE@" />
+                </repeat>
+            </when>
+        </conditional>
+        <conditional name="subset_eset" >
+            <param name="do" type="select" label="Subset the dataset?"
+                   help="If multiple objects are concatenated as in the above section, the resulting object will be subsetted." >
+                <option value="No" selected="true" >No</option>
+                <option value="Yes" >Yes</option>
+            </param>
+            <when value="No" ></when>
+            <when value="Yes" >
+                <conditional name="subset_yes" >
+                    <param name="by" type="select" label="By"
+                           help="e.g. random subsampling, index ranges and indices, specific labels, phenotype conditions" >
+                        <option value="subsample" selected="true" >Random Subsample</option>
+                        <option value="labels" >Specific Labels</option>
+                        <option value="range_and_index" >Index Ranges and Specific Indices</option>
+                        <option value="phenotype" >Filter Samples and Genes by Phenotype Values</option>
+                    </param>
+                    <when value="subsample" >
+                        <param name="samples" type="integer" label="Select N Samples"
+                               value="" optional="true" help="e.g. '10' will select 10 random samples." />
+                        <param name="genes" type="integer" label="Select N Genes"
+                               value="" optional="true" help="e.g. '123' will select 123 random genes." />
+                    </when>
+                    <when value="labels" >
+                        <param name="samples" type="text" label="List of Sample Labels, comma-delimited"
+                               value="" optional="true" help="e.g. 'Control1,ALPOL56,SampleX' would select just those 3 samples." />
+                        <param name="genes" type="text" label="List of Gene Labels , comma-delimited"
+                               value="" optional="true" help="e.g. 'GeneA,GeneX,Gene123' would select just those 3 genes." />
+                    </when>
+                    <when value="range_and_index" >
+                        <param name="samples" type="text"
+                               label="List of Sample Indexes, ranges are colon-delimited, seperated by commas."
+                               value="" optional="true" help="e.g. '5:3,57:60,27' would yield '5,4,3,57,58,59,60,27' " />
+                        <param name="genes" type="text"
+                               label="List of Gene Indexes, ranges are colon-delimited, seperated by commas."
+                               value="" optional="true" help="e.g. '15:18,26,27,3:1' would yield '15,16,17,18,26,27,3,2,1' " />
+                    </when>
+                    <when value="phenotype" >
+                        <repeat name="pheno_samples" title="Filter Samples by Condition" min="0" >
+                            <param name="column" type="text" value="" label="Name of phenotype column"
+                                   help="e.g. 'gender' or 'control' etc"/>
+                            <param name="values" type="text" value="" label="List of values in this column to filter for, comma-delimited"
+                                   help="e.g. 'female,unknown' selects only samples with values in the above phenotype column of 'female' and 'unknown'."/>
+                        </repeat>
+                        <repeat name="pheno_genes" title="Filter Genes by Condition" min="0" >
+                            <param name="column" type="text" value="" label="Name of phenotype column"
+                                   help="e.g. 'housekeeping' or 'marker' etc"/>
+                            <param name="values" type="text" value="" label="List of values in this column to filter for, comma-delimited"
+                                   help="e.g. '' selects only samples with values in the above phenotype column of 'female' and 'unknown'."/>
+                        </repeat>
+                    </when>
+                </conditional>
+            </when>
+        </conditional>
+    </inputs>
+    <outputs>
+        <data name="out_eset" format="@RDATATYPE@" label="${tool.name} on ${on_string}: ExpressionSet Object" />
+    </outputs>
+    <tests>
+        <test expect_num_outputs="1" >
+            <!-- No operation, do nothing -->
+            <param name="rds_eset" value="Control_Bulk.rds" />
+            <output name="out_eset" value="Control_Bulk.rds" compare="sim_size" />
+        </test>
+        <test expect_num_outputs="1" >
+            <!-- No concat, subset by ranges -->
+            <param name="rds_eset" value="Control_Bulk.rds" />
+            <conditional name="combine_eset" >
+                <param name="do" value="No" />
+            </conditional>
+            <conditional name="subset_eset" >
+                <param name="do" value="Yes" />
+                <conditional name="subset_yes" >
+                    <param name="by" value="range_and_index" />
+                    <param name="samples" value="2:3" />
+                    <param name="genes" value="100:20,22,1:5" />
+                </conditional>
+            </conditional>
+            <assert_stdout>
+                <has_text text="assayData: 87 features, 2 samples" />
+            </assert_stdout>
+        </test>
+        <test expect_num_outputs="1" >
+            <!-- Concat and subset by ranges -->
+            <param name="rds_eset" value="Control_Bulk.rds" />
+            <conditional name="combine_eset" >
+                <param name="do" value="Yes" />
+                <repeat name="eset_group" >
+                    <param name="eset" value="APOL1_Bulk.rds" />
+                </repeat>
+                <repeat name="eset_group" >
+                    <param name="eset" value="Control_Bulk.rds" />
+                </repeat>
+            </conditional>
+            <conditional name="subset_eset" >
+                <param name="do" value="Yes" />
+                <conditional name="subset_yes" >
+                    <param name="by" value="range_and_index" />
+                    <param name="samples" value="5:7,1" />
+                    <param name="genes" value="100:20,22,1:3" />
+                </conditional>
+            </conditional>
+            <assert_stdout>
+                <has_text text="assayData: 85 features, 4 samples" />
+            </assert_stdout>
+        </test>
+        <test expect_num_outputs="1" >
+            <!-- Concat and subset by labels -->
+            <param name="rds_eset" value="Control_Bulk.rds" />
+            <conditional name="combine_eset" >
+                <param name="do" value="Yes" />
+                <repeat name="eset_group" >
+                    <param name="eset" value="APOL1_Bulk.rds" />
+                </repeat>
+            </conditional>
+            <conditional name="subset_eset" >
+                <param name="do" value="Yes" />
+                <conditional name="subset_yes" >
+                    <param name="by" value="labels" />
+                    <param name="samples" value="control.NA.27,control.NA.39" />
+                    <param name="genes" value="Nqo1,Card14,Scube2,Nup214" />
+                </conditional>
+            </conditional>
+            <assert_stdout>
+                <has_text text="assayData: 4 features, 2 samples" />
+                <has_text text="sampleNames: control.NA.27 control.NA.39" />
+            </assert_stdout>
+        </test>
+        <test expect_num_outputs="1" >
+            <!-- Concat and subset by filtering phenotype data -->
+            <param name="rds_eset" value="Control_Bulk.rds" />
+            <conditional name="combine_eset" >
+                <param name="do" value="Yes" />
+                <repeat name="eset_group" >
+                    <param name="eset" value="APOL1_Bulk.rds" />
+                </repeat>
+                <repeat name="eset_group" >
+                    <param name="eset" value="Control_Bulk.rds" />
+                </repeat>
+            </conditional>
+            <conditional name="subset_eset" >
+                <param name="do" value="Yes" />
+                <conditional name="subset_yes" >
+                    <param name="by" value="phenotype" />
+                    <repeat name="pheno_samples" >
+                        <param name="column" value="Control" />
+                        <param name="values" value="control" />
+                    </repeat>
+                    <repeat name="pheno_samples" >
+                        <param name="column" value="sampleID" />
+                        <param name="values" value="3" />
+                    </repeat>
+                </conditional>
+            </conditional>
+            <assert_stdout>
+                <has_text text="assayData: 19033 features, 1 samples" />
+                <has_text text="sampleNames: control.NA.39" />
+            </assert_stdout>
+        </test>
+        <test expect_num_outputs="1" >
+            <!-- Concat and random subsample -->
+            <param name="rds_eset" value="Control_Bulk.rds" />
+            <conditional name="combine_eset" >
+                <param name="do" value="Yes" />
+                <repeat name="eset_group" >
+                    <param name="eset" value="APOL1_Bulk.rds" />
+                </repeat>
+                <repeat name="eset_group" >
+                    <param name="eset" value="Control_Bulk.rds" />
+                </repeat>
+            </conditional>
+            <conditional name="subset_eset" >
+                <param name="do" value="Yes" />
+                <conditional name="subset_yes" >
+                    <param name="by" value="subsample" />
+                    <param name="samples" value="3" />
+                    <param name="genes" value="25" />
+                </conditional>
+            </conditional>
+            <assert_stdout>
+                <has_text text="assayData: 25 features, 3 samples" />
+            </assert_stdout>
+        </test>
+    </tests>
+    <help><![CDATA[
+Manipulate an ExpressionSet object by concatenation and or subsetting.
+
+For more options and information, consult `the manual <http://www.bioconductor.org/packages/release/bioc/vignettes/Biobase/inst/doc/ExpressionSetIntroduction.pdf>`_ and the `rdocumentation <https://www.rdocumentation.org/packages/Biobase/versions/2.32.0/topics/ExpressionSet>`_
+.
+    ]]></help>
+    <citations>
+        <citation type="bibtex">
+            @misc{falcon2007introduction,
+            title={An introduction to bioconductor’s expressionset class},
+            author={Falcon, Seth and Morgan, Martin and Gentleman, Robert},
+            year={2007}
+            }
+        </citation>
+    </citations>
+</tool>