Mercurial > repos > iuc > ruvseq

--- a/get_deseq_dataset.R	Fri Jul 23 22:37:45 2021 +0000
+++ b/get_deseq_dataset.R	Fri Apr 21 14:09:17 2023 +0000
@@ -14,7 +14,7 @@
     }
   }

-  if (!use_txi & has_header) {
+  if (!use_txi && has_header) {
       countfiles <- lapply(as.character(sample_table$filename), read.delim, row.names = 1)
       tbl <- do.call("cbind", countfiles)
       colnames(tbl) <- rownames(sample_table) # take sample ids from header
@@ -35,7 +35,7 @@
         colData = subset(sample_table, select = -filename),
         design = design_formula
       )
-  } else if (!use_txi & !has_header) {
+  } else if (!use_txi && !has_header) {

     # construct the object from HTSeq files
     dds <- DESeqDataSetFromHTSeqCount(
--- a/ruvseq.R	Fri Jul 23 22:37:45 2021 +0000
+++ b/ruvseq.R	Fri Apr 21 14:09:17 2023 +0000
@@ -1,7 +1,8 @@
 # setup R error handling to go to stderr
 library("getopt")
-options(show.error.messages = F, error = function() {
-  cat(geterrmessage(), file = stderr()); q("no", 1, F)
+options(show.error.messages = FALSE, error = function() {
+  cat(geterrmessage(), file = stderr())
+  q("no", 1, FALSE)
 })
 options(stringAsFactors = FALSE, useFancyQuotes = FALSE)

@@ -17,7 +18,8 @@
     "plots", "p", 1, "character",
     "header", "H", 0, "logical",
     "txtype", "y", 1, "character",
-    "tx2gene", "x", 1, "character"), # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF)
+    "tx2gene", "x", 1, "character", # a space-sep tx-to-gene map or GTF file (auto detect .gtf/.GTF)
+    "ruv_ncounts", "ruv_ncounts", 0, "logical"),
     byrow = TRUE, ncol = 4)

   opt <- getopt(spec)
@@ -155,6 +157,7 @@
 min_k <- opt$min_k
 max_k <- opt$max_k
 min_c <- opt$min_mean_count
+ruv_ncounts <- ifelse(is.null(opt$ruv_ncounts), FALSE, TRUE)
 sample_json <- fromJSON(opt$sample_json)
 sample_paths <- sample_json$path
 sample_names <- sample_json$label
@@ -183,8 +186,14 @@
     df <- data.frame(identifier = rownames(unwanted_variation))
     df <- cbind(df, unwanted_variation)
     colnames(df)[2] <- "condition"
-    write.table(df, file = paste0("batch_effects_", name, ".tabular"),  sep = "\t", quote = F, row.names = F)
+    write.table(df, file = paste0("uv_batch_effects_", name, ".tabular"),  sep = "\t", quote = FALSE, row.names = FALSE)
+    if (ruv_ncounts) {
+      ruvnorm_counts <- normCounts(set)
+      ruvnorm_df <- data.frame(geneID = rownames(ruvnorm_counts), ruvnorm_counts)
+      write.table(ruvnorm_df, file = paste0("ruv_norm_counts_", name, ".tabular"),  sep = "\t", quote = FALSE, row.names = FALSE)
+    }
   }
+
 }

 # close the plot device
--- a/ruvseq.xml	Fri Jul 23 22:37:45 2021 +0000
+++ b/ruvseq.xml	Fri Apr 21 14:09:17 2023 +0000
@@ -1,8 +1,12 @@
 <tool id="ruvseq" name="Remove Unwanted Variation" version="@TOOL_VERSION@+galaxy@WRAPPER_VERSION@">
     <description>from RNA-seq data</description>
+    <xrefs>
+        <xref type="bio.tools">ruvseq</xref>
+        <xref type="bioconductor">ruvseq</xref>
+    </xrefs>
     <macros>
         <token name="@TOOL_VERSION@">1.26.0</token>
-        <token name="@WRAPPER_VERSION@">0</token>
+        <token name="@WRAPPER_VERSION@">1</token>
     </macros>
     <requirements>
         <requirement type="package" version="@TOOL_VERSION@">bioconductor-ruvseq</requirement>
@@ -60,6 +64,10 @@
         --tx2gene mapping.txt
     #end if
 #end if
+
+#if $ruv_ncounts == 1:
+    --ruv_ncounts
+#end if
 ]]></command>
     <configfiles>
         <configfile name="sampleTable">
@@ -117,10 +125,17 @@
         <param name="pdf" type="boolean" truevalue="1" falsevalue="0" checked="true"
             label="Visualising the analysis results"
             help="output an additional PDF files" />
+        <param name="ruv_ncounts" type="boolean" truevalue="1" falsevalue="0" checked="false"
+            label="Output RUVSeq normalized count tables"
+            help="If this option is set to Yes, the tool will generate RUVseq normalized count files. Default: No" />
     </inputs>
     <outputs>
         <collection name="unwanted_variation" type="list" label="RUVSeq covariate files on ${on_string}">
-            <discover_datasets pattern="(?P&lt;designation&gt;.+)\.tabular" format="tabular" directory="." visible="false"/>
+            <discover_datasets pattern="uv_(?P&lt;designation&gt;.+)\.tabular" format="tabular" directory="." visible="false"/>
+        </collection>
+        <collection name="ruv_normcounts" type="list" label="RUVSeq normalized counts on ${on_string}">
+            <filter>ruv_ncounts == True</filter>
+            <discover_datasets pattern="ruv_(?P&lt;designation&gt;.+)\.tabular" format="tabular" directory="." visible="false"/>
         </collection>
         <data format="pdf" name="plots" label="RUVSeq diagonstic plots on ${on_string}">
             <filter>pdf == True</filter>
@@ -232,6 +247,73 @@
                 </element>
             </output_collection>
         </test>
+                <!--Ensure Normalized counts files are generated -->
+        <test>
+            <repeat name="rep_factorLevel">
+                <param name="factorLevel" value="Treated"/>
+                <param name="countsFile" value="GSM461179_treat_single.counts,GSM461180_treat_paired.counts,GSM461181_treat_paired.counts"/>
+            </repeat>
+            <repeat name="rep_factorLevel">
+                <param name="factorLevel" value="Untreated"/>
+                <param name="countsFile" value="GSM461176_untreat_single.counts,GSM461177_untreat_paired.counts,GSM461178_untreat_paired.counts,GSM461182_untreat_single.counts"/>
+            </repeat>
+            <param name="pdf" value="true"/>
+            <param name="ruv_ncounts" value="true"/>
+            <output name="plots" file="ruvseq_diag.pdf" ftype="pdf" compare="sim_size"/>
+            <output_collection name="ruv_normcounts" type="list">
+                <element name="norm_counts_control_method_k1">
+                    <assert_contents>
+                        <has_text_matching expression="geneID\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\t.+"/>
+                    </assert_contents>
+                </element>
+                <element name="norm_counts_replicate_method_k1">
+                    <assert_contents>
+                        <has_text_matching expression="geneID\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\t.+"/>
+                    </assert_contents>
+                </element>
+                <element name="norm_counts_residual_method_k1">
+                    <assert_contents>
+                        <has_text_matching expression="geneID\tGSM461179_treat_single.counts\tGSM461180_treat_paired.counts\t.+"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
+        <!--Ensure Normalized counts are generated with sailfish files  -->
+        <test>
+            <repeat name="rep_factorLevel">
+                <param name="factorLevel" value="Treated"/>
+                <param name="countsFile" value="sailfish/sailfish_quant.sf1.tab,sailfish/sailfish_quant.sf2.tab,sailfish/sailfish_quant.sf3.tab"/>
+            </repeat>
+            <repeat name="rep_factorLevel">
+                <param name="factorLevel" value="Untreated"/>
+                    <param name="countsFile" value="sailfish/sailfish_quant.sf4.tab,sailfish/sailfish_quant.sf5.tab,sailfish/sailfish_quant.sf6.tab"/>
+            </repeat>
+            <param name="pdf" value="true"/>
+            <param name="tximport_selector" value="tximport"/>
+            <param name="txtype" value="sailfish"/>
+            <param name="mapping_format_selector" value="tabular"/>
+            <param name="tabular_file" value="tx2gene.tab"/>
+            <param name="min_mean_count" value="0"/>
+            <param name="ruv_ncounts" value="true"/>
+            <output name="plots" file="ruvseq_diag_sailfish.pdf" ftype="pdf" compare="sim_size"/>
+            <output_collection name="ruv_normcounts" type="list">
+                <element name="norm_counts_control_method_k1">
+                    <assert_contents>
+                        <has_text_matching expression="geneID\tsailfish_quant.sf1.tab\tsailfish_quant.sf2.tab\t.+"/>
+                    </assert_contents>
+                </element>
+                <element name="norm_counts_replicate_method_k1">
+                    <assert_contents>
+                        <has_text_matching expression="geneID\tsailfish_quant.sf1.tab\tsailfish_quant.sf2.tab\t.+"/>
+                    </assert_contents>
+                </element>
+                <element name="norm_counts_residual_method_k1">
+                    <assert_contents>
+                        <has_text_matching expression="geneID\tsailfish_quant.sf1.tab\tsailfish_quant.sf2.tab\t.+"/>
+                    </assert_contents>
+                </element>
+            </output_collection>
+        </test>
     </tests>
     <help><![CDATA[
 .. class:: infomark
@@ -306,6 +388,9 @@

 RUVSeq_ generates a tabular file for each method and each k of variation as well as a summary PDF.

+RUVSeq can also generate RUVSeq normalized count tables. However, *these counts should be used only for exploration. It is important that subsequent DE analysis be done on the original counts, as removing the unwanted factors from the counts can also remove part of a factor of interest*.
+
+
 .. _RUVSeq: http://master.bioconductor.org/packages/release/bioc/html/RUVSeq.html
 .. _tximport: https://bioconductor.org/packages/devel/bioc/vignettes/tximport/inst/doc/tximport.html
     ]]></help>