changeset 2:e08419b8ec24 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/gsc_signature_score commit 987e0ceb55e8de1d2f09d0f2ae48ff7cd3e82051
author artbio
date Tue, 09 Jul 2019 10:52:29 -0400
parents 01b2c4fcada8
children 3351ca630a01
files signature_score.R signature_score.xml test-data/correlations.tsv test-data/covariances.tsv test-data/signature.pdf
diffstat 5 files changed, 49 insertions(+), 37 deletions(-) [+]
line wrap: on
line diff
--- a/signature_score.R	Mon Jun 24 19:17:53 2019 -0400
+++ b/signature_score.R	Tue Jul 09 10:52:29 2019 -0400
@@ -65,6 +65,18 @@
     help = "statistics path [default : '%default' ]"
   ),
   make_option(
+    "--correlations",
+    default = "./correlations.tab",
+    type = 'character',
+    help = "Correlations between signature genes  [default : '%default' ]"
+  ),
+  make_option(
+    "--covariances",
+    default = "./statistics.tab",
+    type = 'character',
+    help = "Covariances between signature genes [default : '%default' ]"
+  ),
+  make_option(
     "--pdf",
     default = "~/output.pdf",
     type = 'character',
@@ -100,6 +112,15 @@
 # Retrieve target genes in counts data
 signature.counts <- subset(data.counts, logical_genes)
 
+# compute covariance
+signature.covariances <- as.data.frame(cov(t(signature.counts)))
+signature.covariances <- cbind(gene=rownames(signature.covariances), signature.covariances)
+write.table(signature.covariances, file=opt$covariances, quote=F, row.names=F, sep="\t")
+
+# compute signature.correlations
+signature.correlations <- as.data.frame(cov(t(signature.counts)))
+signature.correlations <- cbind(gene=rownames(signature.correlations), signature.correlations)
+write.table(signature.correlations, file=opt$correlations, quote=F, row.names=F, sep="\t")
 
 ## Descriptive Statistics Function
 descriptive_stats = function(InputData) {
@@ -171,43 +192,16 @@
                     stringsAsFactors = F)
 
 pdf(file = opt$pdf)
-
-ggplot(score, aes(x = order, y = score)) +
-  geom_line() + 
-  geom_segment(x = 0, xend = max(score$order[score$signature == "LOW"]), y = mean(score$score), yend = mean(score$score)) +
-  geom_area(aes(fill = signature), alpha = .7) +
-  scale_fill_manual(values=c("#ff0000", "#08661e")) +
-  geom_text(aes(x = 1, y = mean(score)), label = "Mean", vjust = -0.3, colour = "black") +
-  labs(title = "Ordered cell signature scores", x = "Cell index", y = "Score")
+myplot <- ggplot(signature_output, aes(x=rate, y=score)) +
+                 geom_violin(aes(fill = rate), alpha = .5, trim = F, show.legend = F, cex=0.5) +
+                 geom_abline(slope=0, intercept=mean(score$score), lwd=.5, color="red") +
+                 scale_fill_manual(values=c("#ff0000", "#08661e")) +
+                 geom_jitter(size=0.2) + labs(y = "Score", x = "Rate") +
+                 annotate("text", x = 0.55, y = mean(score$score), cex = 3, vjust=1.5,
+                           color="black", label = mean(score$score), parse = TRUE) +
+                 labs(title = "Violin plots of Cell signature scores")
 
-density_score <- density(score$score)
-ggplot(data.frame(density_score[1:2]), aes(x, y, fill = ifelse(x < mean(score$score), "LOW", "HIGH"))) +
-  geom_line() +
-  geom_vline(xintercept = mean(score$score)) +
-  geom_text(x = mean(score$score), y = max(density_score$y), label = "Mean", hjust = -0.3, colour = "black") +
-  geom_area(alpha = .7) +
-  scale_fill_manual(values=c("#ff0000", "#08661e")) +
-  ylim(0, max(density_score$y)) +
-  labs(
-    title = "Distribution of Cell signature scores",
-    x = paste("N =", density_score$n, "Bandwidth =", density_score$bw),
-    y = "Density",
-    fill = "Signature"
-  )
-
-# Check score independant of low expression
-p_gene <- ggplot(signature_output, aes(rate, nGenes)) +
-  geom_violin(aes(fill = rate), alpha = .5, trim = F, show.legend = F) +
-  scale_fill_manual(values=c("#ff0000", "#08661e")) +
-  geom_jitter() + labs(y = "Number of detected genes", x = "Signature")
-
-p_counts <- ggplot(signature_output, aes(rate, total_counts)) +
-  geom_violin(aes(fill = rate), alpha = .5, trim = F, show.legend = F) +
-  scale_fill_manual(values=c("#ff0000", "#08661e")) +
-  geom_jitter() + labs(y = "Total counts", x = "Signature")
-
-grid.arrange(p_gene, p_counts, ncol = 2, top = "Influence of library sequencing depth on cell signature scores")
-
+print(myplot)
 dev.off()
 
 # Save file
--- a/signature_score.xml	Mon Jun 24 19:17:53 2019 -0400
+++ b/signature_score.xml	Tue Jul 09 10:52:29 2019 -0400
@@ -1,4 +1,4 @@
-<tool id="signature_score" name="Compute signature scores" version="0.9.0">
+<tool id="signature_score" name="Compute signature scores" version="0.9.1">
     <description>in single cell RNAseq</description>
     <requirements>
         <requirement type="package" version="1.3.2=r3.3.2_0">r-optparse</requirement>
@@ -23,6 +23,8 @@
             --output '$output'
             --stats '$stats'
             --pdf '$pdf'
+            --correlations '$correlations'
+            --covariances '$covariances'
 ]]></command>
     <inputs>
         <param name="input" type="data" format="txt,tabular" label="Raw counts of expression data"/>
@@ -39,6 +41,8 @@
         <data name="pdf" format="pdf" label="Signatures plots from ${on_string}" />
         <data name="output" format="tabular" label="signature scores from ${on_string}" />
         <data name="stats" format="tabular" label="genes statistics from ${on_string}" />
+        <data name="correlations" format="tabular" label="Signature genes correlations" />
+        <data name="covariances" format="tabular" label="Signature genes covariances" />
     </outputs>
     <tests>
         <test>
@@ -48,6 +52,8 @@
             <output name="pdf" file="signature.pdf" ftype="pdf" compare="sim_size" delta="200" />
             <output name="output" file="signature.tsv" ftype="tabular"/>
             <output name="stats" file="gene_stats.tsv" ftype="tabular"/>
+            <output name="correlations" file="correlations.tsv" ftype="tabular"/>
+            <output name="covariances" file="covariances.tsv" ftype="tabular"/>
         </test>
     </tests>
     <help>
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/correlations.tsv	Tue Jul 09 10:52:29 2019 -0400
@@ -0,0 +1,6 @@
+gene	ZNF454	ACAD9	LAIR1	GAPDH	CHTOP
+ZNF454	11.9837926366092	1.26124926841842	-4.79209750033634	1.28119046792732	4.7040467133562
+ACAD9	1.26124926841842	36.4037213995154	0.987902621359844	1.57734537259074	3.12155500211722
+LAIR1	-4.79209750033634	0.987902621359844	58.8639861631431	-0.564659931985148	1.20262555258428
+GAPDH	1.28119046792732	1.57734537259074	-0.564659931985148	24.8789307434615	0.808639324069219
+CHTOP	4.7040467133562	3.12155500211722	1.20262555258428	0.808639324069219	44.8667386237237
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/test-data/covariances.tsv	Tue Jul 09 10:52:29 2019 -0400
@@ -0,0 +1,6 @@
+gene	ZNF454	ACAD9	LAIR1	GAPDH	CHTOP
+ZNF454	11.9837926366092	1.26124926841842	-4.79209750033634	1.28119046792732	4.7040467133562
+ACAD9	1.26124926841842	36.4037213995154	0.987902621359844	1.57734537259074	3.12155500211722
+LAIR1	-4.79209750033634	0.987902621359844	58.8639861631431	-0.564659931985148	1.20262555258428
+GAPDH	1.28119046792732	1.57734537259074	-0.564659931985148	24.8789307434615	0.808639324069219
+CHTOP	4.7040467133562	3.12155500211722	1.20262555258428	0.808639324069219	44.8667386237237
Binary file test-data/signature.pdf has changed