changeset 16:8ce951313688 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
author mvdbeek
date Thu, 25 Feb 2016 08:49:20 -0500
parents e3b1dd2f7f70
children 1b03f6232900
files get_length_and_gc_content.r goseq.r
diffstat 2 files changed, 19 insertions(+), 2 deletions(-) [+]
line wrap: on
line diff
--- a/get_length_and_gc_content.r	Thu Feb 25 07:24:31 2016 -0500
+++ b/get_length_and_gc_content.r	Thu Feb 25 08:49:20 2016 -0500
@@ -44,4 +44,6 @@
 output <- setDT(data.frame(output), keep.rownames = TRUE)[]
 colnames(output) <- c("#gene_id", "length", "GC")
 
-write.table(output, file=output_file, row.names=FALSE, quote=FALSE, sep="\t")
\ No newline at end of file
+write.table(output, file=output_file, row.names=FALSE, quote=FALSE, sep="\t")
+
+sessionInfo()
\ No newline at end of file
--- a/goseq.r	Thu Feb 25 07:24:31 2016 -0500
+++ b/goseq.r	Thu Feb 25 08:49:20 2016 -0500
@@ -13,6 +13,7 @@
     make_option(c("-c", "--cutoff"), type="double",dest="p_adj_cutoff",
                 help="Genes with p.adjust below cutoff are considered not differentially expressed and serve as control genes"),
     make_option(c("-r", "--repcnt"), type="integer", default=100, help="Number of repeats for sampling"),
+    make_option(c("-lf", "--length_file"), default=FALSE, help = "Path to "),
     make_option(c("-g", "--genome"), type="character", help = "Genome [used for looking up correct gene length]"),
     make_option(c("-i", "--gene_id"), type="character", help="Gene ID of gene column in DGE file")
   )
@@ -23,6 +24,7 @@
 dge_file = args$dge_file
 p_adj_column = args$p_adj_colum
 p_adj_cutoff = args$p_adj_cutoff
+length_file = args$length_file
 genome = args$genome
 gene_id = args$gene_id
 wallenius_tab = args$wallenius_tab
@@ -38,10 +40,21 @@
 genes = as.integer(dge_table[,p_adj_column]<p_adj_cutoff)
 names(genes) = dge_table[,1] # Assuming first row contains gene names
 
+# Get gene lengths
+
+if (length_file) {
+  length_table = read.delim(length_file, header=TRUE, sep="\t", check.names=FALSE, row.names=TRUE)
+  gene_lengths = length_table[names(genes),]$length
+  } else {
+  gene_lengths = getlength(names(genes),genome, gene_id)
+  }
+
 # Estimate PWF
 
+print(gene_lengths)
+
 pdf(length_bias_plot)
-pwf=nullp(genes, genome , gene_id)
+pwf=nullp(genes, genome, gene_id, gene_lengths)
 dev.off()
 # Null dstribution wallenius
 GO.wall=goseq(pwf, genome, gene_id)
@@ -64,6 +77,8 @@
 write.table(GO.samp, sampling_tab, sep="\t", row.names = FALSE, quote = FALSE)
 write.table(GO.nobias, nobias_tab, sep="\t", row.names = FALSE, quote = FALSE)
 
+sessionInfo()
+
 # Use the following to get a list of supported genomes / gene ids
 
 # write.table(supportedGenomes(), "available_genomes.tab", row.names = FALSE, quote=FALSE)