Mercurial > repos > mvdbeek > r_goseq_1_22_0
changeset 16:8ce951313688 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/goseq_1_22_0 commit fdd0811efc61c31f88ff17096fbe8ee8cfacd766-dirty
author | mvdbeek |
---|---|
date | Thu, 25 Feb 2016 08:49:20 -0500 |
parents | e3b1dd2f7f70 |
children | 1b03f6232900 |
files | get_length_and_gc_content.r goseq.r |
diffstat | 2 files changed, 19 insertions(+), 2 deletions(-) [+] |
line wrap: on
line diff
--- a/get_length_and_gc_content.r Thu Feb 25 07:24:31 2016 -0500 +++ b/get_length_and_gc_content.r Thu Feb 25 08:49:20 2016 -0500 @@ -44,4 +44,6 @@ output <- setDT(data.frame(output), keep.rownames = TRUE)[] colnames(output) <- c("#gene_id", "length", "GC") -write.table(output, file=output_file, row.names=FALSE, quote=FALSE, sep="\t") \ No newline at end of file +write.table(output, file=output_file, row.names=FALSE, quote=FALSE, sep="\t") + +sessionInfo() \ No newline at end of file
--- a/goseq.r Thu Feb 25 07:24:31 2016 -0500 +++ b/goseq.r Thu Feb 25 08:49:20 2016 -0500 @@ -13,6 +13,7 @@ make_option(c("-c", "--cutoff"), type="double",dest="p_adj_cutoff", help="Genes with p.adjust below cutoff are considered not differentially expressed and serve as control genes"), make_option(c("-r", "--repcnt"), type="integer", default=100, help="Number of repeats for sampling"), + make_option(c("-lf", "--length_file"), default=FALSE, help = "Path to "), make_option(c("-g", "--genome"), type="character", help = "Genome [used for looking up correct gene length]"), make_option(c("-i", "--gene_id"), type="character", help="Gene ID of gene column in DGE file") ) @@ -23,6 +24,7 @@ dge_file = args$dge_file p_adj_column = args$p_adj_colum p_adj_cutoff = args$p_adj_cutoff +length_file = args$length_file genome = args$genome gene_id = args$gene_id wallenius_tab = args$wallenius_tab @@ -38,10 +40,21 @@ genes = as.integer(dge_table[,p_adj_column]<p_adj_cutoff) names(genes) = dge_table[,1] # Assuming first row contains gene names +# Get gene lengths + +if (length_file) { + length_table = read.delim(length_file, header=TRUE, sep="\t", check.names=FALSE, row.names=TRUE) + gene_lengths = length_table[names(genes),]$length + } else { + gene_lengths = getlength(names(genes),genome, gene_id) + } + # Estimate PWF +print(gene_lengths) + pdf(length_bias_plot) -pwf=nullp(genes, genome , gene_id) +pwf=nullp(genes, genome, gene_id, gene_lengths) dev.off() # Null dstribution wallenius GO.wall=goseq(pwf, genome, gene_id) @@ -64,6 +77,8 @@ write.table(GO.samp, sampling_tab, sep="\t", row.names = FALSE, quote = FALSE) write.table(GO.nobias, nobias_tab, sep="\t", row.names = FALSE, quote = FALSE) +sessionInfo() + # Use the following to get a list of supported genomes / gene ids # write.table(supportedGenomes(), "available_genomes.tab", row.names = FALSE, quote=FALSE)