Mercurial > repos > theo.collard > ballgown_wrapper
changeset 16:4290f0f3d908 draft
Uploaded
author | theo.collard |
---|---|
date | Tue, 03 Oct 2017 09:25:35 -0400 |
parents | a225c3f40675 |
children | 05977e96375b |
files | ballgown.R |
diffstat | 1 files changed, 79 insertions(+), 73 deletions(-) [+] |
line wrap: on
line diff
--- a/ballgown.R Tue Oct 03 09:25:22 2017 -0400 +++ b/ballgown.R Tue Oct 03 09:25:35 2017 -0400 @@ -1,73 +1,79 @@ -#!/usr/bin/Rscript - -# Enabling commands line arguments. Using optparse which allows to use options. -# ---------------------------------------------------------------------------------------- - -suppressMessages(library(optparse, warn.conflicts = FALSE)) -opt_list=list( -make_option(c("-d", "--directory"), type="character", default=NULL, help="directory containing the samples", metavar="character"), -make_option(c("-p", "--phendat"), type="character", default=NULL, help="phenotype data(must be a .csv file)", metavar="character"), -make_option(c("-t","--outputtranscript"), type="character", default="output_transcript.csv", help="output_transcript.csv: contains the transcripts of the expirements", metavar="character"), -make_option(c("-g","--outputgenes"), type="character", default="output_genes.csv", help="output_genes.csv: contains the genes of the expirements", metavar="character"), -make_option(c("-e","--texpression"), type="double", default="0.5", help="transcripts expression filter", metavar="character"), -make_option(c("--bgout"), type="character", default="", help="save the ballgown object created in the process", metavar="character") -) -opt_parser=OptionParser(option_list=opt_list) -opt=parse_args(opt_parser) - -# Loading required libraries. suppressMessages() remove all noisy attachement messages -# ---------------------------------------------------------------------------------------- - -suppressMessages(library(ballgown, warn.conflicts = FALSE)) -suppressMessages(library(genefilter, warn.conflicts = FALSE)) -suppressMessages(library(dplyr, warn.conflicts = FALSE)) - -# Setup for the tool with some bases variables. -# ---------------------------------------------------------------------------------------- - - -filtstr = opt$texpression -pdat = 2 -phendata = read.csv(opt$phendat) -setwd(opt$dir) - -# Checking if the pdata file has the right samples names. -# ---------------------------------------------------------------------------------------- - -if (all(phendata$ids == list.files(".")) != TRUE) -{ - stop("Your phenotype data table does not match the samples names. ") -} - -# Creation of the ballgown object based on data -# ---------------------------------------------------------------------------------------- -bgi = ballgown(dataDir= "." , samplePattern="", pData = phendata, verbose = FALSE) - -# Filter the genes with an expression superior to the input filter -# ---------------------------------------------------------------------------------------- -bgi_filt= subset(bgi, paste("rowVars(texpr(bgi)) >",filtstr), genomesubset = TRUE) - -# Creating the variables containing the transcripts and the genes and sorting them through the arrange() command. -# Checking if there's one or more adjust variables in the phenotype data file -# ---------------------------------------------------------------------------------------- - -if (ncol(pData(bgi))<=3) { - results_transcripts=stattest(bgi_filt,feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM") - results_genes=stattest(bgi_filt,feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM") -} else { - results_transcripts=stattest(bgi_filt,feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM") - results_genes=stattest(bgi_filt,feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM") -} - -results_transcripts = data.frame(geneNames=ballgown::geneNames(bgi_filt), geneIDs=ballgown::geneIDs(bgi_filt), results_transcripts) -results_transcripts = arrange(results_transcripts,pval) -results_genes = arrange(results_genes,pval) - -# Main output of the wrapper, two .csv files containing the genes and transcripts with their qvalue and pvalue -#This part also output the data of the ballgown object created in the process and save it in a R data file -# ---------------------------------------------------------------------------------------- -write.csv(results_transcripts, opt$outputtranscript, row.names = FALSE) -write.csv(results_genes, opt$outputgenes, row.names = FALSE) -if (opt$bgout != ""){ - save(bgi, file=opt$bgout) -} +#!/usr/bin/env Rscript + +suppressMessages(library(optparse, warn.conflicts = FALSE)) +opt_list=list( +make_option(c("-d", "--directory"), type="character", default=NULL, help="directory containing the samples", metavar="character"), +make_option(c("-p", "--phendat"), type="character", default=NULL, help="phenotype data(must be a .csv file)", metavar="character"), +make_option(c("-t","--outputtranscript"), type="character", default="output_transcript.csv", help="output_transcript.csv: contains the transcripts of the experiments", metavar="character"), +make_option(c("-g","--outputgenes"), type="character", default="output_genes.csv", help="output_genes.csv: contains the genes of the experiments", metavar="character"), +make_option(c("-e","--texpression"), type="double", default="0.5", help="transcripts expression filter", metavar="character"), +make_option(c("--bgout"), type="character", default="", help="save the ballgown object created in the process", metavar="character"), +make_option(c("-f","--format"), type="character", default="tsv", help="Create csv or tsv files as output", metavar="character"), +make_option(c("-T","--tsvoutputtranscript"), type="character", default="output_transcript.tsv", help="output_transcript.tsv: contains the transcripts of the experiments", metavar="character"), +make_option(c("-G","--tsvoutputgenes"), type="character", default="output_genes.tsv", help="output_genes.tsv: contains the genes of the experiments", metavar="character") +) +opt_parser=OptionParser(option_list=opt_list) +opt=parse_args(opt_parser) + +# Loading required libraries. suppressMessages() remove all noisy attachement messages +# ---------------------------------------------------------------------------------------- + +suppressMessages(library(ballgown, warn.conflicts = FALSE)) +suppressMessages(library(genefilter, warn.conflicts = FALSE)) +suppressMessages(library(dplyr, warn.conflicts = FALSE)) + +# Setup for the tool with some bases variables. +# ---------------------------------------------------------------------------------------- + + +filtstr = opt$texpression +pdat = 2 +phendata = read.csv(opt$phendat) + + +# Checking if the pdata file has the right samples names. +# ---------------------------------------------------------------------------------------- + +if (all(phendata$ids == list.files(opt$directory)) != TRUE) +{ + stop("Your phenotype data table does not match the samples names. ") +} + +# Creation of the ballgown object based on data +# ---------------------------------------------------------------------------------------- +bgi = ballgown(dataDir= opt$directory , samplePattern="", pData = phendata, verbose = FALSE) + +# Filter the genes with an expression superior to the input filter +# ---------------------------------------------------------------------------------------- +bgi_filt= subset(bgi, paste("rowVars(texpr(bgi)) >",filtstr), genomesubset = TRUE) + +# Creating the variables containing the transcripts and the genes and sorting them through the arrange() command. +# Checking if there's one or more adjust variables in the phenotype data file +# ---------------------------------------------------------------------------------------- + +if (ncol(pData(bgi))<=3) { + results_transcripts=stattest(bgi_filt, feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM") + results_genes=stattest(bgi_filt, feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM") +} else { + results_transcripts=stattest(bgi_filt, feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM") + results_genes=stattest(bgi_filt, feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM") +} + +results_transcripts = data.frame(geneNames=ballgown::geneNames(bgi_filt), geneIDs=ballgown::geneIDs(bgi_filt), results_transcripts) +results_transcripts = arrange(results_transcripts, pval) +results_genes = arrange(results_genes, pval) + +# Main output of the wrapper, two .csv files containing the genes and transcripts with their qvalue and pvalue +#This part also output the data of the ballgown object created in the process and save it in a R data file +# ---------------------------------------------------------------------------------------- +if (opt$format == "tsv"){ + write.table(results_transcripts, file=opt$tsvoutputtranscript, quote=FALSE, sep='\t', col.names = NA) + write.table(results_genes, file=opt$tsvoutputgenes, quote=FALSE, sep='\t', col.names = NA) +} else { + write.csv(results_transcripts, opt$outputtranscript, row.names = FALSE) + write.csv(results_genes, opt$outputgenes, row.names = FALSE) +} + +if (opt$bgout != ""){ + save(bgi, file=opt$bgout) +}