annotate ballgown/ballgown.R @ 3:896cdffe06ff draft

first upload
author theo.collard
date Wed, 26 Apr 2017 08:42:01 -0400
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
3
896cdffe06ff first upload
theo.collard
parents:
diff changeset
1 #!/usr/bin/Rscript
896cdffe06ff first upload
theo.collard
parents:
diff changeset
2
896cdffe06ff first upload
theo.collard
parents:
diff changeset
3 # Enabling commands line arguments. Using optparse which allows to use options.
896cdffe06ff first upload
theo.collard
parents:
diff changeset
4 # ----------------------------------------------------------------------------------------
896cdffe06ff first upload
theo.collard
parents:
diff changeset
5
896cdffe06ff first upload
theo.collard
parents:
diff changeset
6 suppressMessages(library(optparse, warn.conflicts = FALSE))
896cdffe06ff first upload
theo.collard
parents:
diff changeset
7 opt_list=list(
896cdffe06ff first upload
theo.collard
parents:
diff changeset
8 make_option(c("-d", "--directory"), type="character", default=NULL, help="directory containing the samples", metavar="character"),
896cdffe06ff first upload
theo.collard
parents:
diff changeset
9 make_option(c("-p", "--phendat"), type="character", default=NULL, help="phenotype data(must be a .csv file)", metavar="character"),
896cdffe06ff first upload
theo.collard
parents:
diff changeset
10 make_option(c("-t","--outputtranscript"), type="character", default="output_transcript.csv", help="output_transcript.csv: contains the transcripts of the expirements", metavar="character"),
896cdffe06ff first upload
theo.collard
parents:
diff changeset
11 make_option(c("-g","--outputgenes"), type="character", default="output_genes.csv", help="output_genes.csv: contains the genes of the expirements", metavar="character"),
896cdffe06ff first upload
theo.collard
parents:
diff changeset
12 make_option(c("-e","--texpression"), type="double", default="0.5", help="transcripts expression filter", metavar="character"),
896cdffe06ff first upload
theo.collard
parents:
diff changeset
13 make_option(c("--bgout"), type="character", default="", help="save the ballgown object created in the process", metavar="character")
896cdffe06ff first upload
theo.collard
parents:
diff changeset
14 )
896cdffe06ff first upload
theo.collard
parents:
diff changeset
15 opt_parser=OptionParser(option_list=opt_list)
896cdffe06ff first upload
theo.collard
parents:
diff changeset
16 opt=parse_args(opt_parser)
896cdffe06ff first upload
theo.collard
parents:
diff changeset
17
896cdffe06ff first upload
theo.collard
parents:
diff changeset
18 # Loading required libraries. suppressMessages() remove all noisy attachement messages
896cdffe06ff first upload
theo.collard
parents:
diff changeset
19 # ----------------------------------------------------------------------------------------
896cdffe06ff first upload
theo.collard
parents:
diff changeset
20
896cdffe06ff first upload
theo.collard
parents:
diff changeset
21 suppressMessages(library(ballgown, warn.conflicts = FALSE))
896cdffe06ff first upload
theo.collard
parents:
diff changeset
22 suppressMessages(library(genefilter, warn.conflicts = FALSE))
896cdffe06ff first upload
theo.collard
parents:
diff changeset
23 suppressMessages(library(dplyr, warn.conflicts = FALSE))
896cdffe06ff first upload
theo.collard
parents:
diff changeset
24
896cdffe06ff first upload
theo.collard
parents:
diff changeset
25 # Setup for the tool with some bases variables.
896cdffe06ff first upload
theo.collard
parents:
diff changeset
26 # ----------------------------------------------------------------------------------------
896cdffe06ff first upload
theo.collard
parents:
diff changeset
27
896cdffe06ff first upload
theo.collard
parents:
diff changeset
28
896cdffe06ff first upload
theo.collard
parents:
diff changeset
29 filtstr = opt$texpression
896cdffe06ff first upload
theo.collard
parents:
diff changeset
30 pdat = 2
896cdffe06ff first upload
theo.collard
parents:
diff changeset
31 phendata = read.csv(opt$phendat)
896cdffe06ff first upload
theo.collard
parents:
diff changeset
32 setwd(opt$dir)
896cdffe06ff first upload
theo.collard
parents:
diff changeset
33
896cdffe06ff first upload
theo.collard
parents:
diff changeset
34 # Checking if the pdata file has the right samples names.
896cdffe06ff first upload
theo.collard
parents:
diff changeset
35 # ----------------------------------------------------------------------------------------
896cdffe06ff first upload
theo.collard
parents:
diff changeset
36
896cdffe06ff first upload
theo.collard
parents:
diff changeset
37 if (all(phendata$ids == list.files(".")) != TRUE)
896cdffe06ff first upload
theo.collard
parents:
diff changeset
38 {
896cdffe06ff first upload
theo.collard
parents:
diff changeset
39 stop("Your phenotype data table does not match the samples names. ")
896cdffe06ff first upload
theo.collard
parents:
diff changeset
40 }
896cdffe06ff first upload
theo.collard
parents:
diff changeset
41
896cdffe06ff first upload
theo.collard
parents:
diff changeset
42 # Creation of the ballgown object based on data
896cdffe06ff first upload
theo.collard
parents:
diff changeset
43 # ----------------------------------------------------------------------------------------
896cdffe06ff first upload
theo.collard
parents:
diff changeset
44 bgi = ballgown(dataDir= "." , samplePattern="", pData = phendata, verbose = FALSE)
896cdffe06ff first upload
theo.collard
parents:
diff changeset
45
896cdffe06ff first upload
theo.collard
parents:
diff changeset
46 # Filter the genes with an expression superior to the input filter
896cdffe06ff first upload
theo.collard
parents:
diff changeset
47 # ----------------------------------------------------------------------------------------
896cdffe06ff first upload
theo.collard
parents:
diff changeset
48 bgi_filt= subset(bgi, paste("rowVars(texpr(bgi)) >",filtstr), genomesubset = TRUE)
896cdffe06ff first upload
theo.collard
parents:
diff changeset
49
896cdffe06ff first upload
theo.collard
parents:
diff changeset
50 # Creating the variables containing the transcripts and the genes and sorting them through the arrange() command.
896cdffe06ff first upload
theo.collard
parents:
diff changeset
51 # Checking if there's one or more adjust variables in the phenotype data file
896cdffe06ff first upload
theo.collard
parents:
diff changeset
52 # ----------------------------------------------------------------------------------------
896cdffe06ff first upload
theo.collard
parents:
diff changeset
53
896cdffe06ff first upload
theo.collard
parents:
diff changeset
54 if (ncol(pData(bgi))<=3) {
896cdffe06ff first upload
theo.collard
parents:
diff changeset
55 results_transcripts=stattest(bgi_filt,feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM")
896cdffe06ff first upload
theo.collard
parents:
diff changeset
56 results_genes=stattest(bgi_filt,feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM")
896cdffe06ff first upload
theo.collard
parents:
diff changeset
57 } else {
896cdffe06ff first upload
theo.collard
parents:
diff changeset
58 results_transcripts=stattest(bgi_filt,feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM")
896cdffe06ff first upload
theo.collard
parents:
diff changeset
59 results_genes=stattest(bgi_filt,feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM")
896cdffe06ff first upload
theo.collard
parents:
diff changeset
60 }
896cdffe06ff first upload
theo.collard
parents:
diff changeset
61
896cdffe06ff first upload
theo.collard
parents:
diff changeset
62 results_transcripts = data.frame(geneNames=ballgown::geneNames(bgi_filt), geneIDs=ballgown::geneIDs(bgi_filt), results_transcripts)
896cdffe06ff first upload
theo.collard
parents:
diff changeset
63 results_transcripts = arrange(results_transcripts,pval)
896cdffe06ff first upload
theo.collard
parents:
diff changeset
64 results_genes = arrange(results_genes,pval)
896cdffe06ff first upload
theo.collard
parents:
diff changeset
65
896cdffe06ff first upload
theo.collard
parents:
diff changeset
66 # Main output of the wrapper, two .csv files containing the genes and transcripts with their qvalue and pvalue
896cdffe06ff first upload
theo.collard
parents:
diff changeset
67 #This part also output the data of the ballgown object created in the process and save it in a R data file
896cdffe06ff first upload
theo.collard
parents:
diff changeset
68 # ----------------------------------------------------------------------------------------
896cdffe06ff first upload
theo.collard
parents:
diff changeset
69 write.csv(results_transcripts, opt$outputtranscript, row.names = FALSE)
896cdffe06ff first upload
theo.collard
parents:
diff changeset
70 write.csv(results_genes, opt$outputgenes, row.names = FALSE)
896cdffe06ff first upload
theo.collard
parents:
diff changeset
71 if (opt$bgout != ""){
896cdffe06ff first upload
theo.collard
parents:
diff changeset
72 save(bgi, file=opt$bgout)
896cdffe06ff first upload
theo.collard
parents:
diff changeset
73 }