3
|
1 #!/usr/bin/Rscript
|
|
2
|
|
3 # Enabling commands line arguments. Using optparse which allows to use options.
|
|
4 # ----------------------------------------------------------------------------------------
|
|
5
|
|
6 suppressMessages(library(optparse, warn.conflicts = FALSE))
|
|
7 opt_list=list(
|
|
8 make_option(c("-d", "--directory"), type="character", default=NULL, help="directory containing the samples", metavar="character"),
|
|
9 make_option(c("-p", "--phendat"), type="character", default=NULL, help="phenotype data(must be a .csv file)", metavar="character"),
|
|
10 make_option(c("-t","--outputtranscript"), type="character", default="output_transcript.csv", help="output_transcript.csv: contains the transcripts of the expirements", metavar="character"),
|
|
11 make_option(c("-g","--outputgenes"), type="character", default="output_genes.csv", help="output_genes.csv: contains the genes of the expirements", metavar="character"),
|
|
12 make_option(c("-e","--texpression"), type="double", default="0.5", help="transcripts expression filter", metavar="character"),
|
|
13 make_option(c("--bgout"), type="character", default="", help="save the ballgown object created in the process", metavar="character")
|
|
14 )
|
|
15 opt_parser=OptionParser(option_list=opt_list)
|
|
16 opt=parse_args(opt_parser)
|
|
17
|
|
18 # Loading required libraries. suppressMessages() remove all noisy attachement messages
|
|
19 # ----------------------------------------------------------------------------------------
|
|
20
|
|
21 suppressMessages(library(ballgown, warn.conflicts = FALSE))
|
|
22 suppressMessages(library(genefilter, warn.conflicts = FALSE))
|
|
23 suppressMessages(library(dplyr, warn.conflicts = FALSE))
|
|
24
|
|
25 # Setup for the tool with some bases variables.
|
|
26 # ----------------------------------------------------------------------------------------
|
|
27
|
|
28
|
|
29 filtstr = opt$texpression
|
|
30 pdat = 2
|
|
31 phendata = read.csv(opt$phendat)
|
|
32 setwd(opt$dir)
|
|
33
|
|
34 # Checking if the pdata file has the right samples names.
|
|
35 # ----------------------------------------------------------------------------------------
|
|
36
|
|
37 if (all(phendata$ids == list.files(".")) != TRUE)
|
|
38 {
|
|
39 stop("Your phenotype data table does not match the samples names. ")
|
|
40 }
|
|
41
|
|
42 # Creation of the ballgown object based on data
|
|
43 # ----------------------------------------------------------------------------------------
|
|
44 bgi = ballgown(dataDir= "." , samplePattern="", pData = phendata, verbose = FALSE)
|
|
45
|
|
46 # Filter the genes with an expression superior to the input filter
|
|
47 # ----------------------------------------------------------------------------------------
|
|
48 bgi_filt= subset(bgi, paste("rowVars(texpr(bgi)) >",filtstr), genomesubset = TRUE)
|
|
49
|
|
50 # Creating the variables containing the transcripts and the genes and sorting them through the arrange() command.
|
|
51 # Checking if there's one or more adjust variables in the phenotype data file
|
|
52 # ----------------------------------------------------------------------------------------
|
|
53
|
|
54 if (ncol(pData(bgi))<=3) {
|
|
55 results_transcripts=stattest(bgi_filt,feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM")
|
|
56 results_genes=stattest(bgi_filt,feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM")
|
|
57 } else {
|
|
58 results_transcripts=stattest(bgi_filt,feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM")
|
|
59 results_genes=stattest(bgi_filt,feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM")
|
|
60 }
|
|
61
|
|
62 results_transcripts = data.frame(geneNames=ballgown::geneNames(bgi_filt), geneIDs=ballgown::geneIDs(bgi_filt), results_transcripts)
|
|
63 results_transcripts = arrange(results_transcripts,pval)
|
|
64 results_genes = arrange(results_genes,pval)
|
|
65
|
|
66 # Main output of the wrapper, two .csv files containing the genes and transcripts with their qvalue and pvalue
|
|
67 #This part also output the data of the ballgown object created in the process and save it in a R data file
|
|
68 # ----------------------------------------------------------------------------------------
|
|
69 write.csv(results_transcripts, opt$outputtranscript, row.names = FALSE)
|
|
70 write.csv(results_genes, opt$outputgenes, row.names = FALSE)
|
|
71 if (opt$bgout != ""){
|
|
72 save(bgi, file=opt$bgout)
|
|
73 }
|