comparison ballgown.R @ 16:4290f0f3d908 draft

Uploaded
author theo.collard
date Tue, 03 Oct 2017 09:25:35 -0400
parents 940701e8bc59
children
comparison
equal deleted inserted replaced
15:a225c3f40675 16:4290f0f3d908
1 #!/usr/bin/Rscript 1 #!/usr/bin/env Rscript
2
3 # Enabling commands line arguments. Using optparse which allows to use options.
4 # ----------------------------------------------------------------------------------------
5 2
6 suppressMessages(library(optparse, warn.conflicts = FALSE)) 3 suppressMessages(library(optparse, warn.conflicts = FALSE))
7 opt_list=list( 4 opt_list=list(
8 make_option(c("-d", "--directory"), type="character", default=NULL, help="directory containing the samples", metavar="character"), 5 make_option(c("-d", "--directory"), type="character", default=NULL, help="directory containing the samples", metavar="character"),
9 make_option(c("-p", "--phendat"), type="character", default=NULL, help="phenotype data(must be a .csv file)", metavar="character"), 6 make_option(c("-p", "--phendat"), type="character", default=NULL, help="phenotype data(must be a .csv file)", metavar="character"),
10 make_option(c("-t","--outputtranscript"), type="character", default="output_transcript.csv", help="output_transcript.csv: contains the transcripts of the expirements", metavar="character"), 7 make_option(c("-t","--outputtranscript"), type="character", default="output_transcript.csv", help="output_transcript.csv: contains the transcripts of the experiments", metavar="character"),
11 make_option(c("-g","--outputgenes"), type="character", default="output_genes.csv", help="output_genes.csv: contains the genes of the expirements", metavar="character"), 8 make_option(c("-g","--outputgenes"), type="character", default="output_genes.csv", help="output_genes.csv: contains the genes of the experiments", metavar="character"),
12 make_option(c("-e","--texpression"), type="double", default="0.5", help="transcripts expression filter", metavar="character"), 9 make_option(c("-e","--texpression"), type="double", default="0.5", help="transcripts expression filter", metavar="character"),
13 make_option(c("--bgout"), type="character", default="", help="save the ballgown object created in the process", metavar="character") 10 make_option(c("--bgout"), type="character", default="", help="save the ballgown object created in the process", metavar="character"),
11 make_option(c("-f","--format"), type="character", default="tsv", help="Create csv or tsv files as output", metavar="character"),
12 make_option(c("-T","--tsvoutputtranscript"), type="character", default="output_transcript.tsv", help="output_transcript.tsv: contains the transcripts of the experiments", metavar="character"),
13 make_option(c("-G","--tsvoutputgenes"), type="character", default="output_genes.tsv", help="output_genes.tsv: contains the genes of the experiments", metavar="character")
14 ) 14 )
15 opt_parser=OptionParser(option_list=opt_list) 15 opt_parser=OptionParser(option_list=opt_list)
16 opt=parse_args(opt_parser) 16 opt=parse_args(opt_parser)
17 17
18 # Loading required libraries. suppressMessages() remove all noisy attachement messages 18 # Loading required libraries. suppressMessages() remove all noisy attachement messages
27 27
28 28
29 filtstr = opt$texpression 29 filtstr = opt$texpression
30 pdat = 2 30 pdat = 2
31 phendata = read.csv(opt$phendat) 31 phendata = read.csv(opt$phendat)
32 setwd(opt$dir) 32
33 33
34 # Checking if the pdata file has the right samples names. 34 # Checking if the pdata file has the right samples names.
35 # ---------------------------------------------------------------------------------------- 35 # ----------------------------------------------------------------------------------------
36 36
37 if (all(phendata$ids == list.files(".")) != TRUE) 37 if (all(phendata$ids == list.files(opt$directory)) != TRUE)
38 { 38 {
39 stop("Your phenotype data table does not match the samples names. ") 39 stop("Your phenotype data table does not match the samples names. ")
40 } 40 }
41 41
42 # Creation of the ballgown object based on data 42 # Creation of the ballgown object based on data
43 # ---------------------------------------------------------------------------------------- 43 # ----------------------------------------------------------------------------------------
44 bgi = ballgown(dataDir= "." , samplePattern="", pData = phendata, verbose = FALSE) 44 bgi = ballgown(dataDir= opt$directory , samplePattern="", pData = phendata, verbose = FALSE)
45 45
46 # Filter the genes with an expression superior to the input filter 46 # Filter the genes with an expression superior to the input filter
47 # ---------------------------------------------------------------------------------------- 47 # ----------------------------------------------------------------------------------------
48 bgi_filt= subset(bgi, paste("rowVars(texpr(bgi)) >",filtstr), genomesubset = TRUE) 48 bgi_filt= subset(bgi, paste("rowVars(texpr(bgi)) >",filtstr), genomesubset = TRUE)
49 49
50 # Creating the variables containing the transcripts and the genes and sorting them through the arrange() command. 50 # Creating the variables containing the transcripts and the genes and sorting them through the arrange() command.
51 # Checking if there's one or more adjust variables in the phenotype data file 51 # Checking if there's one or more adjust variables in the phenotype data file
52 # ---------------------------------------------------------------------------------------- 52 # ----------------------------------------------------------------------------------------
53 53
54 if (ncol(pData(bgi))<=3) { 54 if (ncol(pData(bgi))<=3) {
55 results_transcripts=stattest(bgi_filt,feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM") 55 results_transcripts=stattest(bgi_filt, feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM")
56 results_genes=stattest(bgi_filt,feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM") 56 results_genes=stattest(bgi_filt, feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM")
57 } else { 57 } else {
58 results_transcripts=stattest(bgi_filt,feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM") 58 results_transcripts=stattest(bgi_filt, feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM")
59 results_genes=stattest(bgi_filt,feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM") 59 results_genes=stattest(bgi_filt, feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM")
60 } 60 }
61 61
62 results_transcripts = data.frame(geneNames=ballgown::geneNames(bgi_filt), geneIDs=ballgown::geneIDs(bgi_filt), results_transcripts) 62 results_transcripts = data.frame(geneNames=ballgown::geneNames(bgi_filt), geneIDs=ballgown::geneIDs(bgi_filt), results_transcripts)
63 results_transcripts = arrange(results_transcripts,pval) 63 results_transcripts = arrange(results_transcripts, pval)
64 results_genes = arrange(results_genes,pval) 64 results_genes = arrange(results_genes, pval)
65 65
66 # Main output of the wrapper, two .csv files containing the genes and transcripts with their qvalue and pvalue 66 # Main output of the wrapper, two .csv files containing the genes and transcripts with their qvalue and pvalue
67 #This part also output the data of the ballgown object created in the process and save it in a R data file 67 #This part also output the data of the ballgown object created in the process and save it in a R data file
68 # ---------------------------------------------------------------------------------------- 68 # ----------------------------------------------------------------------------------------
69 write.csv(results_transcripts, opt$outputtranscript, row.names = FALSE) 69 if (opt$format == "tsv"){
70 write.csv(results_genes, opt$outputgenes, row.names = FALSE) 70 write.table(results_transcripts, file=opt$tsvoutputtranscript, quote=FALSE, sep='\t', col.names = NA)
71 write.table(results_genes, file=opt$tsvoutputgenes, quote=FALSE, sep='\t', col.names = NA)
72 } else {
73 write.csv(results_transcripts, opt$outputtranscript, row.names = FALSE)
74 write.csv(results_genes, opt$outputgenes, row.names = FALSE)
75 }
76
71 if (opt$bgout != ""){ 77 if (opt$bgout != ""){
72 save(bgi, file=opt$bgout) 78 save(bgi, file=opt$bgout)
73 } 79 }