annotate ballgown.R @ 17:05977e96375b draft default tip

Uploaded
author theo.collard
date Tue, 03 Oct 2017 09:25:51 -0400
parents 4290f0f3d908
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
16
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
1 #!/usr/bin/env Rscript
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
2
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
3 suppressMessages(library(optparse, warn.conflicts = FALSE))
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
4 opt_list=list(
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
5 make_option(c("-d", "--directory"), type="character", default=NULL, help="directory containing the samples", metavar="character"),
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
6 make_option(c("-p", "--phendat"), type="character", default=NULL, help="phenotype data(must be a .csv file)", metavar="character"),
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
7 make_option(c("-t","--outputtranscript"), type="character", default="output_transcript.csv", help="output_transcript.csv: contains the transcripts of the experiments", metavar="character"),
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
8 make_option(c("-g","--outputgenes"), type="character", default="output_genes.csv", help="output_genes.csv: contains the genes of the experiments", metavar="character"),
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
9 make_option(c("-e","--texpression"), type="double", default="0.5", help="transcripts expression filter", metavar="character"),
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
10 make_option(c("--bgout"), type="character", default="", help="save the ballgown object created in the process", metavar="character"),
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
11 make_option(c("-f","--format"), type="character", default="tsv", help="Create csv or tsv files as output", metavar="character"),
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
12 make_option(c("-T","--tsvoutputtranscript"), type="character", default="output_transcript.tsv", help="output_transcript.tsv: contains the transcripts of the experiments", metavar="character"),
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
13 make_option(c("-G","--tsvoutputgenes"), type="character", default="output_genes.tsv", help="output_genes.tsv: contains the genes of the experiments", metavar="character")
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
14 )
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
15 opt_parser=OptionParser(option_list=opt_list)
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
16 opt=parse_args(opt_parser)
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
17
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
18 # Loading required libraries. suppressMessages() remove all noisy attachement messages
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
19 # ----------------------------------------------------------------------------------------
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
20
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
21 suppressMessages(library(ballgown, warn.conflicts = FALSE))
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
22 suppressMessages(library(genefilter, warn.conflicts = FALSE))
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
23 suppressMessages(library(dplyr, warn.conflicts = FALSE))
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
24
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
25 # Setup for the tool with some bases variables.
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
26 # ----------------------------------------------------------------------------------------
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
27
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
28
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
29 filtstr = opt$texpression
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
30 pdat = 2
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
31 phendata = read.csv(opt$phendat)
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
32
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
33
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
34 # Checking if the pdata file has the right samples names.
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
35 # ----------------------------------------------------------------------------------------
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
36
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
37 if (all(phendata$ids == list.files(opt$directory)) != TRUE)
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
38 {
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
39 stop("Your phenotype data table does not match the samples names. ")
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
40 }
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
41
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
42 # Creation of the ballgown object based on data
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
43 # ----------------------------------------------------------------------------------------
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
44 bgi = ballgown(dataDir= opt$directory , samplePattern="", pData = phendata, verbose = FALSE)
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
45
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
46 # Filter the genes with an expression superior to the input filter
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
47 # ----------------------------------------------------------------------------------------
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
48 bgi_filt= subset(bgi, paste("rowVars(texpr(bgi)) >",filtstr), genomesubset = TRUE)
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
49
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
50 # Creating the variables containing the transcripts and the genes and sorting them through the arrange() command.
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
51 # Checking if there's one or more adjust variables in the phenotype data file
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
52 # ----------------------------------------------------------------------------------------
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
53
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
54 if (ncol(pData(bgi))<=3) {
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
55 results_transcripts=stattest(bgi_filt, feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM")
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
56 results_genes=stattest(bgi_filt, feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM")
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
57 } else {
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
58 results_transcripts=stattest(bgi_filt, feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM")
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
59 results_genes=stattest(bgi_filt, feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM")
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
60 }
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
61
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
62 results_transcripts = data.frame(geneNames=ballgown::geneNames(bgi_filt), geneIDs=ballgown::geneIDs(bgi_filt), results_transcripts)
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
63 results_transcripts = arrange(results_transcripts, pval)
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
64 results_genes = arrange(results_genes, pval)
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
65
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
66 # Main output of the wrapper, two .csv files containing the genes and transcripts with their qvalue and pvalue
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
67 #This part also output the data of the ballgown object created in the process and save it in a R data file
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
68 # ----------------------------------------------------------------------------------------
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
69 if (opt$format == "tsv"){
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
70 write.table(results_transcripts, file=opt$tsvoutputtranscript, quote=FALSE, sep='\t', col.names = NA)
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
71 write.table(results_genes, file=opt$tsvoutputgenes, quote=FALSE, sep='\t', col.names = NA)
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
72 } else {
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
73 write.csv(results_transcripts, opt$outputtranscript, row.names = FALSE)
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
74 write.csv(results_genes, opt$outputgenes, row.names = FALSE)
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
75 }
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
76
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
77 if (opt$bgout != ""){
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
78 save(bgi, file=opt$bgout)
4290f0f3d908 Uploaded
theo.collard
parents: 0
diff changeset
79 }