Mercurial > repos > theo.collard > ballgown_wrapper
comparison ballgown.R @ 16:4290f0f3d908 draft
Uploaded
author | theo.collard |
---|---|
date | Tue, 03 Oct 2017 09:25:35 -0400 |
parents | 940701e8bc59 |
children |
comparison
equal
deleted
inserted
replaced
15:a225c3f40675 | 16:4290f0f3d908 |
---|---|
1 #!/usr/bin/Rscript | 1 #!/usr/bin/env Rscript |
2 | |
3 # Enabling commands line arguments. Using optparse which allows to use options. | |
4 # ---------------------------------------------------------------------------------------- | |
5 | 2 |
6 suppressMessages(library(optparse, warn.conflicts = FALSE)) | 3 suppressMessages(library(optparse, warn.conflicts = FALSE)) |
7 opt_list=list( | 4 opt_list=list( |
8 make_option(c("-d", "--directory"), type="character", default=NULL, help="directory containing the samples", metavar="character"), | 5 make_option(c("-d", "--directory"), type="character", default=NULL, help="directory containing the samples", metavar="character"), |
9 make_option(c("-p", "--phendat"), type="character", default=NULL, help="phenotype data(must be a .csv file)", metavar="character"), | 6 make_option(c("-p", "--phendat"), type="character", default=NULL, help="phenotype data(must be a .csv file)", metavar="character"), |
10 make_option(c("-t","--outputtranscript"), type="character", default="output_transcript.csv", help="output_transcript.csv: contains the transcripts of the expirements", metavar="character"), | 7 make_option(c("-t","--outputtranscript"), type="character", default="output_transcript.csv", help="output_transcript.csv: contains the transcripts of the experiments", metavar="character"), |
11 make_option(c("-g","--outputgenes"), type="character", default="output_genes.csv", help="output_genes.csv: contains the genes of the expirements", metavar="character"), | 8 make_option(c("-g","--outputgenes"), type="character", default="output_genes.csv", help="output_genes.csv: contains the genes of the experiments", metavar="character"), |
12 make_option(c("-e","--texpression"), type="double", default="0.5", help="transcripts expression filter", metavar="character"), | 9 make_option(c("-e","--texpression"), type="double", default="0.5", help="transcripts expression filter", metavar="character"), |
13 make_option(c("--bgout"), type="character", default="", help="save the ballgown object created in the process", metavar="character") | 10 make_option(c("--bgout"), type="character", default="", help="save the ballgown object created in the process", metavar="character"), |
11 make_option(c("-f","--format"), type="character", default="tsv", help="Create csv or tsv files as output", metavar="character"), | |
12 make_option(c("-T","--tsvoutputtranscript"), type="character", default="output_transcript.tsv", help="output_transcript.tsv: contains the transcripts of the experiments", metavar="character"), | |
13 make_option(c("-G","--tsvoutputgenes"), type="character", default="output_genes.tsv", help="output_genes.tsv: contains the genes of the experiments", metavar="character") | |
14 ) | 14 ) |
15 opt_parser=OptionParser(option_list=opt_list) | 15 opt_parser=OptionParser(option_list=opt_list) |
16 opt=parse_args(opt_parser) | 16 opt=parse_args(opt_parser) |
17 | 17 |
18 # Loading required libraries. suppressMessages() remove all noisy attachement messages | 18 # Loading required libraries. suppressMessages() remove all noisy attachement messages |
27 | 27 |
28 | 28 |
29 filtstr = opt$texpression | 29 filtstr = opt$texpression |
30 pdat = 2 | 30 pdat = 2 |
31 phendata = read.csv(opt$phendat) | 31 phendata = read.csv(opt$phendat) |
32 setwd(opt$dir) | 32 |
33 | 33 |
34 # Checking if the pdata file has the right samples names. | 34 # Checking if the pdata file has the right samples names. |
35 # ---------------------------------------------------------------------------------------- | 35 # ---------------------------------------------------------------------------------------- |
36 | 36 |
37 if (all(phendata$ids == list.files(".")) != TRUE) | 37 if (all(phendata$ids == list.files(opt$directory)) != TRUE) |
38 { | 38 { |
39 stop("Your phenotype data table does not match the samples names. ") | 39 stop("Your phenotype data table does not match the samples names. ") |
40 } | 40 } |
41 | 41 |
42 # Creation of the ballgown object based on data | 42 # Creation of the ballgown object based on data |
43 # ---------------------------------------------------------------------------------------- | 43 # ---------------------------------------------------------------------------------------- |
44 bgi = ballgown(dataDir= "." , samplePattern="", pData = phendata, verbose = FALSE) | 44 bgi = ballgown(dataDir= opt$directory , samplePattern="", pData = phendata, verbose = FALSE) |
45 | 45 |
46 # Filter the genes with an expression superior to the input filter | 46 # Filter the genes with an expression superior to the input filter |
47 # ---------------------------------------------------------------------------------------- | 47 # ---------------------------------------------------------------------------------------- |
48 bgi_filt= subset(bgi, paste("rowVars(texpr(bgi)) >",filtstr), genomesubset = TRUE) | 48 bgi_filt= subset(bgi, paste("rowVars(texpr(bgi)) >",filtstr), genomesubset = TRUE) |
49 | 49 |
50 # Creating the variables containing the transcripts and the genes and sorting them through the arrange() command. | 50 # Creating the variables containing the transcripts and the genes and sorting them through the arrange() command. |
51 # Checking if there's one or more adjust variables in the phenotype data file | 51 # Checking if there's one or more adjust variables in the phenotype data file |
52 # ---------------------------------------------------------------------------------------- | 52 # ---------------------------------------------------------------------------------------- |
53 | 53 |
54 if (ncol(pData(bgi))<=3) { | 54 if (ncol(pData(bgi))<=3) { |
55 results_transcripts=stattest(bgi_filt,feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM") | 55 results_transcripts=stattest(bgi_filt, feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM") |
56 results_genes=stattest(bgi_filt,feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM") | 56 results_genes=stattest(bgi_filt, feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = colnames(pData(bgi)[pdat+1]), getFC = TRUE, meas = "FPKM") |
57 } else { | 57 } else { |
58 results_transcripts=stattest(bgi_filt,feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM") | 58 results_transcripts=stattest(bgi_filt, feature = "transcript", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM") |
59 results_genes=stattest(bgi_filt,feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM") | 59 results_genes=stattest(bgi_filt, feature = "gene", covariate = colnames(pData(bgi))[pdat], adjustvars = c(colnames(pData(bgi)[pdat+1:ncol(pData(bgi))])), getFC = TRUE, meas = "FPKM") |
60 } | 60 } |
61 | 61 |
62 results_transcripts = data.frame(geneNames=ballgown::geneNames(bgi_filt), geneIDs=ballgown::geneIDs(bgi_filt), results_transcripts) | 62 results_transcripts = data.frame(geneNames=ballgown::geneNames(bgi_filt), geneIDs=ballgown::geneIDs(bgi_filt), results_transcripts) |
63 results_transcripts = arrange(results_transcripts,pval) | 63 results_transcripts = arrange(results_transcripts, pval) |
64 results_genes = arrange(results_genes,pval) | 64 results_genes = arrange(results_genes, pval) |
65 | 65 |
66 # Main output of the wrapper, two .csv files containing the genes and transcripts with their qvalue and pvalue | 66 # Main output of the wrapper, two .csv files containing the genes and transcripts with their qvalue and pvalue |
67 #This part also output the data of the ballgown object created in the process and save it in a R data file | 67 #This part also output the data of the ballgown object created in the process and save it in a R data file |
68 # ---------------------------------------------------------------------------------------- | 68 # ---------------------------------------------------------------------------------------- |
69 write.csv(results_transcripts, opt$outputtranscript, row.names = FALSE) | 69 if (opt$format == "tsv"){ |
70 write.csv(results_genes, opt$outputgenes, row.names = FALSE) | 70 write.table(results_transcripts, file=opt$tsvoutputtranscript, quote=FALSE, sep='\t', col.names = NA) |
71 write.table(results_genes, file=opt$tsvoutputgenes, quote=FALSE, sep='\t', col.names = NA) | |
72 } else { | |
73 write.csv(results_transcripts, opt$outputtranscript, row.names = FALSE) | |
74 write.csv(results_genes, opt$outputgenes, row.names = FALSE) | |
75 } | |
76 | |
71 if (opt$bgout != ""){ | 77 if (opt$bgout != ""){ |
72 save(bgi, file=opt$bgout) | 78 save(bgi, file=opt$bgout) |
73 } | 79 } |