comparison SMART/DiffExpAnal/DESeqTools/anadiffGenes2conds.R @ 18:94ab73e8a190

Uploaded
author m-zytnicki
date Mon, 29 Apr 2013 03:20:15 -0400
parents
children
comparison
equal deleted inserted replaced
17:b0e8584489e6 18:94ab73e8a190
1 # Analyse differentielle de donnees d expression par gene
2 # avec DESeq
3 # 2 conditions
4
5 args <- commandArgs()
6 #print(args[1])
7 #print(args[2])
8 #print(args[3])
9 #print(args[4])
10 #print(args[5])
11 #print(args[6])
12 #output file names
13 #print(args[7]) # HTML file name
14 #print(args[8]) # HTML file all images directory
15 #print(args[9]) # complete xls file name
16 #print(args[10])# UP xls file name
17 #print(args[11]) #Down xls file name
18 #print(args[12]) #the executable scipt (for getting the path)
19
20 library(R2HTML)
21 library(R.utils)
22
23 #run example:
24 projectName <- "DESeqAnalysis"
25 analysisVersion <- "V1" # fitType=local, sharingMode=fit-only, method=blind
26 rawDir <- "raw"
27 targetFile <- args[4]
28 header <- as.integer(args[5]) #si on a header ou pas, si on a, header=1, sinon header=0
29 withOutReplicates <- as.integer(args[6])
30
31 #get the directory to write the results
32 tab <- splitByPattern(args[7], pattern="/")
33 res_dir <- ""
34 for (e in tab[1:length(tab)-1]) { res_dir <- paste(res_dir, e, sep="")}
35 #get the html output file name
36 OUT_HTMLname <- args[7]
37 #get the images directory to write to
38 OUT_imgDir <- args[8]
39 #if the directory dosen't existe, we should create it first
40
41 alpha <- 0.05
42 adjMethod <- "BH"
43 outfile <- T
44 runningScriptTab <- splitByPattern(args[12], pattern="/")
45 RfuncDir <- ""
46 for (r in runningScriptTab[1:length(runningScriptTab)-1]) { RfuncDir <- paste(RfuncDir, r, sep="")} #find the path of executable script
47 RfuncDir <- paste(RfuncDir, "DESeqTools/", sep="") #define the function files path
48 # Dossier contenant les fonctions
49 print(RfuncDir)
50 source( paste(RfuncDir, "RNAseqFunctions.R", sep="/") )
51
52 # Chargement des packages et des fonctions
53 library(DESeq)
54 RNAseqFunctions(RfuncDir)
55 # Chargement du target file
56 target <- loadTargetFile( targetFile, header )
57 # Chargement des donnees, construction d'une table de comptages par gene
58 #have changed
59 rawCounts <- loadCountData( target, header )
60 conds <- unique(target$group)
61 cond1 <- as.character(conds[1])
62 cond2 <- as.character(conds[!conds == conds[1]])
63 rawCounts <- HTseqClean( rawCounts )
64
65 # Transformation en matrice de comptages
66 counts <- raw2counts( rawCounts )[[1]]
67
68 # Nombre de reads par echantillon
69 OUT_barplotTCName <- paste(OUT_imgDir, "barplotTC.png", sep="/")
70 barplotTC( counts, target$group, OUT_barplotTCName, out=outfile )
71
72 # Proportion comptages nuls
73 OUT_barplotNulName <- paste(OUT_imgDir, "barplotNul.png", sep="/")
74 barplotNul( counts, target$group, OUT_barplotNulName, out=outfile )
75
76 # Suppression comptages nuls
77 counts <- removeNul( counts )[[1]]
78
79 # Density plot
80 OUT_densityPlotName <- paste(OUT_imgDir, "densityPlot.png", sep="/")
81 densityPlot( counts, target$group, OUT_densityPlotName, out=outfile )
82
83 # Boxplot
84 OUT_boxplotCountsName <- paste(OUT_imgDir, "boxplotCounts.png", sep="/")
85 boxplotCounts( counts, target$group, type = c("raw", "norm"), OUT_boxplotCountsName, out=outfile )
86 # Sequence majoritaire
87 OUT_majSequenceName <- paste(OUT_imgDir, "majSequence.png", sep="/")
88 majSequence( counts, target$group, OUT_majSequenceName, out=outfile )
89
90 # ScatterPlot between two samples
91 OUT_scatterPlot <- paste(OUT_imgDir, "scatterPlot.png", sep="/")
92 pairwiseScatterPlots(counts, target, OUT_scatterPlot, out=outfile, pdffile=FALSE)
93
94 # SERE coefficient calculation (Poisson hypothesis for replicates techiques), to know if the variability between the réplicates or the conditons is hight or not.
95 coef <- pairwiseSERE(counts)
96 print(coef)
97 coef
98 # Creation structure de donnees cds, !! we use newCountDataset because that we have first column not numeric, and DESeq dosen't take non numeric values.
99 cds <- newCountDataSet( counts, target$group )
100
101 # Diagnostic for clustering of non-normalized samples
102 OUT_clusterPlot_before <- paste(OUT_imgDir, "clusteringOfSamplesBefore.png", sep="/")
103 clusterPlot(cds, OUT_clusterPlot_before, out=outfile)
104
105
106 # Normalisation (calcul des lib size factors )
107 cds <- estimateSizeFactors( cds )
108
109 # Estimation de la dispersion
110 # parametres:
111 # method: how samples are pooled to estimate dispersion. If no replicates use "blind"
112 # sharingMode: how variance estimate is computed with respect to the fitted line.
113 # "Maximum" is the most conservative (max between fit and estimation), "fit-only" keeps the estimated value
114 # fitType: refers to the model. "Local" is the published model, "parametric" is glm-based (may not converge), now we use "parametric" as default value.
115 #in this case, without replicates
116 if(withOutReplicates!=0){
117 cds <- estimateDispersions( cds, sharingMode="fit-only", method="blind")
118 } else if(withOutReplicates==0){
119 #cds <- estimateDispersions( cds, sharingMode="fit-only", fitType="local")}
120 cds <- estimateDispersions( cds)}
121 # Analyse differentielle, ajustement BH par defaut
122 res <- nbinomTest( cds, cond1, cond2)
123
124 # Diagnostic for clustering of normalized samples
125 OUT_clusterPlot <- paste(OUT_imgDir, "clusteringOfSamples.png", sep="/")
126 clusterPlot(cds, OUT_clusterPlot, out=outfile)
127
128 # Control plot of dispersion estimates
129 OUT_plotDispEstimatesName <- paste(OUT_imgDir, "disperssionEstimates.png", sep="/")
130 plotDispEstimates( cds, OUT_plotDispEstimatesName, out=outfile )
131
132 # Distribution of raw p-values
133 OUT_histoRawpName <- paste(OUT_imgDir, "histoRawPvalue.png", sep="/")
134 histoRawp( res, OUT_histoRawpName, out=outfile )
135
136 # MAplot showing DE genes
137 OUT_MAplotDEName <- paste(OUT_imgDir, "MAplotDE.png", sep="/")
138 MAplotDE( res, alpha, OUT_MAplotDEName, out=outfile )
139
140 # export complete data
141 OUT_completeName <- args[9]
142 complete <- exportComplete( counts, res, target, adjMethod, cond1, cond2, OUT_completeName, out=outfile )
143
144 # export significant genes
145 OUT_upName <- args[10]
146 OUT_downName <- args[11]
147 diff <- exportDiff( complete, alpha, adjMethod, OUT_upName, OUT_downName, out=outfile )
148
149 # write all images results into an HTML file
150 prefixHTMLname <- tab[length(tab)]
151 #HTMLCSS(file.path(res_dir), filename=prefixHTMLname, CSSfile="R2HTML")
152 HTMLInitFile(file.path(res_dir), filename=prefixHTMLname, BackGroundColor="white")
153 HTML.title("<center>Differential Expression DESeq analysis.", HR=1)
154 HTML.title("<center>BarplotTC: number of RNA-seq reads per sample.", HR=2)
155 HTMLInsertGraph("barplotTC.png")
156
157 HTML.title("<center>BarplotNul: number of RNA-seq reads that the count is 0 (nul).", HR=2)
158 HTMLInsertGraph("barplotNul.png")
159
160 HTML.title("<center>DensityPlot: density of each sample.", HR=2)
161 HTMLInsertGraph("densityPlot.png")
162
163 HTML.title("<center>Boxplot: number of RNA-seq reads distribution per sample.", HR=2)
164 HTMLInsertGraph("boxplotCounts.png")
165
166 HTML.title("<center>MajorSequence: the proportion of reads associated with the most expressed sequence.", HR=2)
167 HTMLInsertGraph("majSequence.png")
168
169 HTML.title("<center>ScatterPlot: Scatter plot of samples.", HR=2)
170 HTMLInsertGraph("scatterPlot.png")
171
172 HTML.title("<center>Clustering Of No-Normalized Samples: Representing the no-normalized samples in Diagnostic.", HR=2)
173 HTMLInsertGraph("clusteringOfSamplesBefore.png")
174
175 HTML.title("<center>Clustering Of Normalized Samples: Representing the normalized samples in Diagnostic.", HR=2)
176 HTMLInsertGraph("clusteringOfSamples.png")
177
178 HTML.title("<center>DispersionEstimates: representing dispersion estimates vs mean expression.", HR=2)
179 HTMLInsertGraph("disperssionEstimates.png")
180
181 HTML.title("<center>HistoRawPValue: histogram of raw p-value.", HR=2)
182 HTMLInsertGraph("histoRawPvalue.png")
183
184 HTML.title("<center>MAplotDE: the differentially expressed genes (red point).", HR=2)
185 HTMLInsertGraph("MAplotDE.png")
186 HTMLEndFile()
187 absoluPrefixHTMLname <- paste(res_dir, prefixHTMLname, sep="")
188 outName <- paste(absoluPrefixHTMLname, ".html", sep="")
189 # change name is to be adapted into Galaxy
190 file.rename(outName, OUT_HTMLname)
191