Mercurial > repos > yufei-luo > differential_expression_analysis_pipeline_for_rnaseq_data
comparison deseq/differential_expression_analysis_pipeline_for_rnaseq_data-a03838a6eb54/DiffExpAnal/DESeqTools/anadiffGenes2conds.R @ 10:6e573fd3c41b draft
Uploaded
author | yufei-luo |
---|---|
date | Mon, 13 May 2013 10:06:30 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
9:a03838a6eb54 | 10:6e573fd3c41b |
---|---|
1 # Marie-Anges Dillies, Yufei Luo | |
2 # Analyse differentielle de donnees d expression par gene | |
3 # avec DESeq | |
4 # 2 conditions | |
5 | |
6 args <- commandArgs() | |
7 #print(args[1]) | |
8 #print(args[2]) | |
9 #print(args[3]) | |
10 #print(args[4]) | |
11 #print(args[5]) | |
12 #print(args[6]) | |
13 #output file names | |
14 #print(args[7]) # HTML file name | |
15 #print(args[8]) # HTML file all images directory | |
16 #print(args[9]) # complete xls file name | |
17 #print(args[10])# UP xls file name | |
18 #print(args[11]) #Down xls file name | |
19 #print(args[12]) #the executable scipt (for getting the path) | |
20 | |
21 library(R2HTML) | |
22 library(R.utils) | |
23 | |
24 #run example: | |
25 projectName <- "DESeqAnalysis" | |
26 analysisVersion <- "V1" # fitType=local, sharingMode=fit-only, method=blind | |
27 rawDir <- "raw" | |
28 targetFile <- args[4] | |
29 header <- as.integer(args[5]) #si on a header ou pas, si on a, header=1, sinon header=0 | |
30 withOutReplicates <- as.integer(args[6]) | |
31 | |
32 #get the directory to write the results | |
33 tab <- splitByPattern(args[7], pattern="/") | |
34 res_dir <- "" | |
35 for (e in tab[1:length(tab)-1]) { res_dir <- paste(res_dir, e, sep="")} | |
36 #get the html output file name | |
37 OUT_HTMLname <- args[7] | |
38 #get the images directory to write to | |
39 OUT_imgDir <- args[8] | |
40 #if the directory dosen't existe, we should create it first | |
41 | |
42 alpha <- 0.05 | |
43 adjMethod <- "BH" | |
44 outfile <- T | |
45 runningScriptTab <- splitByPattern(args[12], pattern="/") | |
46 RfuncDir <- "" | |
47 for (r in runningScriptTab[1:length(runningScriptTab)-1]) { RfuncDir <- paste(RfuncDir, r, sep="")} #find the path of executable script | |
48 RfuncDir <- paste(RfuncDir, "DESeqTools/", sep="") #define the function files path | |
49 # Dossier contenant les fonctions | |
50 print(RfuncDir) | |
51 source( paste(RfuncDir, "RNAseqFunctions.R", sep="/") ) | |
52 | |
53 # Chargement des packages et des fonctions | |
54 library(DESeq) | |
55 RNAseqFunctions(RfuncDir) | |
56 # Chargement du target file | |
57 target <- loadTargetFile( targetFile, header ) | |
58 # Chargement des donnees, construction d'une table de comptages par gene | |
59 #have changed | |
60 rawCounts <- loadCountData( target, header ) | |
61 conds <- unique(target$group) | |
62 cond1 <- as.character(conds[1]) | |
63 cond2 <- as.character(conds[!conds == conds[1]]) | |
64 rawCounts <- HTseqClean( rawCounts ) | |
65 | |
66 # Transformation en matrice de comptages | |
67 counts <- raw2counts( rawCounts )[[1]] | |
68 | |
69 # Nombre de reads par echantillon | |
70 OUT_barplotTCName <- paste(OUT_imgDir, "barplotTC.png", sep="/") | |
71 barplotTC( counts, target$group, OUT_barplotTCName, out=outfile ) | |
72 | |
73 # Proportion comptages nuls | |
74 OUT_barplotNulName <- paste(OUT_imgDir, "barplotNul.png", sep="/") | |
75 barplotNul( counts, target$group, OUT_barplotNulName, out=outfile ) | |
76 | |
77 # Suppression comptages nuls | |
78 counts <- removeNul( counts )[[1]] | |
79 | |
80 # Density plot | |
81 OUT_densityPlotName <- paste(OUT_imgDir, "densityPlot.png", sep="/") | |
82 densityPlot( counts, target$group, OUT_densityPlotName, out=outfile ) | |
83 | |
84 # Boxplot | |
85 OUT_boxplotCountsName <- paste(OUT_imgDir, "boxplotCounts.png", sep="/") | |
86 boxplotCounts( counts, target$group, type = c("raw", "norm"), OUT_boxplotCountsName, out=outfile ) | |
87 # Sequence majoritaire | |
88 OUT_majSequenceName <- paste(OUT_imgDir, "majSequence.png", sep="/") | |
89 majSequence( counts, target$group, OUT_majSequenceName, out=outfile ) | |
90 | |
91 # ScatterPlot between two samples | |
92 OUT_scatterPlot <- paste(OUT_imgDir, "scatterPlot.png", sep="/") | |
93 pairwiseScatterPlots(counts, target, OUT_scatterPlot, out=outfile, pdffile=FALSE) | |
94 | |
95 # SERE coefficient calculation (Poisson hypothesis for replicates techiques), to know if the variability between the réplicates or the conditons is hight or not. | |
96 coef <- pairwiseSERE(counts) | |
97 print(coef) | |
98 coef | |
99 # Creation structure de donnees cds, !! we use newCountDataset because that we have first column not numeric, and DESeq dosen't take non numeric values. | |
100 cds <- newCountDataSet( counts, target$group ) | |
101 | |
102 # Diagnostic for clustering of non-normalized samples | |
103 OUT_clusterPlot_before <- paste(OUT_imgDir, "clusteringOfSamplesBefore.png", sep="/") | |
104 clusterPlot(cds, OUT_clusterPlot_before, out=outfile) | |
105 | |
106 | |
107 # Normalisation (calcul des lib size factors ) | |
108 cds <- estimateSizeFactors( cds ) | |
109 | |
110 # Estimation de la dispersion | |
111 # parametres: | |
112 # method: how samples are pooled to estimate dispersion (we use "pooled" | |
113 # as default parameter. If no replicates, we use "blind". | |
114 # sharingMode: how variance estimate is computed with respect to the fitted line. | |
115 #"Maximum" is the most conservative (max between fit andestimation), | |
116 #"fit-only" keeps the estimated value, we use "Maximum" as default parameter. | |
117 # fitType: refers to the model. "Local" is the published model, | |
118 #"parametric" is glm-based (may not converge), now we use "parametric" as default value. | |
119 | |
120 if(withOutReplicates!=0){ | |
121 cds <- estimateDispersions( cds, method="blind")#in this case, without replicates | |
122 | |
123 } else if(withOutReplicates==0){ | |
124 cds <- estimateDispersions( cds, method="pooled", sharingMode="maximum", fitType="parametric") } | |
125 | |
126 # Analyse differentielle, ajustement BH par defaut | |
127 res <- nbinomTest( cds, cond1, cond2) | |
128 | |
129 # Diagnostic for clustering of normalized samples | |
130 OUT_clusterPlot <- paste(OUT_imgDir, "clusteringOfSamples.png", sep="/") | |
131 clusterPlot(cds, OUT_clusterPlot, out=outfile) | |
132 | |
133 # Control plot of dispersion estimates | |
134 OUT_plotDispEstimatesName <- paste(OUT_imgDir, "disperssionEstimates.png", sep="/") | |
135 plotDispEstimates( cds, OUT_plotDispEstimatesName, out=outfile ) | |
136 | |
137 # Distribution of raw p-values | |
138 OUT_histoRawpName <- paste(OUT_imgDir, "histoRawPvalue.png", sep="/") | |
139 histoRawp( res, OUT_histoRawpName, out=outfile ) | |
140 | |
141 # MAplot showing DE genes | |
142 OUT_MAplotDEName <- paste(OUT_imgDir, "MAplotDE.png", sep="/") | |
143 MAplotDE( res, alpha, OUT_MAplotDEName, out=outfile ) | |
144 | |
145 # export complete data | |
146 OUT_completeName <- args[9] | |
147 complete <- exportComplete( counts, res, target, adjMethod, cond1, cond2, OUT_completeName, out=outfile ) | |
148 | |
149 # export significant genes | |
150 OUT_upName <- args[10] | |
151 OUT_downName <- args[11] | |
152 diff <- exportDiff( complete, alpha, adjMethod, OUT_upName, OUT_downName, out=outfile ) | |
153 | |
154 # write all images results into an HTML file | |
155 prefixHTMLname <- tab[length(tab)] | |
156 #HTMLCSS(file.path(res_dir), filename=prefixHTMLname, CSSfile="R2HTML") | |
157 HTMLInitFile(file.path(res_dir), filename=prefixHTMLname, BackGroundColor="white") | |
158 HTML.title("<center>Differential Expression DESeq analysis.", HR=1) | |
159 HTML.title("<center>BarplotTC: number of RNA-seq reads per sample.", HR=2) | |
160 HTMLInsertGraph("barplotTC.png") | |
161 | |
162 HTML.title("<center>BarplotNul: number of RNA-seq reads that the count is 0 (nul).", HR=2) | |
163 HTMLInsertGraph("barplotNul.png") | |
164 | |
165 HTML.title("<center>DensityPlot: density of each sample.", HR=2) | |
166 HTMLInsertGraph("densityPlot.png") | |
167 | |
168 HTML.title("<center>Boxplot: number of RNA-seq reads distribution per sample.", HR=2) | |
169 HTMLInsertGraph("boxplotCounts.png") | |
170 | |
171 HTML.title("<center>MajorSequence: the proportion of reads associated with the most expressed sequence.", HR=2) | |
172 HTMLInsertGraph("majSequence.png") | |
173 | |
174 HTML.title("<center>ScatterPlot: Scatter plot of samples.", HR=2) | |
175 HTMLInsertGraph("scatterPlot.png") | |
176 | |
177 HTML.title("<center>Clustering Of No-Normalized Samples: Representing the no-normalized samples in Diagnostic.", HR=2) | |
178 HTMLInsertGraph("clusteringOfSamplesBefore.png") | |
179 | |
180 HTML.title("<center>Clustering Of Normalized Samples: Representing the normalized samples in Diagnostic.", HR=2) | |
181 HTMLInsertGraph("clusteringOfSamples.png") | |
182 | |
183 HTML.title("<center>DispersionEstimates: representing dispersion estimates vs mean expression.", HR=2) | |
184 HTMLInsertGraph("disperssionEstimates.png") | |
185 | |
186 HTML.title("<center>HistoRawPValue: histogram of raw p-value.", HR=2) | |
187 HTMLInsertGraph("histoRawPvalue.png") | |
188 | |
189 HTML.title("<center>MAplotDE: the differentially expressed genes (red point).", HR=2) | |
190 HTMLInsertGraph("MAplotDE.png") | |
191 HTMLEndFile() | |
192 absoluPrefixHTMLname <- paste(res_dir, prefixHTMLname, sep="") | |
193 outName <- paste(absoluPrefixHTMLname, ".html", sep="") | |
194 # change name is to be adapted into Galaxy | |
195 file.rename(outName, OUT_HTMLname) | |
196 |