Mercurial > repos > yufei-luo > s_mart
comparison SMART/DiffExpAnal/DESeqTools/anadiffGenes2conds.R @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
17:b0e8584489e6 | 18:94ab73e8a190 |
---|---|
1 # Analyse differentielle de donnees d expression par gene | |
2 # avec DESeq | |
3 # 2 conditions | |
4 | |
5 args <- commandArgs() | |
6 #print(args[1]) | |
7 #print(args[2]) | |
8 #print(args[3]) | |
9 #print(args[4]) | |
10 #print(args[5]) | |
11 #print(args[6]) | |
12 #output file names | |
13 #print(args[7]) # HTML file name | |
14 #print(args[8]) # HTML file all images directory | |
15 #print(args[9]) # complete xls file name | |
16 #print(args[10])# UP xls file name | |
17 #print(args[11]) #Down xls file name | |
18 #print(args[12]) #the executable scipt (for getting the path) | |
19 | |
20 library(R2HTML) | |
21 library(R.utils) | |
22 | |
23 #run example: | |
24 projectName <- "DESeqAnalysis" | |
25 analysisVersion <- "V1" # fitType=local, sharingMode=fit-only, method=blind | |
26 rawDir <- "raw" | |
27 targetFile <- args[4] | |
28 header <- as.integer(args[5]) #si on a header ou pas, si on a, header=1, sinon header=0 | |
29 withOutReplicates <- as.integer(args[6]) | |
30 | |
31 #get the directory to write the results | |
32 tab <- splitByPattern(args[7], pattern="/") | |
33 res_dir <- "" | |
34 for (e in tab[1:length(tab)-1]) { res_dir <- paste(res_dir, e, sep="")} | |
35 #get the html output file name | |
36 OUT_HTMLname <- args[7] | |
37 #get the images directory to write to | |
38 OUT_imgDir <- args[8] | |
39 #if the directory dosen't existe, we should create it first | |
40 | |
41 alpha <- 0.05 | |
42 adjMethod <- "BH" | |
43 outfile <- T | |
44 runningScriptTab <- splitByPattern(args[12], pattern="/") | |
45 RfuncDir <- "" | |
46 for (r in runningScriptTab[1:length(runningScriptTab)-1]) { RfuncDir <- paste(RfuncDir, r, sep="")} #find the path of executable script | |
47 RfuncDir <- paste(RfuncDir, "DESeqTools/", sep="") #define the function files path | |
48 # Dossier contenant les fonctions | |
49 print(RfuncDir) | |
50 source( paste(RfuncDir, "RNAseqFunctions.R", sep="/") ) | |
51 | |
52 # Chargement des packages et des fonctions | |
53 library(DESeq) | |
54 RNAseqFunctions(RfuncDir) | |
55 # Chargement du target file | |
56 target <- loadTargetFile( targetFile, header ) | |
57 # Chargement des donnees, construction d'une table de comptages par gene | |
58 #have changed | |
59 rawCounts <- loadCountData( target, header ) | |
60 conds <- unique(target$group) | |
61 cond1 <- as.character(conds[1]) | |
62 cond2 <- as.character(conds[!conds == conds[1]]) | |
63 rawCounts <- HTseqClean( rawCounts ) | |
64 | |
65 # Transformation en matrice de comptages | |
66 counts <- raw2counts( rawCounts )[[1]] | |
67 | |
68 # Nombre de reads par echantillon | |
69 OUT_barplotTCName <- paste(OUT_imgDir, "barplotTC.png", sep="/") | |
70 barplotTC( counts, target$group, OUT_barplotTCName, out=outfile ) | |
71 | |
72 # Proportion comptages nuls | |
73 OUT_barplotNulName <- paste(OUT_imgDir, "barplotNul.png", sep="/") | |
74 barplotNul( counts, target$group, OUT_barplotNulName, out=outfile ) | |
75 | |
76 # Suppression comptages nuls | |
77 counts <- removeNul( counts )[[1]] | |
78 | |
79 # Density plot | |
80 OUT_densityPlotName <- paste(OUT_imgDir, "densityPlot.png", sep="/") | |
81 densityPlot( counts, target$group, OUT_densityPlotName, out=outfile ) | |
82 | |
83 # Boxplot | |
84 OUT_boxplotCountsName <- paste(OUT_imgDir, "boxplotCounts.png", sep="/") | |
85 boxplotCounts( counts, target$group, type = c("raw", "norm"), OUT_boxplotCountsName, out=outfile ) | |
86 # Sequence majoritaire | |
87 OUT_majSequenceName <- paste(OUT_imgDir, "majSequence.png", sep="/") | |
88 majSequence( counts, target$group, OUT_majSequenceName, out=outfile ) | |
89 | |
90 # ScatterPlot between two samples | |
91 OUT_scatterPlot <- paste(OUT_imgDir, "scatterPlot.png", sep="/") | |
92 pairwiseScatterPlots(counts, target, OUT_scatterPlot, out=outfile, pdffile=FALSE) | |
93 | |
94 # SERE coefficient calculation (Poisson hypothesis for replicates techiques), to know if the variability between the réplicates or the conditons is hight or not. | |
95 coef <- pairwiseSERE(counts) | |
96 print(coef) | |
97 coef | |
98 # Creation structure de donnees cds, !! we use newCountDataset because that we have first column not numeric, and DESeq dosen't take non numeric values. | |
99 cds <- newCountDataSet( counts, target$group ) | |
100 | |
101 # Diagnostic for clustering of non-normalized samples | |
102 OUT_clusterPlot_before <- paste(OUT_imgDir, "clusteringOfSamplesBefore.png", sep="/") | |
103 clusterPlot(cds, OUT_clusterPlot_before, out=outfile) | |
104 | |
105 | |
106 # Normalisation (calcul des lib size factors ) | |
107 cds <- estimateSizeFactors( cds ) | |
108 | |
109 # Estimation de la dispersion | |
110 # parametres: | |
111 # method: how samples are pooled to estimate dispersion. If no replicates use "blind" | |
112 # sharingMode: how variance estimate is computed with respect to the fitted line. | |
113 # "Maximum" is the most conservative (max between fit and estimation), "fit-only" keeps the estimated value | |
114 # fitType: refers to the model. "Local" is the published model, "parametric" is glm-based (may not converge), now we use "parametric" as default value. | |
115 #in this case, without replicates | |
116 if(withOutReplicates!=0){ | |
117 cds <- estimateDispersions( cds, sharingMode="fit-only", method="blind") | |
118 } else if(withOutReplicates==0){ | |
119 #cds <- estimateDispersions( cds, sharingMode="fit-only", fitType="local")} | |
120 cds <- estimateDispersions( cds)} | |
121 # Analyse differentielle, ajustement BH par defaut | |
122 res <- nbinomTest( cds, cond1, cond2) | |
123 | |
124 # Diagnostic for clustering of normalized samples | |
125 OUT_clusterPlot <- paste(OUT_imgDir, "clusteringOfSamples.png", sep="/") | |
126 clusterPlot(cds, OUT_clusterPlot, out=outfile) | |
127 | |
128 # Control plot of dispersion estimates | |
129 OUT_plotDispEstimatesName <- paste(OUT_imgDir, "disperssionEstimates.png", sep="/") | |
130 plotDispEstimates( cds, OUT_plotDispEstimatesName, out=outfile ) | |
131 | |
132 # Distribution of raw p-values | |
133 OUT_histoRawpName <- paste(OUT_imgDir, "histoRawPvalue.png", sep="/") | |
134 histoRawp( res, OUT_histoRawpName, out=outfile ) | |
135 | |
136 # MAplot showing DE genes | |
137 OUT_MAplotDEName <- paste(OUT_imgDir, "MAplotDE.png", sep="/") | |
138 MAplotDE( res, alpha, OUT_MAplotDEName, out=outfile ) | |
139 | |
140 # export complete data | |
141 OUT_completeName <- args[9] | |
142 complete <- exportComplete( counts, res, target, adjMethod, cond1, cond2, OUT_completeName, out=outfile ) | |
143 | |
144 # export significant genes | |
145 OUT_upName <- args[10] | |
146 OUT_downName <- args[11] | |
147 diff <- exportDiff( complete, alpha, adjMethod, OUT_upName, OUT_downName, out=outfile ) | |
148 | |
149 # write all images results into an HTML file | |
150 prefixHTMLname <- tab[length(tab)] | |
151 #HTMLCSS(file.path(res_dir), filename=prefixHTMLname, CSSfile="R2HTML") | |
152 HTMLInitFile(file.path(res_dir), filename=prefixHTMLname, BackGroundColor="white") | |
153 HTML.title("<center>Differential Expression DESeq analysis.", HR=1) | |
154 HTML.title("<center>BarplotTC: number of RNA-seq reads per sample.", HR=2) | |
155 HTMLInsertGraph("barplotTC.png") | |
156 | |
157 HTML.title("<center>BarplotNul: number of RNA-seq reads that the count is 0 (nul).", HR=2) | |
158 HTMLInsertGraph("barplotNul.png") | |
159 | |
160 HTML.title("<center>DensityPlot: density of each sample.", HR=2) | |
161 HTMLInsertGraph("densityPlot.png") | |
162 | |
163 HTML.title("<center>Boxplot: number of RNA-seq reads distribution per sample.", HR=2) | |
164 HTMLInsertGraph("boxplotCounts.png") | |
165 | |
166 HTML.title("<center>MajorSequence: the proportion of reads associated with the most expressed sequence.", HR=2) | |
167 HTMLInsertGraph("majSequence.png") | |
168 | |
169 HTML.title("<center>ScatterPlot: Scatter plot of samples.", HR=2) | |
170 HTMLInsertGraph("scatterPlot.png") | |
171 | |
172 HTML.title("<center>Clustering Of No-Normalized Samples: Representing the no-normalized samples in Diagnostic.", HR=2) | |
173 HTMLInsertGraph("clusteringOfSamplesBefore.png") | |
174 | |
175 HTML.title("<center>Clustering Of Normalized Samples: Representing the normalized samples in Diagnostic.", HR=2) | |
176 HTMLInsertGraph("clusteringOfSamples.png") | |
177 | |
178 HTML.title("<center>DispersionEstimates: representing dispersion estimates vs mean expression.", HR=2) | |
179 HTMLInsertGraph("disperssionEstimates.png") | |
180 | |
181 HTML.title("<center>HistoRawPValue: histogram of raw p-value.", HR=2) | |
182 HTMLInsertGraph("histoRawPvalue.png") | |
183 | |
184 HTML.title("<center>MAplotDE: the differentially expressed genes (red point).", HR=2) | |
185 HTMLInsertGraph("MAplotDE.png") | |
186 HTMLEndFile() | |
187 absoluPrefixHTMLname <- paste(res_dir, prefixHTMLname, sep="") | |
188 outName <- paste(absoluPrefixHTMLname, ".html", sep="") | |
189 # change name is to be adapted into Galaxy | |
190 file.rename(outName, OUT_HTMLname) | |
191 |