Mercurial > repos > mnhn65mo > vigiechiro
comparison BilanEnrichiPF.R @ 0:0e3db3a308c0 draft default tip
Uploaded
author | mnhn65mo |
---|---|
date | Mon, 06 Aug 2018 09:13:29 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:0e3db3a308c0 |
---|---|
1 library(data.table) | |
2 library(DT) | |
3 library(htmlwidgets) | |
4 | |
5 f2p <- function(x) #get date-time data from recording file names | |
6 { | |
7 if (is.data.frame((x)[1])) {pretemps <- vector(length = nrow(x))} | |
8 op <- options(digits.secs = 3) | |
9 pretemps <- paste(substr(x, nchar(x) - 18, nchar(x)-4), ".", substr(x, nchar(x) - 2, nchar(x)), sep = "") | |
10 strptime(pretemps, "%Y%m%d_%H%M%OS",tz="UTC")-7200 | |
11 } | |
12 | |
13 args <- commandArgs(trailingOnly = TRUE) | |
14 #print(args) | |
15 EchelleErreur=c("","POSSIBLE","PROBABLE","SUR") | |
16 EchelleNumErreur=c(99,50,10,1) | |
17 | |
18 #for test | |
19 #inputest=list.files("C:/Users/Yves Bas/Documents/GitHub/65MO_Galaxy-E/raw_scripts/Vigie-Chiro/output_IdValid_input_BilanEnrichi/",pattern="IdC2.csv",full.names=T) | |
20 #for (i in 1:length(inputest)) | |
21 #{ | |
22 # args=c(inputest[i],"refPF.csv","SpeciesList.csv") | |
23 | |
24 | |
25 IdC2=fread(args[1]) | |
26 refPF=fread(args[2]) | |
27 GroupList=fread(args[3]) | |
28 | |
29 if(substr(IdC2$`nom du fichier`[1],2,2)!="a") | |
30 { | |
31 print("Protocole non conforme, ce script doit etre lance pour un protocole Point Fixe") | |
32 }else{ | |
33 | |
34 | |
35 #compute error risk by species (minimum error among files) | |
36 #to be replaced by glm outputs if I'll have time | |
37 RisqueErreurT=aggregate(IdC2$IdProb,by=list(IdC2$IdExtrap),FUN=function(x) ceiling((1-max(x-0.0001))*100)) | |
38 barplot(RisqueErreurT$x,names.arg=RisqueErreurT$Group.1,las=2) | |
39 #compute error risk accoring to observer/validator (a little dirty because it relies on alphabetical order of confidence classes: POSSIBLE < PROBABLE < SUR) | |
40 RisqueErreurOV0=match(IdC2$ConfV,EchelleErreur) | |
41 RisqueErreurOV=aggregate(RisqueErreurOV0,by=list(IdC2$IdExtrap) | |
42 ,FUN=max) | |
43 RisqueErreurOV2=EchelleNumErreur[RisqueErreurOV$x] | |
44 #compute minimum error risk between man and machine | |
45 RisqueErreur=pmin(RisqueErreurT$x,RisqueErreurOV2) | |
46 | |
47 #compute number of files validated per species | |
48 FichValid=aggregate(IdC2$IdV,by=list(IdC2$IdExtrap,IdC2$'nom du fichier') | |
49 ,FUN=function(x) sum(x!="")) | |
50 NbValid2=aggregate(FichValid$x,by=list(FichValid$Group.1),FUN=function(x) sum(x>0)) | |
51 | |
52 DiffC50=vector() # to store the median of confidence difference between unvalidated records and validated ones | |
53 DiffT50=vector() # to store the median of time difference between unvalidated records and validated ones | |
54 for (j in 1:nlevels(as.factor(IdC2$IdExtrap))) | |
55 { | |
56 IdSp=subset(IdC2 | |
57 ,IdC2$IdExtrap==levels(as.factor(IdC2$IdExtrap))[j]) | |
58 IdSp=IdSp[order(IdSp$IdProb),] | |
59 IdSpV=subset(IdSp,IdSp$IdV!="") | |
60 if(nrow(IdSpV)>0) | |
61 { | |
62 cuts <- c(-Inf, IdSpV$IdProb[-1]-diff(IdSpV$IdProb)/2, Inf) | |
63 CorrC=findInterval(IdSp$IdProb, cuts) | |
64 CorrC2=IdSpV$IdProb[CorrC] | |
65 DiffC=abs(IdSp$IdProb-CorrC2) | |
66 DiffC50=c(DiffC50,median(DiffC)) | |
67 | |
68 IdSp=IdSp[order(IdSp$TimeNum),] | |
69 IdSpV=subset(IdSp,IdSp$IdV!="") | |
70 cuts <- c(-Inf, IdSpV$TimeNum[-1]-diff(IdSpV$TimeNum)/2, Inf) | |
71 CorrT=findInterval(IdSp$TimeNum, cuts) | |
72 CorrT2=IdSpV$TimeNum[CorrT] | |
73 DiffT=abs(IdSp$TimeNum-CorrT2) | |
74 DiffT50=c(DiffT50,median(DiffT)) | |
75 }else{ | |
76 DiffC50=c(DiffC50,Inf) | |
77 DiffT50=c(DiffT50,Inf) | |
78 } | |
79 } | |
80 #compute an index of validation effort per species | |
81 EffortV=1/DiffC50/DiffT50 | |
82 EffortClass=(EffortV>0.0005)+(EffortV>0.005)+RisqueErreurOV$x | |
83 cbind(RisqueErreurOV,EffortV,DiffC50,DiffT50) | |
84 barplot(EffortClass-1,names.arg=NbValid2$Group.1,las=2) | |
85 ClassEffortV=c("-","FAIBLE","SUFFISANT","SUFFISANT","FORT","FORT") | |
86 EffortClassMot=ClassEffortV[EffortClass] | |
87 | |
88 | |
89 #get date-night | |
90 pourDateNuit=IdC2$TimeNum-12*3600 #bricolage-decalage de 12 heures pour ramener a la date du debut de nuit | |
91 DateNuit=as.Date.POSIXct(pourDateNuit) # date of the beginning of the night | |
92 DateJour=as.Date.POSIXct(IdC2$TimeNum) # date (UTC+0) | |
93 IdC2$DateNuit=DateNuit | |
94 IdC2$DateJour=DateJour | |
95 NbNuit=as.numeric(max(IdC2$DateNuit)-min(IdC2$DateNuit))+1 | |
96 | |
97 #compare activity / reference frame | |
98 ActMoy=aggregate(IdC2$`nom du fichier` | |
99 ,by=list(IdC2$IdExtrap),FUN=function(x) length(x)/NbNuit) | |
100 ListSpref=match(ActMoy$Group.1,refPF$Espece) | |
101 Subref=refPF[ListSpref] | |
102 QualifAct=vector() | |
103 for (k in 1:nrow(ActMoy)) | |
104 { | |
105 if(is.na(Subref$Q25[k])) | |
106 { | |
107 QualifAct=c(QualifAct,NA) | |
108 }else{ | |
109 cuts=cbind(-Inf,as.numeric(Subref$Q25[k]),as.numeric(Subref$Q75[k]) | |
110 ,as.numeric(Subref$Q98[k]),Inf) | |
111 | |
112 QualifAct=c(QualifAct,findInterval(ActMoy$x[k],cuts,left.open=T)) | |
113 } | |
114 } | |
115 ClassAct=c("FAIBLE","MODEREE","FORTE","TRES FORTE") | |
116 QualifActMot=ClassAct[QualifAct] | |
117 | |
118 #compute activity by nights (to be completed) | |
119 #ActNuit=aggregate(IdC2$`nom du fichier`,by=list(IdC2$DateNuit,IdC2$IdExtrap),FUN=length) | |
120 | |
121 #organize the csv summary | |
122 SummPart0=cbind(Esp=levels(as.factor(IdC2$IdExtrap)) | |
123 ,RisqueErreur,NbValid=NbValid2$x,EffortValid=EffortClassMot | |
124 ,Contacts_Nuit=round(ActMoy$x),Niveau_Activite=QualifActMot) | |
125 | |
126 | |
127 InfoSp=c("GroupFR","NomFR","Scientific name","Esp") | |
128 GroupShort=GroupList[,InfoSp,with=FALSE] | |
129 #GroupShort=GroupList[,..InfoSp] | |
130 SummPart=merge(GroupShort,SummPart0,by="Esp") | |
131 IndexGroupe=c("Autre","Sauterelle","Chauve-souris") | |
132 SummPart$IndexSumm=match(SummPart$GroupFR,IndexGroupe) | |
133 SummPart=SummPart[with(SummPart | |
134 ,order(IndexSumm,as.numeric(Contacts_Nuit),decreasing=T)),] | |
135 colnames(SummPart)=c("Code","Groupe","Nom francais","Nom scientifique" | |
136 ,"Risque d'erreur (%)","Nb Validations" | |
137 ,"Effort de validation","Nb de Contacts par Nuit" | |
138 ,"Niveau d'Activite","TriGroupe") | |
139 | |
140 #to do: extend colors to other columns to improve readability | |
141 SummHTML=datatable(SummPart, rownames = FALSE) %>% | |
142 formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)", | |
143 background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>% | |
144 formatStyle(columns = "Effort de validation", | |
145 background = styleEqual(c("-","FAIBLE","SUFFISANT","FORT"), c("white", "cyan", "royalblue", "darkblue"))) %>% | |
146 formatStyle(columns = c("Nb de Contacts par Nuit","Niveau d'Activite"),valueColumns="Niveau d'Activite", | |
147 background = styleEqual(c("FAIBLE","MODEREE","FORTE","TRES FORTE"), c("palegoldenrod", "greenyellow", "limegreen", "darkgreen"))) | |
148 | |
149 saveWidget(SummHTML,"output-summary.html") | |
150 write.table(SummPart,"output-summary.tabular",sep="\t",row.names=F) | |
151 | |
152 #compute number of files validated per night/hour | |
153 IdC2$Heure=sapply(IdC2$`nom du fichier`,FUN=function(x) substr(x,nchar(x)-9,nchar(x)-8)) | |
154 | |
155 ActNuit=aggregate(IdC2$`nom du fichier`,by=list(IdC2$IdExtrap,IdC2$Session),FUN=length) | |
156 ListSpref=match(ActNuit$Group.1,refPF$Espece) | |
157 Subref=refPF[ListSpref] | |
158 | |
159 | |
160 QualifActN=vector() | |
161 for (k in 1:nrow(ActNuit)) | |
162 { | |
163 if(is.na(Subref$Q25[k])) | |
164 { | |
165 QualifActN=c(QualifActN,NA) | |
166 }else{ | |
167 cuts=cbind(-Inf,as.numeric(Subref$Q25[k]),as.numeric(Subref$Q75[k]) | |
168 ,as.numeric(Subref$Q98[k]),Inf) | |
169 | |
170 QualifActN=c(QualifActN,findInterval(ActNuit$x[k],cuts,left.open=T)) | |
171 } | |
172 } | |
173 ActNuit$QualifActN=QualifActN | |
174 | |
175 ActNuitT=dcast(data=ActNuit,formula=Group.1~Group.2 | |
176 ,value.var="x") | |
177 ActNuitT[is.na(ActNuitT)]=0 | |
178 RefNuitT=dcast(data=ActNuit,formula=Group.1~Group.2 | |
179 ,value.var="QualifActN") | |
180 ARNuit=merge(ActNuitT,RefNuitT,by="Group.1") | |
181 | |
182 SummPartshort=cbind(SummPart[,c(1:5)],TriGroupe=SummPart[,TriGroupe]) | |
183 SummPartN=merge(SummPartshort,ARNuit,by.x="Code",by.y="Group.1") | |
184 SummPartN=SummPartN[order(TriGroupe,decreasing=T),] | |
185 | |
186 test=grepl(".x",colnames(SummPartN)) | |
187 colnames(SummPartN)=mapply(FUN=function(x,y) if(y){substr(x,1,2)}else{x} | |
188 ,colnames(SummPartN),test) | |
189 ListNuit=subset(colnames(SummPartN),test) | |
190 ListRef=subset(colnames(SummPartN),grepl(".y",colnames(SummPartN))) | |
191 testHide=match(ListRef,colnames(SummPartN))-1 | |
192 #to do: extend colors to other columns to improve readability | |
193 SummHTMLN=datatable(SummPartN, rownames = FALSE,options = list( | |
194 columnDefs = list(list(targets = testHide,visible = FALSE)))) %>% | |
195 formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)", | |
196 background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>% | |
197 formatStyle(columns = ListNuit,valueColumns=ListRef, | |
198 background = styleEqual(c(1,2,3,4), c("palegoldenrod", "greenyellow", "limegreen", "darkgreen"))) | |
199 | |
200 saveWidget(SummHTMLN,"output-nightly.html") | |
201 write.table(SummPartN,"output-nightly.tabular",sep="\t",row.names=F) | |
202 | |
203 | |
204 #summary by hour | |
205 ActMoyH=dcast(data=IdC2,formula=IdExtrap~Heure | |
206 ,fun.aggregate=length) | |
207 ActMoyHA=aggregate(IdC2$`nom du fichier` | |
208 ,by=list(IdC2$IdExtrap,IdC2$Heure) | |
209 ,FUN=length) | |
210 | |
211 test=(as.numeric(colnames(ActMoyH))>12) | |
212 ColDebut=subset(colnames(ActMoyH),test) | |
213 ColFin=subset(colnames(ActMoyH),test==F) | |
214 ListH=c(ColDebut,ColFin) | |
215 neworder=c("IdExtrap",ColDebut,ColFin) | |
216 ActMoyH=ActMoyH[,..neworder] | |
217 | |
218 SummPartH=merge(SummPartshort,ActMoyH,by.x="Code",by.y="IdExtrap") | |
219 SummPartH=SummPartH[order(TriGroupe,decreasing=T),] | |
220 | |
221 | |
222 brks <- quantile(ActMoyHA$x, probs = seq(.05, .95, .05), na.rm = TRUE)-1 | |
223 clrs <- round(seq(255, 40, length.out = length(brks) + 1), 0) %>% | |
224 {paste0("rgb(255,", ., ",", ., ")")} | |
225 | |
226 | |
227 SummHTMLH=datatable(SummPartH, rownames = FALSE) %>% | |
228 formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)", | |
229 background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>% | |
230 formatStyle(columns=ListH, backgroundColor = styleInterval(brks, clrs)) | |
231 | |
232 saveWidget(SummHTMLH,"output-hourly.html") | |
233 write.table(SummPartH,"output-hourly.tabular",sep="\t",row.names=F) | |
234 | |
235 | |
236 #saveWidget(SummHTML,paste0(substr(args[1],1,nchar(args[1])-9),"-summary.html")) | |
237 #write.table(SummPart,paste0(substr(args[1],1,nchar(args[1])-9),"-summary.csv"),sep="\t",row.names=F) | |
238 #saveWidget(SummHTMLN,paste0(substr(args[1],1,nchar(args[1])-9),"-nightly.html")) | |
239 #write.table(SummPartN,paste0(substr(args[1],1,nchar(args[1])-9),"-nightly.csv"),sep="\t",row.names=F) | |
240 #saveWidget(SummHTMLH,paste0(substr(args[1],1,nchar(args[1])-9),"-hourly.html")) | |
241 #write.table(SummPartH,paste0(substr(args[1],1,nchar(args[1])-9),"-hourly.csv"),sep="\t",row.names=F) | |
242 | |
243 | |
244 } | |
245 | |
246 | |
247 | |
248 | |
249 |