0
|
1 library(data.table)
|
|
2 library(DT)
|
|
3 library(htmlwidgets)
|
|
4
|
|
5 f2p <- function(x) #get date-time data from recording file names
|
|
6 {
|
|
7 if (is.data.frame((x)[1])) {pretemps <- vector(length = nrow(x))}
|
|
8 op <- options(digits.secs = 3)
|
|
9 pretemps <- paste(substr(x, nchar(x) - 18, nchar(x)-4), ".", substr(x, nchar(x) - 2, nchar(x)), sep = "")
|
|
10 strptime(pretemps, "%Y%m%d_%H%M%OS",tz="UTC")-7200
|
|
11 }
|
|
12
|
|
13 args <- commandArgs(trailingOnly = TRUE)
|
|
14 #print(args)
|
|
15 EchelleErreur=c("","POSSIBLE","PROBABLE","SUR")
|
|
16 EchelleNumErreur=c(99,50,10,1)
|
|
17
|
|
18 #for test
|
|
19 #inputest=list.files("C:/Users/Yves Bas/Documents/GitHub/65MO_Galaxy-E/raw_scripts/Vigie-Chiro/output_IdValid_input_BilanEnrichi/",pattern="IdC2.csv",full.names=T)
|
|
20 #for (i in 1:length(inputest))
|
|
21 #{
|
|
22 # args=c(inputest[i],"refPF.csv","SpeciesList.csv")
|
|
23
|
|
24
|
|
25 IdC2=fread(args[1])
|
|
26 refPF=fread(args[2])
|
|
27 GroupList=fread(args[3])
|
|
28
|
|
29 if(substr(IdC2$`nom du fichier`[1],2,2)!="a")
|
|
30 {
|
|
31 print("Protocole non conforme, ce script doit etre lance pour un protocole Point Fixe")
|
|
32 }else{
|
|
33
|
|
34
|
|
35 #compute error risk by species (minimum error among files)
|
|
36 #to be replaced by glm outputs if I'll have time
|
|
37 RisqueErreurT=aggregate(IdC2$IdProb,by=list(IdC2$IdExtrap),FUN=function(x) ceiling((1-max(x-0.0001))*100))
|
|
38 barplot(RisqueErreurT$x,names.arg=RisqueErreurT$Group.1,las=2)
|
|
39 #compute error risk accoring to observer/validator (a little dirty because it relies on alphabetical order of confidence classes: POSSIBLE < PROBABLE < SUR)
|
|
40 RisqueErreurOV0=match(IdC2$ConfV,EchelleErreur)
|
|
41 RisqueErreurOV=aggregate(RisqueErreurOV0,by=list(IdC2$IdExtrap)
|
|
42 ,FUN=max)
|
|
43 RisqueErreurOV2=EchelleNumErreur[RisqueErreurOV$x]
|
|
44 #compute minimum error risk between man and machine
|
|
45 RisqueErreur=pmin(RisqueErreurT$x,RisqueErreurOV2)
|
|
46
|
|
47 #compute number of files validated per species
|
|
48 FichValid=aggregate(IdC2$IdV,by=list(IdC2$IdExtrap,IdC2$'nom du fichier')
|
|
49 ,FUN=function(x) sum(x!=""))
|
|
50 NbValid2=aggregate(FichValid$x,by=list(FichValid$Group.1),FUN=function(x) sum(x>0))
|
|
51
|
|
52 DiffC50=vector() # to store the median of confidence difference between unvalidated records and validated ones
|
|
53 DiffT50=vector() # to store the median of time difference between unvalidated records and validated ones
|
|
54 for (j in 1:nlevels(as.factor(IdC2$IdExtrap)))
|
|
55 {
|
|
56 IdSp=subset(IdC2
|
|
57 ,IdC2$IdExtrap==levels(as.factor(IdC2$IdExtrap))[j])
|
|
58 IdSp=IdSp[order(IdSp$IdProb),]
|
|
59 IdSpV=subset(IdSp,IdSp$IdV!="")
|
|
60 if(nrow(IdSpV)>0)
|
|
61 {
|
|
62 cuts <- c(-Inf, IdSpV$IdProb[-1]-diff(IdSpV$IdProb)/2, Inf)
|
|
63 CorrC=findInterval(IdSp$IdProb, cuts)
|
|
64 CorrC2=IdSpV$IdProb[CorrC]
|
|
65 DiffC=abs(IdSp$IdProb-CorrC2)
|
|
66 DiffC50=c(DiffC50,median(DiffC))
|
|
67
|
|
68 IdSp=IdSp[order(IdSp$TimeNum),]
|
|
69 IdSpV=subset(IdSp,IdSp$IdV!="")
|
|
70 cuts <- c(-Inf, IdSpV$TimeNum[-1]-diff(IdSpV$TimeNum)/2, Inf)
|
|
71 CorrT=findInterval(IdSp$TimeNum, cuts)
|
|
72 CorrT2=IdSpV$TimeNum[CorrT]
|
|
73 DiffT=abs(IdSp$TimeNum-CorrT2)
|
|
74 DiffT50=c(DiffT50,median(DiffT))
|
|
75 }else{
|
|
76 DiffC50=c(DiffC50,Inf)
|
|
77 DiffT50=c(DiffT50,Inf)
|
|
78 }
|
|
79 }
|
|
80 #compute an index of validation effort per species
|
|
81 EffortV=1/DiffC50/DiffT50
|
|
82 EffortClass=(EffortV>0.0005)+(EffortV>0.005)+RisqueErreurOV$x
|
|
83 cbind(RisqueErreurOV,EffortV,DiffC50,DiffT50)
|
|
84 barplot(EffortClass-1,names.arg=NbValid2$Group.1,las=2)
|
|
85 ClassEffortV=c("-","FAIBLE","SUFFISANT","SUFFISANT","FORT","FORT")
|
|
86 EffortClassMot=ClassEffortV[EffortClass]
|
|
87
|
|
88
|
|
89 #get date-night
|
|
90 pourDateNuit=IdC2$TimeNum-12*3600 #bricolage-decalage de 12 heures pour ramener a la date du debut de nuit
|
|
91 DateNuit=as.Date.POSIXct(pourDateNuit) # date of the beginning of the night
|
|
92 DateJour=as.Date.POSIXct(IdC2$TimeNum) # date (UTC+0)
|
|
93 IdC2$DateNuit=DateNuit
|
|
94 IdC2$DateJour=DateJour
|
|
95 NbNuit=as.numeric(max(IdC2$DateNuit)-min(IdC2$DateNuit))+1
|
|
96
|
|
97 #compare activity / reference frame
|
|
98 ActMoy=aggregate(IdC2$`nom du fichier`
|
|
99 ,by=list(IdC2$IdExtrap),FUN=function(x) length(x)/NbNuit)
|
|
100 ListSpref=match(ActMoy$Group.1,refPF$Espece)
|
|
101 Subref=refPF[ListSpref]
|
|
102 QualifAct=vector()
|
|
103 for (k in 1:nrow(ActMoy))
|
|
104 {
|
|
105 if(is.na(Subref$Q25[k]))
|
|
106 {
|
|
107 QualifAct=c(QualifAct,NA)
|
|
108 }else{
|
|
109 cuts=cbind(-Inf,as.numeric(Subref$Q25[k]),as.numeric(Subref$Q75[k])
|
|
110 ,as.numeric(Subref$Q98[k]),Inf)
|
|
111
|
|
112 QualifAct=c(QualifAct,findInterval(ActMoy$x[k],cuts,left.open=T))
|
|
113 }
|
|
114 }
|
|
115 ClassAct=c("FAIBLE","MODEREE","FORTE","TRES FORTE")
|
|
116 QualifActMot=ClassAct[QualifAct]
|
|
117
|
|
118 #compute activity by nights (to be completed)
|
|
119 #ActNuit=aggregate(IdC2$`nom du fichier`,by=list(IdC2$DateNuit,IdC2$IdExtrap),FUN=length)
|
|
120
|
|
121 #organize the csv summary
|
|
122 SummPart0=cbind(Esp=levels(as.factor(IdC2$IdExtrap))
|
|
123 ,RisqueErreur,NbValid=NbValid2$x,EffortValid=EffortClassMot
|
|
124 ,Contacts_Nuit=round(ActMoy$x),Niveau_Activite=QualifActMot)
|
|
125
|
|
126
|
|
127 InfoSp=c("GroupFR","NomFR","Scientific name","Esp")
|
|
128 GroupShort=GroupList[,InfoSp,with=FALSE]
|
|
129 #GroupShort=GroupList[,..InfoSp]
|
|
130 SummPart=merge(GroupShort,SummPart0,by="Esp")
|
|
131 IndexGroupe=c("Autre","Sauterelle","Chauve-souris")
|
|
132 SummPart$IndexSumm=match(SummPart$GroupFR,IndexGroupe)
|
|
133 SummPart=SummPart[with(SummPart
|
|
134 ,order(IndexSumm,as.numeric(Contacts_Nuit),decreasing=T)),]
|
|
135 colnames(SummPart)=c("Code","Groupe","Nom francais","Nom scientifique"
|
|
136 ,"Risque d'erreur (%)","Nb Validations"
|
|
137 ,"Effort de validation","Nb de Contacts par Nuit"
|
|
138 ,"Niveau d'Activite","TriGroupe")
|
|
139
|
|
140 #to do: extend colors to other columns to improve readability
|
|
141 SummHTML=datatable(SummPart, rownames = FALSE) %>%
|
|
142 formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)",
|
|
143 background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>%
|
|
144 formatStyle(columns = "Effort de validation",
|
|
145 background = styleEqual(c("-","FAIBLE","SUFFISANT","FORT"), c("white", "cyan", "royalblue", "darkblue"))) %>%
|
|
146 formatStyle(columns = c("Nb de Contacts par Nuit","Niveau d'Activite"),valueColumns="Niveau d'Activite",
|
|
147 background = styleEqual(c("FAIBLE","MODEREE","FORTE","TRES FORTE"), c("palegoldenrod", "greenyellow", "limegreen", "darkgreen")))
|
|
148
|
|
149 saveWidget(SummHTML,"output-summary.html")
|
|
150 write.table(SummPart,"output-summary.tabular",sep="\t",row.names=F)
|
|
151
|
|
152 #compute number of files validated per night/hour
|
|
153 IdC2$Heure=sapply(IdC2$`nom du fichier`,FUN=function(x) substr(x,nchar(x)-9,nchar(x)-8))
|
|
154
|
|
155 ActNuit=aggregate(IdC2$`nom du fichier`,by=list(IdC2$IdExtrap,IdC2$Session),FUN=length)
|
|
156 ListSpref=match(ActNuit$Group.1,refPF$Espece)
|
|
157 Subref=refPF[ListSpref]
|
|
158
|
|
159
|
|
160 QualifActN=vector()
|
|
161 for (k in 1:nrow(ActNuit))
|
|
162 {
|
|
163 if(is.na(Subref$Q25[k]))
|
|
164 {
|
|
165 QualifActN=c(QualifActN,NA)
|
|
166 }else{
|
|
167 cuts=cbind(-Inf,as.numeric(Subref$Q25[k]),as.numeric(Subref$Q75[k])
|
|
168 ,as.numeric(Subref$Q98[k]),Inf)
|
|
169
|
|
170 QualifActN=c(QualifActN,findInterval(ActNuit$x[k],cuts,left.open=T))
|
|
171 }
|
|
172 }
|
|
173 ActNuit$QualifActN=QualifActN
|
|
174
|
|
175 ActNuitT=dcast(data=ActNuit,formula=Group.1~Group.2
|
|
176 ,value.var="x")
|
|
177 ActNuitT[is.na(ActNuitT)]=0
|
|
178 RefNuitT=dcast(data=ActNuit,formula=Group.1~Group.2
|
|
179 ,value.var="QualifActN")
|
|
180 ARNuit=merge(ActNuitT,RefNuitT,by="Group.1")
|
|
181
|
|
182 SummPartshort=cbind(SummPart[,c(1:5)],TriGroupe=SummPart[,TriGroupe])
|
|
183 SummPartN=merge(SummPartshort,ARNuit,by.x="Code",by.y="Group.1")
|
|
184 SummPartN=SummPartN[order(TriGroupe,decreasing=T),]
|
|
185
|
|
186 test=grepl(".x",colnames(SummPartN))
|
|
187 colnames(SummPartN)=mapply(FUN=function(x,y) if(y){substr(x,1,2)}else{x}
|
|
188 ,colnames(SummPartN),test)
|
|
189 ListNuit=subset(colnames(SummPartN),test)
|
|
190 ListRef=subset(colnames(SummPartN),grepl(".y",colnames(SummPartN)))
|
|
191 testHide=match(ListRef,colnames(SummPartN))-1
|
|
192 #to do: extend colors to other columns to improve readability
|
|
193 SummHTMLN=datatable(SummPartN, rownames = FALSE,options = list(
|
|
194 columnDefs = list(list(targets = testHide,visible = FALSE)))) %>%
|
|
195 formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)",
|
|
196 background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>%
|
|
197 formatStyle(columns = ListNuit,valueColumns=ListRef,
|
|
198 background = styleEqual(c(1,2,3,4), c("palegoldenrod", "greenyellow", "limegreen", "darkgreen")))
|
|
199
|
|
200 saveWidget(SummHTMLN,"output-nightly.html")
|
|
201 write.table(SummPartN,"output-nightly.tabular",sep="\t",row.names=F)
|
|
202
|
|
203
|
|
204 #summary by hour
|
|
205 ActMoyH=dcast(data=IdC2,formula=IdExtrap~Heure
|
|
206 ,fun.aggregate=length)
|
|
207 ActMoyHA=aggregate(IdC2$`nom du fichier`
|
|
208 ,by=list(IdC2$IdExtrap,IdC2$Heure)
|
|
209 ,FUN=length)
|
|
210
|
|
211 test=(as.numeric(colnames(ActMoyH))>12)
|
|
212 ColDebut=subset(colnames(ActMoyH),test)
|
|
213 ColFin=subset(colnames(ActMoyH),test==F)
|
|
214 ListH=c(ColDebut,ColFin)
|
|
215 neworder=c("IdExtrap",ColDebut,ColFin)
|
|
216 ActMoyH=ActMoyH[,..neworder]
|
|
217
|
|
218 SummPartH=merge(SummPartshort,ActMoyH,by.x="Code",by.y="IdExtrap")
|
|
219 SummPartH=SummPartH[order(TriGroupe,decreasing=T),]
|
|
220
|
|
221
|
|
222 brks <- quantile(ActMoyHA$x, probs = seq(.05, .95, .05), na.rm = TRUE)-1
|
|
223 clrs <- round(seq(255, 40, length.out = length(brks) + 1), 0) %>%
|
|
224 {paste0("rgb(255,", ., ",", ., ")")}
|
|
225
|
|
226
|
|
227 SummHTMLH=datatable(SummPartH, rownames = FALSE) %>%
|
|
228 formatStyle(columns = c("Code","Groupe","Nom francais","Nom scientifique","Risque d'erreur (%)"),valueColumns="Risque d'erreur (%)",
|
|
229 background = styleInterval(c(1, 10, 50), c("white", "khaki", "orange", "orangered"))) %>%
|
|
230 formatStyle(columns=ListH, backgroundColor = styleInterval(brks, clrs))
|
|
231
|
|
232 saveWidget(SummHTMLH,"output-hourly.html")
|
|
233 write.table(SummPartH,"output-hourly.tabular",sep="\t",row.names=F)
|
|
234
|
|
235
|
|
236 #saveWidget(SummHTML,paste0(substr(args[1],1,nchar(args[1])-9),"-summary.html"))
|
|
237 #write.table(SummPart,paste0(substr(args[1],1,nchar(args[1])-9),"-summary.csv"),sep="\t",row.names=F)
|
|
238 #saveWidget(SummHTMLN,paste0(substr(args[1],1,nchar(args[1])-9),"-nightly.html"))
|
|
239 #write.table(SummPartN,paste0(substr(args[1],1,nchar(args[1])-9),"-nightly.csv"),sep="\t",row.names=F)
|
|
240 #saveWidget(SummHTMLH,paste0(substr(args[1],1,nchar(args[1])-9),"-hourly.html"))
|
|
241 #write.table(SummPartH,paste0(substr(args[1],1,nchar(args[1])-9),"-hourly.csv"),sep="\t",row.names=F)
|
|
242
|
|
243
|
|
244 }
|
|
245
|
|
246
|
|
247
|
|
248
|
|
249
|