Mercurial > repos > melpetera > batchcorrection
comparison BC/batch_correction_3Llauncher.R @ 4:23314e1192d4 draft default tip
Uploaded
| author | melpetera |
|---|---|
| date | Thu, 14 Jan 2021 09:56:58 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 3:73892ef177e3 | 4:23314e1192d4 |
|---|---|
| 1 ############################################################################################################### | |
| 2 # batch_correction_3Llauncher # | |
| 3 # # | |
| 4 # Authors: Jean-Francois MARTIN / Melanie Petera # | |
| 5 # Starting date: 04-08-2020 # | |
| 6 # Based on batch_correction_wrapper.R version 2.91 # | |
| 7 # Version 1: 02-10-2020 # | |
| 8 # - split of tool-linked code and script-linked one # | |
| 9 # - handling of sample tags' parameters # | |
| 10 # - accepting samples beyond pools and samples # | |
| 11 # - dealing with special characters in IDs and column names # | |
| 12 # - adding a min.norm argument to the function # | |
| 13 # # | |
| 14 # Input files: dataMatrix.txt, sampleMetadata.txt, variableMetadata.txt (BC only) # | |
| 15 # Output files: graph.pdf, corrected table (BC only), diagnostic table (DBC only), variableMetadata (BC only) # | |
| 16 # # | |
| 17 ############################################################################################################### | |
| 18 | |
| 19 meth3L <- function(idsample,iddata,sample_type_col_name,injection_order_col_name,batch_col_name,sample_type_tags, | |
| 20 factbio,analyse,metaion,detail,method,outlog,span,valnull, | |
| 21 rdata_output,dataMatrix_out,variableMetadata_out,out_graph_pdf,out_preNormSummary,min.norm){ | |
| 22 | |
| 23 ## Import function | |
| 24 tab.import <- function(tested.file,tabtype){ | |
| 25 tab.res <- tryCatch(read.table(tested.file,header=TRUE,sep='\t',check.names=FALSE,comment.char = ''), error=conditionMessage) | |
| 26 if(length(tab.res)==1){ | |
| 27 stop(paste("Could not import the",tabtype,"file. There may be issues in your table integrity.\nCorresponding R error message:\n",tab.res)) | |
| 28 }else{ | |
| 29 tab.comp <- tryCatch(read.table(tested.file,header=TRUE,sep='\t',check.names=FALSE,comment.char = '',quote=""), error=conditionMessage) | |
| 30 if((length(tab.comp)!=1)&&(dim(tab.res)!=dim(tab.comp))){ # wrong original import due to quotes inside a column name | |
| 31 return(tab.comp) | |
| 32 }else{ return(tab.res) } | |
| 33 } | |
| 34 } | |
| 35 | |
| 36 ## Reading of input files | |
| 37 idsample=tab.import(idsample,"sampleMetadata") | |
| 38 iddata=tab.import(iddata,"dataMatrix") | |
| 39 | |
| 40 ### Table match check | |
| 41 table.check <- match2(iddata,idsample,"sample") | |
| 42 if(length(table.check)>1){check.err(table.check)} | |
| 43 | |
| 44 ### StockID | |
| 45 samp.id <- stockID(iddata,idsample,"sample") | |
| 46 iddata<-samp.id$dataMatrix ; idsample<-samp.id$Metadata ; samp.id<-samp.id$id.match | |
| 47 | |
| 48 ### Checking mandatory variables | |
| 49 mand.check <- "" | |
| 50 for(mandcol in c(sample_type_col_name, injection_order_col_name, batch_col_name)){ | |
| 51 if(!(mandcol%in%colnames(idsample))){ | |
| 52 mand.check <- c(mand.check,"\nError: no '",mandcol,"' column in sample metadata.\n", | |
| 53 "Note: column names are case-sensitive.\n") | |
| 54 } | |
| 55 } | |
| 56 if(length(mand.check)>1){ | |
| 57 mand.check <- c(mand.check,"\nFor more information, see the help section or:", | |
| 58 "\n http://workflow4metabolomics.org/sites/", | |
| 59 "workflow4metabolomics.org/files/files/w4e-2016-data_processing.pdf\n") | |
| 60 check.err(mand.check) | |
| 61 } | |
| 62 | |
| 63 if(analyse == "batch_correction") { | |
| 64 ## Reading of Metadata Ions file | |
| 65 metaion=read.table(metaion,header=T,sep='\t',check.names=FALSE,comment.char = '') | |
| 66 ## Table match check | |
| 67 table.check <- c(table.check,match2(iddata,metaion,"variable")) | |
| 68 ## StockID | |
| 69 var.id <- stockID(iddata,metaion,"variable") | |
| 70 iddata<-var.id$dataMatrix ; metaion<-var.id$Metadata ; var.id<-var.id$id.match | |
| 71 } | |
| 72 | |
| 73 ### Formating | |
| 74 idsample[[1]]=make.names(idsample[[1]]) | |
| 75 dimnames(iddata)[[1]]=iddata[[1]] | |
| 76 | |
| 77 ### Transposition of ions data | |
| 78 idTdata=t(iddata[,2:dim(iddata)[2]]) | |
| 79 idTdata=data.frame(dimnames(idTdata)[[1]],idTdata) | |
| 80 | |
| 81 ### Merge of 2 files (ok even if the two dataframe are not sorted on the same key) | |
| 82 ids=merge(idsample, idTdata, by.x=1, by.y=1) | |
| 83 | |
| 84 ids[[batch_col_name]]=as.factor(ids[[batch_col_name]]) | |
| 85 nbid=dim(idsample)[2] | |
| 86 | |
| 87 ### Checking the number of sample and pool | |
| 88 | |
| 89 # least 2 samples | |
| 90 if(length(which(ids[[sample_type_col_name]] %in% sample_type_tags$sample))<2){ | |
| 91 table.check <- c(table.check,"\nError: less than 2 samples specified in sample metadata.", | |
| 92 "\nMake sure this is not due to errors in your ",sample_type_col_name," coding.\n") | |
| 93 } | |
| 94 | |
| 95 # least 2 pools per batch for all batchs | |
| 96 B <- rep(0,length(levels(ids[[batch_col_name]]))) | |
| 97 for(nbB in 1:length(levels(ids[[batch_col_name]]))){ | |
| 98 B[nbB]<-length(which(ids[which(ids[[batch_col_name]]==(levels(ids[[batch_col_name]])[nbB])),,drop=FALSE][[sample_type_col_name]] %in% sample_type_tags$pool)) | |
| 99 } | |
| 100 if(length(which(B>1))==0){ | |
| 101 table.check <- c(table.check,"\nError: less than 2 pools specified in at least one batch in sample metadata.", | |
| 102 "\nMake sure this is not due to errors in your ",sample_type_col_name," coding.\n") | |
| 103 } | |
| 104 | |
| 105 ### Checking the unicity of samples and variables | |
| 106 uni.check <- function(tested.tab,tabtype,err.obj){ | |
| 107 unicity <- duplicated(tested.tab[,1]) | |
| 108 if(sum(unicity)>0){ | |
| 109 #Sending back an explicit error | |
| 110 duptable <- t(t(table(tested.tab[,1][unicity])+1)) | |
| 111 err.obj <- c(err.obj,paste0("\n-------\nError: your '",tabtype,"' IDs contain duplicates:\n"), | |
| 112 paste(rownames(duptable),duptable,sep=": ",collapse="\n"), | |
| 113 "\nSince identifiers are meant to be unique, please check your data.\n-------\n") | |
| 114 } | |
| 115 return(err.obj) | |
| 116 } | |
| 117 table.check <- uni.check(ids,"sample",table.check) | |
| 118 if(analyse == "batch_correction"){table.check <- uni.check(metaion,"variable",table.check)} | |
| 119 | |
| 120 ## error check | |
| 121 check.err(table.check) | |
| 122 | |
| 123 | |
| 124 ### BC/DBC-specific processing | |
| 125 | |
| 126 # Gathering mandatory information in a single object | |
| 127 sm.meta <- list(batch=batch_col_name, injectionOrder=injection_order_col_name, sampleType=sample_type_col_name, sampleTag=sample_type_tags) | |
| 128 | |
| 129 if(analyse == "batch_correction") { | |
| 130 ## Launch | |
| 131 res = norm_QCpool(ids,nbid,outlog,factbio,metaion,detail,FALSE,FALSE,method,span,valnull,sm.meta,min.norm) | |
| 132 ## Get back original IDs | |
| 133 var.id <- reproduceID(res[[1]],res[[2]],"variable",var.id) | |
| 134 res[[1]] <- var.id$dataMatrix ; res[[2]] <- var.id$Metadata | |
| 135 samp.id <- reproduceID(res[[1]],res[[3]],"sample",samp.id) | |
| 136 res[[1]] <- samp.id$dataMatrix ; res[[3]] <- samp.id$Metadata | |
| 137 ## Save files | |
| 138 save(res, file=rdata_output) | |
| 139 write.table(res[[1]], file=dataMatrix_out, sep = '\t', row.names=FALSE, quote=FALSE) | |
| 140 write.table(res[[2]], file=variableMetadata_out, sep = '\t', row.names=FALSE, quote=FALSE) | |
| 141 }else{ | |
| 142 ## Launch | |
| 143 plotsituation(ids,nbid,out_graph_pdf,out_preNormSummary,factbio,span,sm.meta) | |
| 144 } | |
| 145 | |
| 146 }#end of meth3L |
