comparison batch_correction_wrapper.R @ 0:71d83d8920bf draft

planemo upload for repository https://github.com/workflow4metabolomics/batchcorrection.git commit de79117e6ab856420b87efca3675c7963688f975
author melpetera
date Tue, 09 Aug 2016 06:47:41 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:71d83d8920bf
1 #!/usr/bin/Rscript --vanilla --slave --no-site-file
2
3 ################################################################################################
4 # batch_correction_wrapper #
5 # #
6 # Author: Marion LANDI / Jean-Francois MARTIN / Melanie Petera #
7 # User: Galaxy #
8 # Original data: -- #
9 # Starting date: 22-07-2014 #
10 # Version 1: 22-07-2014 #
11 # Version 2: 08-12-2014 #
12 # Version 2.1: 09-01-2015 modification in Error message of sample matching #
13 # Version 2.2: 16-03-2015 inclusion of miniTools' functions for special characters #
14 # #
15 # #
16 # Input files: dataMatrix.txt ; sampleMetadata.txt ; variableMetadata.txt (for DBC) #
17 # Output files: graph_output.pdf ; corrected table ; diagnostic table #
18 # #
19 ################################################################################################
20
21
22 library(batch) #necessary for parseCommandArgs function
23 args = parseCommandArgs(evaluate=FALSE) #interpretation of arguments given in command line as an R list of objects
24
25 source_local <- function(...){
26 argv <- commandArgs(trailingOnly = FALSE)
27 base_dir <- dirname(substring(argv[grep("--file=", argv)], 8))
28 for(i in 1:length(list(...))){source(paste(base_dir, list(...)[[i]], sep="/"))}
29 }
30 #Import the different functions
31 source_local("Normalisation_QCpool.r","easyrlibrary-lib/RcheckLibrary.R","easyrlibrary-lib/miniTools.R")
32
33
34 ## Reading of input files
35 idsample=read.table(args$sampleMetadata,header=T,sep='\t',check.names=FALSE)
36 iddata=read.table(args$dataMatrix,header=T,sep='\t',check.names=FALSE)
37
38 ### Table match check
39 table.check <- match2(iddata,idsample,"sample")
40
41 ### StockID
42 samp.id <- stockID(iddata,idsample,"sample")
43 iddata<-samp.id$dataMatrix ; idsample<-samp.id$Metadata ; samp.id<-samp.id$id.match
44
45 ### Checking mandatory variables
46 mand.check <- ""
47 for(mandcol in c("sampleType","injectionOrder","batch")){
48 if(!(mandcol%in%colnames(idsample))){
49 mand.check <- c(mand.check,"\nError: no '",mandcol,"' column in sample metadata.\n",
50 "Note: table must include this exact column name (it is case-sensitive).\n")
51 }
52 }
53 if(length(mand.check)>1){check.err(paste(table.check,mand.check,sep=""))}
54
55 ### Formating
56 idsample[[1]]=make.names(idsample[[1]])
57 dimnames(iddata)[[1]]=iddata[[1]]
58
59 ### Transposition of ions data
60 idTdata=t(iddata[,2:dim(iddata)[2]])
61 idTdata=data.frame(dimnames(idTdata)[[1]],idTdata)
62
63 ### Merge of 2 files (ok even if the two dataframe are not sorted on the same key)
64 id=merge(idsample, idTdata, by.x=1, by.y=1)
65
66 id$batch=as.factor(id$batch)
67 ids=id[id$sampleType == 'pool' | id$sampleType == 'sample',]
68 nbid=dim(idsample)[2]
69
70 ### Checking the number of sample and pool
71
72 # least 2 samples
73 if(length(which(ids$sampleType == "sample"))<2){
74 table.check <- c(table.check,"\nError: less than 2 samples specified in sample metadata.",
75 "\nMake sure this is not due to errors in sampleType coding.\n")
76 }
77
78 # least 2 pools per batch for all batchs
79 B <- rep(0,length(levels(ids$batch)))
80 for(nbB in length(levels(ids$batch))){
81 B[nbB]<-length(which(ids[which(ids$batch==(levels(ids$batch)[nbB])),]$sampleType == "pool"))
82 }
83 if(length(which(B>1))==0){
84 table.check <- c(table.check,"\nError: less than 2 pools specified in each batch in sample metadata.",
85 "\nMake sure this is not due to errors in sampleType coding.\n")
86 }
87
88 ### Factor of interest
89 factbio=args$ref_factor
90
91
92 if(args$analyse == "batch_correction") {
93 ## Reading of Metadata Ions file
94 metaion=read.table(args$variableMetadata,header=T,sep='\t',check.names=FALSE)
95 ## Table match check
96 table.check <- c(table.check,match2(iddata,metaion,"variable"))
97 check.err(table.check)
98
99 ## variables
100 detail=args$detail
101 method=args$method
102
103 ## outputs
104 outfic=args$variable_for_simca
105 outlog=args$graph_output
106
107 ## Launch
108 res = norm_QCpool(ids,nbid,outfic,outlog,factbio,metaion,detail,F,F,method,args$span)
109 save(res, file=args$rdata_output)
110 write.table(reproduceID(res[[1]],res[[3]],"sample",samp.id)$dataMatrix, file=args$dataMatrix_out, sep = '\t', row.names=F, quote=F)
111 write.table(res[[2]], file=args$variableMetadata_out, sep = '\t', row.names=F, quote=F)
112 }else{
113 ## error check
114 check.err(table.check)
115
116 ## outputs
117 out_graph_pdf=args$out_graph_pdf
118 out_preNormSummary=args$out_preNormSummary
119
120 ## Launch
121 plotsituation(ids,nbid,out_graph_pdf,out_preNormSummary,factbio,args$span)
122 }
123
124 rm(args)