0
|
1 ###############################################################################
|
|
2 # In summary this script calculates the robust z score for one or more plates
|
|
3 # of data.
|
|
4 #
|
|
5 # Args:
|
|
6 # input.data.frame contains one or more columns of plate data. The first column
|
|
7 # must contain the well annotation ie A01, A02 etc. Each subsequent column represents
|
|
8 # a plate of data where the column name is the plate name.
|
|
9 #
|
|
10 # plate.conf.data.frame is a file of the following format:
|
|
11 # well, type (syntax must be either "poscontr" or "negcontr" or "empty"), name (name of control)
|
|
12 #
|
|
13 # Returns:
|
|
14 # robust z score normalized values per plate.
|
|
15 #
|
|
16 # Author: kate gould
|
|
17 ###############################################################################
|
|
18
|
|
19 use strict;
|
|
20 use warnings;
|
|
21 use IO::Handle;
|
|
22 use File::Temp qw/ tempfile tempdir /;
|
|
23 my $tdir = tempdir( CLEANUP => 0 );
|
|
24
|
|
25 # check to make sure having correct input and output files
|
|
26 my $usage = "usage: robustZScoreNormalization.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] \n";
|
|
27 die $usage unless @ARGV == 3;
|
|
28
|
|
29 #get the input arguments
|
|
30 my $plateData = $ARGV[0];
|
|
31 my $plateConfig = $ARGV[1];
|
|
32 my $zNorm = $ARGV[2];
|
|
33
|
|
34 #open the input files
|
|
35 open (INPUT1, "<", $plateData) || die("Could not open file $plateData \n");
|
|
36 open (INPUT2, "<", $plateConfig) || die("Could not open file $plateConfig \n");
|
|
37 open (OUTPUT1, ">", $zNorm) || die("Could not open file $zNorm \n");
|
|
38
|
|
39 #variable to store the name of the R script file
|
|
40 my $r_script;
|
|
41
|
|
42 # R script to implement the calcualtion of q-values based on multiple simultaneous tests p-values
|
|
43 # construct an R script file and save it in a temp directory
|
|
44 chdir $tdir;
|
|
45 $r_script = "robustZScoreNormalization.r";
|
|
46
|
|
47 open(Rcmd,">", $r_script) or die "Cannot open $r_script \n\n";
|
|
48 print Rcmd "
|
|
49 #options(show.error.messages = FALSE);
|
|
50 header <- read.table(\"$plateData\", head=F, sep=\"\\t\", comment=\"\", nrows=1);
|
|
51 input.data.frame <- read.table(\"$plateData\", head=F, sep=\"\\t\", skip=1);
|
|
52 plate.conf.data.frame <- read.table(\"$plateConfig\", head=T, sep=\"\\t\", comment=\"\");
|
|
53
|
|
54 # assumed second column is type ie negcontr or poscontr and third column is name of control
|
|
55 excluded.wells <- unique(plate.conf.data.frame[,1])
|
|
56
|
|
57 robust.z.score.normalise <- function(x, y) return ((x-median(y))/mad(y))
|
|
58 robust.z.score.normalisation.data.frame <- data.frame(well=input.data.frame[,1], stringsAsFactors=FALSE)
|
|
59
|
|
60 for (i in 2:length(colnames(input.data.frame))){
|
|
61 replicate.sample.values <- input.data.frame[((!(input.data.frame[,1] %in% excluded.wells))&(!(is.na(input.data.frame[,i])))),][,i]
|
|
62 robust.z.score.normalisation.data.frame <- cbind(robust.z.score.normalisation.data.frame, round(sapply(input.data.frame[,i], FUN=robust.z.score.normalise, replicate.sample.values), 2))
|
|
63 }
|
|
64 names(robust.z.score.normalisation.data.frame) <- names(input.data.frame)
|
|
65 write.table(header, file=\"$zNorm\", quote=F, sep=\"\\t\", row.names=F, col.names=F);
|
|
66 write.table(robust.z.score.normalisation.data.frame, file=\"$zNorm\", quote=F, sep=\"\\t\", row.names=F, col.names=F, append=T);
|
|
67 #eof\n";
|
|
68
|
|
69 close Rcmd;
|
|
70
|
|
71 system("R --no-restore --no-save --no-readline < $r_script > $r_script.out");
|
|
72
|
|
73 #close the input and output files
|
|
74 close(OUTPUT1);
|
|
75 close(INPUT1);
|
|
76 close(INPUT2);
|