Mercurial > repos > pmac > robustzscorenormalization
comparison robustZScoreNormalization.pl @ 0:7ecbc3ec2f55 draft default tip
Uploaded
author | pmac |
---|---|
date | Wed, 01 Jun 2016 03:59:51 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:7ecbc3ec2f55 |
---|---|
1 ############################################################################### | |
2 # In summary this script calculates the robust z score for one or more plates | |
3 # of data. | |
4 # | |
5 # Args: | |
6 # input.data.frame contains one or more columns of plate data. The first column | |
7 # must contain the well annotation ie A01, A02 etc. Each subsequent column represents | |
8 # a plate of data where the column name is the plate name. | |
9 # | |
10 # plate.conf.data.frame is a file of the following format: | |
11 # well, type (syntax must be either "poscontr" or "negcontr" or "empty"), name (name of control) | |
12 # | |
13 # Returns: | |
14 # robust z score normalized values per plate. | |
15 # | |
16 # Author: kate gould | |
17 ############################################################################### | |
18 | |
19 use strict; | |
20 use warnings; | |
21 use IO::Handle; | |
22 use File::Temp qw/ tempfile tempdir /; | |
23 my $tdir = tempdir( CLEANUP => 0 ); | |
24 | |
25 # check to make sure having correct input and output files | |
26 my $usage = "usage: robustZScoreNormalization.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] \n"; | |
27 die $usage unless @ARGV == 3; | |
28 | |
29 #get the input arguments | |
30 my $plateData = $ARGV[0]; | |
31 my $plateConfig = $ARGV[1]; | |
32 my $zNorm = $ARGV[2]; | |
33 | |
34 #open the input files | |
35 open (INPUT1, "<", $plateData) || die("Could not open file $plateData \n"); | |
36 open (INPUT2, "<", $plateConfig) || die("Could not open file $plateConfig \n"); | |
37 open (OUTPUT1, ">", $zNorm) || die("Could not open file $zNorm \n"); | |
38 | |
39 #variable to store the name of the R script file | |
40 my $r_script; | |
41 | |
42 # R script to implement the calcualtion of q-values based on multiple simultaneous tests p-values | |
43 # construct an R script file and save it in a temp directory | |
44 chdir $tdir; | |
45 $r_script = "robustZScoreNormalization.r"; | |
46 | |
47 open(Rcmd,">", $r_script) or die "Cannot open $r_script \n\n"; | |
48 print Rcmd " | |
49 #options(show.error.messages = FALSE); | |
50 header <- read.table(\"$plateData\", head=F, sep=\"\\t\", comment=\"\", nrows=1); | |
51 input.data.frame <- read.table(\"$plateData\", head=F, sep=\"\\t\", skip=1); | |
52 plate.conf.data.frame <- read.table(\"$plateConfig\", head=T, sep=\"\\t\", comment=\"\"); | |
53 | |
54 # assumed second column is type ie negcontr or poscontr and third column is name of control | |
55 excluded.wells <- unique(plate.conf.data.frame[,1]) | |
56 | |
57 robust.z.score.normalise <- function(x, y) return ((x-median(y))/mad(y)) | |
58 robust.z.score.normalisation.data.frame <- data.frame(well=input.data.frame[,1], stringsAsFactors=FALSE) | |
59 | |
60 for (i in 2:length(colnames(input.data.frame))){ | |
61 replicate.sample.values <- input.data.frame[((!(input.data.frame[,1] %in% excluded.wells))&(!(is.na(input.data.frame[,i])))),][,i] | |
62 robust.z.score.normalisation.data.frame <- cbind(robust.z.score.normalisation.data.frame, round(sapply(input.data.frame[,i], FUN=robust.z.score.normalise, replicate.sample.values), 2)) | |
63 } | |
64 names(robust.z.score.normalisation.data.frame) <- names(input.data.frame) | |
65 write.table(header, file=\"$zNorm\", quote=F, sep=\"\\t\", row.names=F, col.names=F); | |
66 write.table(robust.z.score.normalisation.data.frame, file=\"$zNorm\", quote=F, sep=\"\\t\", row.names=F, col.names=F, append=T); | |
67 #eof\n"; | |
68 | |
69 close Rcmd; | |
70 | |
71 system("R --no-restore --no-save --no-readline < $r_script > $r_script.out"); | |
72 | |
73 #close the input and output files | |
74 close(OUTPUT1); | |
75 close(INPUT1); | |
76 close(INPUT2); |