view calculateZPrimeFactor.pl @ 5:d7efb5d5352a draft default tip

Uploaded
author jason-ellul
date Wed, 01 Jun 2016 02:40:16 -0400
parents
children
line wrap: on
line source

###############################################################################
# In summary this script calculates the z prime factor for one or more plates
# of data.
#
# Args:
#   input.data.frame contains one or more columns of plate data. The first column 
#   must contain the well annotation ie A01, A02 etc. Each subsequent column represents
#   a plate of data where the column name is the plate name.
#
#   plate.conf.data.frame is a file of the following format:
#	  well, type(syntax must be either "poscontr" or "negcontr"), name (name of control)
#
# Returns:
#    z'factor for each plate for each combination of each type of positive vs negative control.
#
# Author: gouldkate
###############################################################################

use strict;
use warnings;
use IO::Handle;
use File::Temp qw/ tempfile tempdir /;
my $tdir = tempdir( CLEANUP => 0 );

# check to make sure having correct input and output files
my $usage = "usage: calculateZPrimeFactor.pl [TABULAR.in] [TABULAR.in] [TABULAR.out] \n";
die $usage unless @ARGV == 3;

#get the input arguments
my $plateData = $ARGV[0];
my $plateConfig =  $ARGV[1];
my $zPrimeFactor =  $ARGV[2];

#open the input files
open (INPUT1, "<", $plateData) || die("Could not open file $plateData \n");
open (INPUT2, "<", $plateConfig) || die("Could not open file $plateConfig \n");
open (OUTPUT1, ">", $zPrimeFactor) || die("Could not open file $zPrimeFactor \n");

#variable to store the name of the R script file
my $r_script;

# R script to implement the calcualtion of q-values based on multiple simultaneous tests p-values 	
# construct an R script file and save it in a temp directory
chdir $tdir;
$r_script = "calculateZPrimeFactor.r";

open(Rcmd,">", $r_script) or die "Cannot open $r_script \n\n"; 
print Rcmd "
	#options(show.error.messages = FALSE);
	input.data.frame <- read.table(\"$plateData\", head=T, sep=\"\\t\", comment=\"\");
	plate.conf.data.frame <- read.table(\"$plateConfig\", head=T, sep=\"\\t\", comment=\"\");
	
	# assumed second column is type ie negcontr or poscontr and third column is name of control
	negative.controls.list <- unique(plate.conf.data.frame[which (plate.conf.data.frame[,2] == \"negcontr\"),][,3])
	positive.controls.list <- unique(plate.conf.data.frame[which (plate.conf.data.frame[,2] == \"poscontr\"),][,3])
  
	z.prime.factor.report.data.frame <- data.frame()
  
  
	for (negative.control in negative.controls.list){
		for (positive.control in positive.controls.list){
			for (i in 2:length(colnames(input.data.frame))){
        
				negative.control.wells <- plate.conf.data.frame[which (plate.conf.data.frame[3] == negative.control),][,1]
				positive.control.wells <- plate.conf.data.frame[which (plate.conf.data.frame[3] == positive.control),][,1]

				control.duo <- paste(negative.control, positive.control, sep=\"/\")
				
				negative.control.values <- input.data.frame[((input.data.frame[,1] %in% negative.control.wells)&(!(is.na(input.data.frame[,i])))),][,i]
				positive.control.values <- input.data.frame[((input.data.frame[,1] %in% positive.control.wells)&(!(is.na(input.data.frame[,i])))),][,i]

				if ((length(negative.control.values)==0)|(length(positive.control.values)==0)){
					z.prime.factor <- NA
				} else {
					if ((sum(negative.control.values)) < (sum(positive.control.values))){
						low.value.controls <- negative.control.values
						high.value.controls <- positive.control.values
					} else {
						low.value.controls <- positive.control.values
						high.value.controls <- negative.control.values
					}
					z.prime.factor <- round(1 - ((3 * (sd(high.value.controls, na.rm = TRUE) + sd(low.value.controls, na.rm = TRUE))/(abs(mean(high.value.controls, na.rm = TRUE) - mean(low.value.controls, na.rm = TRUE))))), 2)
				}
				
				plate.name <- colnames(input.data.frame)[i]
				z.prime.factor.current.row.data.frame <- data.frame(plate=plate.name, control.duo=control.duo, z.prime.factor=z.prime.factor, stringsAsFactors=FALSE)
				z.prime.factor.report.data.frame <-rbind(z.prime.factor.report.data.frame, z.prime.factor.current.row.data.frame)
			}
		}
	}
	z.prime.factor.report.data.frame <- z.prime.factor.report.data.frame[order(z.prime.factor.report.data.frame\$control.duo),]
	write.table(z.prime.factor.report.data.frame, file=\"$zPrimeFactor\", quote=F, sep=\"\\t\", row.names=F);
	#eof\n";

close Rcmd;	

system("R --no-restore --no-save --no-readline < $r_script > $r_script.out");

#close the input and output files
close(OUTPUT1);
close(INPUT1);
close(INPUT2);