#setwd("/Users/nanettecoetzer/Documents/Bioinformatics/MAIZE_project/eQTL_pipeline/April_2012_pipeline_scripts/July_2012/output_July")

perm_threshold <- function(p, cm, eqtl) {
	m=c()
	for (j in 1:p) {
		d=c()
		x=sample(1:cm, eqtl, replace=T)
		for (i in 1:cm) {
			s=length(which(x==i))
			d=c(d,s)
		}
		m=c(m,max(d))
	}
t <- quantile(m, 0.95)
return(t)
}

chisq_test <- function(m, tot_eqtls, tot_genes, total) {
	ans <- c()
	for (i in 1:dim(m)[1]) {
		c <- chisq.test(m[i,2:3],p=c(tot_eqtls/total,tot_genes/total))
		if (c$p.value < 0.0001 & m[i,4] == 0) {
			eqtl_excess_calc = m[i,2]/(m[i,2]+m[i,3])
			
			if (eqtl_excess_calc > tot_eqtls/total) {
				ans = c(ans, 1)	
			}
			else {
				ans = c(ans, -1)	
			}
			
		}
		else {
			ans = c(ans, 0)	
		}
	}
	list(ans=ans, exp=exp)
}

# threshold
summary = read.table("frequency_summary.txt",sep="\t")
num.cm = summary[5,2]
num.perm = summary[10,2]
num.genes = summary[4,2]
 
num.all.eqtl = summary[1,2] 
num.cis.eqtl = summary[2,2] 
num.trans.eqtl = summary[3,2] 

#print(num.perm)
threshold.all <- perm_threshold(p=num.perm,cm=num.cm,eqtl=num.all.eqtl)
threshold.cis <- perm_threshold(p=num.perm,cm=num.cm,eqtl=num.cis.eqtl)
threshold.trans <- perm_threshold(p=num.perm,cm=num.cm,eqtl=num.trans.eqtl)


# chi-squared test
#summary <- read.table("freq_summary.txt", header=F, sep="\t")
d <- read.table("sliding_frequency.txt", header=T, sep="\t")

# calculate the cut-off value so that the expected value of each cell (num.eQTLs and num.genes) is > 5
# chisq test --> all
total.all = num.genes + num.all.eqtl
t.all = 5* total.all/min(num.genes, num.all.eqtl)
genes_div_total.all = num.genes/total.all
eqtl_div_total.all = num.all.eqtl/total.all

# chisq test --> cis
total.cis = num.genes + num.cis.eqtl
t.cis = 5* total.cis/min(num.genes, num.cis.eqtl)
genes_div_total.cis = num.genes/total.cis
eqtl_div_total.cis = num.cis.eqtl/total.cis

# chisq test --> trans
total.trans = num.genes + num.trans.eqtl
t.trans = 5*total.trans/min(num.genes, num.trans.eqtl)
genes_div_total.trans = num.genes/total.trans
eqtl_div_total.trans = num.trans.eqtl/total.trans

write.table(c(threshold.all ,t.all ,genes_div_total.all, eqtl_div_total.all, threshold.cis ,t.cis ,genes_div_total.cis, eqtl_div_total.cis, threshold.trans, t.trans, genes_div_total.trans, eqtl_div_total.trans), file="threshold_chisq_summary.txt")


s.all = cbind(d$sliding.id,d$sliding.all.eQTL,d$sliding.genes,d$sliding.genes+d$sliding.all.eQTL<t.all, d$sliding.cM,d$chr)
s.cis = cbind(d$sliding.id,d$sliding.cis.eQTL,d$sliding.genes,d$sliding.genes+d$sliding.cis.eQTL<t.cis, d$sliding.cM,d$chr)
s.trans = cbind(d$sliding.id,d$sliding.trans.eQTL,d$sliding.genes,d$sliding.genes+d$sliding.trans.eQTL<t.trans, d$sliding.cM,d$chr)

a.all <- chisq_test(m=s.all, num.all.eqtl, num.genes, total.all)
a.cis <- chisq_test(m=s.cis, num.cis.eqtl, num.genes, total.cis)
a.trans <- chisq_test(m=s.trans, num.trans.eqtl, num.genes, total.trans)

write.table(cbind(s.all, a.all$ans),"chisq_out_all.txt")
write.table(cbind(s.cis, a.cis$ans),"chisq_out_cis.txt")
write.table(cbind(s.trans, a.trans$ans),"chisq_out_trans.txt")

