Mercurial > repos > xuebing > sharplabtool
diff tools/taxonomy/poisson2test.py @ 0:9071e359b9a3
Uploaded
author | xuebing |
---|---|
date | Fri, 09 Mar 2012 19:37:19 -0500 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/tools/taxonomy/poisson2test.py Fri Mar 09 19:37:19 2012 -0500 @@ -0,0 +1,124 @@ +#!/usr/local/bin/python + +import sys +from math import * +from rpy import * + + +if ((len(sys.argv)-1) != 6): + print 'too few parameters' + print 'usage: inputfile, col1, col2, d-value(not 0), p-val correction method(0 or 1)' + sys.exit() + +try: + lines_arr = open(sys.argv[1]).readlines() +except IOError: + print'cannot open',sys.argv[1] + sys.exit() + +try: + i = int(sys.argv[2]) #first column to compare + j = int(sys.argv[3]) #second colum to compare + d = float(sys.argv[4]) #correction factor + k = int(sys.argv[5]) #p-val correction method + outfile = open(sys.argv[6],'w') # output data + + if (i>j): + print 'column order not correct col1 < col2' + print 'usage: inputfile, col1, col2, d-value, p-val correction method' + sys.exit() + + try: + a = 1 / d + assert k in [0,1] + except ZeroDivisionError: + print 'd cannot be 0' + print 'usage: inputfile, col1, col2, d-value, p-val correction method' + sys.exit() + except: + print ' p-val correction should be 0 or 1 (0 = "bonferroni", 1 = "fdr")' + print 'usage: inputfile, col1, col2, d-value, p-val correction method' + sys.exit() +except ValueError: + print 'parameters are not integers' + print 'usage: inputfile, col1, col2, d-value, p-val correction method' + sys.exit() + + +fsize = len(lines_arr) + +z1 = [] +z2 = [] +pz1 = [] +pz2 = [] +field = [] + +if d<1: # Z score calculation + for line in lines_arr: + line.strip() + field = line.split('\t') + + x = int(field[j-1]) #input column 2 + y = int(field[i-1]) #input column 1 + if y>x: + z1.append(float((y - ((1/d)*x))/sqrt((1/d)*(x + y)))) + z2.append(float((2*(sqrt(y+(3/8))-sqrt((1/d)*(x+(3/8)))))/sqrt(1+(1/d)))) + else: + tmp_var1 = x + x = y + y = tmp_var1 + z1.append(float((y - (d*x))/sqrt(d*(x + y)))) + z2.append(float((2*(sqrt(y+(3/8))-sqrt(d*(x+(3/8)))))/sqrt(1+d))) + +else: #d>1 Z score calculation + for line in lines_arr: + line.strip() + field = line.split('\t') + x = int(field[i-1]) #input column 1 + y = int(field[j-1]) #input column 2 + + if y>x: + z1.append(float((y - (d*x))/sqrt(d*(x + y)))) + z2.append(float((2*(sqrt(y+(3/8))-sqrt(d*(x+(3/8)))))/sqrt(1+d))) + else: + tmp_var2 = x + x = y + y = tmp_var2 + z1.append(float((y - ((1/d)*x))/sqrt((1/d)*(x + y)))) + z2.append(float((2*(sqrt(y+(3/8))-sqrt((1/d)*(x+(3/8)))))/sqrt(1+(1/d)))) + + + + + +# P-value caluculation for z1 and z2 +for p in z1: + + pz1.append(float(r.pnorm(-abs(float(p))))) + +for q in z2: + + pz2.append(float(r.pnorm(-abs(float(q))))) + +# P-value correction for pz1 and pz2 + +if k == 0: + corrz1 = r.p_adjust(pz1,"bonferroni",fsize) + corrz2 = r.p_adjust(pz2,"bonferroni",fsize) + + +else: + + corrz1 = r.p_adjust(pz1,"fdr",fsize) + corrz2 = r.p_adjust(pz2,"fdr",fsize) + + +#printing all columns +for n in range(fsize): + print >> outfile, "%s\t%4.3f\t%4.3f\t%8.6f\t%8.6f\t%8.6f\t%8.6f" %(lines_arr[n].strip(),z1[n],z2[n],pz1[n],pz2[n],corrz1[n],corrz2[n]) + + + + + +