0
|
1 #!/usr/local/bin/python
|
|
2
|
|
3 import sys
|
|
4 from math import *
|
|
5 from rpy import *
|
|
6
|
|
7
|
|
8 if ((len(sys.argv)-1) != 6):
|
|
9 print 'too few parameters'
|
|
10 print 'usage: inputfile, col1, col2, d-value(not 0), p-val correction method(0 or 1)'
|
|
11 sys.exit()
|
|
12
|
|
13 try:
|
|
14 lines_arr = open(sys.argv[1]).readlines()
|
|
15 except IOError:
|
|
16 print'cannot open',sys.argv[1]
|
|
17 sys.exit()
|
|
18
|
|
19 try:
|
|
20 i = int(sys.argv[2]) #first column to compare
|
|
21 j = int(sys.argv[3]) #second colum to compare
|
|
22 d = float(sys.argv[4]) #correction factor
|
|
23 k = int(sys.argv[5]) #p-val correction method
|
|
24 outfile = open(sys.argv[6],'w') # output data
|
|
25
|
|
26 if (i>j):
|
|
27 print 'column order not correct col1 < col2'
|
|
28 print 'usage: inputfile, col1, col2, d-value, p-val correction method'
|
|
29 sys.exit()
|
|
30
|
|
31 try:
|
|
32 a = 1 / d
|
|
33 assert k in [0,1]
|
|
34 except ZeroDivisionError:
|
|
35 print 'd cannot be 0'
|
|
36 print 'usage: inputfile, col1, col2, d-value, p-val correction method'
|
|
37 sys.exit()
|
|
38 except:
|
|
39 print ' p-val correction should be 0 or 1 (0 = "bonferroni", 1 = "fdr")'
|
|
40 print 'usage: inputfile, col1, col2, d-value, p-val correction method'
|
|
41 sys.exit()
|
|
42 except ValueError:
|
|
43 print 'parameters are not integers'
|
|
44 print 'usage: inputfile, col1, col2, d-value, p-val correction method'
|
|
45 sys.exit()
|
|
46
|
|
47
|
|
48 fsize = len(lines_arr)
|
|
49
|
|
50 z1 = []
|
|
51 z2 = []
|
|
52 pz1 = []
|
|
53 pz2 = []
|
|
54 field = []
|
|
55
|
|
56 if d<1: # Z score calculation
|
|
57 for line in lines_arr:
|
|
58 line.strip()
|
|
59 field = line.split('\t')
|
|
60
|
|
61 x = int(field[j-1]) #input column 2
|
|
62 y = int(field[i-1]) #input column 1
|
|
63 if y>x:
|
|
64 z1.append(float((y - ((1/d)*x))/sqrt((1/d)*(x + y))))
|
|
65 z2.append(float((2*(sqrt(y+(3/8))-sqrt((1/d)*(x+(3/8)))))/sqrt(1+(1/d))))
|
|
66 else:
|
|
67 tmp_var1 = x
|
|
68 x = y
|
|
69 y = tmp_var1
|
|
70 z1.append(float((y - (d*x))/sqrt(d*(x + y))))
|
|
71 z2.append(float((2*(sqrt(y+(3/8))-sqrt(d*(x+(3/8)))))/sqrt(1+d)))
|
|
72
|
|
73 else: #d>1 Z score calculation
|
|
74 for line in lines_arr:
|
|
75 line.strip()
|
|
76 field = line.split('\t')
|
|
77 x = int(field[i-1]) #input column 1
|
|
78 y = int(field[j-1]) #input column 2
|
|
79
|
|
80 if y>x:
|
|
81 z1.append(float((y - (d*x))/sqrt(d*(x + y))))
|
|
82 z2.append(float((2*(sqrt(y+(3/8))-sqrt(d*(x+(3/8)))))/sqrt(1+d)))
|
|
83 else:
|
|
84 tmp_var2 = x
|
|
85 x = y
|
|
86 y = tmp_var2
|
|
87 z1.append(float((y - ((1/d)*x))/sqrt((1/d)*(x + y))))
|
|
88 z2.append(float((2*(sqrt(y+(3/8))-sqrt((1/d)*(x+(3/8)))))/sqrt(1+(1/d))))
|
|
89
|
|
90
|
|
91
|
|
92
|
|
93
|
|
94 # P-value caluculation for z1 and z2
|
|
95 for p in z1:
|
|
96
|
|
97 pz1.append(float(r.pnorm(-abs(float(p)))))
|
|
98
|
|
99 for q in z2:
|
|
100
|
|
101 pz2.append(float(r.pnorm(-abs(float(q)))))
|
|
102
|
|
103 # P-value correction for pz1 and pz2
|
|
104
|
|
105 if k == 0:
|
|
106 corrz1 = r.p_adjust(pz1,"bonferroni",fsize)
|
|
107 corrz2 = r.p_adjust(pz2,"bonferroni",fsize)
|
|
108
|
|
109
|
|
110 else:
|
|
111
|
|
112 corrz1 = r.p_adjust(pz1,"fdr",fsize)
|
|
113 corrz2 = r.p_adjust(pz2,"fdr",fsize)
|
|
114
|
|
115
|
|
116 #printing all columns
|
|
117 for n in range(fsize):
|
|
118 print >> outfile, "%s\t%4.3f\t%4.3f\t%8.6f\t%8.6f\t%8.6f\t%8.6f" %(lines_arr[n].strip(),z1[n],z2[n],pz1[n],pz2[n],corrz1[n],corrz2[n])
|
|
119
|
|
120
|
|
121
|
|
122
|
|
123
|
|
124
|