comparison reactivity_cal/react_cal.py @ 58:971ef91f6b68 draft

Uploaded
author tyty
date Tue, 18 Nov 2014 01:01:38 -0500
parents
children
comparison
equal deleted inserted replaced
57:31d688722e7a 58:971ef91f6b68
1 #!/usr/bin/env python
2 # -*- coding: utf-8 -*-
3 import sys
4 from Bio import SeqIO
5 import math
6 from parse_dis_react import *
7 from react_norm_function import *
8 import os
9 import random
10 import string
11
12
13 dist_file1 = sys.argv[1] #plus library
14 dist_file2 = sys.argv[2] #minus library
15 seq_file = sys.argv[3] #Reference library(genome/cDNA)
16 nt_spec = sys.argv[4] #only show reactivity for AC or ATCG
17 flag_in = sys.argv[5] # perform 2-8% normalization (1) or not (0)
18 threshold = sys.argv[6] #Threshold to cap the reactivities
19 output_file = sys.argv[7]
20
21
22 distri_p = parse_dist(dist_file1)
23 distri_m = parse_dist(dist_file2)
24 threshold = float(threshold)
25
26
27 ospath = os.path.realpath(sys.argv[0])
28 ost = ospath.split('/')
29 syspath = ""
30 for i in range(len(ost)-1):
31 syspath = syspath+ost[i].strip()
32 syspath = syspath+'/'
33
34 rs = ''.join(random.sample(string.ascii_letters + string.digits, 8))
35 os.system("mkdir "+syspath+"output_"+rs)
36
37 syspathrs = syspath+"output_"+rs+'/'
38
39 h = file(syspathrs+"react.txt",'w')
40 flag_in = int(flag_in)
41
42 seqs = SeqIO.parse(open(seq_file),'fasta');
43 nt_s = set()
44 for i in range(len(nt_spec)):
45 nt_s.add(nt_spec[i])
46
47 flag = 0
48 trans = []
49 distri_p = distri_p[1]
50 distri_m = distri_m[1]
51
52 #thres = int(threshold)
53
54
55 transcripts = {}
56 for seq in seqs:
57 n = seq.id
58 trans.append(n)
59 transcripts[n] = seq.seq.tostring()
60
61
62 #print(distri_p)
63
64
65 for i in range(0, len(trans)):
66 h.write(trans[i])
67 h.write('\n')
68 for j in range(len(distri_p[trans[i]])):
69 distri_p[trans[i]][j] = math.log((int(distri_p[trans[i]][j])+1),math.e)
70 for j in range(len(distri_m[trans[i]])):
71 distri_m[trans[i]][j] = math.log((int(distri_m[trans[i]][j])+1),math.e)
72 s_p = sum(distri_p[trans[i]])
73 s_m = sum(distri_m[trans[i]])
74 length = len(distri_p[trans[i]])
75 if s_p!= 0 and s_m!= 0:
76 r = []
77 for j in range(0, len(distri_p[trans[i]])):
78 f_p = (float(distri_p[trans[i]][j]))/float(s_p)*length
79 f_m = (float(distri_m[trans[i]][j]))/float(s_m)*length
80 raw_react = f_p-f_m
81 r.append(max(0, raw_react))
82
83 if s_p!= 0 and s_m!= 0:
84 for k in range(1,(len(r)-1)):
85 if transcripts[trans[i]][k-1] in nt_s:
86 h.write(str(r[k]))
87 h.write('\t')
88 else:
89 h.write('NA')
90 h.write('\t')
91 k = k+1
92 if transcripts[trans[i]][k-1] in nt_s:
93 h.write(str(r[k]))
94 h.write('\n')
95 else:
96 h.write('NA')
97 h.write('\n')
98
99
100 h.close()
101
102 if flag_in:
103 react_norm((syspathrs+"react.txt"),output_file, threshold)
104 else:
105 h_o = file(output_file, 'w')
106 f_i = open(syspathrs+"react.txt")
107 for aline in f_i.readlines():
108 h_o.write(aline.strip())
109 h_o.write('\n')
110 os.system("rm -f "+syspathrs+"react.txt")
111
112 os.system("rm -r "+syspathrs)
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145