annotate BSseeker2/bs_align/bs_rrbs.py @ 1:8b26adf64adc draft default tip

V2.0.5
author weilong-guo
date Tue, 05 Nov 2013 01:55:39 -0500
parents e6df770c0e58
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1 import fileinput, random, math, os.path
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
2 from bs_index.rrbs_build import FWD_MAPPABLE_REGIONS, REV_MAPPABLE_REGIONS
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
3 from bs_utils.utils import *
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
4
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
5 from bs_align.bs_single_end import extract_mapping
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
6 from bs_align_utils import *
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
7
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
8 def my_mappable_region(chr_regions, mapped_location, FR): # start_position (first C), end_position (last G), serial, sequence
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
9 #print len(chr_regions)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
10 out_serial = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
11 out_start = -1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
12 out_end = -1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
13 #print "mapped_location:", mapped_location
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
14 if FR == "+FW" or FR == "-RC":
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
15 my_location = str(mapped_location)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
16 if my_location in chr_regions:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
17 my_lst = chr_regions[my_location]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
18 out_start = int(my_location)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
19 out_end = my_lst[0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
20 out_serial = my_lst[1]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
21 #else :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
22 # print "[For debug]: +FW location %s cannot be found" % my_location
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
23 elif FR == "-FW" or FR == "+RC":
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
24 my_location = str(mapped_location)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
25 if my_location in chr_regions:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
26 my_lst = chr_regions[my_location]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
27 out_end = int(my_location)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
28 out_start = my_lst[0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
29 out_serial = my_lst[1]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
30 #else :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
31 # print "[For debug]: -FW location %s cannot be found" % my_location
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
32
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
33 return out_serial, out_start, out_end
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
34
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
35
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
36 #----------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
37
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
38 def bs_rrbs(main_read_file, asktag, adapter_file, cut_s, cut_e, no_small_lines, max_mismatch_no,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
39 aligner_command, db_path, tmp_path, outfile, XS_pct, XS_count, adapter_mismatch, cut_format="C-CGG",
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
40 show_multiple_hit=False):
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
41 #----------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
42 # For double enzyme: cut_format="C-CGG,A-CTG"; ApekI:"G^CWGC"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
43 #cut_context = re.sub("-", "", cut_format)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
44 # Ex. cut_format="C-CGG,AT-CG,G-CWGC"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
45
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
46 cut_format_lst = EnumerateIUPAC(cut_format.upper().split(",")) # ['G-CAGC', 'AT-CG', 'C-CGG', 'G-CTGC']
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
47 cut_context = [i.replace("-","") for i in cut_format_lst] # ['GCAGC', 'ATCG', 'CCGG', 'GCTGC']
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
48 cut5_context = [re.match( r'(.*)\-(.*)', i).group(1) for i in cut_format_lst] # ['G', 'AT', 'C', 'G']
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
49 cut3_context = [re.match( r'(.*)\-(.*)', i).group(2) for i in cut_format_lst] # ['CAGC', 'CG', 'CGG', 'CTGC']
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
50 cut_len = [len(i) for i in cut_context] # [5, 4, 4, 5]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
51 min_cut5_len = min([len(i) for i in cut5_context])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
52 #print cut_format_lst
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
53 #print cut_format
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
54 #print cut5_context
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
55
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
56 cut_tag_lst = Enumerate_C_to_CT(cut_format_lst) # ['G-TTGC', 'AT-TG', 'G-CAGT', 'T-CGG', 'G-TAGC', 'C-TGG', 'G-CAGC', 'G-CTGC', 'AT-CG', 'T-TGG', 'G-TTGT', 'G-TAGT', 'C-CGG', 'G-CTGT']
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
57 cut5_tag_lst = [re.match(r'(.*)\-(.*)', i).group(1) for i in cut_tag_lst]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
58 cut3_tag_lst = [re.match(r'(.*)\-(.*)', i).group(2) for i in cut_tag_lst]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
59 check_pattern = [ i[-2:]+"_"+j for i,j in zip(cut5_tag_lst, cut3_tag_lst) ]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
60
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
61 #print "======="
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
62 #print cut_tag_lst
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
63 #print cut3_tag_lst
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
64 #print cut5_tag_lst
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
65 #print check_pattern
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
66
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
67 # set region[gx,gy] for checking_genome_context
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
68 gx = [ 0 if j>2 else 2-j for j in [len(i) for i in cut5_tag_lst] ] # [XC-CGG]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
69 gy = [ 3+len(i) for i in cut3_tag_lst ]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
70
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
71
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
72 #----------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
73
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
74 # helper method to join fname with tmp_path
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
75 tmp_d = lambda fname: os.path.join(tmp_path, fname)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
76 db_d = lambda fname: os.path.join(db_path, fname)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
77
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
78 MAX_TRY = 500 # For finding the serial_no
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
79 whole_adapter_seq = ""
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
80 #----------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
81 adapter_seq=""
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
82 if adapter_file:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
83 try :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
84 adapter_inf = open(adapter_file,"r")
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
85 whole_adapter_seq = adapter_inf.readline().strip()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
86 adapter_seq = whole_adapter_seq[0:10] # only use first 10bp of adapter
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
87 adapter_inf.close()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
88 except IOError:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
89 print "[Error] Cannot find adapter file : %s !" % adapter_file
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
90 exit(-1)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
91
1
weilong-guo
parents: 0
diff changeset
92 logm("Read filename: %s" % main_read_file)
weilong-guo
parents: 0
diff changeset
93 logm("The first base (for mapping): %d"% cut_s )
weilong-guo
parents: 0
diff changeset
94 logm("The last base (for mapping): %d"% cut_e )
weilong-guo
parents: 0
diff changeset
95
weilong-guo
parents: 0
diff changeset
96 logm("Path for short reads aligner: %s"% aligner_command + '\n')
weilong-guo
parents: 0
diff changeset
97 logm("Reference genome library path: %s"% db_path )
weilong-guo
parents: 0
diff changeset
98
weilong-guo
parents: 0
diff changeset
99 if asktag == "Y" :
weilong-guo
parents: 0
diff changeset
100 logm("Un-directional library" )
weilong-guo
parents: 0
diff changeset
101 else :
weilong-guo
parents: 0
diff changeset
102 logm("Directional library")
weilong-guo
parents: 0
diff changeset
103
weilong-guo
parents: 0
diff changeset
104 logm("Number of mismatches allowed: %s"% max_mismatch_no )
weilong-guo
parents: 0
diff changeset
105
weilong-guo
parents: 0
diff changeset
106 if adapter_file !="":
weilong-guo
parents: 0
diff changeset
107 logm("Adapter seq: %s" % whole_adapter_seq)
weilong-guo
parents: 0
diff changeset
108 logm("-------------------------------- " )
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
109
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
110 #----------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
111 all_raw_reads=0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
112 all_tagged=0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
113 all_tagged_trimmed=0
1
weilong-guo
parents: 0
diff changeset
114 all_base_before_trim=0
weilong-guo
parents: 0
diff changeset
115 all_base_after_trim=0
weilong-guo
parents: 0
diff changeset
116 all_base_mapped=0
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
117 all_mapped=0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
118 all_mapped_passed=0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
119 n_cut_tag_lst={}
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
120 #print cut3_tag_lst
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
121 for x in cut3_tag_lst:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
122 n_cut_tag_lst[x]=0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
123
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
124 mC_lst=[0,0,0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
125 uC_lst=[0,0,0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
126
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
127 no_my_files=0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
128
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
129 num_mapped_FW_C2T = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
130 num_mapped_RC_C2T = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
131 num_mapped_FW_G2A = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
132 num_mapped_RC_G2A = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
133
1
weilong-guo
parents: 0
diff changeset
134 # Count of nucleotides, which should be cut before the adapters
weilong-guo
parents: 0
diff changeset
135 Extra_base_cut_5end_adapter = max([ abs(len(i)-len(j)) for i,j in zip(cut5_context, cut3_context)])
weilong-guo
parents: 0
diff changeset
136
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
137 #===============================================
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
138 # directional sequencing
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
139 #===============================================
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
140
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
141 if asktag=="N" :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
142 #----------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
143 logm("== Start mapping ==")
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
144
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
145 input_fname = os.path.split(main_read_file)[1]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
146 for read_file in isplit_file(main_read_file, tmp_d(input_fname)+'-s-', no_small_lines):
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
147
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
148 logm("Processing read file: %s" % read_file)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
149 original_bs_reads = {}
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
150 no_my_files+=1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
151 random_id = ".tmp-"+str(random.randint(1000000,9999999))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
152 outfile2=tmp_d('Trimmed_C2T.fa'+random_id)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
153
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
154 outf2=open(outfile2,'w')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
155
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
156 #--- Checking input format ------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
157 try :
1
weilong-guo
parents: 0
diff changeset
158 if read_file.endswith(".gz") : # support input file ending with ".gz"
weilong-guo
parents: 0
diff changeset
159 read_inf = gzip.open(read_file, "rb")
weilong-guo
parents: 0
diff changeset
160 else :
weilong-guo
parents: 0
diff changeset
161 read_inf=open(read_file,"r")
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
162 except IOError:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
163 print "[Error] Cannot open input file : %s" % read_file
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
164 exit(-1)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
165
1
weilong-guo
parents: 0
diff changeset
166 oneline = read_inf.readline()
weilong-guo
parents: 0
diff changeset
167 l = oneline.split()
weilong-guo
parents: 0
diff changeset
168 input_format = ""
weilong-guo
parents: 0
diff changeset
169 if oneline[0]=="@":
weilong-guo
parents: 0
diff changeset
170 input_format = "fastq"
weilong-guo
parents: 0
diff changeset
171 elif len(l)==1 and oneline[0]!=">" :
weilong-guo
parents: 0
diff changeset
172 input_format = "seq"
weilong-guo
parents: 0
diff changeset
173 elif len(l)==11:
weilong-guo
parents: 0
diff changeset
174 input_format = "qseq"
weilong-guo
parents: 0
diff changeset
175 elif oneline[0]==">" :
weilong-guo
parents: 0
diff changeset
176 input_format = "fasta"
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
177 read_inf.close()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
178
1
weilong-guo
parents: 0
diff changeset
179
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
180 #----------------------------------------------------------------
1
weilong-guo
parents: 0
diff changeset
181 read_id = ""
weilong-guo
parents: 0
diff changeset
182 seq = ""
weilong-guo
parents: 0
diff changeset
183 seq_ready = 0
weilong-guo
parents: 0
diff changeset
184 line_no = 0
weilong-guo
parents: 0
diff changeset
185 for line in fileinput.input(read_file, openhook=fileinput.hook_compressed):
weilong-guo
parents: 0
diff changeset
186 l = line.split()
weilong-guo
parents: 0
diff changeset
187 line_no += 1
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
188 if input_format=="seq":
1
weilong-guo
parents: 0
diff changeset
189 all_raw_reads += 1
weilong-guo
parents: 0
diff changeset
190 read_id = str(all_raw_reads)
weilong-guo
parents: 0
diff changeset
191 read_id = read_id.zfill(12)
weilong-guo
parents: 0
diff changeset
192 seq = l[0]
weilong-guo
parents: 0
diff changeset
193 seq_ready = "Y"
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
194 elif input_format=="fastq":
1
weilong-guo
parents: 0
diff changeset
195 l_fastq = math.fmod(line_no, 4)
weilong-guo
parents: 0
diff changeset
196 if l_fastq == 1 :
weilong-guo
parents: 0
diff changeset
197 all_raw_reads += 1
weilong-guo
parents: 0
diff changeset
198 read_id = l[0][1:]
weilong-guo
parents: 0
diff changeset
199 seq_ready = "N"
weilong-guo
parents: 0
diff changeset
200 elif l_fastq == 2 :
weilong-guo
parents: 0
diff changeset
201 seq = l[0]
weilong-guo
parents: 0
diff changeset
202 seq_ready = "Y"
weilong-guo
parents: 0
diff changeset
203 else :
weilong-guo
parents: 0
diff changeset
204 seq = ""
weilong-guo
parents: 0
diff changeset
205 seq_ready = "N"
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
206 elif input_format=="qseq":
1
weilong-guo
parents: 0
diff changeset
207 all_raw_reads += 1
weilong-guo
parents: 0
diff changeset
208 read_id = str(all_raw_reads)
weilong-guo
parents: 0
diff changeset
209 read_id = read_id.zfill(12)
weilong-guo
parents: 0
diff changeset
210 seq = l[8]
weilong-guo
parents: 0
diff changeset
211 seq_ready = "Y"
weilong-guo
parents: 0
diff changeset
212 elif input_format=="fasta" :
weilong-guo
parents: 0
diff changeset
213 l_fasta = math.fmod(line_no,2)
weilong-guo
parents: 0
diff changeset
214 if l_fasta==1:
weilong-guo
parents: 0
diff changeset
215 all_raw_reads += 1
weilong-guo
parents: 0
diff changeset
216 read_id = l[0][1:]
weilong-guo
parents: 0
diff changeset
217 seq = ""
weilong-guo
parents: 0
diff changeset
218 seq_ready = "N"
weilong-guo
parents: 0
diff changeset
219 elif l_fasta==0 :
weilong-guo
parents: 0
diff changeset
220 seq = l[0]
weilong-guo
parents: 0
diff changeset
221 seq_ready = "Y"
weilong-guo
parents: 0
diff changeset
222
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
223 #---------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
224 if seq_ready=="Y":
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
225 # Normalize the characters
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
226 seq=seq.upper().replace(".","N")
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
227
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
228 read_tag = [ m for m,n in [ (i, len(i)) for i in uniq(cut3_tag_lst)] if seq[0:n] == m ]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
229 if len(read_tag) > 0 :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
230 all_tagged += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
231 for i in read_tag :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
232 n_cut_tag_lst[i] += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
233
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
234 seq = seq[(cut_s-1):cut_e] # cut_s start from 1 cycle by default
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
235
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
236 #-- Trimming adapter sequence ---
1
weilong-guo
parents: 0
diff changeset
237
weilong-guo
parents: 0
diff changeset
238 all_base_before_trim += len(seq)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
239 if adapter_seq != "" :
1
weilong-guo
parents: 0
diff changeset
240 new_read = RemoveAdapter(seq, adapter_seq, adapter_mismatch, Extra_base_cut_5end_adapter)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
241 if len(new_read) < len(seq) :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
242 all_tagged_trimmed += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
243 seq = new_read
1
weilong-guo
parents: 0
diff changeset
244 all_base_after_trim += len(seq)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
245 if len(seq) <= 4 :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
246 seq = "N" * (cut_e - cut_s)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
247
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
248 # all reads will be considered, regardless of tags
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
249 #--------- trimmed_raw_BS_read and qscore ------------------
1
weilong-guo
parents: 0
diff changeset
250 original_bs_reads[read_id] = seq
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
251 #--------- FW_C2T ------------------
1
weilong-guo
parents: 0
diff changeset
252 outf2.write('>%s\n%s\n'%(read_id, seq.replace('C', 'T')))
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
253 fileinput.close()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
254
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
255 outf2.close()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
256
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
257 delete_files(read_file)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
258 logm("Processing input is done")
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
259 #--------------------------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
260
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
261 # mapping
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
262 #--------------------------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
263 WC2T=tmp_d("W_C2T_m"+max_mismatch_no+".mapping"+random_id)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
264 CC2T=tmp_d("C_C2T_m"+max_mismatch_no+".mapping"+random_id)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
265
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
266 run_in_parallel([ aligner_command % {'reference_genome' : os.path.join(db_path,'W_C2T'),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
267 'input_file' : outfile2,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
268 'output_file' : WC2T},
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
269 aligner_command % {'reference_genome' : os.path.join(db_path,'C_C2T'),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
270 'input_file' : outfile2,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
271 'output_file' : CC2T} ])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
272
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
273 logm("Aligning reads is done")
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
274
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
275 delete_files(outfile2)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
276
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
277 #--------------------------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
278 # Post processing
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
279 #--------------------------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
280
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
281 FW_C2T_U,FW_C2T_R=extract_mapping(WC2T)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
282 RC_C2T_U,RC_C2T_R=extract_mapping(CC2T)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
283 logm("Extracting alignments is done")
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
284
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
285 #----------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
286 # get uniq-hit reads
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
287 #----------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
288 Union_set=set(FW_C2T_U.iterkeys()) | set(RC_C2T_U.iterkeys())
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
289
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
290 Unique_FW_C2T=set() # +
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
291 Unique_RC_C2T=set() # -
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
292 Multiple_hits=set()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
293
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
294
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
295 for x in Union_set:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
296 _list=[]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
297 for dx in [FW_C2T_U, RC_C2T_U]:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
298 mis_lst=dx.get(x,[99])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
299 mis=int(mis_lst[0])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
300 _list.append(mis)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
301 for dx in [FW_C2T_R, RC_C2T_R]:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
302 mis=dx.get(x,99)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
303 _list.append(mis)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
304 mini=min(_list)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
305 if _list.count(mini)==1:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
306 mini_index=_list.index(mini)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
307 if mini_index==0:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
308 Unique_FW_C2T.add(x)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
309 elif mini_index==1:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
310 Unique_RC_C2T.add(x)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
311 else :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
312 Multiple_hits.add(x)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
313 else :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
314 Multiple_hits.add(x)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
315 # write reads rejected by Multiple Hits to file
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
316 if show_multiple_hit :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
317 outf_MH=open("Multiple_hit.fa",'w')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
318 for i in Multiple_hits :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
319 outf_MH.write(">%s\n" % i)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
320 outf_MH.write("%s\n" % original_bs_reads[i])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
321 outf_MH.close()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
322
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
323 del Union_set
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
324 del FW_C2T_R
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
325 del RC_C2T_R
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
326
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
327 FW_uniq_lst=[[FW_C2T_U[u][1],u] for u in Unique_FW_C2T]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
328 RC_uniq_lst=[[RC_C2T_U[u][1],u] for u in Unique_RC_C2T]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
329 FW_uniq_lst.sort()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
330 RC_uniq_lst.sort()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
331 FW_uniq_lst=[x[1] for x in FW_uniq_lst]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
332 RC_uniq_lst=[x[1] for x in RC_uniq_lst]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
333
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
334 del Unique_FW_C2T
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
335 del Unique_RC_C2T
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
336
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
337 #----------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
338 # Post-filtering reads
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
339
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
340 # ---- FW ----
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
341 FW_regions = dict()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
342 gseq = dict()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
343 chr_length = dict()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
344 for header in FW_uniq_lst :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
345 _, mapped_chr, mapped_location, cigar = FW_C2T_U[header]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
346 original_BS = original_bs_reads[header]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
347 if mapped_chr not in FW_regions :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
348 FW_regions[mapped_chr] = deserialize(db_d(FWD_MAPPABLE_REGIONS(mapped_chr)))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
349 if mapped_chr not in gseq :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
350 gseq[mapped_chr] = deserialize(db_d(mapped_chr))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
351 chr_length[mapped_chr] = len(gseq[mapped_chr])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
352
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
353 r_start, r_end, g_len = get_read_start_end_and_genome_length(cigar)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
354 all_mapped+=1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
355 FR = "+FW"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
356 mapped_strand = "+"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
357 origin_genome, next2bp, output_genome = get_genomic_sequence(gseq[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
358 mapped_location,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
359 mapped_location + g_len,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
360 mapped_strand)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
361 checking_genome_context = [output_genome[i:j] == k for i,j,k in zip(gx,gy,check_pattern) ]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
362 r_aln, g_aln = cigar_to_alignment(cigar, original_BS, origin_genome)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
363
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
364 if len(r_aln) == len(g_aln) :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
365 my_region_serial, my_region_start, my_region_end = [-1, 0, 0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
366 if True in checking_genome_context :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
367 try_pos = [mapped_location - len(i) for i,j in zip(cut5_tag_lst, checking_genome_context) if j][0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
368 my_region_serial, my_region_start, my_region_end = my_mappable_region(FW_regions[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
369 try_pos, FR)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
370 if my_region_serial == 0 : # still be 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
371 # for some cases, read has no tags; searching the upstream sequence for tags
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
372 # print "[For debug]: FW read has no tags"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
373 try_count = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
374 try_pos = mapped_location - min_cut5_len + 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
375 while my_region_serial == 0 and try_count < MAX_TRY :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
376 my_region_serial, my_region_start, my_region_end = my_mappable_region(FW_regions[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
377 try_pos, FR)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
378 try_pos -= 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
379 try_count += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
380
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
381 #if my_region_serial == 0 :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
382 # print "[For debug]: chr=", mapped_chr
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
383 # print "[For debug]: +FW read still can not find fragment serial"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
384 # Tip: sometimes "my_region_serial" is still 0 ...
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
385
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
386
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
387 N_mismatch = N_MIS(r_aln, g_aln)
1
weilong-guo
parents: 0
diff changeset
388 # if N_mismatch <= int(max_mismatch_no) :
weilong-guo
parents: 0
diff changeset
389 mm_no=float(max_mismatch_no)
weilong-guo
parents: 0
diff changeset
390 if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ):
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
391 all_mapped_passed += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
392 methy = methy_seq(r_aln, g_aln + next2bp)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
393 mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
394 #---XS FILTER----------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
395 XS = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
396 nCH = methy.count('y') + methy.count('z')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
397 nmCH = methy.count('Y') + methy.count('Z')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
398 if( (nmCH>XS_count) and nmCH/float(nCH+nmCH)>XS_pct ) :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
399 XS = 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
400 num_mapped_FW_C2T += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
401 outfile.store(header, N_mismatch, FR, mapped_chr, mapped_strand,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
402 mapped_location, cigar, original_BS, methy, XS,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
403 output_genome = output_genome,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
404 rrbs = True,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
405 my_region_serial = my_region_serial,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
406 my_region_start = my_region_start,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
407 my_region_end = my_region_end)
1
weilong-guo
parents: 0
diff changeset
408 all_base_mapped += len(original_BS)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
409 else :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
410 print "[For debug]: reads not in same lengths"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
411
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
412 #print "start RC"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
413 # ---- RC ----
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
414 RC_regions = dict()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
415 for header in RC_uniq_lst :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
416 _, mapped_chr, mapped_location, cigar = RC_C2T_U[header]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
417 original_BS = original_bs_reads[header]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
418 if mapped_chr not in RC_regions :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
419 RC_regions[mapped_chr] = deserialize(db_d(REV_MAPPABLE_REGIONS(mapped_chr)))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
420 if mapped_chr not in gseq :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
421 gseq[mapped_chr] = deserialize(db_d(mapped_chr))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
422 chr_length[mapped_chr] = len(gseq[mapped_chr])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
423
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
424 r_start, r_end, g_len = get_read_start_end_and_genome_length(cigar)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
425 mapped_location = chr_length[mapped_chr] - mapped_location - g_len
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
426 all_mapped+=1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
427 FR = "-FW"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
428 mapped_strand = "-"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
429 origin_genome, next2bp, output_genome = get_genomic_sequence(gseq[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
430 mapped_location,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
431 mapped_location + g_len,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
432 mapped_strand)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
433 #checking_genome_context = (output_genome[gx:gy] == check_pattern)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
434 checking_genome_context = [output_genome[i:j] == k for i,j,k in zip(gx,gy,check_pattern) ]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
435 r_aln, g_aln = cigar_to_alignment(cigar, original_BS, origin_genome)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
436
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
437 if len(r_aln) == len(g_aln) : # and checking_genome_context:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
438 my_region_serial, my_region_start, my_region_end = [-1, 0, 0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
439 if True in checking_genome_context :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
440 try_pos = [mapped_location + g_len - 1 + len(i) for i,j in zip(cut5_tag_lst, checking_genome_context) if j][0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
441 my_region_serial, my_region_start, my_region_end = my_mappable_region(RC_regions[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
442 try_pos , FR)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
443 if my_region_serial == 0 : # still be 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
444 # for some cases, read has no tags; searching the upstream sequence for tags
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
445 #print "[For debug]: RC Read has no tags"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
446 try_count = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
447 try_pos = mapped_location + g_len + min_cut5_len - 2
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
448 while my_region_serial == 0 and try_count < MAX_TRY :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
449 my_region_serial, my_region_start, my_region_end = my_mappable_region(RC_regions[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
450 try_pos, FR)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
451 try_pos += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
452 try_count += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
453
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
454 #if my_region_serial == 0 :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
455 # print "[For debug]: chr=", mapped_chr
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
456 # print "[For debug]: -FW read still cannot find fragment serial"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
457
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
458
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
459 N_mismatch = N_MIS(r_aln, g_aln)
1
weilong-guo
parents: 0
diff changeset
460 # if N_mismatch <= int(max_mismatch_no) :
weilong-guo
parents: 0
diff changeset
461 mm_no=float(max_mismatch_no)
weilong-guo
parents: 0
diff changeset
462 if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ):
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
463 all_mapped_passed += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
464 methy = methy_seq(r_aln, g_aln + next2bp)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
465 mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
466 #---XS FILTER----------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
467 XS = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
468 nCH = methy.count('y') + methy.count('z')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
469 nmCH = methy.count('Y') + methy.count('Z')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
470 if( (nmCH>XS_count) and nmCH/float(nCH+nmCH)>XS_pct ) :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
471 XS = 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
472 num_mapped_RC_C2T += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
473 outfile.store(header, N_mismatch, FR, mapped_chr, mapped_strand,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
474 mapped_location, cigar, original_BS, methy, XS,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
475 output_genome = output_genome,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
476 rrbs = True,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
477 my_region_serial = my_region_serial,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
478 my_region_start = my_region_start,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
479 my_region_end = my_region_end)
1
weilong-guo
parents: 0
diff changeset
480 all_base_mapped += len(original_BS)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
481 else :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
482 print "[For debug]: reads not in same lengths"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
483
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
484
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
485 # Finished both FW and RC
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
486 logm("Done: %s (%d) \n" % (read_file, no_my_files))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
487 print "--> %s (%d) "%(read_file, no_my_files)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
488 del original_bs_reads
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
489 delete_files(WC2T, CC2T)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
490
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
491 # End of directional library
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
492
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
493
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
494 # ====================================================
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
495 # un-directional library
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
496 # ====================================================
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
497
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
498 elif asktag=="Y" :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
499 #----------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
500 logm("== Start mapping ==")
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
501
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
502 input_fname = os.path.split(main_read_file)[1]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
503 for read_file in isplit_file(main_read_file, tmp_d(input_fname)+'-s-', no_small_lines):
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
504
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
505 logm("Processing read file: %s" % read_file)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
506 original_bs_reads = {}
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
507 no_my_files+=1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
508 random_id = ".tmp-"+str(random.randint(1000000,9999999))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
509 outfile2=tmp_d('Trimmed_C2T.fa'+random_id)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
510 outfile3=tmp_d('Trimmed_G2A.fa'+random_id)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
511
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
512 outf2=open(outfile2,'w')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
513 outf3=open(outfile3,'w')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
514
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
515 #--- Checking input format ------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
516 try :
1
weilong-guo
parents: 0
diff changeset
517 if read_file.endswith(".gz") : # support input file ending with ".gz"
weilong-guo
parents: 0
diff changeset
518 read_inf = gzip.open(read_file, "rb")
weilong-guo
parents: 0
diff changeset
519 else :
weilong-guo
parents: 0
diff changeset
520 read_inf=open(read_file,"r")
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
521 except IOError:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
522 print "[Error] Cannot open input file : %s" % read_file
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
523 exit(-1)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
524
1
weilong-guo
parents: 0
diff changeset
525 oneline = read_inf.readline()
weilong-guo
parents: 0
diff changeset
526 l = oneline.split()
weilong-guo
parents: 0
diff changeset
527 input_format = ""
weilong-guo
parents: 0
diff changeset
528 if oneline[0]=="@":
weilong-guo
parents: 0
diff changeset
529 input_format = "fastq"
weilong-guo
parents: 0
diff changeset
530 elif len(l)==1 and oneline[0]!=">" :
weilong-guo
parents: 0
diff changeset
531 input_format = "seq"
weilong-guo
parents: 0
diff changeset
532 elif len(l)==11:
weilong-guo
parents: 0
diff changeset
533 input_format = "qseq"
weilong-guo
parents: 0
diff changeset
534 elif oneline[0]==">" :
weilong-guo
parents: 0
diff changeset
535 input_format = "fasta"
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
536 read_inf.close()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
537
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
538 #----------------------------------------------------------------
1
weilong-guo
parents: 0
diff changeset
539 read_id = ""
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
540 seq = ""
1
weilong-guo
parents: 0
diff changeset
541 seq_ready = 0
weilong-guo
parents: 0
diff changeset
542 line_no = 0
weilong-guo
parents: 0
diff changeset
543 for line in fileinput.input(read_file, openhook=fileinput.hook_compressed):
weilong-guo
parents: 0
diff changeset
544 l = line.split()
weilong-guo
parents: 0
diff changeset
545 line_no += 1
weilong-guo
parents: 0
diff changeset
546 if input_format=="seq":
weilong-guo
parents: 0
diff changeset
547 all_raw_reads += 1
weilong-guo
parents: 0
diff changeset
548 read_id = str(all_raw_reads)
weilong-guo
parents: 0
diff changeset
549 read_id = read_id.zfill(12)
weilong-guo
parents: 0
diff changeset
550 seq = l[0]
weilong-guo
parents: 0
diff changeset
551 seq_ready = "Y"
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
552 elif input_format=="fastq":
1
weilong-guo
parents: 0
diff changeset
553 l_fastq = math.fmod(line_no, 4)
weilong-guo
parents: 0
diff changeset
554 if l_fastq == 1 :
weilong-guo
parents: 0
diff changeset
555 all_raw_reads += 1
weilong-guo
parents: 0
diff changeset
556 read_id = l[0][1:]
weilong-guo
parents: 0
diff changeset
557 seq_ready = "N"
weilong-guo
parents: 0
diff changeset
558 elif l_fastq == 2 :
weilong-guo
parents: 0
diff changeset
559 seq = l[0]
weilong-guo
parents: 0
diff changeset
560 seq_ready = "Y"
weilong-guo
parents: 0
diff changeset
561 else :
weilong-guo
parents: 0
diff changeset
562 seq = ""
weilong-guo
parents: 0
diff changeset
563 seq_ready = "N"
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
564 elif input_format=="qseq":
1
weilong-guo
parents: 0
diff changeset
565 all_raw_reads += 1
weilong-guo
parents: 0
diff changeset
566 read_id = str(all_raw_reads)
weilong-guo
parents: 0
diff changeset
567 read_id = read_id.zfill(12)
weilong-guo
parents: 0
diff changeset
568 seq = l[8]
weilong-guo
parents: 0
diff changeset
569 seq_ready = "Y"
weilong-guo
parents: 0
diff changeset
570 elif input_format=="fasta" :
weilong-guo
parents: 0
diff changeset
571 l_fasta = math.fmod(line_no,2)
weilong-guo
parents: 0
diff changeset
572 if l_fasta==1:
weilong-guo
parents: 0
diff changeset
573 all_raw_reads += 1
weilong-guo
parents: 0
diff changeset
574 read_id = l[0][1:]
weilong-guo
parents: 0
diff changeset
575 seq = ""
weilong-guo
parents: 0
diff changeset
576 seq_ready = "N"
weilong-guo
parents: 0
diff changeset
577 elif l_fasta==0 :
weilong-guo
parents: 0
diff changeset
578 seq = l[0]
weilong-guo
parents: 0
diff changeset
579 seq_ready = "Y"
weilong-guo
parents: 0
diff changeset
580
weilong-guo
parents: 0
diff changeset
581 #---------------------------------------------------------------
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
582 if seq_ready=="Y":
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
583 # Normalize the characters
1
weilong-guo
parents: 0
diff changeset
584 seq = seq.upper().replace(".","N")
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
585
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
586 read_tag = [ m for m,n in [ (i, len(i)) for i in uniq(cut3_tag_lst)] if seq[0:n] == m ]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
587 if len(read_tag) > 0 :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
588 all_tagged += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
589 for i in read_tag :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
590 n_cut_tag_lst[i] += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
591
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
592 seq = seq[(cut_s-1):cut_e] # cut_s start from 1 cycle by default
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
593
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
594 #-- Trimming adapter sequence ---
1
weilong-guo
parents: 0
diff changeset
595 all_base_before_trim += len(seq)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
596 if adapter_seq != "" :
1
weilong-guo
parents: 0
diff changeset
597 new_read = RemoveAdapter(seq, adapter_seq, adapter_mismatch, Extra_base_cut_5end_adapter)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
598 if len(new_read) < len(seq) :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
599 all_tagged_trimmed += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
600 seq = new_read
1
weilong-guo
parents: 0
diff changeset
601 all_base_after_trim += len(seq)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
602 if len(seq) <= 4 :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
603 seq = "N" * (cut_e - cut_s)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
604
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
605 # all reads will be considered, regardless of tags
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
606 #--------- trimmed_raw_BS_read and qscore ------------------
1
weilong-guo
parents: 0
diff changeset
607 original_bs_reads[read_id] = seq
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
608 #--------- FW_C2T ------------------
1
weilong-guo
parents: 0
diff changeset
609 outf2.write('>%s\n%s\n'%(read_id, seq.replace('C', 'T')))
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
610 #--------- RC_G2A ------------------
1
weilong-guo
parents: 0
diff changeset
611 outf3.write('>%s\n%s\n' % (read_id, seq.replace("G","A")))
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
612 fileinput.close()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
613
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
614 outf2.close()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
615
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
616 delete_files(read_file)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
617 logm("Processing input is done")
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
618 #--------------------------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
619
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
620 # mapping
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
621 #--------------------------------------------------------------------------------
1
weilong-guo
parents: 0
diff changeset
622 WC2T = tmp_d("W_C2T_m"+max_mismatch_no+".mapping"+random_id)
weilong-guo
parents: 0
diff changeset
623 CC2T = tmp_d("C_C2T_m"+max_mismatch_no+".mapping"+random_id)
weilong-guo
parents: 0
diff changeset
624 WG2A = tmp_d("W_G2A_m"+max_mismatch_no+".mapping"+random_id)
weilong-guo
parents: 0
diff changeset
625 CG2A = tmp_d("C_G2A_m"+max_mismatch_no+".mapping"+random_id)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
626
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
627 run_in_parallel([ aligner_command % {'reference_genome' : os.path.join(db_path,'W_C2T'),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
628 'input_file' : outfile2,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
629 'output_file' : WC2T},
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
630 aligner_command % {'reference_genome' : os.path.join(db_path,'C_C2T'),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
631 'input_file' : outfile2,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
632 'output_file' : CC2T},
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
633 aligner_command % {'reference_genome' : os.path.join(db_path,'W_G2A'),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
634 'input_file' : outfile3,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
635 'output_file' : WG2A},
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
636 aligner_command % {'reference_genome' : os.path.join(db_path,'C_G2A'),
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
637 'input_file' : outfile3,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
638 'output_file' : CG2A} ])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
639
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
640 logm("Aligning reads is done")
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
641
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
642 delete_files(outfile2)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
643
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
644 #--------------------------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
645 # Post processing
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
646 #--------------------------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
647
1
weilong-guo
parents: 0
diff changeset
648 FW_C2T_U,FW_C2T_R = extract_mapping(WC2T)
weilong-guo
parents: 0
diff changeset
649 RC_G2A_U,RC_G2A_R = extract_mapping(CG2A)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
650
1
weilong-guo
parents: 0
diff changeset
651 FW_G2A_U,FW_G2A_R = extract_mapping(WG2A)
weilong-guo
parents: 0
diff changeset
652 RC_C2T_U,RC_C2T_R = extract_mapping(CC2T)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
653
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
654 logm("Extracting alignments is done")
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
655
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
656 #----------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
657 # get unique-hit reads
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
658 #----------------------------------------------------------------
1
weilong-guo
parents: 0
diff changeset
659 Union_set = set(FW_C2T_U.iterkeys()) | set(RC_G2A_U.iterkeys()) | set(FW_G2A_U.iterkeys()) | set(RC_C2T_U.iterkeys())
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
660
1
weilong-guo
parents: 0
diff changeset
661 Unique_FW_C2T = set() # +
weilong-guo
parents: 0
diff changeset
662 Unique_RC_G2A = set() # +
weilong-guo
parents: 0
diff changeset
663 Unique_FW_G2A = set() # -
weilong-guo
parents: 0
diff changeset
664 Unique_RC_C2T = set() # -
weilong-guo
parents: 0
diff changeset
665 Multiple_hits = set()
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
666
1
weilong-guo
parents: 0
diff changeset
667 for x in Union_set :
weilong-guo
parents: 0
diff changeset
668 _list = []
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
669 for dx in [FW_C2T_U, RC_G2A_U, FW_G2A_U, RC_C2T_U]:
1
weilong-guo
parents: 0
diff changeset
670 mis_lst = dx.get(x,[99])
weilong-guo
parents: 0
diff changeset
671 mis = int(mis_lst[0])
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
672 _list.append(mis)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
673 for dx in [FW_C2T_R, RC_G2A_R, FW_G2A_R, RC_C2T_R]:
1
weilong-guo
parents: 0
diff changeset
674 mis = dx.get(x,99)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
675 _list.append(mis)
1
weilong-guo
parents: 0
diff changeset
676 mini = min(_list)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
677 if _list.count(mini) == 1:
1
weilong-guo
parents: 0
diff changeset
678 mini_index = _list.index(mini)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
679 if mini_index == 0:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
680 Unique_FW_C2T.add(x)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
681 elif mini_index == 1:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
682 Unique_RC_G2A.add(x)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
683 elif mini_index == 2:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
684 Unique_FW_G2A.add(x)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
685 elif mini_index == 3:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
686 Unique_RC_C2T.add(x)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
687 else :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
688 Multiple_hits.add(x)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
689 else :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
690 Multiple_hits.add(x)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
691 # write reads rejected by Multiple Hits to file
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
692 if show_multiple_hit :
1
weilong-guo
parents: 0
diff changeset
693 outf_MH = open("Multiple_hit.fa",'w')
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
694 for i in Multiple_hits :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
695 outf_MH.write(">%s\n" % i)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
696 outf_MH.write("%s\n" % original_bs_reads[i])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
697 outf_MH.close()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
698
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
699 del Union_set
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
700 del FW_C2T_R
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
701 del FW_G2A_R
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
702 del RC_C2T_R
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
703 del RC_G2A_R
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
704
1
weilong-guo
parents: 0
diff changeset
705 FW_C2T_uniq_lst = [[FW_C2T_U[u][1],u] for u in Unique_FW_C2T]
weilong-guo
parents: 0
diff changeset
706 FW_G2A_uniq_lst = [[FW_G2A_U[u][1],u] for u in Unique_FW_G2A]
weilong-guo
parents: 0
diff changeset
707 RC_C2T_uniq_lst = [[RC_C2T_U[u][1],u] for u in Unique_RC_C2T]
weilong-guo
parents: 0
diff changeset
708 RC_G2A_uniq_lst = [[RC_G2A_U[u][1],u] for u in Unique_RC_G2A]
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
709 FW_C2T_uniq_lst.sort()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
710 RC_C2T_uniq_lst.sort()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
711 FW_G2A_uniq_lst.sort()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
712 RC_G2A_uniq_lst.sort()
1
weilong-guo
parents: 0
diff changeset
713 FW_C2T_uniq_lst = [x[1] for x in FW_C2T_uniq_lst]
weilong-guo
parents: 0
diff changeset
714 RC_C2T_uniq_lst = [x[1] for x in RC_C2T_uniq_lst]
weilong-guo
parents: 0
diff changeset
715 FW_G2A_uniq_lst = [x[1] for x in FW_G2A_uniq_lst]
weilong-guo
parents: 0
diff changeset
716 RC_G2A_uniq_lst = [x[1] for x in RC_G2A_uniq_lst]
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
717
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
718 del Unique_FW_C2T
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
719 del Unique_FW_G2A
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
720 del Unique_RC_C2T
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
721 del Unique_RC_G2A
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
722
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
723
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
724 #----------------------------------------------------------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
725 # Post-filtering reads
1
weilong-guo
parents: 0
diff changeset
726 # ---- FW_C2T ---- un-directional
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
727 FW_regions = dict()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
728 gseq = dict()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
729 chr_length = dict()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
730 for header in FW_C2T_uniq_lst :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
731 _, mapped_chr, mapped_location, cigar = FW_C2T_U[header]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
732 original_BS = original_bs_reads[header]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
733 if mapped_chr not in FW_regions :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
734 FW_regions[mapped_chr] = deserialize(db_d(FWD_MAPPABLE_REGIONS(mapped_chr)))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
735 if mapped_chr not in gseq :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
736 gseq[mapped_chr] = deserialize(db_d(mapped_chr))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
737 chr_length[mapped_chr] = len(gseq[mapped_chr])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
738
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
739 r_start, r_end, g_len = get_read_start_end_and_genome_length(cigar)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
740 all_mapped+=1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
741 FR = "+FW"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
742 mapped_strand = "+"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
743 origin_genome, next2bp, output_genome = get_genomic_sequence(gseq[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
744 mapped_location,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
745 mapped_location + g_len,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
746 mapped_strand)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
747 checking_genome_context = [output_genome[i:j] == k for i,j,k in zip(gx,gy,check_pattern) ]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
748 r_aln, g_aln = cigar_to_alignment(cigar, original_BS, origin_genome)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
749
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
750 if len(r_aln) == len(g_aln) :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
751 my_region_serial, my_region_start, my_region_end = [-1, 0, 0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
752 if True in checking_genome_context :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
753 try_pos = [mapped_location - len(i) for i,j in zip(cut5_tag_lst, checking_genome_context) if j][0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
754 my_region_serial, my_region_start, my_region_end = my_mappable_region(FW_regions[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
755 try_pos, FR)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
756 if my_region_serial == 0 : # still be 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
757 # for some cases, read has no tags; searching the upstream sequence for tags
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
758 # print "[For debug]: FW read has no tags"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
759 try_count = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
760 try_pos = mapped_location - min_cut5_len + 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
761 while my_region_serial == 0 and try_count < MAX_TRY :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
762 my_region_serial, my_region_start, my_region_end = my_mappable_region(FW_regions[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
763 try_pos, FR)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
764 try_pos -= 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
765 try_count += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
766
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
767 N_mismatch = N_MIS(r_aln, g_aln)
1
weilong-guo
parents: 0
diff changeset
768 # if N_mismatch <= int(max_mismatch_no) :
weilong-guo
parents: 0
diff changeset
769 mm_no=float(max_mismatch_no)
weilong-guo
parents: 0
diff changeset
770 if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ):
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
771 all_mapped_passed += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
772 methy = methy_seq(r_aln, g_aln + next2bp)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
773 mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
774 #---XS FILTER----------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
775 XS = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
776 nCH = methy.count('y') + methy.count('z')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
777 nmCH = methy.count('Y') + methy.count('Z')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
778 if( (nmCH>XS_count) and nmCH/float(nCH+nmCH)>XS_pct ) :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
779 XS = 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
780 num_mapped_FW_C2T += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
781 outfile.store(header, N_mismatch, FR, mapped_chr, mapped_strand,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
782 mapped_location, cigar, original_BS, methy, XS,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
783 output_genome = output_genome,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
784 rrbs = True,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
785 my_region_serial = my_region_serial,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
786 my_region_start = my_region_start,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
787 my_region_end = my_region_end)
1
weilong-guo
parents: 0
diff changeset
788 all_base_mapped += len(original_BS)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
789 else :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
790 print "[For debug]: reads not in same lengths"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
791
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
792
1
weilong-guo
parents: 0
diff changeset
793 # ---- RC_C2T ---- un-directional
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
794 RC_regions = dict()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
795 for header in RC_C2T_uniq_lst :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
796 _, mapped_chr, mapped_location, cigar = RC_C2T_U[header]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
797 original_BS = original_bs_reads[header]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
798 if mapped_chr not in RC_regions :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
799 RC_regions[mapped_chr] = deserialize(db_d(REV_MAPPABLE_REGIONS(mapped_chr)))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
800 if mapped_chr not in gseq :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
801 gseq[mapped_chr] = deserialize(db_d(mapped_chr))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
802 chr_length[mapped_chr] = len(gseq[mapped_chr])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
803
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
804 r_start, r_end, g_len = get_read_start_end_and_genome_length(cigar)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
805 mapped_location = chr_length[mapped_chr] - mapped_location - g_len
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
806 all_mapped+=1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
807 FR = "-FW"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
808 mapped_strand = "-"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
809 origin_genome, next2bp, output_genome = get_genomic_sequence(gseq[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
810 mapped_location,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
811 mapped_location + g_len,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
812 mapped_strand)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
813 checking_genome_context = [output_genome[i:j] == k for i,j,k in zip(gx,gy,check_pattern) ]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
814 r_aln, g_aln = cigar_to_alignment(cigar, original_BS, origin_genome)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
815
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
816 if len(r_aln) == len(g_aln) : # and checking_genome_context:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
817 my_region_serial, my_region_start, my_region_end = [-1, 0, 0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
818 if True in checking_genome_context :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
819 try_pos = [mapped_location + g_len - 1 + len(i) for i,j in zip(cut5_tag_lst, checking_genome_context) if j][0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
820 my_region_serial, my_region_start, my_region_end = my_mappable_region(RC_regions[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
821 try_pos , FR)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
822 if my_region_serial == 0 : # still be 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
823 # for some cases, read has no tags; searching the upstream sequence for tags
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
824 #print "[For debug]: RC Read has no tags"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
825 try_count = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
826 try_pos = mapped_location + g_len + min_cut5_len - 2
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
827 while my_region_serial == 0 and try_count < MAX_TRY :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
828 my_region_serial, my_region_start, my_region_end = my_mappable_region(RC_regions[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
829 try_pos, FR)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
830 try_pos += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
831 try_count += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
832
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
833 N_mismatch = N_MIS(r_aln, g_aln)
1
weilong-guo
parents: 0
diff changeset
834 # if N_mismatch <= int(max_mismatch_no) :
weilong-guo
parents: 0
diff changeset
835 mm_no=float(max_mismatch_no)
weilong-guo
parents: 0
diff changeset
836 if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ):
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
837 all_mapped_passed += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
838 methy = methy_seq(r_aln, g_aln + next2bp)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
839 mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
840 #---XS FILTER----------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
841 XS = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
842 nCH = methy.count('y') + methy.count('z')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
843 nmCH = methy.count('Y') + methy.count('Z')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
844 if( (nmCH>XS_count) and nmCH/float(nCH+nmCH)>XS_pct ) :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
845 XS = 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
846 num_mapped_RC_C2T += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
847 outfile.store(header, N_mismatch, FR, mapped_chr, mapped_strand,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
848 mapped_location, cigar, original_BS, methy, XS,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
849 output_genome = output_genome,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
850 rrbs = True,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
851 my_region_serial = my_region_serial,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
852 my_region_start = my_region_start,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
853 my_region_end = my_region_end)
1
weilong-guo
parents: 0
diff changeset
854 all_base_mapped += len(original_BS)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
855
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
856 else :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
857 print "[For debug]: reads not in same lengths"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
858
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
859
1
weilong-guo
parents: 0
diff changeset
860 # ---- FW_G2A ---- un-directional
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
861 FW_regions = dict()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
862 gseq = dict()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
863 chr_length = dict()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
864 for header in FW_G2A_uniq_lst :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
865 _, mapped_chr, mapped_location, cigar = FW_G2A_U[header]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
866 original_BS = original_bs_reads[header]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
867 if mapped_chr not in FW_regions :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
868 FW_regions[mapped_chr] = deserialize(db_d(FWD_MAPPABLE_REGIONS(mapped_chr)))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
869 if mapped_chr not in gseq :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
870 gseq[mapped_chr] = deserialize(db_d(mapped_chr))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
871 chr_length[mapped_chr] = len(gseq[mapped_chr])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
872 cigar = list(reversed(cigar))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
873
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
874 r_start, r_end, g_len = get_read_start_end_and_genome_length(cigar)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
875 all_mapped+=1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
876 FR = "-RC"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
877 mapped_strand = "-"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
878 origin_genome, next2bp, output_genome = get_genomic_sequence(gseq[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
879 mapped_location,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
880 mapped_location + g_len,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
881 mapped_strand)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
882 original_BS = reverse_compl_seq(original_BS) # for RC reads
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
883 checking_genome_context = [output_genome[i:j] == k for i,j,k in zip(gx,gy,check_pattern) ]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
884 r_aln, g_aln = cigar_to_alignment(cigar, original_BS, origin_genome)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
885
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
886 if len(r_aln) == len(g_aln) :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
887 my_region_serial, my_region_start, my_region_end = [-1, 0, 0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
888 if True in checking_genome_context :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
889 try_pos = [mapped_location - len(i) for i,j in zip(cut5_tag_lst, checking_genome_context) if j][0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
890 my_region_serial, my_region_start, my_region_end = my_mappable_region(FW_regions[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
891 try_pos, FR)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
892 if my_region_serial == 0 : # still be 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
893 # for some cases, read has no tags; searching the upstream sequence for tags
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
894 #print "[For debug]: FW read has no tags"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
895 try_count = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
896 try_pos = mapped_location - min_cut5_len + 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
897 while my_region_serial == 0 and try_count < MAX_TRY :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
898 my_region_serial, my_region_start, my_region_end = my_mappable_region(FW_regions[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
899 try_pos, FR)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
900 try_pos += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
901 try_count += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
902 #if my_region_serial == 0 :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
903 # print "[For debug]: chr=", mapped_chr
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
904 # print "[For debug]: FW_G2A read still can not find fragment serial"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
905 # Tip: sometimes "my_region_serial" is still 0 ...
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
906
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
907 N_mismatch = N_MIS(r_aln, g_aln)
1
weilong-guo
parents: 0
diff changeset
908 # if N_mismatch <= int(max_mismatch_no) :
weilong-guo
parents: 0
diff changeset
909 max_mismatch_no=float(max_mismatch_no)
weilong-guo
parents: 0
diff changeset
910 if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ):
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
911 all_mapped_passed += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
912 methy = methy_seq(r_aln, g_aln + next2bp)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
913 mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
914 #---XS FILTER----------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
915 XS = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
916 nCH = methy.count('y') + methy.count('z')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
917 nmCH = methy.count('Y') + methy.count('Z')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
918 if( (nmCH>XS_count) and nmCH/float(nCH+nmCH)>XS_pct ) :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
919 XS = 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
920 num_mapped_FW_G2A += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
921 outfile.store(header, N_mismatch, FR, mapped_chr, mapped_strand,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
922 mapped_location, cigar, original_BS, methy, XS,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
923 output_genome = output_genome,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
924 rrbs = True,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
925 my_region_serial = my_region_serial,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
926 my_region_start = my_region_start,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
927 my_region_end = my_region_end)
1
weilong-guo
parents: 0
diff changeset
928 all_base_mapped += len(original_BS)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
929 else :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
930 print "[For debug]: reads not in same lengths"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
931
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
932
1
weilong-guo
parents: 0
diff changeset
933 # ---- RC_G2A ---- un-directional
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
934 RC_regions = dict()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
935 for header in RC_G2A_uniq_lst :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
936 _, mapped_chr, mapped_location, cigar = RC_G2A_U[header]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
937 original_BS = original_bs_reads[header]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
938 if mapped_chr not in RC_regions :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
939 RC_regions[mapped_chr] = deserialize(db_d(REV_MAPPABLE_REGIONS(mapped_chr)))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
940 if mapped_chr not in gseq :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
941 gseq[mapped_chr] = deserialize(db_d(mapped_chr))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
942 chr_length[mapped_chr] = len(gseq[mapped_chr])
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
943 cigar = list(reversed(cigar))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
944
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
945 r_start, r_end, g_len = get_read_start_end_and_genome_length(cigar)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
946 mapped_location = chr_length[mapped_chr] - mapped_location - g_len
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
947 all_mapped+=1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
948 FR = "+RC"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
949 mapped_strand = "+"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
950 origin_genome, next2bp, output_genome = get_genomic_sequence(gseq[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
951 mapped_location,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
952 mapped_location + g_len,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
953 mapped_strand)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
954 original_BS = reverse_compl_seq(original_BS) # for RC reads
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
955 checking_genome_context = [output_genome[i:j] == k for i,j,k in zip(gx,gy,check_pattern) ]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
956 r_aln, g_aln = cigar_to_alignment(cigar, original_BS, origin_genome)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
957
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
958 if len(r_aln) == len(g_aln) : # and checking_genome_context:
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
959 my_region_serial, my_region_start, my_region_end = [-1, 0, 0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
960 if True in checking_genome_context :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
961 try_pos = [mapped_location + g_len - 1 + len(i) for i,j in zip(cut5_tag_lst, checking_genome_context) if j][0]
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
962 my_region_serial, my_region_start, my_region_end = my_mappable_region(RC_regions[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
963 mapped_location + g_len + min_cut5_len -1, FR)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
964 if my_region_serial == 0 : # still be 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
965 # for some cases, read has no tags; searching the upstream sequence for tags
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
966 #print "[For debug]: RC Read has no tags"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
967 try_count = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
968 try_pos = mapped_location + g_len + min_cut5_len - 2
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
969 while try_count < MAX_TRY :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
970 my_region_serial, my_region_start, my_region_end = my_mappable_region(RC_regions[mapped_chr],
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
971 try_pos, FR)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
972 try_pos += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
973 try_count += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
974
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
975 #if my_region_serial == 0 :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
976 # print "[For debug]: chr=", mapped_chr
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
977 # print "[For debug]: RC_C2A read still cannot find fragment serial"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
978
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
979 N_mismatch = N_MIS(r_aln, g_aln)
1
weilong-guo
parents: 0
diff changeset
980 # if N_mismatch <= int(max_mismatch_no) :
weilong-guo
parents: 0
diff changeset
981 mm_no=float(max_mismatch_no)
weilong-guo
parents: 0
diff changeset
982 if (mm_no>=1 and N_mismatch<=mm_no) or (mm_no<1 and N_mismatch<=(mm_no*len(r_aln)) ):
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
983 all_mapped_passed += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
984 methy = methy_seq(r_aln, g_aln + next2bp)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
985 mC_lst, uC_lst = mcounts(methy, mC_lst, uC_lst)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
986 #---XS FILTER----------------
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
987 XS = 0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
988 nCH = methy.count('y') + methy.count('z')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
989 nmCH = methy.count('Y') + methy.count('Z')
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
990 if( (nmCH>XS_count) and nmCH/float(nCH+nmCH)>XS_pct ) :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
991 XS = 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
992 num_mapped_RC_G2A += 1
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
993 outfile.store(header, N_mismatch, FR, mapped_chr, mapped_strand,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
994 mapped_location, cigar, original_BS, methy, XS,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
995 output_genome = output_genome,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
996 rrbs = True,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
997 my_region_serial = my_region_serial,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
998 my_region_start = my_region_start,
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
999 my_region_end = my_region_end)
1
weilong-guo
parents: 0
diff changeset
1000 all_base_mapped += len(original_BS)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1001 else :
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1002 print "[For debug]: reads not in same lengths"
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1003
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1004 # Finished both FW and RC
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1005 logm("Done: %s (%d) \n" % (read_file, no_my_files))
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1006 print "--> %s (%d) "%(read_file, no_my_files)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1007 del original_bs_reads
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1008 delete_files(WC2T, CC2T, WG2A, CG2A)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1009 # End of un-directional library
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1010
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1011 delete_files(tmp_path)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1012
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1013
1
weilong-guo
parents: 0
diff changeset
1014 logm("Number of raw reads: %d "% all_raw_reads)
weilong-guo
parents: 0
diff changeset
1015 if all_raw_reads>0:
weilong-guo
parents: 0
diff changeset
1016 logm("Number of raw reads with CGG/TGG at 5' end: %d (%1.3f)"%(all_tagged,float(all_tagged)/all_raw_reads))
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1017 for kk in range(len(n_cut_tag_lst)):
1
weilong-guo
parents: 0
diff changeset
1018 logm("Number of raw reads with tag %s: %d (%1.3f)"%(cut3_tag_lst[kk],n_cut_tag_lst[cut3_tag_lst[kk]],float(n_cut_tag_lst[cut3_tag_lst[kk]])/all_raw_reads))
weilong-guo
parents: 0
diff changeset
1019 if adapter_seq!="" :
weilong-guo
parents: 0
diff changeset
1020 logm("Number of reads having adapter removed: %d "%all_tagged_trimmed)
weilong-guo
parents: 0
diff changeset
1021 logm("Number of bases in total: %d "%all_base_before_trim)
weilong-guo
parents: 0
diff changeset
1022 if adapter_seq!="" :
weilong-guo
parents: 0
diff changeset
1023 logm("Number of bases after trimming the adapters: %d (%1.3f)"%(all_base_after_trim, float(all_base_after_trim)/all_base_before_trim ) )
weilong-guo
parents: 0
diff changeset
1024 logm("Number of reads are rejected because of multiple hits: %d\n" % len(Multiple_hits) )
weilong-guo
parents: 0
diff changeset
1025 logm("Number of unique-hits reads (before post-filtering): %d"%all_mapped)
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1026
1
weilong-guo
parents: 0
diff changeset
1027 logm("------ %d uniquely aligned reads, passed fragment check, with mismatches <= %s"%(all_mapped_passed, max_mismatch_no))
weilong-guo
parents: 0
diff changeset
1028 logm("Mappability= %1.4f%%"%(100*float(all_mapped_passed)/all_raw_reads))
weilong-guo
parents: 0
diff changeset
1029 logm("Total bases of uniquely mapped reads %7d"% all_base_mapped )
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1030
1
weilong-guo
parents: 0
diff changeset
1031 if asktag=="Y": # un-diretional
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1032 logm(" ---- %7d FW reads mapped to Watson strand"%(num_mapped_FW_C2T) )
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1033 logm(" ---- %7d RC reads mapped to Watson strand"%(num_mapped_FW_G2A) )
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1034 logm(" ---- %7d FW reads mapped to Crick strand"%(num_mapped_RC_C2T) )
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1035 logm(" ---- %7d RC reads mapped to Crick strand"%(num_mapped_RC_G2A) )
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1036 # the variable name 'num_mapped_RC_G2A' seems not consistent with illustration
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1037 # according to literal meaning
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1038 elif asktag=="N": # directional
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1039 logm(" ---- %7d FW reads mapped to Watson strand"%(num_mapped_FW_C2T) )
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1040 logm(" ---- %7d FW reads mapped to Crick strand"%(num_mapped_RC_C2T) )
1
weilong-guo
parents: 0
diff changeset
1041 logm("Total bases of uniquely mapped reads %7d"% all_base_mapped )
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1042
1
weilong-guo
parents: 0
diff changeset
1043 n_CG = mC_lst[0] + uC_lst[0]
weilong-guo
parents: 0
diff changeset
1044 n_CHG = mC_lst[1] + uC_lst[1]
weilong-guo
parents: 0
diff changeset
1045 n_CHH = mC_lst[2] + uC_lst[2]
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1046
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1047 logm("----------------------------------------------")
1
weilong-guo
parents: 0
diff changeset
1048 logm("Methylated C in mapped reads ")
weilong-guo
parents: 0
diff changeset
1049 logm(" mCG %1.3f%%"%( (100 * float(mC_lst[0]) / n_CG) if n_CG!=0 else 0))
weilong-guo
parents: 0
diff changeset
1050 logm(" mCHG %1.3f%%"%( (100 * float(mC_lst[1]) / n_CHG) if n_CHG!=0 else 0))
weilong-guo
parents: 0
diff changeset
1051 logm(" mCHH %1.3f%%"%( (100 * float(mC_lst[2]) / n_CHH) if n_CHH!=0 else 0))
0
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1052 logm("----------------------------------------------")
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1053 logm("------------------- END ----------------------")
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1054
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1055 elapsed(main_read_file)
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1056
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1057 close_log()
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1058
e6df770c0e58 Initial upload
weilong-guo
parents:
diff changeset
1059