Mercurial > repos > bornea > saint_preprocessing
comparison SAINT_preprocessing.py @ 48:265f5ae47a56 draft
Uploaded
author | bornea |
---|---|
date | Thu, 19 May 2016 11:58:57 -0400 |
parents | d223d815e9c5 |
children | 6bb30aeb02bd |
comparison
equal
deleted
inserted
replaced
47:8ca1d3bc5906 | 48:265f5ae47a56 |
---|---|
236 output_file.close() | 236 output_file.close() |
237 | 237 |
238 | 238 |
239 def no_error_inter(Scaffold_input): | 239 def no_error_inter(Scaffold_input): |
240 # Remake inter file without protein errors from Uniprot. | 240 # Remake inter file without protein errors from Uniprot. |
241 err = readtab("error proteins.txt") | 241 err = readtab("./error_proteins.txt") |
242 bait = readtab(baitfile) | 242 bait = readtab(baitfile) |
243 data = read_Scaffold(Scaffold_input).data | 243 data = read_Scaffold(Scaffold_input).data |
244 header = read_Scaffold(Scaffold_input).header | 244 header = read_Scaffold(Scaffold_input).header |
245 header = [MQ_var.replace(r"\"", "") for MQ_var in header] | |
246 header = [MQ_var.replace(r"Intensity.", r"") for MQ_var in header] | |
247 header = [MQ_var.replace(r".", r"-") for MQ_var in header] | |
245 bait_index = [] | 248 bait_index = [] |
246 for bait_line in bait: | 249 for bait_item in bait: |
247 bait_index.append(header.index(bait_line[0])) | 250 bait_index.append(header.index(bait_item[0])) |
248 proteins = read_Scaffold(Scaffold_input).proteins | 251 proteins = read_Scaffold(Scaffold_input).proteins |
249 errors = [] | 252 errors = [] |
253 valid_prots = [] | |
250 for e in err: | 254 for e in err: |
251 errors.append(e[0]) | 255 errors.append(e[0]) |
252 with open('inter.txt', 'w') as y: | 256 for a in proteins: |
257 a = a.replace("\n", "") | |
258 # Remove \n for input into function. | |
259 a = a.replace("\r", "") | |
260 # Ditto for \r. | |
261 seq = get_info(a).seqlength | |
262 GN = get_info(a).genename | |
263 if seq != 'NA': | |
264 if GN != 'NA': | |
265 valid_prots.append(a) | |
266 with open('inter.txt', 'w') as input_file: | |
253 l = 0; a = 0 | 267 l = 0; a = 0 |
254 for bb in bait: | 268 for bb in bait: |
255 for lst in data: | 269 for lst in data: |
256 if proteins[a] not in errors: | 270 if lst[0] in valid_prots: |
257 y.write(header[bait_index[l]] + '\t' + bb[1] + '\t' + proteins[a] + '\t' | 271 input_file.write(header[bait_index[l]] + '\t' + bb[1] + '\t' + lst[0] + '\t' + lst[bait_index[l]] + '\n') |
258 + lst[bait_index[l]] + '\n') | |
259 a += 1 | 272 a += 1 |
260 if a == len(proteins): | 273 if a == len(proteins): |
261 l += 1; a = 0 | 274 l += 1; a = 0 |
275 | |
262 | 276 |
263 | 277 |
264 def bait_check(bait, Scaffold_input): | 278 def bait_check(bait, Scaffold_input): |
265 # Check that bait names share Scaffold header titles. | 279 # Check that bait names share Scaffold header titles. |
266 bait_in = readtab(bait) | 280 bait_in = readtab(bait) |