comparison SAINT_preprocessing.py @ 48:265f5ae47a56 draft

Uploaded
author bornea
date Thu, 19 May 2016 11:58:57 -0400
parents d223d815e9c5
children 6bb30aeb02bd
comparison
equal deleted inserted replaced
47:8ca1d3bc5906 48:265f5ae47a56
236 output_file.close() 236 output_file.close()
237 237
238 238
239 def no_error_inter(Scaffold_input): 239 def no_error_inter(Scaffold_input):
240 # Remake inter file without protein errors from Uniprot. 240 # Remake inter file without protein errors from Uniprot.
241 err = readtab("error proteins.txt") 241 err = readtab("./error_proteins.txt")
242 bait = readtab(baitfile) 242 bait = readtab(baitfile)
243 data = read_Scaffold(Scaffold_input).data 243 data = read_Scaffold(Scaffold_input).data
244 header = read_Scaffold(Scaffold_input).header 244 header = read_Scaffold(Scaffold_input).header
245 header = [MQ_var.replace(r"\"", "") for MQ_var in header]
246 header = [MQ_var.replace(r"Intensity.", r"") for MQ_var in header]
247 header = [MQ_var.replace(r".", r"-") for MQ_var in header]
245 bait_index = [] 248 bait_index = []
246 for bait_line in bait: 249 for bait_item in bait:
247 bait_index.append(header.index(bait_line[0])) 250 bait_index.append(header.index(bait_item[0]))
248 proteins = read_Scaffold(Scaffold_input).proteins 251 proteins = read_Scaffold(Scaffold_input).proteins
249 errors = [] 252 errors = []
253 valid_prots = []
250 for e in err: 254 for e in err:
251 errors.append(e[0]) 255 errors.append(e[0])
252 with open('inter.txt', 'w') as y: 256 for a in proteins:
257 a = a.replace("\n", "")
258 # Remove \n for input into function.
259 a = a.replace("\r", "")
260 # Ditto for \r.
261 seq = get_info(a).seqlength
262 GN = get_info(a).genename
263 if seq != 'NA':
264 if GN != 'NA':
265 valid_prots.append(a)
266 with open('inter.txt', 'w') as input_file:
253 l = 0; a = 0 267 l = 0; a = 0
254 for bb in bait: 268 for bb in bait:
255 for lst in data: 269 for lst in data:
256 if proteins[a] not in errors: 270 if lst[0] in valid_prots:
257 y.write(header[bait_index[l]] + '\t' + bb[1] + '\t' + proteins[a] + '\t' 271 input_file.write(header[bait_index[l]] + '\t' + bb[1] + '\t' + lst[0] + '\t' + lst[bait_index[l]] + '\n')
258 + lst[bait_index[l]] + '\n')
259 a += 1 272 a += 1
260 if a == len(proteins): 273 if a == len(proteins):
261 l += 1; a = 0 274 l += 1; a = 0
275
262 276
263 277
264 def bait_check(bait, Scaffold_input): 278 def bait_check(bait, Scaffold_input):
265 # Check that bait names share Scaffold header titles. 279 # Check that bait names share Scaffold header titles.
266 bait_in = readtab(bait) 280 bait_in = readtab(bait)