comparison SAINT_preprocessing.py @ 12:3e1b66d58f94 draft

Uploaded
author bornea
date Tue, 12 Apr 2016 13:24:36 -0400
parents b688d0dae86b
children febb6def95cb
comparison
equal deleted inserted replaced
11:b688d0dae86b 12:3e1b66d58f94
197 proteins = [] 197 proteins = []
198 for Scaffold_line in data: 198 for Scaffold_line in data:
199 Scaffold_line[4] = Scaffold_line[4].split()[0] 199 Scaffold_line[4] = Scaffold_line[4].split()[0]
200 # Removes the (+##) that sometimes is attached. 200 # Removes the (+##) that sometimes is attached.
201 uniprot_re = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") 201 uniprot_re = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}")
202 for protein in data: 202 for protein in data:
203 prot_id = uniprot_re.match(protein[prot_start]) 203 prot_id = uniprot_re.match(protein[prot_start])
204 proteins.append(prot_id.group()) 204 if prot_id:
205 proteins.append(prot_id.group())
206 else:
207 prot_ids = protein[prot_start].split("|")
208 for prot_id in prot_ids:
209 if "_HUMAN" in prot_id:
210 proteins.append(prot_id)
211 elif "_YEAST" in prot_id:
212 proteins.append(prot_id)
213 elif "_MOUSE" in prot_id:
214 proteins.append(prot_id)
215 else:
216 print "Accession must be uniprot ID or gene name"
217 sys.exit()
205 return ReturnValue2(data, proteins, header) 218 return ReturnValue2(data, proteins, header)
206 219
207 220
208 def make_inter(Scaffold_input): 221 def make_inter(Scaffold_input):
209 bait = readtab(baitfile) 222 bait = readtab(baitfile)