Mercurial > repos > bornea > saint_preprocessing
comparison SAINT_preprocessing.py @ 12:3e1b66d58f94 draft
Uploaded
author | bornea |
---|---|
date | Tue, 12 Apr 2016 13:24:36 -0400 |
parents | b688d0dae86b |
children | febb6def95cb |
comparison
equal
deleted
inserted
replaced
11:b688d0dae86b | 12:3e1b66d58f94 |
---|---|
197 proteins = [] | 197 proteins = [] |
198 for Scaffold_line in data: | 198 for Scaffold_line in data: |
199 Scaffold_line[4] = Scaffold_line[4].split()[0] | 199 Scaffold_line[4] = Scaffold_line[4].split()[0] |
200 # Removes the (+##) that sometimes is attached. | 200 # Removes the (+##) that sometimes is attached. |
201 uniprot_re = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") | 201 uniprot_re = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") |
202 for protein in data: | 202 for protein in data: |
203 prot_id = uniprot_re.match(protein[prot_start]) | 203 prot_id = uniprot_re.match(protein[prot_start]) |
204 proteins.append(prot_id.group()) | 204 if prot_id: |
205 proteins.append(prot_id.group()) | |
206 else: | |
207 prot_ids = protein[prot_start].split("|") | |
208 for prot_id in prot_ids: | |
209 if "_HUMAN" in prot_id: | |
210 proteins.append(prot_id) | |
211 elif "_YEAST" in prot_id: | |
212 proteins.append(prot_id) | |
213 elif "_MOUSE" in prot_id: | |
214 proteins.append(prot_id) | |
215 else: | |
216 print "Accession must be uniprot ID or gene name" | |
217 sys.exit() | |
205 return ReturnValue2(data, proteins, header) | 218 return ReturnValue2(data, proteins, header) |
206 | 219 |
207 | 220 |
208 def make_inter(Scaffold_input): | 221 def make_inter(Scaffold_input): |
209 bait = readtab(baitfile) | 222 bait = readtab(baitfile) |