Mercurial > repos > bornea > saint_preprocessing
comparison SAINT_preprocessing.py @ 12:3e1b66d58f94 draft
Uploaded
| author | bornea |
|---|---|
| date | Tue, 12 Apr 2016 13:24:36 -0400 |
| parents | b688d0dae86b |
| children | febb6def95cb |
comparison
equal
deleted
inserted
replaced
| 11:b688d0dae86b | 12:3e1b66d58f94 |
|---|---|
| 197 proteins = [] | 197 proteins = [] |
| 198 for Scaffold_line in data: | 198 for Scaffold_line in data: |
| 199 Scaffold_line[4] = Scaffold_line[4].split()[0] | 199 Scaffold_line[4] = Scaffold_line[4].split()[0] |
| 200 # Removes the (+##) that sometimes is attached. | 200 # Removes the (+##) that sometimes is attached. |
| 201 uniprot_re = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") | 201 uniprot_re = re.compile("[OPQ][0-9][A-Z0-9]{3}[0-9]|[A-NR-Z][0-9]([A-Z][A-Z0-9]{2}[0-9]){1,2}") |
| 202 for protein in data: | 202 for protein in data: |
| 203 prot_id = uniprot_re.match(protein[prot_start]) | 203 prot_id = uniprot_re.match(protein[prot_start]) |
| 204 proteins.append(prot_id.group()) | 204 if prot_id: |
| 205 proteins.append(prot_id.group()) | |
| 206 else: | |
| 207 prot_ids = protein[prot_start].split("|") | |
| 208 for prot_id in prot_ids: | |
| 209 if "_HUMAN" in prot_id: | |
| 210 proteins.append(prot_id) | |
| 211 elif "_YEAST" in prot_id: | |
| 212 proteins.append(prot_id) | |
| 213 elif "_MOUSE" in prot_id: | |
| 214 proteins.append(prot_id) | |
| 215 else: | |
| 216 print "Accession must be uniprot ID or gene name" | |
| 217 sys.exit() | |
| 205 return ReturnValue2(data, proteins, header) | 218 return ReturnValue2(data, proteins, header) |
| 206 | 219 |
| 207 | 220 |
| 208 def make_inter(Scaffold_input): | 221 def make_inter(Scaffold_input): |
| 209 bait = readtab(baitfile) | 222 bait = readtab(baitfile) |
