Mercurial > repos > bornea > saint_preprocessing
comparison SAINT_preprocessing.py @ 51:09b89febcf98 draft
Uploaded
| author | bornea |
|---|---|
| date | Thu, 19 May 2016 12:12:00 -0400 |
| parents | 61b792d17660 |
| children | 26b5bd96332e |
comparison
equal
deleted
inserted
replaced
| 50:61b792d17660 | 51:09b89febcf98 |
|---|---|
| 240 # Remake inter file without protein errors from Uniprot. | 240 # Remake inter file without protein errors from Uniprot. |
| 241 err = readtab("error proteins.txt") | 241 err = readtab("error proteins.txt") |
| 242 bait = readtab(baitfile) | 242 bait = readtab(baitfile) |
| 243 data = read_Scaffold(Scaffold_input).data | 243 data = read_Scaffold(Scaffold_input).data |
| 244 header = read_Scaffold(Scaffold_input).header | 244 header = read_Scaffold(Scaffold_input).header |
| 245 header = [MQ_var.replace(r"\"", "") for MQ_var in header] | |
| 246 header = [MQ_var.replace(r"Intensity.", r"") for MQ_var in header] | |
| 247 header = [MQ_var.replace(r".", r"-") for MQ_var in header] | |
| 248 bait_index = [] | 245 bait_index = [] |
| 249 for bait_item in bait: | 246 for bait_line in bait: |
| 250 bait_index.append(header.index(bait_item[0])) | 247 bait_index.append(header.index(bait_line[0])) |
| 251 proteins = read_Scaffold(Scaffold_input).proteins | 248 proteins = read_Scaffold(Scaffold_input).proteins |
| 252 errors = [] | 249 errors = [] |
| 253 valid_prots = [] | |
| 254 for e in err: | 250 for e in err: |
| 255 errors.append(e[0]) | 251 errors.append(e[0]) |
| 256 for a in proteins: | 252 with open('inter.txt', 'w') as y: |
| 257 a = a.replace("\n", "") | |
| 258 # Remove \n for input into function. | |
| 259 a = a.replace("\r", "") | |
| 260 # Ditto for \r. | |
| 261 seq = get_info(a).seqlength | |
| 262 GN = get_info(a).genename | |
| 263 if seq != 'NA': | |
| 264 if GN != 'NA': | |
| 265 valid_prots.append(a) | |
| 266 with open('inter.txt', 'w') as input_file: | |
| 267 l = 0; a = 0 | 253 l = 0; a = 0 |
| 268 for bb in bait: | 254 for bb in bait: |
| 269 for lst in data: | 255 for lst in data: |
| 270 print lst | 256 if proteins[a] not in errors: |
| 271 if lst[0] in valid_prots: | 257 y.write(header[bait_index[l]] + '\t' + bb[1] + '\t' + proteins[a] + '\t' |
| 272 input_file.write(header[bait_index[l]] + '\t' + bb[1] + '\t' + lst[0] + '\t' + lst[bait_index[l]] + '\n') | 258 + lst[bait_index[l]] + '\n') |
| 273 a += 1 | 259 a += 1 |
| 274 if a == len(proteins): | 260 if a == len(proteins): |
| 275 l += 1; a = 0 | 261 l += 1; a = 0 |
| 276 | |
| 277 | 262 |
| 278 | 263 |
| 279 def bait_check(bait, Scaffold_input): | 264 def bait_check(bait, Scaffold_input): |
| 280 # Check that bait names share Scaffold header titles. | 265 # Check that bait names share Scaffold header titles. |
| 281 bait_in = readtab(bait) | 266 bait_in = readtab(bait) |
