Mercurial > repos > bornea > saint_preprocessing
comparison SAINT_preprocessing.py @ 48:265f5ae47a56 draft
Uploaded
| author | bornea |
|---|---|
| date | Thu, 19 May 2016 11:58:57 -0400 |
| parents | d223d815e9c5 |
| children | 6bb30aeb02bd |
comparison
equal
deleted
inserted
replaced
| 47:8ca1d3bc5906 | 48:265f5ae47a56 |
|---|---|
| 236 output_file.close() | 236 output_file.close() |
| 237 | 237 |
| 238 | 238 |
| 239 def no_error_inter(Scaffold_input): | 239 def no_error_inter(Scaffold_input): |
| 240 # Remake inter file without protein errors from Uniprot. | 240 # Remake inter file without protein errors from Uniprot. |
| 241 err = readtab("error proteins.txt") | 241 err = readtab("./error_proteins.txt") |
| 242 bait = readtab(baitfile) | 242 bait = readtab(baitfile) |
| 243 data = read_Scaffold(Scaffold_input).data | 243 data = read_Scaffold(Scaffold_input).data |
| 244 header = read_Scaffold(Scaffold_input).header | 244 header = read_Scaffold(Scaffold_input).header |
| 245 header = [MQ_var.replace(r"\"", "") for MQ_var in header] | |
| 246 header = [MQ_var.replace(r"Intensity.", r"") for MQ_var in header] | |
| 247 header = [MQ_var.replace(r".", r"-") for MQ_var in header] | |
| 245 bait_index = [] | 248 bait_index = [] |
| 246 for bait_line in bait: | 249 for bait_item in bait: |
| 247 bait_index.append(header.index(bait_line[0])) | 250 bait_index.append(header.index(bait_item[0])) |
| 248 proteins = read_Scaffold(Scaffold_input).proteins | 251 proteins = read_Scaffold(Scaffold_input).proteins |
| 249 errors = [] | 252 errors = [] |
| 253 valid_prots = [] | |
| 250 for e in err: | 254 for e in err: |
| 251 errors.append(e[0]) | 255 errors.append(e[0]) |
| 252 with open('inter.txt', 'w') as y: | 256 for a in proteins: |
| 257 a = a.replace("\n", "") | |
| 258 # Remove \n for input into function. | |
| 259 a = a.replace("\r", "") | |
| 260 # Ditto for \r. | |
| 261 seq = get_info(a).seqlength | |
| 262 GN = get_info(a).genename | |
| 263 if seq != 'NA': | |
| 264 if GN != 'NA': | |
| 265 valid_prots.append(a) | |
| 266 with open('inter.txt', 'w') as input_file: | |
| 253 l = 0; a = 0 | 267 l = 0; a = 0 |
| 254 for bb in bait: | 268 for bb in bait: |
| 255 for lst in data: | 269 for lst in data: |
| 256 if proteins[a] not in errors: | 270 if lst[0] in valid_prots: |
| 257 y.write(header[bait_index[l]] + '\t' + bb[1] + '\t' + proteins[a] + '\t' | 271 input_file.write(header[bait_index[l]] + '\t' + bb[1] + '\t' + lst[0] + '\t' + lst[bait_index[l]] + '\n') |
| 258 + lst[bait_index[l]] + '\n') | |
| 259 a += 1 | 272 a += 1 |
| 260 if a == len(proteins): | 273 if a == len(proteins): |
| 261 l += 1; a = 0 | 274 l += 1; a = 0 |
| 275 | |
| 262 | 276 |
| 263 | 277 |
| 264 def bait_check(bait, Scaffold_input): | 278 def bait_check(bait, Scaffold_input): |
| 265 # Check that bait names share Scaffold header titles. | 279 # Check that bait names share Scaffold header titles. |
| 266 bait_in = readtab(bait) | 280 bait_in = readtab(bait) |
