Mercurial > repos > bornea > saint_preprocessing
comparison SAINT_preprocessing.py @ 53:26b5bd96332e draft
Uploaded
| author | bornea |
|---|---|
| date | Sat, 27 Aug 2016 20:25:09 -0400 |
| parents | 09b89febcf98 |
| children |
comparison
equal
deleted
inserted
replaced
| 52:8031a47f67c6 | 53:26b5bd96332e |
|---|---|
| 108 data = open(fasta_db, 'r') | 108 data = open(fasta_db, 'r') |
| 109 data_lines = data.readlines() | 109 data_lines = data.readlines() |
| 110 db_len = len(data_lines) | 110 db_len = len(data_lines) |
| 111 seqlength = 0 | 111 seqlength = 0 |
| 112 count = 0 | 112 count = 0 |
| 113 last_line = data_lines[-1] | |
| 113 for data_line in data_lines: | 114 for data_line in data_lines: |
| 114 if ">sp" in data_line: | 115 if ">sp" in data_line: |
| 115 namer = data_line.split("|")[2] | 116 namer = data_line.split("|")[2] |
| 116 if uniprot_accession_in == data_line.split("|")[1]: | 117 if uniprot_accession_in == data_line.split("|")[1]: |
| 117 match = count + 1 | 118 match = count + 1 |
| 122 if 'GN=' not in data_line: | 123 if 'GN=' not in data_line: |
| 123 genename = 'NA' | 124 genename = 'NA' |
| 124 while ">sp" not in data_lines[match]: | 125 while ">sp" not in data_lines[match]: |
| 125 if match <= db_len: | 126 if match <= db_len: |
| 126 seqlength = seqlength + len(data_lines[match].strip()) | 127 seqlength = seqlength + len(data_lines[match].strip()) |
| 128 if data_lines[match] == last_line: | |
| 129 break | |
| 127 match = match + 1 | 130 match = match + 1 |
| 128 else: | 131 else: |
| 129 break | 132 break |
| 130 return ReturnValue1(seqlength, genename) | 133 return ReturnValue1(seqlength, genename) |
| 131 if uniprot_accession_in == namer.split(" ")[0]: | 134 if uniprot_accession_in == namer.split(" ")[0]: |
| 138 if 'GN=' not in data_line: | 141 if 'GN=' not in data_line: |
| 139 genename = 'NA' | 142 genename = 'NA' |
| 140 while ">sp" not in data_lines[match]: | 143 while ">sp" not in data_lines[match]: |
| 141 if match <= db_len: | 144 if match <= db_len: |
| 142 seqlength = seqlength + len(data_lines[match].strip()) | 145 seqlength = seqlength + len(data_lines[match].strip()) |
| 146 if data_lines[match] == last_line: | |
| 147 break | |
| 143 match = match + 1 | 148 match = match + 1 |
| 144 else: | 149 else: |
| 145 break | 150 break |
| 146 return ReturnValue1(seqlength, genename) | 151 return ReturnValue1(seqlength, genename) |
| 147 count = count + 1 | 152 count = count + 1 |
