Mercurial > repos > bornea > saint_preprocessing
comparison SAINT_preprocessing.py @ 53:26b5bd96332e draft
Uploaded
author | bornea |
---|---|
date | Sat, 27 Aug 2016 20:25:09 -0400 |
parents | 09b89febcf98 |
children |
comparison
equal
deleted
inserted
replaced
52:8031a47f67c6 | 53:26b5bd96332e |
---|---|
108 data = open(fasta_db, 'r') | 108 data = open(fasta_db, 'r') |
109 data_lines = data.readlines() | 109 data_lines = data.readlines() |
110 db_len = len(data_lines) | 110 db_len = len(data_lines) |
111 seqlength = 0 | 111 seqlength = 0 |
112 count = 0 | 112 count = 0 |
113 last_line = data_lines[-1] | |
113 for data_line in data_lines: | 114 for data_line in data_lines: |
114 if ">sp" in data_line: | 115 if ">sp" in data_line: |
115 namer = data_line.split("|")[2] | 116 namer = data_line.split("|")[2] |
116 if uniprot_accession_in == data_line.split("|")[1]: | 117 if uniprot_accession_in == data_line.split("|")[1]: |
117 match = count + 1 | 118 match = count + 1 |
122 if 'GN=' not in data_line: | 123 if 'GN=' not in data_line: |
123 genename = 'NA' | 124 genename = 'NA' |
124 while ">sp" not in data_lines[match]: | 125 while ">sp" not in data_lines[match]: |
125 if match <= db_len: | 126 if match <= db_len: |
126 seqlength = seqlength + len(data_lines[match].strip()) | 127 seqlength = seqlength + len(data_lines[match].strip()) |
128 if data_lines[match] == last_line: | |
129 break | |
127 match = match + 1 | 130 match = match + 1 |
128 else: | 131 else: |
129 break | 132 break |
130 return ReturnValue1(seqlength, genename) | 133 return ReturnValue1(seqlength, genename) |
131 if uniprot_accession_in == namer.split(" ")[0]: | 134 if uniprot_accession_in == namer.split(" ")[0]: |
138 if 'GN=' not in data_line: | 141 if 'GN=' not in data_line: |
139 genename = 'NA' | 142 genename = 'NA' |
140 while ">sp" not in data_lines[match]: | 143 while ">sp" not in data_lines[match]: |
141 if match <= db_len: | 144 if match <= db_len: |
142 seqlength = seqlength + len(data_lines[match].strip()) | 145 seqlength = seqlength + len(data_lines[match].strip()) |
146 if data_lines[match] == last_line: | |
147 break | |
143 match = match + 1 | 148 match = match + 1 |
144 else: | 149 else: |
145 break | 150 break |
146 return ReturnValue1(seqlength, genename) | 151 return ReturnValue1(seqlength, genename) |
147 count = count + 1 | 152 count = count + 1 |