Mercurial > repos > petr-novak > re_utils
comparison fasta_interlacer.py @ 4:d397f5a85464 draft
Uploaded
author | petr-novak |
---|---|
date | Wed, 18 Sep 2019 06:30:04 -0400 |
parents | a4cd8608ef6b |
children |
comparison
equal
deleted
inserted
replaced
3:e320ef2d105a | 4:d397f5a85464 |
---|---|
77 if charA == charB: | 77 if charA == charB: |
78 sys.stderr.write( | 78 sys.stderr.write( |
79 "last character of sequence id must be used for distinguishing pairs!") | 79 "last character of sequence id must be used for distinguishing pairs!") |
80 exit(1) | 80 exit(1) |
81 # check first thousand! | 81 # check first thousand! |
82 for i in range(1000): | 82 for i in range(3): |
83 seqA = readSingleSeq(fA) | 83 seqA = readSingleSeq(fA) |
84 seqB = readSingleSeq(fB) | 84 seqB = readSingleSeq(fB) |
85 if (not seqA) or (not seqB): | 85 if (not seqA) or (not seqB): |
86 # end of file: | 86 # end of file: |
87 if i == 0: | 87 if i == 0: |
103 buffB = {} | 103 buffB = {} |
104 buffA_names = [] | 104 buffA_names = [] |
105 buffB_names = [] | 105 buffB_names = [] |
106 | 106 |
107 while True: | 107 while True: |
108 | |
109 seqA = readSingleSeq(fA) | 108 seqA = readSingleSeq(fA) |
110 seqB = readSingleSeq(fB) | 109 seqB = readSingleSeq(fB) |
111 | |
112 if not seqA and not seqB: | 110 if not seqA and not seqB: |
113 break # end of file | 111 break # end of file |
114 | 112 |
115 ## validation and direct checking only if not end of files | 113 ## validation and direct checking only if not end of files |
116 if seqA and seqB: | 114 if seqA and seqB: |
136 writeSingleSeq(fPairs, seqtmp) | 134 writeSingleSeq(fPairs, seqtmp) |
137 # can I empty buffA ??? | 135 # can I empty buffA ??? |
138 for i in buffA_names: | 136 for i in buffA_names: |
139 seqtmp = {"name": i + charA, "sequence": buffA[i]} | 137 seqtmp = {"name": i + charA, "sequence": buffA[i]} |
140 writeSingleSeq(single, seqtmp) | 138 writeSingleSeq(single, seqtmp) |
141 buffA = {} | 139 buffA = {} |
142 buffA_names = [] | 140 buffA_names = [] |
143 | 141 |
144 j = 0 | 142 j = 0 |
145 for i in buffB_names: | 143 for i in buffB_names: |
146 seqtmp = {"name": i + charB, "sequence": buffB[i]} | 144 seqtmp = {"name": i + charB, "sequence": buffB[i]} |
147 del buffB[i] | 145 del buffB[i] |
164 writeSingleSeq(fPairs, seqB) | 162 writeSingleSeq(fPairs, seqB) |
165 # can I empty buffB ??? | 163 # can I empty buffB ??? |
166 for i in buffB_names: | 164 for i in buffB_names: |
167 seqtmp = {"name": i + charB, "sequence": buffB[i]} | 165 seqtmp = {"name": i + charB, "sequence": buffB[i]} |
168 writeSingleSeq(single, seqtmp) | 166 writeSingleSeq(single, seqtmp) |
169 buffB = {} | 167 buffB = {} |
170 buffB_names = [] | 168 buffB_names = [] |
171 | 169 |
172 j = 0 | 170 j = 0 |
173 for i in buffA_names: | 171 for i in buffA_names: |
174 seqtmp = {"name": i + charA, "sequence": buffA[i]} | 172 seqtmp = {"name": i + charA, "sequence": buffA[i]} |
175 del buffA[i] | 173 del buffA[i] |
181 writeSingleSeq(single, seqtmp) | 179 writeSingleSeq(single, seqtmp) |
182 | 180 |
183 else: | 181 else: |
184 buffB[seqB["name"][:-1]] = seqB['sequence'] | 182 buffB[seqB["name"][:-1]] = seqB['sequence'] |
185 buffB_names.append(seqB["name"][:-1]) | 183 buffB_names.append(seqB["name"][:-1]) |
186 fA.close() | 184 |
187 fB.close() | 185 fA.close() |
188 fPairs.close() | 186 fB.close() |
189 # write rest of singles: | 187 fPairs.close() |
188 | |
189 # write rest of singles: | |
190 for i in buffA: | 190 for i in buffA: |
191 seqtmp = {"name": i + charA, "sequence": buffA[i]} | 191 seqtmp = {"name": i + charA, "sequence": buffA[i]} |
192 writeSingleSeq(single, seqtmp) | 192 writeSingleSeq(single, seqtmp) |
193 for i in buffB: | 193 for i in buffB: |
194 seqtmp = {"name": i + charB, "sequence": buffB[i]} | 194 seqtmp = {"name": i + charB, "sequence": buffB[i]} |
195 writeSingleSeq(single, seqtmp) | 195 writeSingleSeq(single, seqtmp) |
196 single.close() | 196 single.close() |
197 | 197 |
198 | 198 |
199 if __name__ == "__main__": | 199 if __name__ == "__main__": |
200 main() | 200 main() |