0
|
1 __license__ = "MIT"
|
|
2
|
|
3 import unittest
|
|
4 import glycan_sets as gs
|
|
5
|
|
6
|
|
7 class SimpleUnitTest(unittest.TestCase):
|
|
8 def setUp(self):
|
|
9 import os
|
|
10
|
|
11 self.kcinput = {"ringsmcaw": """ENTRY G04845 Glycan
|
|
12 COMPOSITION (Gal)3 (Glc)1 (GlcNAc)2 (LFuc)2 (Neu5Ac)1
|
|
13 MASS 1656.5
|
|
14 DBLINKS CCSD: 23949
|
|
15 GlycomeDB: 20420
|
|
16 JCGGDB: JCGG-STR011245
|
|
17 NODE 9
|
|
18 1 Glc 0 0
|
|
19 2 Gal -10 0
|
|
20 3 GlcNAc -20 10
|
|
21 4 GlcNAc -20 -10
|
|
22 5 Gal -30 15
|
|
23 6 LFuc -30 5
|
|
24 7 LFuc -30 -5
|
|
25 8 Gal -30 -15
|
|
26 9 Neu5Ac -40 15
|
|
27 EDGE 8
|
|
28 1 2:b1 1:4
|
|
29 2 3:b1 2:6
|
|
30 3 4:b1 2:3
|
|
31 4 5:b1 3:4
|
|
32 5 6:a1 3:3
|
|
33 6 7:a1 4:4
|
|
34 7 8:b1 4:3
|
|
35 8 9:a2 5:3
|
|
36 ///
|
|
37 ENTRY G05108 Glycan
|
|
38 COMPOSITION (Gal)2 (GalNAc)1 (GlcNAc)1 (LFuc)1 (Neu5Ac)1 (S)1
|
|
39 MASS 1266.2
|
|
40 CLASS Glycoprotein; O-Glycan
|
|
41 DBLINKS CCSD: 33353
|
|
42 GlycomeDB: 20590
|
|
43 JCGGDB: JCGG-STR011425
|
|
44 NODE 7
|
|
45 1 GalNAc 20 1
|
|
46 2 Gal 10 1
|
|
47 3 GlcNAc 0 1
|
|
48 4 S -5 5
|
|
49 5 Gal -10 1
|
|
50 6 LFuc -10 -4
|
|
51 7 Neu5Ac -20 1
|
|
52 EDGE 6
|
|
53 1 2:b1 1:3
|
|
54 2 3:b1 2:3
|
|
55 3 4 3:6
|
|
56 4 5:b1 3:4
|
|
57 5 6:a1 3:3
|
|
58 6 7:a2 5:3
|
|
59 ///
|
|
60
|
|
61 ENTRY G05121 Glycan
|
|
62 COMPOSITION (Gal)2 (GalNAc)1 (GlcNAc)1 (LFuc)1 (Neu5Ac)2
|
|
63 MASS 1477.3
|
|
64 CLASS Glycoprotein; O-Glycan
|
|
65 DBLINKS CCSD: 33350
|
|
66 GlycomeDB: 20599
|
|
67 JCGGDB: JCGG-STR011433
|
|
68 NODE 7
|
|
69 1 GalNAc 0 0
|
|
70 2 Neu5Ac -10 5
|
|
71 3 Gal -10 -5
|
|
72 4 GlcNAc -20 -5
|
|
73 5 Gal -30 0
|
|
74 6 LFuc -30 -10
|
|
75 7 Neu5Ac -40 0
|
|
76 EDGE 6
|
|
77 1 2:a2 1:6
|
|
78 2 3:b1 1:3
|
|
79 3 4:b1 3:3
|
|
80 4 5:b1 4:4
|
|
81 5 6:a1 4:3
|
|
82 6 7:a2 5:3
|
|
83 ///
|
|
84
|
|
85 ENTRY G04183 Glycan
|
|
86 COMPOSITION (Gal)3 (GlcNAc)5 (LFuc)1 (Man)3 (Neu5Ac)3
|
|
87 MASS 3026.8
|
|
88 CLASS Glycoprotein; N-Glycan
|
|
89 DBLINKS CCSD: 41981
|
|
90 GlycomeDB: 19974
|
|
91 JCGGDB: JCGG-STR010756
|
|
92 NODE 15
|
|
93 1 GlcNAc 0 0
|
|
94 2 GlcNAc -10 0
|
|
95 3 Man -20 0
|
|
96 4 Man -30 10
|
|
97 5 Man -30 -10
|
|
98 6 GlcNAc -40 10
|
|
99 7 GlcNAc -40 -5
|
|
100 8 GlcNAc -40 -15
|
|
101 9 Gal -50 10
|
|
102 10 Gal -50 0
|
|
103 11 LFuc -50 -10
|
|
104 12 Gal -50 -15
|
|
105 13 Neu5Ac -60 10
|
|
106 14 Neu5Ac -60 0
|
|
107 15 Neu5Ac -60 -15
|
|
108 EDGE 14
|
|
109 1 2:b1 1:4
|
|
110 2 3:b1 2:4
|
|
111 3 4:a1 3:6
|
|
112 4 5:a1 3:3
|
|
113 5 6:b1 4:2
|
|
114 6 7:b1 5:4
|
|
115 7 8:b1 5:2
|
|
116 8 9:b1 6:4
|
|
117 9 10:b1 7:4
|
|
118 10 11:a1 7:3
|
|
119 11 12:b1 8:4
|
|
120 12 13:a2 9:6
|
|
121 13 14:a2 10:3
|
|
122 14 15:a2 12:6
|
|
123 ///
|
|
124
|
|
125 ENTRY G04329 Glycan
|
|
126 COMPOSITION (Gal)3 (GlcNAc)3 (LFuc)3 (Neu5Ac)1
|
|
127 MASS 1843.7
|
|
128 DBLINKS CCSD: 36620
|
|
129 GlycomeDB: 20084
|
|
130 JCGGDB: JCGG-STR010874
|
|
131 NODE 10
|
|
132 1 GlcNAc 27 -5
|
|
133 2 Gal 18 0
|
|
134 3 LFuc 18 -10
|
|
135 4 GlcNAc 9 0
|
|
136 5 Gal 0 5
|
|
137 6 LFuc 0 -5
|
|
138 7 GlcNAc -9 5
|
|
139 8 Gal -18 10
|
|
140 9 LFuc -18 0
|
|
141 10 Neu5Ac -27 10
|
|
142 EDGE 9
|
|
143 1 2:b1 1:4
|
|
144 2 3:a1 1:3
|
|
145 3 4:b1 2:3
|
|
146 4 5:b1 4:4
|
|
147 5 6:a1 4:3
|
|
148 6 7:b1 5:3
|
|
149 7 8:b1 7:4
|
|
150 8 9:a1 7:3
|
|
151 9 10:a2 8:3
|
|
152 ///
|
|
153 ENTRY G04804 Glycan
|
|
154 COMPOSITION (Gal)3 (GlcNAc)5 (LFuc)1 (Man)3 (Neu5Ac)3
|
|
155 MASS 3026.8
|
|
156 CLASS Glycoprotein; N-Glycan
|
|
157 REFERENCE 1 [PMID:6704968]
|
|
158 Chandrasekaran EV, Davila M, Nixon D, Mendicino J.
|
|
159 Structures of the oligosaccharide chains of two forms of alpha 1-acid glycoprotein purified from liver metastases of lung, colon, and breast tumors.
|
|
160 Cancer. Res. 44 (1984) 1557-67.
|
|
161 DBLINKS CCSD: 8168
|
|
162 GlycomeDB: 33225
|
|
163 JCGGDB: JCGG-STR024138
|
|
164 NODE 15
|
|
165 1 GlcNAc 27.2 1.6
|
|
166 2 GlcNAc 17.2 1.6
|
|
167 3 Man 8.2 1.6
|
|
168 4 Man 0.2 7.6
|
|
169 5 Man 0.2 -4.4
|
|
170 6 GlcNAc -8.8 7.6
|
|
171 7 GlcNAc -8.8 -0.4
|
|
172 8 GlcNAc -8.8 -8.4
|
|
173 9 Gal -17.8 7.6
|
|
174 10 Gal -17.8 3.6
|
|
175 11 LFuc -17.8 -4.4
|
|
176 12 Gal -17.8 -8.4
|
|
177 13 Neu5Ac -26.8 7.6
|
|
178 14 Neu5Ac -26.8 3.6
|
|
179 15 Neu5Ac -26.8 -8.4
|
|
180 EDGE 14
|
|
181 1 2:b1 1:4
|
|
182 2 3:b1 2:4
|
|
183 3 4:a1 3:6
|
|
184 4 5:a1 3:3
|
|
185 5 6:b1 4:2
|
|
186 6 7:b1 5:4
|
|
187 7 8:b1 5:2
|
|
188 8 9:b1 6:4
|
|
189 9 10:b1 7:4
|
|
190 10 11:a1 7:3
|
|
191 11 12:b1 8:4
|
|
192 12 13:a2 9:3
|
|
193 13 14:a2 10:3
|
|
194 14 15:a2 12:6
|
|
195 ///
|
|
196 """, "ringsmcaw_modified": """ENTRY G04845 Glycan
|
|
197 NODE 9
|
|
198 1 Glc 0 0
|
|
199 2 Gal -10 0
|
|
200 3 GlcNAc -20 10
|
|
201 4 GlcNAc -20 -10
|
|
202 5 Gal -30 15
|
|
203 6 LFuc -30 5
|
|
204 7 LFuc -30 -5
|
|
205 8 Gal -30 -15
|
|
206 9 Neu5Ac -40 15
|
|
207 EDGE 8
|
|
208 1 2:b1 1:4
|
|
209 2 3:b1 2:6
|
|
210 3 4:b1 2:3
|
|
211 4 5:b1 3:4
|
|
212 5 6:a1 3:3
|
|
213 6 7:a1 4:4
|
|
214 7 8:b1 4:3
|
|
215 8 9:a2 5:3
|
|
216 ///
|
|
217 ENTRY G05108 Glycan
|
|
218 NODE 7
|
|
219 1 GalNAc 20 1
|
|
220 2 Gal 10 1
|
|
221 3 GlcNAc 0 1
|
|
222 4 S -5 5
|
|
223 5 Gal -10 1
|
|
224 6 LFuc -10 -4
|
|
225 7 Neu5Ac -20 1
|
|
226 EDGE 6
|
|
227 1 2:b1 1:3
|
|
228 2 3:b1 2:3
|
|
229 3 4 3:6
|
|
230 4 5:b1 3:4
|
|
231 5 6:a1 3:3
|
|
232 6 7:a2 5:3
|
|
233 ///
|
|
234 ENTRY G05121 Glycan
|
|
235 NODE 7
|
|
236 1 GalNAc 0 0
|
|
237 2 Neu5Ac -10 5
|
|
238 3 Gal -10 -5
|
|
239 4 GlcNAc -20 -5
|
|
240 5 Gal -30 0
|
|
241 6 LFuc -30 -10
|
|
242 7 Neu5Ac -40 0
|
|
243 EDGE 6
|
|
244 1 2:a2 1:6
|
|
245 2 3:b1 1:3
|
|
246 3 4:b1 3:3
|
|
247 4 5:b1 4:4
|
|
248 5 6:a1 4:3
|
|
249 6 7:a2 5:3
|
|
250 ///
|
|
251 ENTRY G04183 Glycan
|
|
252 NODE 15
|
|
253 1 GlcNAc 0 0
|
|
254 2 GlcNAc -10 0
|
|
255 3 Man -20 0
|
|
256 4 Man -30 10
|
|
257 5 Man -30 -10
|
|
258 6 GlcNAc -40 10
|
|
259 7 GlcNAc -40 -5
|
|
260 8 GlcNAc -40 -15
|
|
261 9 Gal -50 10
|
|
262 10 Gal -50 0
|
|
263 11 LFuc -50 -10
|
|
264 12 Gal -50 -15
|
|
265 13 Neu5Ac -60 10
|
|
266 14 Neu5Ac -60 0
|
|
267 15 Neu5Ac -60 -15
|
|
268 EDGE 14
|
|
269 1 2:b1 1:4
|
|
270 2 3:b1 2:4
|
|
271 3 4:a1 3:6
|
|
272 4 5:a1 3:3
|
|
273 5 6:b1 4:2
|
|
274 6 7:b1 5:4
|
|
275 7 8:b1 5:2
|
|
276 8 9:b1 6:4
|
|
277 9 10:b1 7:4
|
|
278 10 11:a1 7:3
|
|
279 11 12:b1 8:4
|
|
280 12 13:a2 9:6
|
|
281 13 14:a2 10:3
|
|
282 14 15:a2 12:6
|
|
283 ///
|
|
284 ENTRY G04329 Glycan
|
|
285 NODE 10
|
|
286 1 GlcNAc 27 -5
|
|
287 2 Gal 18 0
|
|
288 3 LFuc 18 -10
|
|
289 4 GlcNAc 9 0
|
|
290 5 Gal 0 5
|
|
291 6 LFuc 0 -5
|
|
292 7 GlcNAc -9 5
|
|
293 8 Gal -18 10
|
|
294 9 LFuc -18 0
|
|
295 10 Neu5Ac -27 10
|
|
296 EDGE 9
|
|
297 1 2:b1 1:4
|
|
298 2 3:a1 1:3
|
|
299 3 4:b1 2:3
|
|
300 4 5:b1 4:4
|
|
301 5 6:a1 4:3
|
|
302 6 7:b1 5:3
|
|
303 7 8:b1 7:4
|
|
304 8 9:a1 7:3
|
|
305 9 10:a2 8:3
|
|
306 ///
|
|
307 ENTRY G04804 Glycan
|
|
308 NODE 15
|
|
309 1 GlcNAc 27.2 1.6
|
|
310 2 GlcNAc 17.2 1.6
|
|
311 3 Man 8.2 1.6
|
|
312 4 Man 0.2 7.6
|
|
313 5 Man 0.2 -4.4
|
|
314 6 GlcNAc -8.8 7.6
|
|
315 7 GlcNAc -8.8 -0.4
|
|
316 8 GlcNAc -8.8 -8.4
|
|
317 9 Gal -17.8 7.6
|
|
318 10 Gal -17.8 3.6
|
|
319 11 LFuc -17.8 -4.4
|
|
320 12 Gal -17.8 -8.4
|
|
321 13 Neu5Ac -26.8 7.6
|
|
322 14 Neu5Ac -26.8 3.6
|
|
323 15 Neu5Ac -26.8 -8.4
|
|
324 EDGE 14
|
|
325 1 2:b1 1:4
|
|
326 2 3:b1 2:4
|
|
327 3 4:a1 3:6
|
|
328 4 5:a1 3:3
|
|
329 5 6:b1 4:2
|
|
330 6 7:b1 5:4
|
|
331 7 8:b1 5:2
|
|
332 8 9:b1 6:4
|
|
333 9 10:b1 7:4
|
|
334 10 11:a1 7:3
|
|
335 11 12:b1 8:4
|
|
336 12 13:a2 9:3
|
|
337 13 14:a2 10:3
|
|
338 14 15:a2 12:6
|
|
339 ///
|
|
340 """, "ringsfinger": """ENTRY G00015 Glycan
|
|
341 NODE 8
|
|
342 1 Asn 20 0
|
|
343 2 GlcNAc 12 0
|
|
344 3 GlcNAc 3 0
|
|
345 4 Man -5 0
|
|
346 5 Man -12 5
|
|
347 6 Man -12 -5
|
|
348 7 GlcNAc -20 5
|
|
349 8 GlcNAc -20 -5
|
|
350 EDGE 7
|
|
351 1 2:b1 1
|
|
352 2 3:b1 2:4
|
|
353 3 4:b1 3:4
|
|
354 4 5:a1 4:6
|
|
355 5 6:a1 4:3
|
|
356 6 7:b1 5:2
|
|
357 7 8:b1 6:2
|
|
358 ///
|
|
359 ENTRY G00016 Glycan
|
|
360 NODE 9
|
|
361 1 Asn 20 3
|
|
362 2 GlcNAc 12 3
|
|
363 3 LFuc 4 8
|
|
364 4 GlcNAc 3 -2
|
|
365 5 Man -5 -2
|
|
366 6 Man -12 3
|
|
367 7 Man -12 -7
|
|
368 8 GlcNAc -20 3
|
|
369 9 GlcNAc -20 -7
|
|
370 EDGE 8
|
|
371 1 2:b1 1
|
|
372 2 3:a1 2:6
|
|
373 3 4:b1 2:4
|
|
374 4 5:b1 4:4
|
|
375 5 6:a1 5:6
|
|
376 6 7:a1 5:3
|
|
377 7 8:b1 6:2
|
|
378 8 9:b1 7:2
|
|
379 ///
|
|
380 ENTRY G00017 Glycan
|
|
381 NODE 11
|
|
382 1 Asn 24 3
|
|
383 2 GlcNAc 14 3
|
|
384 3 LFuc 7 8
|
|
385 4 GlcNAc 6 -2
|
|
386 5 Man -2 -2
|
|
387 6 Man -8 3
|
|
388 7 Man -8 -7
|
|
389 8 GlcNAc -16 3
|
|
390 9 GlcNAc -16 -7
|
|
391 10 Gal -24 3
|
|
392 11 Gal -24 -7
|
|
393 EDGE 10
|
|
394 1 2:b1 1
|
|
395 2 3:a1 2:6
|
|
396 3 4:b1 2:4
|
|
397 4 5:b1 4:4
|
|
398 5 6:a1 5:6
|
|
399 6 7:a1 5:3
|
|
400 7 8:b1 6:2
|
|
401 8 9:b1 7:2
|
|
402 9 10:b1 8:4
|
|
403 10 11:b1 9:4
|
|
404 ///
|
|
405 ENTRY G00018 Glycan
|
|
406 NODE 13
|
|
407 1 Asn 28 3
|
|
408 2 GlcNAc 18 3
|
|
409 3 LFuc 10 8
|
|
410 4 GlcNAc 9 -2
|
|
411 5 Man 1 -2
|
|
412 6 Man -5 4
|
|
413 7 Man -5 -8
|
|
414 8 GlcNAc -13 4
|
|
415 9 GlcNAc -13 -8
|
|
416 10 Gal -21 4
|
|
417 11 Gal -21 -8
|
|
418 12 Neu5Ac -29 4
|
|
419 13 Neu5Ac -29 -8
|
|
420 EDGE 12
|
|
421 1 2:b1 1
|
|
422 2 3:a1 2:6
|
|
423 3 4:b1 2:4
|
|
424 4 5:b1 4:4
|
|
425 5 6:a1 5:6
|
|
426 6 7:a1 5:3
|
|
427 7 8:b1 6:2
|
|
428 8 9:b1 7:2
|
|
429 9 10:b1 8:4
|
|
430 10 11:b1 9:4
|
|
431 11 12:a2 10:6
|
|
432 12 13:a2 11:6
|
|
433 ///
|
|
434 ENTRY G00019 Glycan
|
|
435 NODE 9
|
|
436 1 Asn 20 0
|
|
437 2 GlcNAc 12 0
|
|
438 3 GlcNAc 3 0
|
|
439 4 Man -5 0
|
|
440 5 Man -12 5
|
|
441 6 Man -12 -5
|
|
442 7 GlcNAc -15 0
|
|
443 8 GlcNAc -20 5
|
|
444 9 GlcNAc -20 -5
|
|
445 EDGE 8
|
|
446 1 2:b1 1
|
|
447 2 3:b1 2:4
|
|
448 3 4:b1 3:4
|
|
449 4 5:a1 4:6
|
|
450 5 6:a1 4:3
|
|
451 6 7:b1 4:4
|
|
452 7 8:b1 5:2
|
|
453 8 9:b1 6:2
|
|
454 ///
|
|
455 ENTRY G00020 Glycan
|
|
456 NODE 9
|
|
457 1 Asn 20 3
|
|
458 2 GlcNAc 11 3
|
|
459 3 GlcNAc 2 3
|
|
460 4 Man -6 3
|
|
461 5 Man -13 9
|
|
462 6 Man -13 -3
|
|
463 7 GlcNAc -21 9
|
|
464 8 GlcNAc -21 2
|
|
465 9 GlcNAc -21 -8
|
|
466 EDGE 8
|
|
467 1 2:b1 1
|
|
468 2 3:b1 2:4
|
|
469 3 4:b1 3:4
|
|
470 4 5:a1 4:6
|
|
471 5 6:a1 4:3
|
|
472 6 7:b1 5:2
|
|
473 7 8:b1 6:4
|
|
474 8 9:b1 6:2
|
|
475 ///
|
|
476 """, "ringsbroken": """ABCDEFGENTRY G00015 Glycan
|
|
477 NODE 8
|
|
478 1 Asn 20 0
|
|
479 2 GlcNAc 12 0
|
|
480 """
|
|
481 }
|
|
482 os.environ["http_proxy"] = "" # work around for IOError: [Errno url error] invalid proxy for http:
|
|
483 pass
|
|
484
|
|
485 def tearDown(self):
|
|
486 pass
|
|
487
|
|
488 def test_broken_kcf(self):
|
|
489 import StringIO
|
|
490
|
|
491 brk = self.kcinput["ringsbroken"]
|
|
492 handle = StringIO.StringIO(''.join(brk))
|
|
493 with self.assertRaises(UnboundLocalError):
|
|
494 gs.glycan_set_comparison(handle, handle)
|
|
495
|
|
496 def test_same_kcf(self):
|
|
497 import StringIO
|
|
498
|
|
499 kcf = self.kcinput["ringsfinger"]
|
|
500 handle = StringIO.StringIO(''.join(kcf))
|
|
501 handle2 = StringIO.StringIO(''.join(kcf))
|
|
502 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)
|
|
503 self.assertEqual(uniquesetA, uniquesetB)
|
|
504 self.assertEqual(union, intersection)
|
|
505 self.assertEqual(AB, BA)
|
|
506
|
|
507 def test_diff_kcf(self):
|
|
508 import StringIO
|
|
509
|
|
510 kcf = self.kcinput["ringsfinger"]
|
|
511 kcf2 = self.kcinput["ringsmcaw"]
|
|
512 handle = StringIO.StringIO(''.join(kcf))
|
|
513 handle2 = StringIO.StringIO(''.join(kcf2))
|
|
514 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)
|
|
515 self.assertNotEqual(uniquesetA, uniquesetB)
|
|
516 self.assertNotEqual(union, intersection)
|
|
517 self.assertNotEqual(AB, BA)
|
|
518
|
|
519 def test_diff_kcf_modified(self):
|
|
520 import StringIO
|
|
521
|
|
522 kcf = self.kcinput["ringsfinger"]
|
|
523 kcf2 = self.kcinput["ringsmcaw_modified"]
|
|
524 handle = StringIO.StringIO(''.join(kcf))
|
|
525 handle2 = StringIO.StringIO(''.join(kcf2))
|
|
526 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)
|
|
527 self.assertNotEqual(uniquesetA, uniquesetB)
|
|
528 self.assertNotEqual(union, intersection)
|
|
529 self.assertNotEqual(AB, BA)
|
|
530
|
|
531 # def test_bug_read_kcf(self):
|
|
532 # """
|
|
533 # readkcf is not a full implementation. Fails on ringsmcaw test set
|
|
534 # :return:
|
|
535 # """
|
|
536 # import StringIO
|
|
537 #
|
|
538 # kcf = self.kcinput["ringsfinger"]
|
|
539 # kcf2 = self.kcinput["ringsmcaw"]
|
|
540 # handle = StringIO.StringIO(''.join(kcf))
|
|
541 # handle2 = StringIO.StringIO(''.join(kcf2))
|
|
542 # with self.assertRaises(ValueError):
|
|
543 # uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)
|
|
544
|
|
545 def test_empty_stream(self):
|
|
546 with self.assertRaises(IOError):
|
|
547 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison("", "")
|
|
548 with self.assertRaises(IOError):
|
|
549 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison([], [])
|
|
550 with self.assertRaises(IOError):
|
|
551 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(None, None)
|
|
552
|
|
553
|
|
554 def run_tests():
|
|
555 unittest.main()
|
|
556
|
|
557
|
|
558 if __name__ == '__main__':
|
|
559 run_tests()
|