comparison join_subtract_group/glycan_set_operations/test_sets.py @ 0:89592faa2875 draft

Uploaded
author chrisb
date Wed, 23 Mar 2016 14:35:56 -0400
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:89592faa2875
1 __license__ = "MIT"
2
3 import unittest
4 import glycan_sets as gs
5
6
7 class SimpleUnitTest(unittest.TestCase):
8 def setUp(self):
9 import os
10
11 self.kcinput = {"ringsmcaw": """ENTRY G04845 Glycan
12 COMPOSITION (Gal)3 (Glc)1 (GlcNAc)2 (LFuc)2 (Neu5Ac)1
13 MASS 1656.5
14 DBLINKS CCSD: 23949
15 GlycomeDB: 20420
16 JCGGDB: JCGG-STR011245
17 NODE 9
18 1 Glc 0 0
19 2 Gal -10 0
20 3 GlcNAc -20 10
21 4 GlcNAc -20 -10
22 5 Gal -30 15
23 6 LFuc -30 5
24 7 LFuc -30 -5
25 8 Gal -30 -15
26 9 Neu5Ac -40 15
27 EDGE 8
28 1 2:b1 1:4
29 2 3:b1 2:6
30 3 4:b1 2:3
31 4 5:b1 3:4
32 5 6:a1 3:3
33 6 7:a1 4:4
34 7 8:b1 4:3
35 8 9:a2 5:3
36 ///
37 ENTRY G05108 Glycan
38 COMPOSITION (Gal)2 (GalNAc)1 (GlcNAc)1 (LFuc)1 (Neu5Ac)1 (S)1
39 MASS 1266.2
40 CLASS Glycoprotein; O-Glycan
41 DBLINKS CCSD: 33353
42 GlycomeDB: 20590
43 JCGGDB: JCGG-STR011425
44 NODE 7
45 1 GalNAc 20 1
46 2 Gal 10 1
47 3 GlcNAc 0 1
48 4 S -5 5
49 5 Gal -10 1
50 6 LFuc -10 -4
51 7 Neu5Ac -20 1
52 EDGE 6
53 1 2:b1 1:3
54 2 3:b1 2:3
55 3 4 3:6
56 4 5:b1 3:4
57 5 6:a1 3:3
58 6 7:a2 5:3
59 ///
60
61 ENTRY G05121 Glycan
62 COMPOSITION (Gal)2 (GalNAc)1 (GlcNAc)1 (LFuc)1 (Neu5Ac)2
63 MASS 1477.3
64 CLASS Glycoprotein; O-Glycan
65 DBLINKS CCSD: 33350
66 GlycomeDB: 20599
67 JCGGDB: JCGG-STR011433
68 NODE 7
69 1 GalNAc 0 0
70 2 Neu5Ac -10 5
71 3 Gal -10 -5
72 4 GlcNAc -20 -5
73 5 Gal -30 0
74 6 LFuc -30 -10
75 7 Neu5Ac -40 0
76 EDGE 6
77 1 2:a2 1:6
78 2 3:b1 1:3
79 3 4:b1 3:3
80 4 5:b1 4:4
81 5 6:a1 4:3
82 6 7:a2 5:3
83 ///
84
85 ENTRY G04183 Glycan
86 COMPOSITION (Gal)3 (GlcNAc)5 (LFuc)1 (Man)3 (Neu5Ac)3
87 MASS 3026.8
88 CLASS Glycoprotein; N-Glycan
89 DBLINKS CCSD: 41981
90 GlycomeDB: 19974
91 JCGGDB: JCGG-STR010756
92 NODE 15
93 1 GlcNAc 0 0
94 2 GlcNAc -10 0
95 3 Man -20 0
96 4 Man -30 10
97 5 Man -30 -10
98 6 GlcNAc -40 10
99 7 GlcNAc -40 -5
100 8 GlcNAc -40 -15
101 9 Gal -50 10
102 10 Gal -50 0
103 11 LFuc -50 -10
104 12 Gal -50 -15
105 13 Neu5Ac -60 10
106 14 Neu5Ac -60 0
107 15 Neu5Ac -60 -15
108 EDGE 14
109 1 2:b1 1:4
110 2 3:b1 2:4
111 3 4:a1 3:6
112 4 5:a1 3:3
113 5 6:b1 4:2
114 6 7:b1 5:4
115 7 8:b1 5:2
116 8 9:b1 6:4
117 9 10:b1 7:4
118 10 11:a1 7:3
119 11 12:b1 8:4
120 12 13:a2 9:6
121 13 14:a2 10:3
122 14 15:a2 12:6
123 ///
124
125 ENTRY G04329 Glycan
126 COMPOSITION (Gal)3 (GlcNAc)3 (LFuc)3 (Neu5Ac)1
127 MASS 1843.7
128 DBLINKS CCSD: 36620
129 GlycomeDB: 20084
130 JCGGDB: JCGG-STR010874
131 NODE 10
132 1 GlcNAc 27 -5
133 2 Gal 18 0
134 3 LFuc 18 -10
135 4 GlcNAc 9 0
136 5 Gal 0 5
137 6 LFuc 0 -5
138 7 GlcNAc -9 5
139 8 Gal -18 10
140 9 LFuc -18 0
141 10 Neu5Ac -27 10
142 EDGE 9
143 1 2:b1 1:4
144 2 3:a1 1:3
145 3 4:b1 2:3
146 4 5:b1 4:4
147 5 6:a1 4:3
148 6 7:b1 5:3
149 7 8:b1 7:4
150 8 9:a1 7:3
151 9 10:a2 8:3
152 ///
153 ENTRY G04804 Glycan
154 COMPOSITION (Gal)3 (GlcNAc)5 (LFuc)1 (Man)3 (Neu5Ac)3
155 MASS 3026.8
156 CLASS Glycoprotein; N-Glycan
157 REFERENCE 1 [PMID:6704968]
158 Chandrasekaran EV, Davila M, Nixon D, Mendicino J.
159 Structures of the oligosaccharide chains of two forms of alpha 1-acid glycoprotein purified from liver metastases of lung, colon, and breast tumors.
160 Cancer. Res. 44 (1984) 1557-67.
161 DBLINKS CCSD: 8168
162 GlycomeDB: 33225
163 JCGGDB: JCGG-STR024138
164 NODE 15
165 1 GlcNAc 27.2 1.6
166 2 GlcNAc 17.2 1.6
167 3 Man 8.2 1.6
168 4 Man 0.2 7.6
169 5 Man 0.2 -4.4
170 6 GlcNAc -8.8 7.6
171 7 GlcNAc -8.8 -0.4
172 8 GlcNAc -8.8 -8.4
173 9 Gal -17.8 7.6
174 10 Gal -17.8 3.6
175 11 LFuc -17.8 -4.4
176 12 Gal -17.8 -8.4
177 13 Neu5Ac -26.8 7.6
178 14 Neu5Ac -26.8 3.6
179 15 Neu5Ac -26.8 -8.4
180 EDGE 14
181 1 2:b1 1:4
182 2 3:b1 2:4
183 3 4:a1 3:6
184 4 5:a1 3:3
185 5 6:b1 4:2
186 6 7:b1 5:4
187 7 8:b1 5:2
188 8 9:b1 6:4
189 9 10:b1 7:4
190 10 11:a1 7:3
191 11 12:b1 8:4
192 12 13:a2 9:3
193 13 14:a2 10:3
194 14 15:a2 12:6
195 ///
196 """, "ringsmcaw_modified": """ENTRY G04845 Glycan
197 NODE 9
198 1 Glc 0 0
199 2 Gal -10 0
200 3 GlcNAc -20 10
201 4 GlcNAc -20 -10
202 5 Gal -30 15
203 6 LFuc -30 5
204 7 LFuc -30 -5
205 8 Gal -30 -15
206 9 Neu5Ac -40 15
207 EDGE 8
208 1 2:b1 1:4
209 2 3:b1 2:6
210 3 4:b1 2:3
211 4 5:b1 3:4
212 5 6:a1 3:3
213 6 7:a1 4:4
214 7 8:b1 4:3
215 8 9:a2 5:3
216 ///
217 ENTRY G05108 Glycan
218 NODE 7
219 1 GalNAc 20 1
220 2 Gal 10 1
221 3 GlcNAc 0 1
222 4 S -5 5
223 5 Gal -10 1
224 6 LFuc -10 -4
225 7 Neu5Ac -20 1
226 EDGE 6
227 1 2:b1 1:3
228 2 3:b1 2:3
229 3 4 3:6
230 4 5:b1 3:4
231 5 6:a1 3:3
232 6 7:a2 5:3
233 ///
234 ENTRY G05121 Glycan
235 NODE 7
236 1 GalNAc 0 0
237 2 Neu5Ac -10 5
238 3 Gal -10 -5
239 4 GlcNAc -20 -5
240 5 Gal -30 0
241 6 LFuc -30 -10
242 7 Neu5Ac -40 0
243 EDGE 6
244 1 2:a2 1:6
245 2 3:b1 1:3
246 3 4:b1 3:3
247 4 5:b1 4:4
248 5 6:a1 4:3
249 6 7:a2 5:3
250 ///
251 ENTRY G04183 Glycan
252 NODE 15
253 1 GlcNAc 0 0
254 2 GlcNAc -10 0
255 3 Man -20 0
256 4 Man -30 10
257 5 Man -30 -10
258 6 GlcNAc -40 10
259 7 GlcNAc -40 -5
260 8 GlcNAc -40 -15
261 9 Gal -50 10
262 10 Gal -50 0
263 11 LFuc -50 -10
264 12 Gal -50 -15
265 13 Neu5Ac -60 10
266 14 Neu5Ac -60 0
267 15 Neu5Ac -60 -15
268 EDGE 14
269 1 2:b1 1:4
270 2 3:b1 2:4
271 3 4:a1 3:6
272 4 5:a1 3:3
273 5 6:b1 4:2
274 6 7:b1 5:4
275 7 8:b1 5:2
276 8 9:b1 6:4
277 9 10:b1 7:4
278 10 11:a1 7:3
279 11 12:b1 8:4
280 12 13:a2 9:6
281 13 14:a2 10:3
282 14 15:a2 12:6
283 ///
284 ENTRY G04329 Glycan
285 NODE 10
286 1 GlcNAc 27 -5
287 2 Gal 18 0
288 3 LFuc 18 -10
289 4 GlcNAc 9 0
290 5 Gal 0 5
291 6 LFuc 0 -5
292 7 GlcNAc -9 5
293 8 Gal -18 10
294 9 LFuc -18 0
295 10 Neu5Ac -27 10
296 EDGE 9
297 1 2:b1 1:4
298 2 3:a1 1:3
299 3 4:b1 2:3
300 4 5:b1 4:4
301 5 6:a1 4:3
302 6 7:b1 5:3
303 7 8:b1 7:4
304 8 9:a1 7:3
305 9 10:a2 8:3
306 ///
307 ENTRY G04804 Glycan
308 NODE 15
309 1 GlcNAc 27.2 1.6
310 2 GlcNAc 17.2 1.6
311 3 Man 8.2 1.6
312 4 Man 0.2 7.6
313 5 Man 0.2 -4.4
314 6 GlcNAc -8.8 7.6
315 7 GlcNAc -8.8 -0.4
316 8 GlcNAc -8.8 -8.4
317 9 Gal -17.8 7.6
318 10 Gal -17.8 3.6
319 11 LFuc -17.8 -4.4
320 12 Gal -17.8 -8.4
321 13 Neu5Ac -26.8 7.6
322 14 Neu5Ac -26.8 3.6
323 15 Neu5Ac -26.8 -8.4
324 EDGE 14
325 1 2:b1 1:4
326 2 3:b1 2:4
327 3 4:a1 3:6
328 4 5:a1 3:3
329 5 6:b1 4:2
330 6 7:b1 5:4
331 7 8:b1 5:2
332 8 9:b1 6:4
333 9 10:b1 7:4
334 10 11:a1 7:3
335 11 12:b1 8:4
336 12 13:a2 9:3
337 13 14:a2 10:3
338 14 15:a2 12:6
339 ///
340 """, "ringsfinger": """ENTRY G00015 Glycan
341 NODE 8
342 1 Asn 20 0
343 2 GlcNAc 12 0
344 3 GlcNAc 3 0
345 4 Man -5 0
346 5 Man -12 5
347 6 Man -12 -5
348 7 GlcNAc -20 5
349 8 GlcNAc -20 -5
350 EDGE 7
351 1 2:b1 1
352 2 3:b1 2:4
353 3 4:b1 3:4
354 4 5:a1 4:6
355 5 6:a1 4:3
356 6 7:b1 5:2
357 7 8:b1 6:2
358 ///
359 ENTRY G00016 Glycan
360 NODE 9
361 1 Asn 20 3
362 2 GlcNAc 12 3
363 3 LFuc 4 8
364 4 GlcNAc 3 -2
365 5 Man -5 -2
366 6 Man -12 3
367 7 Man -12 -7
368 8 GlcNAc -20 3
369 9 GlcNAc -20 -7
370 EDGE 8
371 1 2:b1 1
372 2 3:a1 2:6
373 3 4:b1 2:4
374 4 5:b1 4:4
375 5 6:a1 5:6
376 6 7:a1 5:3
377 7 8:b1 6:2
378 8 9:b1 7:2
379 ///
380 ENTRY G00017 Glycan
381 NODE 11
382 1 Asn 24 3
383 2 GlcNAc 14 3
384 3 LFuc 7 8
385 4 GlcNAc 6 -2
386 5 Man -2 -2
387 6 Man -8 3
388 7 Man -8 -7
389 8 GlcNAc -16 3
390 9 GlcNAc -16 -7
391 10 Gal -24 3
392 11 Gal -24 -7
393 EDGE 10
394 1 2:b1 1
395 2 3:a1 2:6
396 3 4:b1 2:4
397 4 5:b1 4:4
398 5 6:a1 5:6
399 6 7:a1 5:3
400 7 8:b1 6:2
401 8 9:b1 7:2
402 9 10:b1 8:4
403 10 11:b1 9:4
404 ///
405 ENTRY G00018 Glycan
406 NODE 13
407 1 Asn 28 3
408 2 GlcNAc 18 3
409 3 LFuc 10 8
410 4 GlcNAc 9 -2
411 5 Man 1 -2
412 6 Man -5 4
413 7 Man -5 -8
414 8 GlcNAc -13 4
415 9 GlcNAc -13 -8
416 10 Gal -21 4
417 11 Gal -21 -8
418 12 Neu5Ac -29 4
419 13 Neu5Ac -29 -8
420 EDGE 12
421 1 2:b1 1
422 2 3:a1 2:6
423 3 4:b1 2:4
424 4 5:b1 4:4
425 5 6:a1 5:6
426 6 7:a1 5:3
427 7 8:b1 6:2
428 8 9:b1 7:2
429 9 10:b1 8:4
430 10 11:b1 9:4
431 11 12:a2 10:6
432 12 13:a2 11:6
433 ///
434 ENTRY G00019 Glycan
435 NODE 9
436 1 Asn 20 0
437 2 GlcNAc 12 0
438 3 GlcNAc 3 0
439 4 Man -5 0
440 5 Man -12 5
441 6 Man -12 -5
442 7 GlcNAc -15 0
443 8 GlcNAc -20 5
444 9 GlcNAc -20 -5
445 EDGE 8
446 1 2:b1 1
447 2 3:b1 2:4
448 3 4:b1 3:4
449 4 5:a1 4:6
450 5 6:a1 4:3
451 6 7:b1 4:4
452 7 8:b1 5:2
453 8 9:b1 6:2
454 ///
455 ENTRY G00020 Glycan
456 NODE 9
457 1 Asn 20 3
458 2 GlcNAc 11 3
459 3 GlcNAc 2 3
460 4 Man -6 3
461 5 Man -13 9
462 6 Man -13 -3
463 7 GlcNAc -21 9
464 8 GlcNAc -21 2
465 9 GlcNAc -21 -8
466 EDGE 8
467 1 2:b1 1
468 2 3:b1 2:4
469 3 4:b1 3:4
470 4 5:a1 4:6
471 5 6:a1 4:3
472 6 7:b1 5:2
473 7 8:b1 6:4
474 8 9:b1 6:2
475 ///
476 """, "ringsbroken": """ABCDEFGENTRY G00015 Glycan
477 NODE 8
478 1 Asn 20 0
479 2 GlcNAc 12 0
480 """
481 }
482 os.environ["http_proxy"] = "" # work around for IOError: [Errno url error] invalid proxy for http:
483 pass
484
485 def tearDown(self):
486 pass
487
488 def test_broken_kcf(self):
489 import StringIO
490
491 brk = self.kcinput["ringsbroken"]
492 handle = StringIO.StringIO(''.join(brk))
493 with self.assertRaises(UnboundLocalError):
494 gs.glycan_set_comparison(handle, handle)
495
496 def test_same_kcf(self):
497 import StringIO
498
499 kcf = self.kcinput["ringsfinger"]
500 handle = StringIO.StringIO(''.join(kcf))
501 handle2 = StringIO.StringIO(''.join(kcf))
502 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)
503 self.assertEqual(uniquesetA, uniquesetB)
504 self.assertEqual(union, intersection)
505 self.assertEqual(AB, BA)
506
507 def test_diff_kcf(self):
508 import StringIO
509
510 kcf = self.kcinput["ringsfinger"]
511 kcf2 = self.kcinput["ringsmcaw"]
512 handle = StringIO.StringIO(''.join(kcf))
513 handle2 = StringIO.StringIO(''.join(kcf2))
514 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)
515 self.assertNotEqual(uniquesetA, uniquesetB)
516 self.assertNotEqual(union, intersection)
517 self.assertNotEqual(AB, BA)
518
519 def test_diff_kcf_modified(self):
520 import StringIO
521
522 kcf = self.kcinput["ringsfinger"]
523 kcf2 = self.kcinput["ringsmcaw_modified"]
524 handle = StringIO.StringIO(''.join(kcf))
525 handle2 = StringIO.StringIO(''.join(kcf2))
526 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)
527 self.assertNotEqual(uniquesetA, uniquesetB)
528 self.assertNotEqual(union, intersection)
529 self.assertNotEqual(AB, BA)
530
531 # def test_bug_read_kcf(self):
532 # """
533 # readkcf is not a full implementation. Fails on ringsmcaw test set
534 # :return:
535 # """
536 # import StringIO
537 #
538 # kcf = self.kcinput["ringsfinger"]
539 # kcf2 = self.kcinput["ringsmcaw"]
540 # handle = StringIO.StringIO(''.join(kcf))
541 # handle2 = StringIO.StringIO(''.join(kcf2))
542 # with self.assertRaises(ValueError):
543 # uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)
544
545 def test_empty_stream(self):
546 with self.assertRaises(IOError):
547 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison("", "")
548 with self.assertRaises(IOError):
549 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison([], [])
550 with self.assertRaises(IOError):
551 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(None, None)
552
553
554 def run_tests():
555 unittest.main()
556
557
558 if __name__ == '__main__':
559 run_tests()