Mercurial > repos > chrisb > gap_all_glycan_tools
comparison join_subtract_group/glycan_set_operations/test_sets.py @ 0:89592faa2875 draft
Uploaded
author | chrisb |
---|---|
date | Wed, 23 Mar 2016 14:35:56 -0400 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:89592faa2875 |
---|---|
1 __license__ = "MIT" | |
2 | |
3 import unittest | |
4 import glycan_sets as gs | |
5 | |
6 | |
7 class SimpleUnitTest(unittest.TestCase): | |
8 def setUp(self): | |
9 import os | |
10 | |
11 self.kcinput = {"ringsmcaw": """ENTRY G04845 Glycan | |
12 COMPOSITION (Gal)3 (Glc)1 (GlcNAc)2 (LFuc)2 (Neu5Ac)1 | |
13 MASS 1656.5 | |
14 DBLINKS CCSD: 23949 | |
15 GlycomeDB: 20420 | |
16 JCGGDB: JCGG-STR011245 | |
17 NODE 9 | |
18 1 Glc 0 0 | |
19 2 Gal -10 0 | |
20 3 GlcNAc -20 10 | |
21 4 GlcNAc -20 -10 | |
22 5 Gal -30 15 | |
23 6 LFuc -30 5 | |
24 7 LFuc -30 -5 | |
25 8 Gal -30 -15 | |
26 9 Neu5Ac -40 15 | |
27 EDGE 8 | |
28 1 2:b1 1:4 | |
29 2 3:b1 2:6 | |
30 3 4:b1 2:3 | |
31 4 5:b1 3:4 | |
32 5 6:a1 3:3 | |
33 6 7:a1 4:4 | |
34 7 8:b1 4:3 | |
35 8 9:a2 5:3 | |
36 /// | |
37 ENTRY G05108 Glycan | |
38 COMPOSITION (Gal)2 (GalNAc)1 (GlcNAc)1 (LFuc)1 (Neu5Ac)1 (S)1 | |
39 MASS 1266.2 | |
40 CLASS Glycoprotein; O-Glycan | |
41 DBLINKS CCSD: 33353 | |
42 GlycomeDB: 20590 | |
43 JCGGDB: JCGG-STR011425 | |
44 NODE 7 | |
45 1 GalNAc 20 1 | |
46 2 Gal 10 1 | |
47 3 GlcNAc 0 1 | |
48 4 S -5 5 | |
49 5 Gal -10 1 | |
50 6 LFuc -10 -4 | |
51 7 Neu5Ac -20 1 | |
52 EDGE 6 | |
53 1 2:b1 1:3 | |
54 2 3:b1 2:3 | |
55 3 4 3:6 | |
56 4 5:b1 3:4 | |
57 5 6:a1 3:3 | |
58 6 7:a2 5:3 | |
59 /// | |
60 | |
61 ENTRY G05121 Glycan | |
62 COMPOSITION (Gal)2 (GalNAc)1 (GlcNAc)1 (LFuc)1 (Neu5Ac)2 | |
63 MASS 1477.3 | |
64 CLASS Glycoprotein; O-Glycan | |
65 DBLINKS CCSD: 33350 | |
66 GlycomeDB: 20599 | |
67 JCGGDB: JCGG-STR011433 | |
68 NODE 7 | |
69 1 GalNAc 0 0 | |
70 2 Neu5Ac -10 5 | |
71 3 Gal -10 -5 | |
72 4 GlcNAc -20 -5 | |
73 5 Gal -30 0 | |
74 6 LFuc -30 -10 | |
75 7 Neu5Ac -40 0 | |
76 EDGE 6 | |
77 1 2:a2 1:6 | |
78 2 3:b1 1:3 | |
79 3 4:b1 3:3 | |
80 4 5:b1 4:4 | |
81 5 6:a1 4:3 | |
82 6 7:a2 5:3 | |
83 /// | |
84 | |
85 ENTRY G04183 Glycan | |
86 COMPOSITION (Gal)3 (GlcNAc)5 (LFuc)1 (Man)3 (Neu5Ac)3 | |
87 MASS 3026.8 | |
88 CLASS Glycoprotein; N-Glycan | |
89 DBLINKS CCSD: 41981 | |
90 GlycomeDB: 19974 | |
91 JCGGDB: JCGG-STR010756 | |
92 NODE 15 | |
93 1 GlcNAc 0 0 | |
94 2 GlcNAc -10 0 | |
95 3 Man -20 0 | |
96 4 Man -30 10 | |
97 5 Man -30 -10 | |
98 6 GlcNAc -40 10 | |
99 7 GlcNAc -40 -5 | |
100 8 GlcNAc -40 -15 | |
101 9 Gal -50 10 | |
102 10 Gal -50 0 | |
103 11 LFuc -50 -10 | |
104 12 Gal -50 -15 | |
105 13 Neu5Ac -60 10 | |
106 14 Neu5Ac -60 0 | |
107 15 Neu5Ac -60 -15 | |
108 EDGE 14 | |
109 1 2:b1 1:4 | |
110 2 3:b1 2:4 | |
111 3 4:a1 3:6 | |
112 4 5:a1 3:3 | |
113 5 6:b1 4:2 | |
114 6 7:b1 5:4 | |
115 7 8:b1 5:2 | |
116 8 9:b1 6:4 | |
117 9 10:b1 7:4 | |
118 10 11:a1 7:3 | |
119 11 12:b1 8:4 | |
120 12 13:a2 9:6 | |
121 13 14:a2 10:3 | |
122 14 15:a2 12:6 | |
123 /// | |
124 | |
125 ENTRY G04329 Glycan | |
126 COMPOSITION (Gal)3 (GlcNAc)3 (LFuc)3 (Neu5Ac)1 | |
127 MASS 1843.7 | |
128 DBLINKS CCSD: 36620 | |
129 GlycomeDB: 20084 | |
130 JCGGDB: JCGG-STR010874 | |
131 NODE 10 | |
132 1 GlcNAc 27 -5 | |
133 2 Gal 18 0 | |
134 3 LFuc 18 -10 | |
135 4 GlcNAc 9 0 | |
136 5 Gal 0 5 | |
137 6 LFuc 0 -5 | |
138 7 GlcNAc -9 5 | |
139 8 Gal -18 10 | |
140 9 LFuc -18 0 | |
141 10 Neu5Ac -27 10 | |
142 EDGE 9 | |
143 1 2:b1 1:4 | |
144 2 3:a1 1:3 | |
145 3 4:b1 2:3 | |
146 4 5:b1 4:4 | |
147 5 6:a1 4:3 | |
148 6 7:b1 5:3 | |
149 7 8:b1 7:4 | |
150 8 9:a1 7:3 | |
151 9 10:a2 8:3 | |
152 /// | |
153 ENTRY G04804 Glycan | |
154 COMPOSITION (Gal)3 (GlcNAc)5 (LFuc)1 (Man)3 (Neu5Ac)3 | |
155 MASS 3026.8 | |
156 CLASS Glycoprotein; N-Glycan | |
157 REFERENCE 1 [PMID:6704968] | |
158 Chandrasekaran EV, Davila M, Nixon D, Mendicino J. | |
159 Structures of the oligosaccharide chains of two forms of alpha 1-acid glycoprotein purified from liver metastases of lung, colon, and breast tumors. | |
160 Cancer. Res. 44 (1984) 1557-67. | |
161 DBLINKS CCSD: 8168 | |
162 GlycomeDB: 33225 | |
163 JCGGDB: JCGG-STR024138 | |
164 NODE 15 | |
165 1 GlcNAc 27.2 1.6 | |
166 2 GlcNAc 17.2 1.6 | |
167 3 Man 8.2 1.6 | |
168 4 Man 0.2 7.6 | |
169 5 Man 0.2 -4.4 | |
170 6 GlcNAc -8.8 7.6 | |
171 7 GlcNAc -8.8 -0.4 | |
172 8 GlcNAc -8.8 -8.4 | |
173 9 Gal -17.8 7.6 | |
174 10 Gal -17.8 3.6 | |
175 11 LFuc -17.8 -4.4 | |
176 12 Gal -17.8 -8.4 | |
177 13 Neu5Ac -26.8 7.6 | |
178 14 Neu5Ac -26.8 3.6 | |
179 15 Neu5Ac -26.8 -8.4 | |
180 EDGE 14 | |
181 1 2:b1 1:4 | |
182 2 3:b1 2:4 | |
183 3 4:a1 3:6 | |
184 4 5:a1 3:3 | |
185 5 6:b1 4:2 | |
186 6 7:b1 5:4 | |
187 7 8:b1 5:2 | |
188 8 9:b1 6:4 | |
189 9 10:b1 7:4 | |
190 10 11:a1 7:3 | |
191 11 12:b1 8:4 | |
192 12 13:a2 9:3 | |
193 13 14:a2 10:3 | |
194 14 15:a2 12:6 | |
195 /// | |
196 """, "ringsmcaw_modified": """ENTRY G04845 Glycan | |
197 NODE 9 | |
198 1 Glc 0 0 | |
199 2 Gal -10 0 | |
200 3 GlcNAc -20 10 | |
201 4 GlcNAc -20 -10 | |
202 5 Gal -30 15 | |
203 6 LFuc -30 5 | |
204 7 LFuc -30 -5 | |
205 8 Gal -30 -15 | |
206 9 Neu5Ac -40 15 | |
207 EDGE 8 | |
208 1 2:b1 1:4 | |
209 2 3:b1 2:6 | |
210 3 4:b1 2:3 | |
211 4 5:b1 3:4 | |
212 5 6:a1 3:3 | |
213 6 7:a1 4:4 | |
214 7 8:b1 4:3 | |
215 8 9:a2 5:3 | |
216 /// | |
217 ENTRY G05108 Glycan | |
218 NODE 7 | |
219 1 GalNAc 20 1 | |
220 2 Gal 10 1 | |
221 3 GlcNAc 0 1 | |
222 4 S -5 5 | |
223 5 Gal -10 1 | |
224 6 LFuc -10 -4 | |
225 7 Neu5Ac -20 1 | |
226 EDGE 6 | |
227 1 2:b1 1:3 | |
228 2 3:b1 2:3 | |
229 3 4 3:6 | |
230 4 5:b1 3:4 | |
231 5 6:a1 3:3 | |
232 6 7:a2 5:3 | |
233 /// | |
234 ENTRY G05121 Glycan | |
235 NODE 7 | |
236 1 GalNAc 0 0 | |
237 2 Neu5Ac -10 5 | |
238 3 Gal -10 -5 | |
239 4 GlcNAc -20 -5 | |
240 5 Gal -30 0 | |
241 6 LFuc -30 -10 | |
242 7 Neu5Ac -40 0 | |
243 EDGE 6 | |
244 1 2:a2 1:6 | |
245 2 3:b1 1:3 | |
246 3 4:b1 3:3 | |
247 4 5:b1 4:4 | |
248 5 6:a1 4:3 | |
249 6 7:a2 5:3 | |
250 /// | |
251 ENTRY G04183 Glycan | |
252 NODE 15 | |
253 1 GlcNAc 0 0 | |
254 2 GlcNAc -10 0 | |
255 3 Man -20 0 | |
256 4 Man -30 10 | |
257 5 Man -30 -10 | |
258 6 GlcNAc -40 10 | |
259 7 GlcNAc -40 -5 | |
260 8 GlcNAc -40 -15 | |
261 9 Gal -50 10 | |
262 10 Gal -50 0 | |
263 11 LFuc -50 -10 | |
264 12 Gal -50 -15 | |
265 13 Neu5Ac -60 10 | |
266 14 Neu5Ac -60 0 | |
267 15 Neu5Ac -60 -15 | |
268 EDGE 14 | |
269 1 2:b1 1:4 | |
270 2 3:b1 2:4 | |
271 3 4:a1 3:6 | |
272 4 5:a1 3:3 | |
273 5 6:b1 4:2 | |
274 6 7:b1 5:4 | |
275 7 8:b1 5:2 | |
276 8 9:b1 6:4 | |
277 9 10:b1 7:4 | |
278 10 11:a1 7:3 | |
279 11 12:b1 8:4 | |
280 12 13:a2 9:6 | |
281 13 14:a2 10:3 | |
282 14 15:a2 12:6 | |
283 /// | |
284 ENTRY G04329 Glycan | |
285 NODE 10 | |
286 1 GlcNAc 27 -5 | |
287 2 Gal 18 0 | |
288 3 LFuc 18 -10 | |
289 4 GlcNAc 9 0 | |
290 5 Gal 0 5 | |
291 6 LFuc 0 -5 | |
292 7 GlcNAc -9 5 | |
293 8 Gal -18 10 | |
294 9 LFuc -18 0 | |
295 10 Neu5Ac -27 10 | |
296 EDGE 9 | |
297 1 2:b1 1:4 | |
298 2 3:a1 1:3 | |
299 3 4:b1 2:3 | |
300 4 5:b1 4:4 | |
301 5 6:a1 4:3 | |
302 6 7:b1 5:3 | |
303 7 8:b1 7:4 | |
304 8 9:a1 7:3 | |
305 9 10:a2 8:3 | |
306 /// | |
307 ENTRY G04804 Glycan | |
308 NODE 15 | |
309 1 GlcNAc 27.2 1.6 | |
310 2 GlcNAc 17.2 1.6 | |
311 3 Man 8.2 1.6 | |
312 4 Man 0.2 7.6 | |
313 5 Man 0.2 -4.4 | |
314 6 GlcNAc -8.8 7.6 | |
315 7 GlcNAc -8.8 -0.4 | |
316 8 GlcNAc -8.8 -8.4 | |
317 9 Gal -17.8 7.6 | |
318 10 Gal -17.8 3.6 | |
319 11 LFuc -17.8 -4.4 | |
320 12 Gal -17.8 -8.4 | |
321 13 Neu5Ac -26.8 7.6 | |
322 14 Neu5Ac -26.8 3.6 | |
323 15 Neu5Ac -26.8 -8.4 | |
324 EDGE 14 | |
325 1 2:b1 1:4 | |
326 2 3:b1 2:4 | |
327 3 4:a1 3:6 | |
328 4 5:a1 3:3 | |
329 5 6:b1 4:2 | |
330 6 7:b1 5:4 | |
331 7 8:b1 5:2 | |
332 8 9:b1 6:4 | |
333 9 10:b1 7:4 | |
334 10 11:a1 7:3 | |
335 11 12:b1 8:4 | |
336 12 13:a2 9:3 | |
337 13 14:a2 10:3 | |
338 14 15:a2 12:6 | |
339 /// | |
340 """, "ringsfinger": """ENTRY G00015 Glycan | |
341 NODE 8 | |
342 1 Asn 20 0 | |
343 2 GlcNAc 12 0 | |
344 3 GlcNAc 3 0 | |
345 4 Man -5 0 | |
346 5 Man -12 5 | |
347 6 Man -12 -5 | |
348 7 GlcNAc -20 5 | |
349 8 GlcNAc -20 -5 | |
350 EDGE 7 | |
351 1 2:b1 1 | |
352 2 3:b1 2:4 | |
353 3 4:b1 3:4 | |
354 4 5:a1 4:6 | |
355 5 6:a1 4:3 | |
356 6 7:b1 5:2 | |
357 7 8:b1 6:2 | |
358 /// | |
359 ENTRY G00016 Glycan | |
360 NODE 9 | |
361 1 Asn 20 3 | |
362 2 GlcNAc 12 3 | |
363 3 LFuc 4 8 | |
364 4 GlcNAc 3 -2 | |
365 5 Man -5 -2 | |
366 6 Man -12 3 | |
367 7 Man -12 -7 | |
368 8 GlcNAc -20 3 | |
369 9 GlcNAc -20 -7 | |
370 EDGE 8 | |
371 1 2:b1 1 | |
372 2 3:a1 2:6 | |
373 3 4:b1 2:4 | |
374 4 5:b1 4:4 | |
375 5 6:a1 5:6 | |
376 6 7:a1 5:3 | |
377 7 8:b1 6:2 | |
378 8 9:b1 7:2 | |
379 /// | |
380 ENTRY G00017 Glycan | |
381 NODE 11 | |
382 1 Asn 24 3 | |
383 2 GlcNAc 14 3 | |
384 3 LFuc 7 8 | |
385 4 GlcNAc 6 -2 | |
386 5 Man -2 -2 | |
387 6 Man -8 3 | |
388 7 Man -8 -7 | |
389 8 GlcNAc -16 3 | |
390 9 GlcNAc -16 -7 | |
391 10 Gal -24 3 | |
392 11 Gal -24 -7 | |
393 EDGE 10 | |
394 1 2:b1 1 | |
395 2 3:a1 2:6 | |
396 3 4:b1 2:4 | |
397 4 5:b1 4:4 | |
398 5 6:a1 5:6 | |
399 6 7:a1 5:3 | |
400 7 8:b1 6:2 | |
401 8 9:b1 7:2 | |
402 9 10:b1 8:4 | |
403 10 11:b1 9:4 | |
404 /// | |
405 ENTRY G00018 Glycan | |
406 NODE 13 | |
407 1 Asn 28 3 | |
408 2 GlcNAc 18 3 | |
409 3 LFuc 10 8 | |
410 4 GlcNAc 9 -2 | |
411 5 Man 1 -2 | |
412 6 Man -5 4 | |
413 7 Man -5 -8 | |
414 8 GlcNAc -13 4 | |
415 9 GlcNAc -13 -8 | |
416 10 Gal -21 4 | |
417 11 Gal -21 -8 | |
418 12 Neu5Ac -29 4 | |
419 13 Neu5Ac -29 -8 | |
420 EDGE 12 | |
421 1 2:b1 1 | |
422 2 3:a1 2:6 | |
423 3 4:b1 2:4 | |
424 4 5:b1 4:4 | |
425 5 6:a1 5:6 | |
426 6 7:a1 5:3 | |
427 7 8:b1 6:2 | |
428 8 9:b1 7:2 | |
429 9 10:b1 8:4 | |
430 10 11:b1 9:4 | |
431 11 12:a2 10:6 | |
432 12 13:a2 11:6 | |
433 /// | |
434 ENTRY G00019 Glycan | |
435 NODE 9 | |
436 1 Asn 20 0 | |
437 2 GlcNAc 12 0 | |
438 3 GlcNAc 3 0 | |
439 4 Man -5 0 | |
440 5 Man -12 5 | |
441 6 Man -12 -5 | |
442 7 GlcNAc -15 0 | |
443 8 GlcNAc -20 5 | |
444 9 GlcNAc -20 -5 | |
445 EDGE 8 | |
446 1 2:b1 1 | |
447 2 3:b1 2:4 | |
448 3 4:b1 3:4 | |
449 4 5:a1 4:6 | |
450 5 6:a1 4:3 | |
451 6 7:b1 4:4 | |
452 7 8:b1 5:2 | |
453 8 9:b1 6:2 | |
454 /// | |
455 ENTRY G00020 Glycan | |
456 NODE 9 | |
457 1 Asn 20 3 | |
458 2 GlcNAc 11 3 | |
459 3 GlcNAc 2 3 | |
460 4 Man -6 3 | |
461 5 Man -13 9 | |
462 6 Man -13 -3 | |
463 7 GlcNAc -21 9 | |
464 8 GlcNAc -21 2 | |
465 9 GlcNAc -21 -8 | |
466 EDGE 8 | |
467 1 2:b1 1 | |
468 2 3:b1 2:4 | |
469 3 4:b1 3:4 | |
470 4 5:a1 4:6 | |
471 5 6:a1 4:3 | |
472 6 7:b1 5:2 | |
473 7 8:b1 6:4 | |
474 8 9:b1 6:2 | |
475 /// | |
476 """, "ringsbroken": """ABCDEFGENTRY G00015 Glycan | |
477 NODE 8 | |
478 1 Asn 20 0 | |
479 2 GlcNAc 12 0 | |
480 """ | |
481 } | |
482 os.environ["http_proxy"] = "" # work around for IOError: [Errno url error] invalid proxy for http: | |
483 pass | |
484 | |
485 def tearDown(self): | |
486 pass | |
487 | |
488 def test_broken_kcf(self): | |
489 import StringIO | |
490 | |
491 brk = self.kcinput["ringsbroken"] | |
492 handle = StringIO.StringIO(''.join(brk)) | |
493 with self.assertRaises(UnboundLocalError): | |
494 gs.glycan_set_comparison(handle, handle) | |
495 | |
496 def test_same_kcf(self): | |
497 import StringIO | |
498 | |
499 kcf = self.kcinput["ringsfinger"] | |
500 handle = StringIO.StringIO(''.join(kcf)) | |
501 handle2 = StringIO.StringIO(''.join(kcf)) | |
502 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2) | |
503 self.assertEqual(uniquesetA, uniquesetB) | |
504 self.assertEqual(union, intersection) | |
505 self.assertEqual(AB, BA) | |
506 | |
507 def test_diff_kcf(self): | |
508 import StringIO | |
509 | |
510 kcf = self.kcinput["ringsfinger"] | |
511 kcf2 = self.kcinput["ringsmcaw"] | |
512 handle = StringIO.StringIO(''.join(kcf)) | |
513 handle2 = StringIO.StringIO(''.join(kcf2)) | |
514 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2) | |
515 self.assertNotEqual(uniquesetA, uniquesetB) | |
516 self.assertNotEqual(union, intersection) | |
517 self.assertNotEqual(AB, BA) | |
518 | |
519 def test_diff_kcf_modified(self): | |
520 import StringIO | |
521 | |
522 kcf = self.kcinput["ringsfinger"] | |
523 kcf2 = self.kcinput["ringsmcaw_modified"] | |
524 handle = StringIO.StringIO(''.join(kcf)) | |
525 handle2 = StringIO.StringIO(''.join(kcf2)) | |
526 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2) | |
527 self.assertNotEqual(uniquesetA, uniquesetB) | |
528 self.assertNotEqual(union, intersection) | |
529 self.assertNotEqual(AB, BA) | |
530 | |
531 # def test_bug_read_kcf(self): | |
532 # """ | |
533 # readkcf is not a full implementation. Fails on ringsmcaw test set | |
534 # :return: | |
535 # """ | |
536 # import StringIO | |
537 # | |
538 # kcf = self.kcinput["ringsfinger"] | |
539 # kcf2 = self.kcinput["ringsmcaw"] | |
540 # handle = StringIO.StringIO(''.join(kcf)) | |
541 # handle2 = StringIO.StringIO(''.join(kcf2)) | |
542 # with self.assertRaises(ValueError): | |
543 # uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2) | |
544 | |
545 def test_empty_stream(self): | |
546 with self.assertRaises(IOError): | |
547 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison("", "") | |
548 with self.assertRaises(IOError): | |
549 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison([], []) | |
550 with self.assertRaises(IOError): | |
551 uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(None, None) | |
552 | |
553 | |
554 def run_tests(): | |
555 unittest.main() | |
556 | |
557 | |
558 if __name__ == '__main__': | |
559 run_tests() |