Mercurial > repos > chrisb > gap_all_glycan_tools
diff join_subtract_group/glycan_set_operations/test_sets.py @ 0:89592faa2875 draft
Uploaded
author | chrisb |
---|---|
date | Wed, 23 Mar 2016 14:35:56 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/join_subtract_group/glycan_set_operations/test_sets.py Wed Mar 23 14:35:56 2016 -0400 @@ -0,0 +1,559 @@ +__license__ = "MIT" + +import unittest +import glycan_sets as gs + + +class SimpleUnitTest(unittest.TestCase): + def setUp(self): + import os + + self.kcinput = {"ringsmcaw": """ENTRY G04845 Glycan +COMPOSITION (Gal)3 (Glc)1 (GlcNAc)2 (LFuc)2 (Neu5Ac)1 +MASS 1656.5 +DBLINKS CCSD: 23949 + GlycomeDB: 20420 + JCGGDB: JCGG-STR011245 +NODE 9 + 1 Glc 0 0 + 2 Gal -10 0 + 3 GlcNAc -20 10 + 4 GlcNAc -20 -10 + 5 Gal -30 15 + 6 LFuc -30 5 + 7 LFuc -30 -5 + 8 Gal -30 -15 + 9 Neu5Ac -40 15 +EDGE 8 + 1 2:b1 1:4 + 2 3:b1 2:6 + 3 4:b1 2:3 + 4 5:b1 3:4 + 5 6:a1 3:3 + 6 7:a1 4:4 + 7 8:b1 4:3 + 8 9:a2 5:3 +/// +ENTRY G05108 Glycan +COMPOSITION (Gal)2 (GalNAc)1 (GlcNAc)1 (LFuc)1 (Neu5Ac)1 (S)1 +MASS 1266.2 +CLASS Glycoprotein; O-Glycan +DBLINKS CCSD: 33353 + GlycomeDB: 20590 + JCGGDB: JCGG-STR011425 +NODE 7 + 1 GalNAc 20 1 + 2 Gal 10 1 + 3 GlcNAc 0 1 + 4 S -5 5 + 5 Gal -10 1 + 6 LFuc -10 -4 + 7 Neu5Ac -20 1 +EDGE 6 + 1 2:b1 1:3 + 2 3:b1 2:3 + 3 4 3:6 + 4 5:b1 3:4 + 5 6:a1 3:3 + 6 7:a2 5:3 +/// + +ENTRY G05121 Glycan +COMPOSITION (Gal)2 (GalNAc)1 (GlcNAc)1 (LFuc)1 (Neu5Ac)2 +MASS 1477.3 +CLASS Glycoprotein; O-Glycan +DBLINKS CCSD: 33350 + GlycomeDB: 20599 + JCGGDB: JCGG-STR011433 +NODE 7 + 1 GalNAc 0 0 + 2 Neu5Ac -10 5 + 3 Gal -10 -5 + 4 GlcNAc -20 -5 + 5 Gal -30 0 + 6 LFuc -30 -10 + 7 Neu5Ac -40 0 +EDGE 6 + 1 2:a2 1:6 + 2 3:b1 1:3 + 3 4:b1 3:3 + 4 5:b1 4:4 + 5 6:a1 4:3 + 6 7:a2 5:3 +/// + +ENTRY G04183 Glycan +COMPOSITION (Gal)3 (GlcNAc)5 (LFuc)1 (Man)3 (Neu5Ac)3 +MASS 3026.8 +CLASS Glycoprotein; N-Glycan +DBLINKS CCSD: 41981 + GlycomeDB: 19974 + JCGGDB: JCGG-STR010756 +NODE 15 + 1 GlcNAc 0 0 + 2 GlcNAc -10 0 + 3 Man -20 0 + 4 Man -30 10 + 5 Man -30 -10 + 6 GlcNAc -40 10 + 7 GlcNAc -40 -5 + 8 GlcNAc -40 -15 + 9 Gal -50 10 + 10 Gal -50 0 + 11 LFuc -50 -10 + 12 Gal -50 -15 + 13 Neu5Ac -60 10 + 14 Neu5Ac -60 0 + 15 Neu5Ac -60 -15 +EDGE 14 + 1 2:b1 1:4 + 2 3:b1 2:4 + 3 4:a1 3:6 + 4 5:a1 3:3 + 5 6:b1 4:2 + 6 7:b1 5:4 + 7 8:b1 5:2 + 8 9:b1 6:4 + 9 10:b1 7:4 + 10 11:a1 7:3 + 11 12:b1 8:4 + 12 13:a2 9:6 + 13 14:a2 10:3 + 14 15:a2 12:6 +/// + +ENTRY G04329 Glycan +COMPOSITION (Gal)3 (GlcNAc)3 (LFuc)3 (Neu5Ac)1 +MASS 1843.7 +DBLINKS CCSD: 36620 + GlycomeDB: 20084 + JCGGDB: JCGG-STR010874 +NODE 10 + 1 GlcNAc 27 -5 + 2 Gal 18 0 + 3 LFuc 18 -10 + 4 GlcNAc 9 0 + 5 Gal 0 5 + 6 LFuc 0 -5 + 7 GlcNAc -9 5 + 8 Gal -18 10 + 9 LFuc -18 0 + 10 Neu5Ac -27 10 +EDGE 9 + 1 2:b1 1:4 + 2 3:a1 1:3 + 3 4:b1 2:3 + 4 5:b1 4:4 + 5 6:a1 4:3 + 6 7:b1 5:3 + 7 8:b1 7:4 + 8 9:a1 7:3 + 9 10:a2 8:3 +/// +ENTRY G04804 Glycan +COMPOSITION (Gal)3 (GlcNAc)5 (LFuc)1 (Man)3 (Neu5Ac)3 +MASS 3026.8 +CLASS Glycoprotein; N-Glycan +REFERENCE 1 [PMID:6704968] + Chandrasekaran EV, Davila M, Nixon D, Mendicino J. + Structures of the oligosaccharide chains of two forms of alpha 1-acid glycoprotein purified from liver metastases of lung, colon, and breast tumors. + Cancer. Res. 44 (1984) 1557-67. +DBLINKS CCSD: 8168 + GlycomeDB: 33225 + JCGGDB: JCGG-STR024138 +NODE 15 + 1 GlcNAc 27.2 1.6 + 2 GlcNAc 17.2 1.6 + 3 Man 8.2 1.6 + 4 Man 0.2 7.6 + 5 Man 0.2 -4.4 + 6 GlcNAc -8.8 7.6 + 7 GlcNAc -8.8 -0.4 + 8 GlcNAc -8.8 -8.4 + 9 Gal -17.8 7.6 + 10 Gal -17.8 3.6 + 11 LFuc -17.8 -4.4 + 12 Gal -17.8 -8.4 + 13 Neu5Ac -26.8 7.6 + 14 Neu5Ac -26.8 3.6 + 15 Neu5Ac -26.8 -8.4 +EDGE 14 + 1 2:b1 1:4 + 2 3:b1 2:4 + 3 4:a1 3:6 + 4 5:a1 3:3 + 5 6:b1 4:2 + 6 7:b1 5:4 + 7 8:b1 5:2 + 8 9:b1 6:4 + 9 10:b1 7:4 + 10 11:a1 7:3 + 11 12:b1 8:4 + 12 13:a2 9:3 + 13 14:a2 10:3 + 14 15:a2 12:6 +/// +""", "ringsmcaw_modified": """ENTRY G04845 Glycan +NODE 9 + 1 Glc 0 0 + 2 Gal -10 0 + 3 GlcNAc -20 10 + 4 GlcNAc -20 -10 + 5 Gal -30 15 + 6 LFuc -30 5 + 7 LFuc -30 -5 + 8 Gal -30 -15 + 9 Neu5Ac -40 15 +EDGE 8 + 1 2:b1 1:4 + 2 3:b1 2:6 + 3 4:b1 2:3 + 4 5:b1 3:4 + 5 6:a1 3:3 + 6 7:a1 4:4 + 7 8:b1 4:3 + 8 9:a2 5:3 +/// +ENTRY G05108 Glycan +NODE 7 + 1 GalNAc 20 1 + 2 Gal 10 1 + 3 GlcNAc 0 1 + 4 S -5 5 + 5 Gal -10 1 + 6 LFuc -10 -4 + 7 Neu5Ac -20 1 +EDGE 6 + 1 2:b1 1:3 + 2 3:b1 2:3 + 3 4 3:6 + 4 5:b1 3:4 + 5 6:a1 3:3 + 6 7:a2 5:3 +/// +ENTRY G05121 Glycan +NODE 7 + 1 GalNAc 0 0 + 2 Neu5Ac -10 5 + 3 Gal -10 -5 + 4 GlcNAc -20 -5 + 5 Gal -30 0 + 6 LFuc -30 -10 + 7 Neu5Ac -40 0 +EDGE 6 + 1 2:a2 1:6 + 2 3:b1 1:3 + 3 4:b1 3:3 + 4 5:b1 4:4 + 5 6:a1 4:3 + 6 7:a2 5:3 +/// +ENTRY G04183 Glycan +NODE 15 + 1 GlcNAc 0 0 + 2 GlcNAc -10 0 + 3 Man -20 0 + 4 Man -30 10 + 5 Man -30 -10 + 6 GlcNAc -40 10 + 7 GlcNAc -40 -5 + 8 GlcNAc -40 -15 + 9 Gal -50 10 + 10 Gal -50 0 + 11 LFuc -50 -10 + 12 Gal -50 -15 + 13 Neu5Ac -60 10 + 14 Neu5Ac -60 0 + 15 Neu5Ac -60 -15 +EDGE 14 + 1 2:b1 1:4 + 2 3:b1 2:4 + 3 4:a1 3:6 + 4 5:a1 3:3 + 5 6:b1 4:2 + 6 7:b1 5:4 + 7 8:b1 5:2 + 8 9:b1 6:4 + 9 10:b1 7:4 + 10 11:a1 7:3 + 11 12:b1 8:4 + 12 13:a2 9:6 + 13 14:a2 10:3 + 14 15:a2 12:6 +/// +ENTRY G04329 Glycan +NODE 10 + 1 GlcNAc 27 -5 + 2 Gal 18 0 + 3 LFuc 18 -10 + 4 GlcNAc 9 0 + 5 Gal 0 5 + 6 LFuc 0 -5 + 7 GlcNAc -9 5 + 8 Gal -18 10 + 9 LFuc -18 0 + 10 Neu5Ac -27 10 +EDGE 9 + 1 2:b1 1:4 + 2 3:a1 1:3 + 3 4:b1 2:3 + 4 5:b1 4:4 + 5 6:a1 4:3 + 6 7:b1 5:3 + 7 8:b1 7:4 + 8 9:a1 7:3 + 9 10:a2 8:3 +/// +ENTRY G04804 Glycan +NODE 15 + 1 GlcNAc 27.2 1.6 + 2 GlcNAc 17.2 1.6 + 3 Man 8.2 1.6 + 4 Man 0.2 7.6 + 5 Man 0.2 -4.4 + 6 GlcNAc -8.8 7.6 + 7 GlcNAc -8.8 -0.4 + 8 GlcNAc -8.8 -8.4 + 9 Gal -17.8 7.6 + 10 Gal -17.8 3.6 + 11 LFuc -17.8 -4.4 + 12 Gal -17.8 -8.4 + 13 Neu5Ac -26.8 7.6 + 14 Neu5Ac -26.8 3.6 + 15 Neu5Ac -26.8 -8.4 +EDGE 14 + 1 2:b1 1:4 + 2 3:b1 2:4 + 3 4:a1 3:6 + 4 5:a1 3:3 + 5 6:b1 4:2 + 6 7:b1 5:4 + 7 8:b1 5:2 + 8 9:b1 6:4 + 9 10:b1 7:4 + 10 11:a1 7:3 + 11 12:b1 8:4 + 12 13:a2 9:3 + 13 14:a2 10:3 + 14 15:a2 12:6 +/// +""", "ringsfinger": """ENTRY G00015 Glycan +NODE 8 + 1 Asn 20 0 + 2 GlcNAc 12 0 + 3 GlcNAc 3 0 + 4 Man -5 0 + 5 Man -12 5 + 6 Man -12 -5 + 7 GlcNAc -20 5 + 8 GlcNAc -20 -5 +EDGE 7 + 1 2:b1 1 + 2 3:b1 2:4 + 3 4:b1 3:4 + 4 5:a1 4:6 + 5 6:a1 4:3 + 6 7:b1 5:2 + 7 8:b1 6:2 +/// +ENTRY G00016 Glycan +NODE 9 + 1 Asn 20 3 + 2 GlcNAc 12 3 + 3 LFuc 4 8 + 4 GlcNAc 3 -2 + 5 Man -5 -2 + 6 Man -12 3 + 7 Man -12 -7 + 8 GlcNAc -20 3 + 9 GlcNAc -20 -7 +EDGE 8 + 1 2:b1 1 + 2 3:a1 2:6 + 3 4:b1 2:4 + 4 5:b1 4:4 + 5 6:a1 5:6 + 6 7:a1 5:3 + 7 8:b1 6:2 + 8 9:b1 7:2 +/// +ENTRY G00017 Glycan +NODE 11 + 1 Asn 24 3 + 2 GlcNAc 14 3 + 3 LFuc 7 8 + 4 GlcNAc 6 -2 + 5 Man -2 -2 + 6 Man -8 3 + 7 Man -8 -7 + 8 GlcNAc -16 3 + 9 GlcNAc -16 -7 + 10 Gal -24 3 + 11 Gal -24 -7 +EDGE 10 + 1 2:b1 1 + 2 3:a1 2:6 + 3 4:b1 2:4 + 4 5:b1 4:4 + 5 6:a1 5:6 + 6 7:a1 5:3 + 7 8:b1 6:2 + 8 9:b1 7:2 + 9 10:b1 8:4 + 10 11:b1 9:4 +/// +ENTRY G00018 Glycan +NODE 13 + 1 Asn 28 3 + 2 GlcNAc 18 3 + 3 LFuc 10 8 + 4 GlcNAc 9 -2 + 5 Man 1 -2 + 6 Man -5 4 + 7 Man -5 -8 + 8 GlcNAc -13 4 + 9 GlcNAc -13 -8 + 10 Gal -21 4 + 11 Gal -21 -8 + 12 Neu5Ac -29 4 + 13 Neu5Ac -29 -8 +EDGE 12 + 1 2:b1 1 + 2 3:a1 2:6 + 3 4:b1 2:4 + 4 5:b1 4:4 + 5 6:a1 5:6 + 6 7:a1 5:3 + 7 8:b1 6:2 + 8 9:b1 7:2 + 9 10:b1 8:4 + 10 11:b1 9:4 + 11 12:a2 10:6 + 12 13:a2 11:6 +/// +ENTRY G00019 Glycan +NODE 9 + 1 Asn 20 0 + 2 GlcNAc 12 0 + 3 GlcNAc 3 0 + 4 Man -5 0 + 5 Man -12 5 + 6 Man -12 -5 + 7 GlcNAc -15 0 + 8 GlcNAc -20 5 + 9 GlcNAc -20 -5 +EDGE 8 + 1 2:b1 1 + 2 3:b1 2:4 + 3 4:b1 3:4 + 4 5:a1 4:6 + 5 6:a1 4:3 + 6 7:b1 4:4 + 7 8:b1 5:2 + 8 9:b1 6:2 +/// +ENTRY G00020 Glycan +NODE 9 + 1 Asn 20 3 + 2 GlcNAc 11 3 + 3 GlcNAc 2 3 + 4 Man -6 3 + 5 Man -13 9 + 6 Man -13 -3 + 7 GlcNAc -21 9 + 8 GlcNAc -21 2 + 9 GlcNAc -21 -8 +EDGE 8 + 1 2:b1 1 + 2 3:b1 2:4 + 3 4:b1 3:4 + 4 5:a1 4:6 + 5 6:a1 4:3 + 6 7:b1 5:2 + 7 8:b1 6:4 + 8 9:b1 6:2 +/// +""", "ringsbroken": """ABCDEFGENTRY G00015 Glycan +NODE 8 + 1 Asn 20 0 + 2 GlcNAc 12 0 +""" + } + os.environ["http_proxy"] = "" # work around for IOError: [Errno url error] invalid proxy for http: + pass + + def tearDown(self): + pass + + def test_broken_kcf(self): + import StringIO + + brk = self.kcinput["ringsbroken"] + handle = StringIO.StringIO(''.join(brk)) + with self.assertRaises(UnboundLocalError): + gs.glycan_set_comparison(handle, handle) + + def test_same_kcf(self): + import StringIO + + kcf = self.kcinput["ringsfinger"] + handle = StringIO.StringIO(''.join(kcf)) + handle2 = StringIO.StringIO(''.join(kcf)) + uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2) + self.assertEqual(uniquesetA, uniquesetB) + self.assertEqual(union, intersection) + self.assertEqual(AB, BA) + + def test_diff_kcf(self): + import StringIO + + kcf = self.kcinput["ringsfinger"] + kcf2 = self.kcinput["ringsmcaw"] + handle = StringIO.StringIO(''.join(kcf)) + handle2 = StringIO.StringIO(''.join(kcf2)) + uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2) + self.assertNotEqual(uniquesetA, uniquesetB) + self.assertNotEqual(union, intersection) + self.assertNotEqual(AB, BA) + + def test_diff_kcf_modified(self): + import StringIO + + kcf = self.kcinput["ringsfinger"] + kcf2 = self.kcinput["ringsmcaw_modified"] + handle = StringIO.StringIO(''.join(kcf)) + handle2 = StringIO.StringIO(''.join(kcf2)) + uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2) + self.assertNotEqual(uniquesetA, uniquesetB) + self.assertNotEqual(union, intersection) + self.assertNotEqual(AB, BA) + +# def test_bug_read_kcf(self): +# """ +# readkcf is not a full implementation. Fails on ringsmcaw test set +# :return: +# """ +# import StringIO +# +# kcf = self.kcinput["ringsfinger"] +# kcf2 = self.kcinput["ringsmcaw"] +# handle = StringIO.StringIO(''.join(kcf)) +# handle2 = StringIO.StringIO(''.join(kcf2)) +# with self.assertRaises(ValueError): +# uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2) + + def test_empty_stream(self): + with self.assertRaises(IOError): + uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison("", "") + with self.assertRaises(IOError): + uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison([], []) + with self.assertRaises(IOError): + uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(None, None) + + +def run_tests(): + unittest.main() + + +if __name__ == '__main__': + run_tests()