diff join_subtract_group/glycan_set_operations/test_sets.py @ 0:89592faa2875 draft

Uploaded
author chrisb
date Wed, 23 Mar 2016 14:35:56 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/join_subtract_group/glycan_set_operations/test_sets.py	Wed Mar 23 14:35:56 2016 -0400
@@ -0,0 +1,559 @@
+__license__ = "MIT"
+
+import unittest
+import glycan_sets as gs
+
+
+class SimpleUnitTest(unittest.TestCase):
+    def setUp(self):
+        import os
+
+        self.kcinput = {"ringsmcaw": """ENTRY       G04845                      Glycan
+COMPOSITION (Gal)3 (Glc)1 (GlcNAc)2 (LFuc)2 (Neu5Ac)1
+MASS        1656.5
+DBLINKS     CCSD: 23949
+            GlycomeDB: 20420
+            JCGGDB: JCGG-STR011245
+NODE        9
+            1   Glc         0     0
+            2   Gal       -10     0
+            3   GlcNAc    -20    10
+            4   GlcNAc    -20   -10
+            5   Gal       -30    15
+            6   LFuc      -30     5
+            7   LFuc      -30    -5
+            8   Gal       -30   -15
+            9   Neu5Ac    -40    15
+EDGE        8
+            1     2:b1    1:4
+            2     3:b1    2:6
+            3     4:b1    2:3
+            4     5:b1    3:4
+            5     6:a1    3:3
+            6     7:a1    4:4
+            7     8:b1    4:3
+            8     9:a2    5:3
+///
+ENTRY       G05108                      Glycan
+COMPOSITION (Gal)2 (GalNAc)1 (GlcNAc)1 (LFuc)1 (Neu5Ac)1 (S)1
+MASS        1266.2
+CLASS       Glycoprotein; O-Glycan
+DBLINKS     CCSD: 33353
+            GlycomeDB: 20590
+            JCGGDB: JCGG-STR011425
+NODE        7
+            1   GalNAc     20     1
+            2   Gal        10     1
+            3   GlcNAc      0     1
+            4   S          -5     5
+            5   Gal       -10     1
+            6   LFuc      -10    -4
+            7   Neu5Ac    -20     1
+EDGE        6
+            1     2:b1    1:3
+            2     3:b1    2:3
+            3     4       3:6
+            4     5:b1    3:4
+            5     6:a1    3:3
+            6     7:a2    5:3
+///
+
+ENTRY       G05121                      Glycan
+COMPOSITION (Gal)2 (GalNAc)1 (GlcNAc)1 (LFuc)1 (Neu5Ac)2
+MASS        1477.3
+CLASS       Glycoprotein; O-Glycan
+DBLINKS     CCSD: 33350
+            GlycomeDB: 20599
+            JCGGDB: JCGG-STR011433
+NODE        7
+            1   GalNAc      0     0
+            2   Neu5Ac    -10     5
+            3   Gal       -10    -5
+            4   GlcNAc    -20    -5
+            5   Gal       -30     0
+            6   LFuc      -30   -10
+            7   Neu5Ac    -40     0
+EDGE        6
+            1     2:a2    1:6
+            2     3:b1    1:3
+            3     4:b1    3:3
+            4     5:b1    4:4
+            5     6:a1    4:3
+            6     7:a2    5:3
+///
+
+ENTRY       G04183                      Glycan
+COMPOSITION (Gal)3 (GlcNAc)5 (LFuc)1 (Man)3 (Neu5Ac)3
+MASS        3026.8
+CLASS       Glycoprotein; N-Glycan
+DBLINKS     CCSD: 41981
+            GlycomeDB: 19974
+            JCGGDB: JCGG-STR010756
+NODE        15
+            1   GlcNAc      0     0
+            2   GlcNAc    -10     0
+            3   Man       -20     0
+            4   Man       -30    10
+            5   Man       -30   -10
+            6   GlcNAc    -40    10
+            7   GlcNAc    -40    -5
+            8   GlcNAc    -40   -15
+            9   Gal       -50    10
+            10  Gal       -50     0
+            11  LFuc      -50   -10
+            12  Gal       -50   -15
+            13  Neu5Ac    -60    10
+            14  Neu5Ac    -60     0
+            15  Neu5Ac    -60   -15
+EDGE        14
+            1     2:b1    1:4
+            2     3:b1    2:4
+            3     4:a1    3:6
+            4     5:a1    3:3
+            5     6:b1    4:2
+            6     7:b1    5:4
+            7     8:b1    5:2
+            8     9:b1    6:4
+            9    10:b1    7:4
+            10   11:a1    7:3
+            11   12:b1    8:4
+            12   13:a2    9:6
+            13   14:a2   10:3
+            14   15:a2   12:6
+///
+
+ENTRY       G04329                      Glycan
+COMPOSITION (Gal)3 (GlcNAc)3 (LFuc)3 (Neu5Ac)1
+MASS        1843.7
+DBLINKS     CCSD: 36620
+            GlycomeDB: 20084
+            JCGGDB: JCGG-STR010874
+NODE        10
+            1   GlcNAc     27    -5
+            2   Gal        18     0
+            3   LFuc       18   -10
+            4   GlcNAc      9     0
+            5   Gal         0     5
+            6   LFuc        0    -5
+            7   GlcNAc     -9     5
+            8   Gal       -18    10
+            9   LFuc      -18     0
+            10  Neu5Ac    -27    10
+EDGE        9
+            1     2:b1    1:4
+            2     3:a1    1:3
+            3     4:b1    2:3
+            4     5:b1    4:4
+            5     6:a1    4:3
+            6     7:b1    5:3
+            7     8:b1    7:4
+            8     9:a1    7:3
+            9    10:a2    8:3
+///
+ENTRY       G04804                      Glycan
+COMPOSITION (Gal)3 (GlcNAc)5 (LFuc)1 (Man)3 (Neu5Ac)3
+MASS        3026.8
+CLASS       Glycoprotein; N-Glycan
+REFERENCE   1  [PMID:6704968]
+            Chandrasekaran EV, Davila M, Nixon D, Mendicino J.
+            Structures of the oligosaccharide chains of two forms of alpha 1-acid glycoprotein purified from liver metastases of lung, colon, and breast tumors.
+            Cancer. Res. 44 (1984) 1557-67.
+DBLINKS     CCSD: 8168
+            GlycomeDB: 33225
+            JCGGDB: JCGG-STR024138
+NODE        15
+            1   GlcNAc   27.2   1.6
+            2   GlcNAc   17.2   1.6
+            3   Man       8.2   1.6
+            4   Man       0.2   7.6
+            5   Man       0.2  -4.4
+            6   GlcNAc   -8.8   7.6
+            7   GlcNAc   -8.8  -0.4
+            8   GlcNAc   -8.8  -8.4
+            9   Gal     -17.8   7.6
+            10  Gal     -17.8   3.6
+            11  LFuc    -17.8  -4.4
+            12  Gal     -17.8  -8.4
+            13  Neu5Ac  -26.8   7.6
+            14  Neu5Ac  -26.8   3.6
+            15  Neu5Ac  -26.8  -8.4
+EDGE        14
+            1     2:b1    1:4
+            2     3:b1    2:4
+            3     4:a1    3:6
+            4     5:a1    3:3
+            5     6:b1    4:2
+            6     7:b1    5:4
+            7     8:b1    5:2
+            8     9:b1    6:4
+            9    10:b1    7:4
+            10   11:a1    7:3
+            11   12:b1    8:4
+            12   13:a2    9:3
+            13   14:a2   10:3
+            14   15:a2   12:6
+///
+""", "ringsmcaw_modified": """ENTRY       G04845                      Glycan
+NODE        9
+            1   Glc         0     0
+            2   Gal       -10     0
+            3   GlcNAc    -20    10
+            4   GlcNAc    -20   -10
+            5   Gal       -30    15
+            6   LFuc      -30     5
+            7   LFuc      -30    -5
+            8   Gal       -30   -15
+            9   Neu5Ac    -40    15
+EDGE        8
+            1     2:b1    1:4
+            2     3:b1    2:6
+            3     4:b1    2:3
+            4     5:b1    3:4
+            5     6:a1    3:3
+            6     7:a1    4:4
+            7     8:b1    4:3
+            8     9:a2    5:3
+///
+ENTRY       G05108                      Glycan
+NODE        7
+            1   GalNAc     20     1
+            2   Gal        10     1
+            3   GlcNAc      0     1
+            4   S          -5     5
+            5   Gal       -10     1
+            6   LFuc      -10    -4
+            7   Neu5Ac    -20     1
+EDGE        6
+            1     2:b1    1:3
+            2     3:b1    2:3
+            3     4       3:6
+            4     5:b1    3:4
+            5     6:a1    3:3
+            6     7:a2    5:3
+///
+ENTRY       G05121                      Glycan
+NODE        7
+            1   GalNAc      0     0
+            2   Neu5Ac    -10     5
+            3   Gal       -10    -5
+            4   GlcNAc    -20    -5
+            5   Gal       -30     0
+            6   LFuc      -30   -10
+            7   Neu5Ac    -40     0
+EDGE        6
+            1     2:a2    1:6
+            2     3:b1    1:3
+            3     4:b1    3:3
+            4     5:b1    4:4
+            5     6:a1    4:3
+            6     7:a2    5:3
+///
+ENTRY       G04183                      Glycan
+NODE        15
+            1   GlcNAc      0     0
+            2   GlcNAc    -10     0
+            3   Man       -20     0
+            4   Man       -30    10
+            5   Man       -30   -10
+            6   GlcNAc    -40    10
+            7   GlcNAc    -40    -5
+            8   GlcNAc    -40   -15
+            9   Gal       -50    10
+            10  Gal       -50     0
+            11  LFuc      -50   -10
+            12  Gal       -50   -15
+            13  Neu5Ac    -60    10
+            14  Neu5Ac    -60     0
+            15  Neu5Ac    -60   -15
+EDGE        14
+            1     2:b1    1:4
+            2     3:b1    2:4
+            3     4:a1    3:6
+            4     5:a1    3:3
+            5     6:b1    4:2
+            6     7:b1    5:4
+            7     8:b1    5:2
+            8     9:b1    6:4
+            9    10:b1    7:4
+            10   11:a1    7:3
+            11   12:b1    8:4
+            12   13:a2    9:6
+            13   14:a2   10:3
+            14   15:a2   12:6
+///
+ENTRY       G04329                      Glycan
+NODE        10
+            1   GlcNAc     27    -5
+            2   Gal        18     0
+            3   LFuc       18   -10
+            4   GlcNAc      9     0
+            5   Gal         0     5
+            6   LFuc        0    -5
+            7   GlcNAc     -9     5
+            8   Gal       -18    10
+            9   LFuc      -18     0
+            10  Neu5Ac    -27    10
+EDGE        9
+            1     2:b1    1:4
+            2     3:a1    1:3
+            3     4:b1    2:3
+            4     5:b1    4:4
+            5     6:a1    4:3
+            6     7:b1    5:3
+            7     8:b1    7:4
+            8     9:a1    7:3
+            9    10:a2    8:3
+///
+ENTRY       G04804                      Glycan
+NODE        15
+            1   GlcNAc   27.2   1.6
+            2   GlcNAc   17.2   1.6
+            3   Man       8.2   1.6
+            4   Man       0.2   7.6
+            5   Man       0.2  -4.4
+            6   GlcNAc   -8.8   7.6
+            7   GlcNAc   -8.8  -0.4
+            8   GlcNAc   -8.8  -8.4
+            9   Gal     -17.8   7.6
+            10  Gal     -17.8   3.6
+            11  LFuc    -17.8  -4.4
+            12  Gal     -17.8  -8.4
+            13  Neu5Ac  -26.8   7.6
+            14  Neu5Ac  -26.8   3.6
+            15  Neu5Ac  -26.8  -8.4
+EDGE        14
+            1     2:b1    1:4
+            2     3:b1    2:4
+            3     4:a1    3:6
+            4     5:a1    3:3
+            5     6:b1    4:2
+            6     7:b1    5:4
+            7     8:b1    5:2
+            8     9:b1    6:4
+            9    10:b1    7:4
+            10   11:a1    7:3
+            11   12:b1    8:4
+            12   13:a2    9:3
+            13   14:a2   10:3
+            14   15:a2   12:6
+///
+""", "ringsfinger": """ENTRY       G00015                      Glycan
+NODE        8
+            1   Asn        20     0
+            2   GlcNAc     12     0
+            3   GlcNAc      3     0
+            4   Man        -5     0
+            5   Man       -12     5
+            6   Man       -12    -5
+            7   GlcNAc    -20     5
+            8   GlcNAc    -20    -5
+EDGE        7
+            1     2:b1    1
+            2     3:b1    2:4
+            3     4:b1    3:4
+            4     5:a1    4:6
+            5     6:a1    4:3
+            6     7:b1    5:2
+            7     8:b1    6:2
+///
+ENTRY       G00016                      Glycan
+NODE        9
+            1   Asn        20     3
+            2   GlcNAc     12     3
+            3   LFuc        4     8
+            4   GlcNAc      3    -2
+            5   Man        -5    -2
+            6   Man       -12     3
+            7   Man       -12    -7
+            8   GlcNAc    -20     3
+            9   GlcNAc    -20    -7
+EDGE        8
+            1     2:b1    1
+            2     3:a1    2:6
+            3     4:b1    2:4
+            4     5:b1    4:4
+            5     6:a1    5:6
+            6     7:a1    5:3
+            7     8:b1    6:2
+            8     9:b1    7:2
+///
+ENTRY       G00017                      Glycan
+NODE        11
+            1   Asn        24     3
+            2   GlcNAc     14     3
+            3   LFuc        7     8
+            4   GlcNAc      6    -2
+            5   Man        -2    -2
+            6   Man        -8     3
+            7   Man        -8    -7
+            8   GlcNAc    -16     3
+            9   GlcNAc    -16    -7
+            10  Gal       -24     3
+            11  Gal       -24    -7
+EDGE        10
+            1     2:b1    1
+            2     3:a1    2:6
+            3     4:b1    2:4
+            4     5:b1    4:4
+            5     6:a1    5:6
+            6     7:a1    5:3
+            7     8:b1    6:2
+            8     9:b1    7:2
+            9    10:b1    8:4
+            10   11:b1    9:4
+///
+ENTRY       G00018                      Glycan
+NODE        13
+            1   Asn        28     3
+            2   GlcNAc     18     3
+            3   LFuc       10     8
+            4   GlcNAc      9    -2
+            5   Man         1    -2
+            6   Man        -5     4
+            7   Man        -5    -8
+            8   GlcNAc    -13     4
+            9   GlcNAc    -13    -8
+            10  Gal       -21     4
+            11  Gal       -21    -8
+            12  Neu5Ac    -29     4
+            13  Neu5Ac    -29    -8
+EDGE        12
+            1     2:b1    1
+            2     3:a1    2:6
+            3     4:b1    2:4
+            4     5:b1    4:4
+            5     6:a1    5:6
+            6     7:a1    5:3
+            7     8:b1    6:2
+            8     9:b1    7:2
+            9    10:b1    8:4
+            10   11:b1    9:4
+            11   12:a2   10:6
+            12   13:a2   11:6
+///
+ENTRY       G00019                      Glycan
+NODE        9
+            1   Asn        20     0
+            2   GlcNAc     12     0
+            3   GlcNAc      3     0
+            4   Man        -5     0
+            5   Man       -12     5
+            6   Man       -12    -5
+            7   GlcNAc    -15     0
+            8   GlcNAc    -20     5
+            9   GlcNAc    -20    -5
+EDGE        8
+            1     2:b1    1
+            2     3:b1    2:4
+            3     4:b1    3:4
+            4     5:a1    4:6
+            5     6:a1    4:3
+            6     7:b1    4:4
+            7     8:b1    5:2
+            8     9:b1    6:2
+///
+ENTRY       G00020                      Glycan
+NODE        9
+            1   Asn        20     3
+            2   GlcNAc     11     3
+            3   GlcNAc      2     3
+            4   Man        -6     3
+            5   Man       -13     9
+            6   Man       -13    -3
+            7   GlcNAc    -21     9
+            8   GlcNAc    -21     2
+            9   GlcNAc    -21    -8
+EDGE        8
+            1     2:b1    1
+            2     3:b1    2:4
+            3     4:b1    3:4
+            4     5:a1    4:6
+            5     6:a1    4:3
+            6     7:b1    5:2
+            7     8:b1    6:4
+            8     9:b1    6:2
+///
+""", "ringsbroken": """ABCDEFGENTRY       G00015                      Glycan
+NODE        8
+            1   Asn        20     0
+            2   GlcNAc     12     0
+"""
+        }
+        os.environ["http_proxy"] = ""  # work around for IOError: [Errno url error] invalid proxy for http:
+        pass
+
+    def tearDown(self):
+        pass
+
+    def test_broken_kcf(self):
+        import StringIO
+
+        brk = self.kcinput["ringsbroken"]
+        handle = StringIO.StringIO(''.join(brk))
+        with self.assertRaises(UnboundLocalError):
+            gs.glycan_set_comparison(handle, handle)
+
+    def test_same_kcf(self):
+        import StringIO
+
+        kcf = self.kcinput["ringsfinger"]
+        handle = StringIO.StringIO(''.join(kcf))
+        handle2 = StringIO.StringIO(''.join(kcf))
+        uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)
+        self.assertEqual(uniquesetA, uniquesetB)
+        self.assertEqual(union, intersection)
+        self.assertEqual(AB, BA)
+
+    def test_diff_kcf(self):
+        import StringIO
+
+        kcf = self.kcinput["ringsfinger"]
+        kcf2 = self.kcinput["ringsmcaw"]
+        handle = StringIO.StringIO(''.join(kcf))
+        handle2 = StringIO.StringIO(''.join(kcf2))
+        uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)
+        self.assertNotEqual(uniquesetA, uniquesetB)
+        self.assertNotEqual(union, intersection)
+        self.assertNotEqual(AB, BA)
+
+    def test_diff_kcf_modified(self):
+        import StringIO
+
+        kcf = self.kcinput["ringsfinger"]
+        kcf2 = self.kcinput["ringsmcaw_modified"]
+        handle = StringIO.StringIO(''.join(kcf))
+        handle2 = StringIO.StringIO(''.join(kcf2))
+        uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)
+        self.assertNotEqual(uniquesetA, uniquesetB)
+        self.assertNotEqual(union, intersection)
+        self.assertNotEqual(AB, BA)
+
+#    def test_bug_read_kcf(self):
+#        """
+#        readkcf is not a full implementation. Fails on ringsmcaw test set
+#        :return:
+#        """
+#        import StringIO
+#
+#        kcf = self.kcinput["ringsfinger"]
+#        kcf2 = self.kcinput["ringsmcaw"]
+#        handle = StringIO.StringIO(''.join(kcf))
+#        handle2 = StringIO.StringIO(''.join(kcf2))
+#        with self.assertRaises(ValueError):
+#            uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)
+
+    def test_empty_stream(self):
+        with self.assertRaises(IOError):
+            uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison("", "")
+        with self.assertRaises(IOError):
+            uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison([], [])
+        with self.assertRaises(IOError):
+            uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(None, None)
+
+
+def run_tests():
+    unittest.main()
+
+
+if __name__ == '__main__':
+    run_tests()