view join_subtract_group/glycan_set_operations/test_sets.py @ 1:0a5e0df17054 draft default tip

Uploaded
author chrisb
date Fri, 06 May 2016 08:05:48 -0400
parents 89592faa2875
children
line wrap: on
line source

__license__ = "MIT"

import unittest
import glycan_sets as gs


class SimpleUnitTest(unittest.TestCase):
    def setUp(self):
        import os

        self.kcinput = {"ringsmcaw": """ENTRY       G04845                      Glycan
COMPOSITION (Gal)3 (Glc)1 (GlcNAc)2 (LFuc)2 (Neu5Ac)1
MASS        1656.5
DBLINKS     CCSD: 23949
            GlycomeDB: 20420
            JCGGDB: JCGG-STR011245
NODE        9
            1   Glc         0     0
            2   Gal       -10     0
            3   GlcNAc    -20    10
            4   GlcNAc    -20   -10
            5   Gal       -30    15
            6   LFuc      -30     5
            7   LFuc      -30    -5
            8   Gal       -30   -15
            9   Neu5Ac    -40    15
EDGE        8
            1     2:b1    1:4
            2     3:b1    2:6
            3     4:b1    2:3
            4     5:b1    3:4
            5     6:a1    3:3
            6     7:a1    4:4
            7     8:b1    4:3
            8     9:a2    5:3
///
ENTRY       G05108                      Glycan
COMPOSITION (Gal)2 (GalNAc)1 (GlcNAc)1 (LFuc)1 (Neu5Ac)1 (S)1
MASS        1266.2
CLASS       Glycoprotein; O-Glycan
DBLINKS     CCSD: 33353
            GlycomeDB: 20590
            JCGGDB: JCGG-STR011425
NODE        7
            1   GalNAc     20     1
            2   Gal        10     1
            3   GlcNAc      0     1
            4   S          -5     5
            5   Gal       -10     1
            6   LFuc      -10    -4
            7   Neu5Ac    -20     1
EDGE        6
            1     2:b1    1:3
            2     3:b1    2:3
            3     4       3:6
            4     5:b1    3:4
            5     6:a1    3:3
            6     7:a2    5:3
///

ENTRY       G05121                      Glycan
COMPOSITION (Gal)2 (GalNAc)1 (GlcNAc)1 (LFuc)1 (Neu5Ac)2
MASS        1477.3
CLASS       Glycoprotein; O-Glycan
DBLINKS     CCSD: 33350
            GlycomeDB: 20599
            JCGGDB: JCGG-STR011433
NODE        7
            1   GalNAc      0     0
            2   Neu5Ac    -10     5
            3   Gal       -10    -5
            4   GlcNAc    -20    -5
            5   Gal       -30     0
            6   LFuc      -30   -10
            7   Neu5Ac    -40     0
EDGE        6
            1     2:a2    1:6
            2     3:b1    1:3
            3     4:b1    3:3
            4     5:b1    4:4
            5     6:a1    4:3
            6     7:a2    5:3
///

ENTRY       G04183                      Glycan
COMPOSITION (Gal)3 (GlcNAc)5 (LFuc)1 (Man)3 (Neu5Ac)3
MASS        3026.8
CLASS       Glycoprotein; N-Glycan
DBLINKS     CCSD: 41981
            GlycomeDB: 19974
            JCGGDB: JCGG-STR010756
NODE        15
            1   GlcNAc      0     0
            2   GlcNAc    -10     0
            3   Man       -20     0
            4   Man       -30    10
            5   Man       -30   -10
            6   GlcNAc    -40    10
            7   GlcNAc    -40    -5
            8   GlcNAc    -40   -15
            9   Gal       -50    10
            10  Gal       -50     0
            11  LFuc      -50   -10
            12  Gal       -50   -15
            13  Neu5Ac    -60    10
            14  Neu5Ac    -60     0
            15  Neu5Ac    -60   -15
EDGE        14
            1     2:b1    1:4
            2     3:b1    2:4
            3     4:a1    3:6
            4     5:a1    3:3
            5     6:b1    4:2
            6     7:b1    5:4
            7     8:b1    5:2
            8     9:b1    6:4
            9    10:b1    7:4
            10   11:a1    7:3
            11   12:b1    8:4
            12   13:a2    9:6
            13   14:a2   10:3
            14   15:a2   12:6
///

ENTRY       G04329                      Glycan
COMPOSITION (Gal)3 (GlcNAc)3 (LFuc)3 (Neu5Ac)1
MASS        1843.7
DBLINKS     CCSD: 36620
            GlycomeDB: 20084
            JCGGDB: JCGG-STR010874
NODE        10
            1   GlcNAc     27    -5
            2   Gal        18     0
            3   LFuc       18   -10
            4   GlcNAc      9     0
            5   Gal         0     5
            6   LFuc        0    -5
            7   GlcNAc     -9     5
            8   Gal       -18    10
            9   LFuc      -18     0
            10  Neu5Ac    -27    10
EDGE        9
            1     2:b1    1:4
            2     3:a1    1:3
            3     4:b1    2:3
            4     5:b1    4:4
            5     6:a1    4:3
            6     7:b1    5:3
            7     8:b1    7:4
            8     9:a1    7:3
            9    10:a2    8:3
///
ENTRY       G04804                      Glycan
COMPOSITION (Gal)3 (GlcNAc)5 (LFuc)1 (Man)3 (Neu5Ac)3
MASS        3026.8
CLASS       Glycoprotein; N-Glycan
REFERENCE   1  [PMID:6704968]
            Chandrasekaran EV, Davila M, Nixon D, Mendicino J.
            Structures of the oligosaccharide chains of two forms of alpha 1-acid glycoprotein purified from liver metastases of lung, colon, and breast tumors.
            Cancer. Res. 44 (1984) 1557-67.
DBLINKS     CCSD: 8168
            GlycomeDB: 33225
            JCGGDB: JCGG-STR024138
NODE        15
            1   GlcNAc   27.2   1.6
            2   GlcNAc   17.2   1.6
            3   Man       8.2   1.6
            4   Man       0.2   7.6
            5   Man       0.2  -4.4
            6   GlcNAc   -8.8   7.6
            7   GlcNAc   -8.8  -0.4
            8   GlcNAc   -8.8  -8.4
            9   Gal     -17.8   7.6
            10  Gal     -17.8   3.6
            11  LFuc    -17.8  -4.4
            12  Gal     -17.8  -8.4
            13  Neu5Ac  -26.8   7.6
            14  Neu5Ac  -26.8   3.6
            15  Neu5Ac  -26.8  -8.4
EDGE        14
            1     2:b1    1:4
            2     3:b1    2:4
            3     4:a1    3:6
            4     5:a1    3:3
            5     6:b1    4:2
            6     7:b1    5:4
            7     8:b1    5:2
            8     9:b1    6:4
            9    10:b1    7:4
            10   11:a1    7:3
            11   12:b1    8:4
            12   13:a2    9:3
            13   14:a2   10:3
            14   15:a2   12:6
///
""", "ringsmcaw_modified": """ENTRY       G04845                      Glycan
NODE        9
            1   Glc         0     0
            2   Gal       -10     0
            3   GlcNAc    -20    10
            4   GlcNAc    -20   -10
            5   Gal       -30    15
            6   LFuc      -30     5
            7   LFuc      -30    -5
            8   Gal       -30   -15
            9   Neu5Ac    -40    15
EDGE        8
            1     2:b1    1:4
            2     3:b1    2:6
            3     4:b1    2:3
            4     5:b1    3:4
            5     6:a1    3:3
            6     7:a1    4:4
            7     8:b1    4:3
            8     9:a2    5:3
///
ENTRY       G05108                      Glycan
NODE        7
            1   GalNAc     20     1
            2   Gal        10     1
            3   GlcNAc      0     1
            4   S          -5     5
            5   Gal       -10     1
            6   LFuc      -10    -4
            7   Neu5Ac    -20     1
EDGE        6
            1     2:b1    1:3
            2     3:b1    2:3
            3     4       3:6
            4     5:b1    3:4
            5     6:a1    3:3
            6     7:a2    5:3
///
ENTRY       G05121                      Glycan
NODE        7
            1   GalNAc      0     0
            2   Neu5Ac    -10     5
            3   Gal       -10    -5
            4   GlcNAc    -20    -5
            5   Gal       -30     0
            6   LFuc      -30   -10
            7   Neu5Ac    -40     0
EDGE        6
            1     2:a2    1:6
            2     3:b1    1:3
            3     4:b1    3:3
            4     5:b1    4:4
            5     6:a1    4:3
            6     7:a2    5:3
///
ENTRY       G04183                      Glycan
NODE        15
            1   GlcNAc      0     0
            2   GlcNAc    -10     0
            3   Man       -20     0
            4   Man       -30    10
            5   Man       -30   -10
            6   GlcNAc    -40    10
            7   GlcNAc    -40    -5
            8   GlcNAc    -40   -15
            9   Gal       -50    10
            10  Gal       -50     0
            11  LFuc      -50   -10
            12  Gal       -50   -15
            13  Neu5Ac    -60    10
            14  Neu5Ac    -60     0
            15  Neu5Ac    -60   -15
EDGE        14
            1     2:b1    1:4
            2     3:b1    2:4
            3     4:a1    3:6
            4     5:a1    3:3
            5     6:b1    4:2
            6     7:b1    5:4
            7     8:b1    5:2
            8     9:b1    6:4
            9    10:b1    7:4
            10   11:a1    7:3
            11   12:b1    8:4
            12   13:a2    9:6
            13   14:a2   10:3
            14   15:a2   12:6
///
ENTRY       G04329                      Glycan
NODE        10
            1   GlcNAc     27    -5
            2   Gal        18     0
            3   LFuc       18   -10
            4   GlcNAc      9     0
            5   Gal         0     5
            6   LFuc        0    -5
            7   GlcNAc     -9     5
            8   Gal       -18    10
            9   LFuc      -18     0
            10  Neu5Ac    -27    10
EDGE        9
            1     2:b1    1:4
            2     3:a1    1:3
            3     4:b1    2:3
            4     5:b1    4:4
            5     6:a1    4:3
            6     7:b1    5:3
            7     8:b1    7:4
            8     9:a1    7:3
            9    10:a2    8:3
///
ENTRY       G04804                      Glycan
NODE        15
            1   GlcNAc   27.2   1.6
            2   GlcNAc   17.2   1.6
            3   Man       8.2   1.6
            4   Man       0.2   7.6
            5   Man       0.2  -4.4
            6   GlcNAc   -8.8   7.6
            7   GlcNAc   -8.8  -0.4
            8   GlcNAc   -8.8  -8.4
            9   Gal     -17.8   7.6
            10  Gal     -17.8   3.6
            11  LFuc    -17.8  -4.4
            12  Gal     -17.8  -8.4
            13  Neu5Ac  -26.8   7.6
            14  Neu5Ac  -26.8   3.6
            15  Neu5Ac  -26.8  -8.4
EDGE        14
            1     2:b1    1:4
            2     3:b1    2:4
            3     4:a1    3:6
            4     5:a1    3:3
            5     6:b1    4:2
            6     7:b1    5:4
            7     8:b1    5:2
            8     9:b1    6:4
            9    10:b1    7:4
            10   11:a1    7:3
            11   12:b1    8:4
            12   13:a2    9:3
            13   14:a2   10:3
            14   15:a2   12:6
///
""", "ringsfinger": """ENTRY       G00015                      Glycan
NODE        8
            1   Asn        20     0
            2   GlcNAc     12     0
            3   GlcNAc      3     0
            4   Man        -5     0
            5   Man       -12     5
            6   Man       -12    -5
            7   GlcNAc    -20     5
            8   GlcNAc    -20    -5
EDGE        7
            1     2:b1    1
            2     3:b1    2:4
            3     4:b1    3:4
            4     5:a1    4:6
            5     6:a1    4:3
            6     7:b1    5:2
            7     8:b1    6:2
///
ENTRY       G00016                      Glycan
NODE        9
            1   Asn        20     3
            2   GlcNAc     12     3
            3   LFuc        4     8
            4   GlcNAc      3    -2
            5   Man        -5    -2
            6   Man       -12     3
            7   Man       -12    -7
            8   GlcNAc    -20     3
            9   GlcNAc    -20    -7
EDGE        8
            1     2:b1    1
            2     3:a1    2:6
            3     4:b1    2:4
            4     5:b1    4:4
            5     6:a1    5:6
            6     7:a1    5:3
            7     8:b1    6:2
            8     9:b1    7:2
///
ENTRY       G00017                      Glycan
NODE        11
            1   Asn        24     3
            2   GlcNAc     14     3
            3   LFuc        7     8
            4   GlcNAc      6    -2
            5   Man        -2    -2
            6   Man        -8     3
            7   Man        -8    -7
            8   GlcNAc    -16     3
            9   GlcNAc    -16    -7
            10  Gal       -24     3
            11  Gal       -24    -7
EDGE        10
            1     2:b1    1
            2     3:a1    2:6
            3     4:b1    2:4
            4     5:b1    4:4
            5     6:a1    5:6
            6     7:a1    5:3
            7     8:b1    6:2
            8     9:b1    7:2
            9    10:b1    8:4
            10   11:b1    9:4
///
ENTRY       G00018                      Glycan
NODE        13
            1   Asn        28     3
            2   GlcNAc     18     3
            3   LFuc       10     8
            4   GlcNAc      9    -2
            5   Man         1    -2
            6   Man        -5     4
            7   Man        -5    -8
            8   GlcNAc    -13     4
            9   GlcNAc    -13    -8
            10  Gal       -21     4
            11  Gal       -21    -8
            12  Neu5Ac    -29     4
            13  Neu5Ac    -29    -8
EDGE        12
            1     2:b1    1
            2     3:a1    2:6
            3     4:b1    2:4
            4     5:b1    4:4
            5     6:a1    5:6
            6     7:a1    5:3
            7     8:b1    6:2
            8     9:b1    7:2
            9    10:b1    8:4
            10   11:b1    9:4
            11   12:a2   10:6
            12   13:a2   11:6
///
ENTRY       G00019                      Glycan
NODE        9
            1   Asn        20     0
            2   GlcNAc     12     0
            3   GlcNAc      3     0
            4   Man        -5     0
            5   Man       -12     5
            6   Man       -12    -5
            7   GlcNAc    -15     0
            8   GlcNAc    -20     5
            9   GlcNAc    -20    -5
EDGE        8
            1     2:b1    1
            2     3:b1    2:4
            3     4:b1    3:4
            4     5:a1    4:6
            5     6:a1    4:3
            6     7:b1    4:4
            7     8:b1    5:2
            8     9:b1    6:2
///
ENTRY       G00020                      Glycan
NODE        9
            1   Asn        20     3
            2   GlcNAc     11     3
            3   GlcNAc      2     3
            4   Man        -6     3
            5   Man       -13     9
            6   Man       -13    -3
            7   GlcNAc    -21     9
            8   GlcNAc    -21     2
            9   GlcNAc    -21    -8
EDGE        8
            1     2:b1    1
            2     3:b1    2:4
            3     4:b1    3:4
            4     5:a1    4:6
            5     6:a1    4:3
            6     7:b1    5:2
            7     8:b1    6:4
            8     9:b1    6:2
///
""", "ringsbroken": """ABCDEFGENTRY       G00015                      Glycan
NODE        8
            1   Asn        20     0
            2   GlcNAc     12     0
"""
        }
        os.environ["http_proxy"] = ""  # work around for IOError: [Errno url error] invalid proxy for http:
        pass

    def tearDown(self):
        pass

    def test_broken_kcf(self):
        import StringIO

        brk = self.kcinput["ringsbroken"]
        handle = StringIO.StringIO(''.join(brk))
        with self.assertRaises(UnboundLocalError):
            gs.glycan_set_comparison(handle, handle)

    def test_same_kcf(self):
        import StringIO

        kcf = self.kcinput["ringsfinger"]
        handle = StringIO.StringIO(''.join(kcf))
        handle2 = StringIO.StringIO(''.join(kcf))
        uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)
        self.assertEqual(uniquesetA, uniquesetB)
        self.assertEqual(union, intersection)
        self.assertEqual(AB, BA)

    def test_diff_kcf(self):
        import StringIO

        kcf = self.kcinput["ringsfinger"]
        kcf2 = self.kcinput["ringsmcaw"]
        handle = StringIO.StringIO(''.join(kcf))
        handle2 = StringIO.StringIO(''.join(kcf2))
        uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)
        self.assertNotEqual(uniquesetA, uniquesetB)
        self.assertNotEqual(union, intersection)
        self.assertNotEqual(AB, BA)

    def test_diff_kcf_modified(self):
        import StringIO

        kcf = self.kcinput["ringsfinger"]
        kcf2 = self.kcinput["ringsmcaw_modified"]
        handle = StringIO.StringIO(''.join(kcf))
        handle2 = StringIO.StringIO(''.join(kcf2))
        uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)
        self.assertNotEqual(uniquesetA, uniquesetB)
        self.assertNotEqual(union, intersection)
        self.assertNotEqual(AB, BA)

#    def test_bug_read_kcf(self):
#        """
#        readkcf is not a full implementation. Fails on ringsmcaw test set
#        :return:
#        """
#        import StringIO
#
#        kcf = self.kcinput["ringsfinger"]
#        kcf2 = self.kcinput["ringsmcaw"]
#        handle = StringIO.StringIO(''.join(kcf))
#        handle2 = StringIO.StringIO(''.join(kcf2))
#        with self.assertRaises(ValueError):
#            uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(handle, handle2)

    def test_empty_stream(self):
        with self.assertRaises(IOError):
            uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison("", "")
        with self.assertRaises(IOError):
            uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison([], [])
        with self.assertRaises(IOError):
            uniquesetA, uniquesetB, union, intersection, AB, BA = gs.glycan_set_comparison(None, None)


def run_tests():
    unittest.main()


if __name__ == '__main__':
    run_tests()