diff ngsutils/support/llh.py @ 0:4e4e4093d65d draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ngsutils commit 09194687c74a424732f8b0c017cbb942aad89068
author iuc
date Wed, 11 Nov 2015 13:04:07 -0500
parents
children 7a68005de299
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ngsutils/support/llh.py	Wed Nov 11 13:04:07 2015 -0500
@@ -0,0 +1,55 @@
+'''
+Methods for calculating log-likelihoods for nucleotide frequencies
+'''
+import math
+import collections
+from ngsutils.support import memoize
+
+_default_background = {'A': 0.3, 'T': 0.3, 'C': 0.2, 'G': 0.2}
+
+NucleotideLogLikelihood = collections.namedtuple('NucleotideLogLikelihood', 'A C G T pseudo')
+
+@memoize
+def pseudo_count(N, bg):
+    '''
+    >>> pseudo_count(100, _default_background['A'])
+    3
+    >>> pseudo_count(100, _default_background['C'])
+    2
+    '''
+
+    return bg * math.sqrt(N)
+
+
+def calc_llh(A, C, G, T, bg=_default_background, pseudo='auto'):
+    if pseudo == 'auto':
+        N = A + C + G + T
+        Ap = float(A) + pseudo_count(N, bg['A'])
+        Cp = float(C) + pseudo_count(N, bg['C'])
+        Gp = float(G) + pseudo_count(N, bg['G'])
+        Tp = float(T) + pseudo_count(N, bg['T'])
+    elif pseudo:
+        Ap = float(A) + pseudo
+        Cp = float(C) + pseudo
+        Gp = float(G) + pseudo
+        Tp = float(T) + pseudo
+    else:
+        Ap = float(A)
+        Cp = float(C)
+        Gp = float(G)
+        Tp = float(T)
+
+    Np = Ap + Cp + Gp + Tp
+
+    freqA = Ap / Np
+    freqC = Cp / Np
+    freqG = Gp / Np
+    freqT = Tp / Np
+
+    return NucleotideLogLikelihood(math.log(freqA / bg['A']), math.log(freqC / bg['C']), math.log(freqG / bg['G']), math.log(freqT / bg['T']), pseudo)
+
+
+
+if __name__ == '__main__':
+    import doctest
+    doctest.testmod()