diff weblogolib/__init__.py @ 10:20716450be87

Uploaded
author davidmurphy
date Mon, 30 Jan 2012 21:17:50 -0500
parents f3462128e87c
children cd6c4bd14718
line wrap: on
line diff
--- a/weblogolib/__init__.py	Mon Jan 30 08:17:57 2012 -0500
+++ b/weblogolib/__init__.py	Mon Jan 30 21:17:50 2012 -0500
@@ -983,11 +983,20 @@
         prior = weight * equiprobable_distribution(len(alphabet)) 
         
     elif comp.lower() == 'escherichiacoli' :
-	composition="{'CTT': 0.7616, 'ATG': 1.5872, 'ACA': 0.4096, 'ACG': 0.736, 'ATC': 1.1648, 'AAC': 1.5615999999999999, 'ATA': 0.2368, 'AGG': 0.1024, 'CCT': 0.5376000000000001, 'ACT': 0.512, 'AGC': 1.0624, 'AAG': 0.7744, 'AGA': 0.0896, 'CAT': 1.0112, 'AAT': 1.4016, 'ATT': 1.952, 'CTG': 3.0016, 'CTA': 0.3392, 'CTC': 0.672, 'CAC': 0.8383999999999999, 'AAA': 2.1248, 'CCG': 1.7087999999999999, 'AGT': 0.4608, 'CCA': 0.4224, 'CAA': 0.7744, 'CCC': 0.4096, 'TAT': 1.0752000000000002, 'GGT': 1.3632, 'TGT': 0.37760000000000005, 'CGA': 0.2752, 'CAG': 1.7728, 'TCT': 0.3648, 'GAT': 2.4255999999999998, 'CGG': 0.26239999999999997, 'TTT': 1.2608, 'TGC': 0.512, 'GGG': 0.5504, 'TAG': 1e-06, 'GGA': 0.5888, 'TAA': 0.1152, 'GGC': 2.1376, 'TAC': 0.9344, 'TTC': 0.96, 'TCG': 0.512, 'TTA': 0.9728, 'TTG': 0.7616, 'TCC': 0.352, 'ACC': 1.4592, 'TCA': 0.4992, 'GCA': 1.3504, 'GTA': 0.736, 'GCC': 2.0224, 'GTC': 0.7487999999999999, 'GCG': 2.464, 'GTG': 1.6896, 'GAG': 1.1776, 'GTT': 1.0752000000000002, 'GCT': 0.6848, 'TGA': 0.064, 'GAC': 1.312, 'CGT': 1.3504, 'TGG': 0.6848, 'GAA': 2.7968, 'CGC': 1.664}"
+      if(altype=="codonsT"):
+        composition="{'CTT': 0.7616, 'ATG': 1.5872, 'ACA': 0.4096, 'ACG': 0.736, 'ATC': 1.1648, 'AAC': 1.5615999999999999, 'ATA': 0.2368, 'AGG': 0.1024, 'CCT': 0.5376000000000001, 'ACT': 0.512, 'AGC': 1.0624, 'AAG': 0.7744, 'AGA': 0.0896, 'CAT': 1.0112, 'AAT': 1.4016, 'ATT': 1.952, 'CTG': 3.0016, 'CTA': 0.3392, 'CTC': 0.672, 'CAC': 0.8383999999999999, 'AAA': 2.1248, 'CCG': 1.7087999999999999, 'AGT': 0.4608, 'CCA': 0.4224, 'CAA': 0.7744, 'CCC': 0.4096, 'TAT': 1.0752000000000002, 'GGT': 1.3632, 'TGT': 0.37760000000000005, 'CGA': 0.2752, 'CAG': 1.7728, 'TCT': 0.3648, 'GAT': 2.4255999999999998, 'CGG': 0.26239999999999997, 'TTT': 1.2608, 'TGC': 0.512, 'GGG': 0.5504, 'TAG': 1e-06, 'GGA': 0.5888, 'TAA': 0.1152, 'GGC': 2.1376, 'TAC': 0.9344, 'TTC': 0.96, 'TCG': 0.512, 'TTA': 0.9728, 'TTG': 0.7616, 'TCC': 0.352, 'ACC': 1.4592, 'TCA': 0.4992, 'GCA': 1.3504, 'GTA': 0.736, 'GCC': 2.0224, 'GTC': 0.7487999999999999, 'GCG': 2.464, 'GTG': 1.6896, 'GAG': 1.1776, 'GTT': 1.0752000000000002, 'GCT': 0.6848, 'TGA': 0.064, 'GAC': 1.312, 'CGT': 1.3504, 'TGG': 0.6848, 'GAA': 2.7968, 'CGC': 1.664}"
+      else:
+        composition="{'CUU': 0.7616, 'AUG': 1.5872, 'ACA': 0.4096, 'ACG': 0.736, 'AUC': 1.1648, 'AAC': 1.5615999999999999, 'AUA': 0.2368, 'AGG': 0.1024, 'CCU': 0.5376000000000001, 'ACU': 0.512, 'AGC': 1.0624, 'AAG': 0.7744, 'AGA': 0.0896, 'CAU': 1.0112, 'AAU': 1.4016, 'AUU': 1.952, 'CUG': 3.0016, 'CUA': 0.3392, 'CUC': 0.672, 'CAC': 0.8383999999999999, 'AAA': 2.1248, 'CCG': 1.7087999999999999, 'AGU': 0.4608, 'CCA': 0.4224, 'CAA': 0.7744, 'CCC': 0.4096, 'UAU': 1.0752000000000002, 'GGU': 1.3632, 'UGU': 0.37760000000000005, 'CGA': 0.2752, 'CAG': 1.7728, 'UCU': 0.3648, 'GAU': 2.4255999999999998, 'CGG': 0.26239999999999997, 'UUU': 1.2608, 'UGC': 0.512, 'GGG': 0.5504, 'UAG': 1e-06, 'GGA': 0.5888, 'UAA': 0.1152, 'GGC': 2.1376, 'UAC': 0.9344, 'UUC': 0.96, 'UCG': 0.512, 'UUA': 0.9728, 'UUG': 0.7616, 'UCC': 0.352, 'ACC': 1.4592, 'UCA': 0.4992, 'GCA': 1.3504, 'GUA': 0.736, 'GCC': 2.0224, 'GUC': 0.7487999999999999, 'GCG': 2.464, 'GUG': 1.6896, 'GAG': 1.1776, 'GUU': 1.0752000000000002, 'GCU': 0.6848, 'UGA': 0.064, 'GAC': 1.312, 'CGU': 1.3504, 'UGG': 0.6848, 'GAA': 2.7968, 'CGC': 1.664}"
     elif comp.lower() == 'homosapiens' :
-	composition="{'CTT': 0.8448, 'ATG': 1.408, 'ACA': 0.9663999999999999, 'ACG': 0.39039999999999997, 'ATC': 1.3312, 'AAC': 1.2224000000000002, 'ATA': 0.48, 'AGG': 0.768, 'CCT': 1.12, 'ACT': 0.8383999999999999, 'AGC': 1.248, 'AAG': 2.0416, 'AGA': 0.7807999999999999, 'CAT': 0.6976, 'AAT': 1.088, 'ATT': 1.024, 'CTG': 2.5344, 'CTA': 0.4608, 'CTC': 1.2544000000000002, 'CAC': 0.9663999999999999, 'AAA': 1.5615999999999999, 'CCG': 0.44160000000000005, 'AGT': 0.7744, 'CCA': 1.0816, 'CAA': 0.7872, 'CCC': 1.2672, 'TAT': 0.7807999999999999, 'GGT': 0.6912, 'TGT': 0.6784, 'CGA': 0.3968, 'CAG': 2.1888, 'TCT': 0.9728, 'GAT': 1.3952, 'CGG': 0.7296, 'TTT': 1.1264, 'TGC': 0.8064, 'GGG': 1.056, 'TAG': 0.0512, 'GGA': 1.056, 'TAA': 0.064, 'GGC': 1.4208, 'TAC': 0.9792000000000001, 'TTC': 1.2992000000000001, 'TCG': 0.2816, 'TTA': 0.4928, 'TTG': 0.8256, 'TCC': 1.1328, 'ACC': 1.2096, 'TCA': 0.7807999999999999, 'GCA': 1.0112, 'GTA': 0.45439999999999997, 'GCC': 1.7728, 'GTC': 0.928, 'GCG': 0.4736, 'GTG': 1.7984, 'GAG': 2.5344, 'GTT': 0.704, 'GCT': 1.1776, 'TGA': 0.1024, 'GAC': 1.6064, 'CGT': 0.288, 'TGG': 0.8448, 'GAA': 1.856, 'CGC': 0.6656}"
-    elif comp.lower() == 'saccharomycescerevisiae' :
-	composition="{'CTT': 0.7872, 'ATG': 1.3376, 'ACA': 1.1392, 'ACG': 0.512, 'ATC': 1.1008, 'AAC': 1.5872, 'ATA': 1.1392, 'AGG': 0.5888, 'CCT': 0.864, 'ACT': 1.2992000000000001, 'AGC': 0.6272000000000001, 'AAG': 1.9712, 'AGA': 1.3632, 'CAT': 0.8704, 'AAT': 2.2848, 'ATT': 1.9264000000000001, 'CTG': 0.672, 'CTA': 0.8576, 'CTC': 0.3456, 'CAC': 0.4992, 'AAA': 2.6816, 'CCG': 0.3392, 'AGT': 0.9087999999999999, 'CCA': 1.1712, 'CAA': 1.7472, 'CCC': 0.4352, 'TAT': 1.2032, 'GGT': 1.5295999999999998, 'TGT': 0.5184, 'CGA': 0.192, 'CAG': 0.7744, 'TCT': 1.504, 'GAT': 2.4064, 'CGG': 0.1088, 'TTT': 1.6704, 'TGC': 0.3072, 'GGG': 0.384, 'TAG': 0.032, 'GGA': 0.6976, 'TAA': 0.0704, 'GGC': 0.6272000000000001, 'TAC': 0.9472, 'TTC': 1.1776, 'TCG': 0.5504, 'TTA': 1.6767999999999998, 'TTG': 1.7408, 'TCC': 0.9087999999999999, 'ACC': 0.8128, 'TCA': 1.1967999999999999, 'GCA': 1.0368, 'GTA': 0.7552000000000001, 'GCC': 0.8064, 'GTC': 0.7552000000000001, 'GCG': 0.3968, 'GTG': 0.6912, 'GAG': 1.2288, 'GTT': 1.4144, 'GCT': 1.3568, 'TGA': 0.0448, 'GAC': 1.2928, 'CGT': 0.4096, 'TGG': 0.6656, 'GAA': 2.9184, 'CGC': 0.1664}"
+      if(altype=="codonsT"):
+        composition="{'CTT': 0.8448, 'ATG': 1.408, 'ACA': 0.9663999999999999, 'ACG': 0.39039999999999997, 'ATC': 1.3312, 'AAC': 1.2224000000000002, 'ATA': 0.48, 'AGG': 0.768, 'CCT': 1.12, 'ACT': 0.8383999999999999, 'AGC': 1.248, 'AAG': 2.0416, 'AGA': 0.7807999999999999, 'CAT': 0.6976, 'AAT': 1.088, 'ATT': 1.024, 'CTG': 2.5344, 'CTA': 0.4608, 'CTC': 1.2544000000000002, 'CAC': 0.9663999999999999, 'AAA': 1.5615999999999999, 'CCG': 0.44160000000000005, 'AGT': 0.7744, 'CCA': 1.0816, 'CAA': 0.7872, 'CCC': 1.2672, 'TAT': 0.7807999999999999, 'GGT': 0.6912, 'TGT': 0.6784, 'CGA': 0.3968, 'CAG': 2.1888, 'TCT': 0.9728, 'GAT': 1.3952, 'CGG': 0.7296, 'TTT': 1.1264, 'TGC': 0.8064, 'GGG': 1.056, 'TAG': 0.0512, 'GGA': 1.056, 'TAA': 0.064, 'GGC': 1.4208, 'TAC': 0.9792000000000001, 'TTC': 1.2992000000000001, 'TCG': 0.2816, 'TTA': 0.4928, 'TTG': 0.8256, 'TCC': 1.1328, 'ACC': 1.2096, 'TCA': 0.7807999999999999, 'GCA': 1.0112, 'GTA': 0.45439999999999997, 'GCC': 1.7728, 'GTC': 0.928, 'GCG': 0.4736, 'GTG': 1.7984, 'GAG': 2.5344, 'GTT': 0.704, 'GCT': 1.1776, 'TGA': 0.1024, 'GAC': 1.6064, 'CGT': 0.288, 'TGG': 0.8448, 'GAA': 1.856, 'CGC': 0.6656}"
+      else:
+        composition="{'CUU': 0.8448, 'AUG': 1.408, 'ACA': 0.9663999999999999, 'ACG': 0.39039999999999997, 'AUC': 1.3312, 'AAC': 1.2224000000000002, 'AUA': 0.48, 'AGG': 0.768, 'CCU': 1.12, 'ACU': 0.8383999999999999, 'AGC': 1.248, 'AAG': 2.0416, 'AGA': 0.7807999999999999, 'CAU': 0.6976, 'AAU': 1.088, 'AUU': 1.024, 'CUG': 2.5344, 'CUA': 0.4608, 'CUC': 1.2544000000000002, 'CAC': 0.9663999999999999, 'AAA': 1.5615999999999999, 'CCG': 0.44160000000000005, 'AGU': 0.7744, 'CCA': 1.0816, 'CAA': 0.7872, 'CCC': 1.2672, 'UAU': 0.7807999999999999, 'GGU': 0.6912, 'UGU': 0.6784, 'CGA': 0.3968, 'CAG': 2.1888, 'UCU': 0.9728, 'GAU': 1.3952, 'CGG': 0.7296, 'UUU': 1.1264, 'UGC': 0.8064, 'GGG': 1.056, 'UAG': 0.0512, 'GGA': 1.056, 'UAA': 0.064, 'GGC': 1.4208, 'UAC': 0.9792000000000001, 'UUC': 1.2992000000000001, 'UCG': 0.2816, 'UUA': 0.4928, 'UUG': 0.8256, 'UCC': 1.1328, 'ACC': 1.2096, 'UCA': 0.7807999999999999, 'GCA': 1.0112, 'GUA': 0.45439999999999997, 'GCC': 1.7728, 'GUC': 0.928, 'GCG': 0.4736, 'GUG': 1.7984, 'GAG': 2.5344, 'GUU': 0.704, 'GCU': 1.1776, 'UGA': 0.1024, 'GAC': 1.6064, 'CGU': 0.288, 'UGG': 0.8448, 'GAA': 1.856, 'CGC': 0.6656}"
+    elif comp.lower() == 'saccharomycescerevisiae' :      
+      if(altype=="codonsT"):
+        composition="{'CTT': 0.7872, 'ATG': 1.3376, 'ACA': 1.1392, 'ACG': 0.512, 'ATC': 1.1008, 'AAC': 1.5872, 'ATA': 1.1392, 'AGG': 0.5888, 'CCT': 0.864, 'ACT': 1.2992000000000001, 'AGC': 0.6272000000000001, 'AAG': 1.9712, 'AGA': 1.3632, 'CAT': 0.8704, 'AAT': 2.2848, 'ATT': 1.9264000000000001, 'CTG': 0.672, 'CTA': 0.8576, 'CTC': 0.3456, 'CAC': 0.4992, 'AAA': 2.6816, 'CCG': 0.3392, 'AGT': 0.9087999999999999, 'CCA': 1.1712, 'CAA': 1.7472, 'CCC': 0.4352, 'TAT': 1.2032, 'GGT': 1.5295999999999998, 'TGT': 0.5184, 'CGA': 0.192, 'CAG': 0.7744, 'TCT': 1.504, 'GAT': 2.4064, 'CGG': 0.1088, 'TTT': 1.6704, 'TGC': 0.3072, 'GGG': 0.384, 'TAG': 0.032, 'GGA': 0.6976, 'TAA': 0.0704, 'GGC': 0.6272000000000001, 'TAC': 0.9472, 'TTC': 1.1776, 'TCG': 0.5504, 'TTA': 1.6767999999999998, 'TTG': 1.7408, 'TCC': 0.9087999999999999, 'ACC': 0.8128, 'TCA': 1.1967999999999999, 'GCA': 1.0368, 'GTA': 0.7552000000000001, 'GCC': 0.8064, 'GTC': 0.7552000000000001, 'GCG': 0.3968, 'GTG': 0.6912, 'GAG': 1.2288, 'GTT': 1.4144, 'GCT': 1.3568, 'TGA': 0.0448, 'GAC': 1.2928, 'CGT': 0.4096, 'TGG': 0.6656, 'GAA': 2.9184, 'CGC': 0.1664}"
+      else:
+        composition="{'CUU': 0.7872, 'AUG': 1.3376, 'ACA': 1.1392, 'ACG': 0.512, 'AUC': 1.1008, 'AAC': 1.5872, 'AUA': 1.1392, 'AGG': 0.5888, 'CCU': 0.864, 'ACU': 1.2992000000000001, 'AGC': 0.6272000000000001, 'AAG': 1.9712, 'AGA': 1.3632, 'CAU': 0.8704, 'AAU': 2.2848, 'AUU': 1.9264000000000001, 'CUG': 0.672, 'CUA': 0.8576, 'CUC': 0.3456, 'CAC': 0.4992, 'AAA': 2.6816, 'CCG': 0.3392, 'AGU': 0.9087999999999999, 'CCA': 1.1712, 'CAA': 1.7472, 'CCC': 0.4352, 'UAU': 1.2032, 'GGU': 1.5295999999999998, 'UGU': 0.5184, 'CGA': 0.192, 'CAG': 0.7744, 'UCU': 1.504, 'GAU': 2.4064, 'CGG': 0.1088, 'UUU': 1.6704, 'UGC': 0.3072, 'GGG': 0.384, 'UAG': 0.032, 'GGA': 0.6976, 'UAA': 0.0704, 'GGC': 0.6272000000000001, 'UAC': 0.9472, 'UUC': 1.1776, 'UCG': 0.5504, 'UUA': 1.6767999999999998, 'UUG': 1.7408, 'UCC': 0.9087999999999999, 'ACC': 0.8128, 'UCA': 1.1967999999999999, 'GCA': 1.0368, 'GUA': 0.7552000000000001, 'GCC': 0.8064, 'GUC': 0.7552000000000001, 'GCG': 0.3968, 'GUG': 0.6912, 'GAG': 1.2288, 'GUU': 1.4144, 'GCU': 1.3568, 'UGA': 0.0448, 'GAC': 1.2928, 'CGU': 0.4096, 'UGG': 0.6656, 'GAA': 2.9184, 'CGC': 0.1664}"
     elif comp.lower() == 'auto' or comp.lower() == 'automatic':
         if alphabet == unambiguous_protein_alphabet :
             prior =  weight * asarray(aa_composition, float64)
@@ -1002,7 +1011,7 @@
     elif isfloat(comp) :
         prior = weight * base_distribution( float(comp)*100. )
 
-    elif composition[0] == '{' and composition[-1] == '}' : 
+    if composition[0] == '{' and composition[-1] == '}' : 
         explicit = composition[1: -1]
         explicit = explicit.replace(',',' ').replace("'", ' ').replace('"',' ').replace(':', ' ').split()