comparison weblogolib/__init__.py @ 8:5149eb3a89c2

Uploaded
author davidmurphy
date Fri, 20 Jan 2012 09:03:40 -0500
parents 4d47ab2b7bcc
children f3462128e87c
comparison
equal deleted inserted replaced
7:8d676bbd1f2d 8:5149eb3a89c2
130 codon_alphabetT=['AAA', 'AAC', 'AAG', 'AAT', 'ACA', 'ACC', 'ACG', 'ACT', 'AGA', 'AGC', 'AGG', 'AGT', 'ATA', 'ATC', 'ATG', 'ATT', 'CAA', 'CAC', 'CAG', 'CAT', 'CCA', 'CCC', 'CCG', 'CCT', 'CGA', 'CGC', 'CGG', 'CGT', 'CTA', 'CTC', 'CTG', 'CTT', 'GAA', 'GAC', 'GAG', 'GAT', 'GCA', 'GCC', 'GCG', 'GCT', 'GGA', 'GGC', 'GGG', 'GGT', 'GTA', 'GTC', 'GTG', 'GTT', 'TAA', 'TAC', 'TAG', 'TAT', 'TCA', 'TCC', 'TCG', 'TCT', 'TGA', 'TGC', 'TGG', 'TGT', 'TTA', 'TTC', 'TTG', 'TTT'] 130 codon_alphabetT=['AAA', 'AAC', 'AAG', 'AAT', 'ACA', 'ACC', 'ACG', 'ACT', 'AGA', 'AGC', 'AGG', 'AGT', 'ATA', 'ATC', 'ATG', 'ATT', 'CAA', 'CAC', 'CAG', 'CAT', 'CCA', 'CCC', 'CCG', 'CCT', 'CGA', 'CGC', 'CGG', 'CGT', 'CTA', 'CTC', 'CTG', 'CTT', 'GAA', 'GAC', 'GAG', 'GAT', 'GCA', 'GCC', 'GCG', 'GCT', 'GGA', 'GGC', 'GGG', 'GGT', 'GTA', 'GTC', 'GTG', 'GTT', 'TAA', 'TAC', 'TAG', 'TAT', 'TCA', 'TCC', 'TCG', 'TCT', 'TGA', 'TGC', 'TGG', 'TGT', 'TTA', 'TTC', 'TTG', 'TTT']
131 131
132 altype="codonsT" 132 altype="codonsT"
133 offset=0 133 offset=0
134 isreversed=False 134 isreversed=False
135 show_warnings=False
135 col=[] 136 col=[]
136 137
137 138
138 139
139 __all__ = ['LogoSize', 140 __all__ = ['LogoSize',
167 'colorscheme', 168 'colorscheme',
168 ] 169 ]
169 170
170 description = "Create sequence logos from biological sequence alignments." 171 description = "Create sequence logos from biological sequence alignments."
171 172
172 __version__ = "1.0" 173 __version__ = "1.1"
173 174
174 # These keywords are subsituted by subversion. 175 # These keywords are subsituted by subversion.
175 # The date and revision will only tell the truth after a branch or tag, 176 # The date and revision will only tell the truth after a branch or tag,
176 # since different files in trunk will have been changed at different times 177 # since different files in trunk will have been changed at different times
177 release_date ="$Date: 2011-09-17 16:30:00 -0700 (Tue, 14 Oct 2008) $".split()[1] 178 release_date ="$Date: 2011-09-17 16:30:00 -0700 (Tue, 14 Oct 2008) $".split()[1]
310 # The base stack width is set equal to 9pt Courier. 311 # The base stack width is set equal to 9pt Courier.
311 # (Courier has a width equal to 3/5 of the point size.) 312 # (Courier has a width equal to 3/5 of the point size.)
312 # Check that can get 80 characters in journal page @small 313 # Check that can get 80 characters in journal page @small
313 # 40 chacaters in a journal column 314 # 40 chacaters in a journal column
314 std_sizes = { 315 std_sizes = {
315 "small" : LogoSize( stack_width = 10, stack_height = 10*1*5), 316 "small" : LogoSize( stack_width = 16.2, stack_height = 10*1*5),
316 "medium" : LogoSize( stack_width = 10*2, stack_height = 10*2*5), 317 "medium" : LogoSize( stack_width = 16.2*2, stack_height = 10*2*5),
317 "large" : LogoSize( stack_width = 10*3, stack_height = 10*3*5), 318 "large" : LogoSize( stack_width = 16.2*3, stack_height = 10*3*5),
318 } 319 }
319 320
320 321
321 std_alphabets = { 322 std_alphabets = {
322 'protein': unambiguous_protein_alphabet, 323 'protein': unambiguous_protein_alphabet,
437 self.yaxis_tic_interval = 1. 438 self.yaxis_tic_interval = 1.
438 self.yaxis_minor_tic_ratio = 5 439 self.yaxis_minor_tic_ratio = 5
439 self.yaxis_scale = None 440 self.yaxis_scale = None
440 441
441 self.show_xaxis = True 442 self.show_xaxis = True
443 self.strict=False
442 self.xaxis_label = "" 444 self.xaxis_label = ""
443 self.xaxis_tic_interval =1 445 self.xaxis_tic_interval =1
444 self.rotate_numbers = False 446 self.rotate_numbers = False
445 self.number_interval = 5 447 self.number_interval = 5
446 self.show_ends = False 448 self.show_ends = False
796 "xaxis_label", "xaxis_tic_interval", "number_interval", 798 "xaxis_label", "xaxis_tic_interval", "number_interval",
797 "fineprint", "shrink_fraction", "errorbar_fraction", 799 "fineprint", "shrink_fraction", "errorbar_fraction",
798 "errorbar_width_fraction", 800 "errorbar_width_fraction",
799 "errorbar_gray", "small_fontsize", "fontsize", 801 "errorbar_gray", "small_fontsize", "fontsize",
800 "title_fontsize", "number_fontsize", "text_font", 802 "title_fontsize", "number_fontsize", "text_font",
801 "logo_font", "title_font", 803 "logo_font", "title_font", "strict",
802 "logo_label", "yaxis_scale", "end_type", 804 "logo_label", "yaxis_scale", "end_type",
803 "debug", "show_title", "show_xaxis", 805 "debug", "show_title", "show_xaxis",
804 "show_xaxis_label", "show_yaxis", "show_yaxis_label", 806 "show_xaxis_label", "show_yaxis", "show_yaxis_label",
805 "show_boxes", "show_errorbars", "show_fineprint", 807 "show_boxes", "show_errorbars", "show_fineprint",
806 "rotate_numbers", "show_ends", "altype", 808 "rotate_numbers", "show_ends", "altype",
928 subsitutions["logo_data"] = "\n".join(data) 930 subsitutions["logo_data"] = "\n".join(data)
929 931
930 932
931 # Create and output logo 933 # Create and output logo
932 template = resource_string( __name__, 'template.eps', __file__) 934 template = resource_string( __name__, 'template.eps', __file__)
935
936
933 logo = Template(template).substitute(subsitutions) 937 logo = Template(template).substitute(subsitutions)
934 print >>fout, logo 938 print >>fout, logo
935 939
936 940
937 # map between output format names and logo 941 # map between output format names and logo
1246 while counter+offset<seq_length: 1250 while counter+offset<seq_length:
1247 for i in range(0,len(seqs)): 1251 for i in range(0,len(seqs)):
1248 if len(str(seqs[i][(counter):(counter+3)]))==3 and len(seqs[i][(counter):(counter+3)].strip("GATUC"))==0 : 1252 if len(str(seqs[i][(counter):(counter+3)]))==3 and len(seqs[i][(counter):(counter+3)].strip("GATUC"))==0 :
1249 if(str(seqs[i][(counter):(counter+3)]) in alphabet): 1253 if(str(seqs[i][(counter):(counter+3)]) in alphabet):
1250 x[counter/3][ (alphabet.index(str(seqs[i][(counter):(counter+3)]))) ]+=1 1254 x[counter/3][ (alphabet.index(str(seqs[i][(counter):(counter+3)]))) ]+=1
1255 elif show_warnings:
1256 if len(seqs[i][(counter):(counter+3)].strip("GATUC"))==1 or len(seqs[i][(counter):(counter+3)].strip("GATUC"))==2 :
1257 print >>sys.stderr, 'Warning:Incomplete or non GATUC codon detected:', seqs[i][(counter):(counter+3)]
1258 print >>sys.stderr, 'Position:',counter
1259 print >>sys.stderr, 'Sequence:',i
1260 print >>sys.stderr, 'This will be treated as ---'
1261
1262
1251 counter=counter+3 1263 counter=counter+3
1252 counts=asarray(x) 1264 counts=asarray(x)
1253 else: 1265 else:
1254 counts = asarray(seqs.tally()) 1266 counts = asarray(seqs.tally())
1255 1267
1367 while 1: 1379 while 1:
1368 line = finp.readline() 1380 line = finp.readline()
1369 if not line: 1381 if not line:
1370 break 1382 break
1371 line = line.split() 1383 line = line.split()
1372 priordict[line[0]]=line[1] 1384
1373 return priordict 1385 if(altype=="codonsT"):
1386 priordict[line[0].upper().replace("U", "T")]=(float(line[1])/1000)*64
1387 else:
1388 priordict[line[0].upper().replace("T", "U")]=(float(line[1])/1000)*64
1389
1390 return priordict
1374 1391
1375 def _build_logodata(options) : 1392 def _build_logodata(options) :
1376 global offset 1393 global offset
1377 offset=options.frame 1394 offset=options.frame
1395 global show_warnings
1396 show_warnings = options.strict
1378 options.alphabet = None 1397 options.alphabet = None
1379 options.ignore_lower_case = False 1398 options.ignore_lower_case = False
1380 #options.default_color = Color.by_name("black") 1399 #options.default_color = Color.by_name("black")
1381 options.color_scheme=None 1400 options.color_scheme=None
1382 #options.colors=[] 1401 #options.colors=[]
1383 options.show_ends=False 1402 options.show_ends=False
1384 seqs = read_seq_data(options.fin, 1403 seqs = read_seq_data(options.fin,
1385 options.input_parser.read, 1404 options.input_parser.read,
1386 alphabet=options.alphabet, 1405 alphabet=options.alphabet,
1387 ignore_lower_case = options.ignore_lower_case) 1406 ignore_lower_case = options.ignore_lower_case)
1388 if(options.priorfile!=None): 1407 if(options.priorfile!=None):
1389 if(altype=="CodonsT"): 1408 if(altype=="CodonsT"):
1390 options.composition= str(read_priors(options.priorfile,codon_alphabetT)) 1409 options.composition= str(read_priors(options.priorfile,codon_alphabetT))
1391 options.alphabet = codon_alphabetT 1410 options.alphabet = codon_alphabetT
1392 else: 1411 else:
1406 direct_from_opts = [ 1425 direct_from_opts = [
1407 "stacks_per_line", 1426 "stacks_per_line",
1408 "logo_title", 1427 "logo_title",
1409 "yaxis_label", 1428 "yaxis_label",
1410 "show_xaxis", 1429 "show_xaxis",
1430 "strict",
1411 "show_yaxis", 1431 "show_yaxis",
1412 "xaxis_label", 1432 "xaxis_label",
1413 "show_ends", 1433 "show_ends",
1414 "fineprint", 1434 "fineprint",
1415 "show_errorbars", 1435 "show_errorbars",
1642 dest="logo_end", 1662 dest="logo_end",
1643 action="store", 1663 action="store",
1644 type="int", 1664 type="int",
1645 help="Upper bound of sequence to display", 1665 help="Upper bound of sequence to display",
1646 metavar="INDEX") 1666 metavar="INDEX")
1647 1667
1668 data_grp.add_option( "-G", "--strict",
1669 dest="strict",
1670 action="store",
1671 type="boolean",
1672 help="Issue warnings if partial codons are encountered. Default: %default",default = defaults.strict,
1673 metavar="True/False")
1648 # ========================== FORMAT OPTIONS ========================== 1674 # ========================== FORMAT OPTIONS ==========================
1649 1675
1650 format_grp.add_option( "-s", "--size", 1676 format_grp.add_option( "-s", "--size",
1651 dest="logo_size", 1677 dest="logo_size",
1652 action="store", 1678 action="store",