Mercurial > repos > davidmurphy > codonlogo
comparison weblogolib/__init__.py @ 8:5149eb3a89c2
Uploaded
author | davidmurphy |
---|---|
date | Fri, 20 Jan 2012 09:03:40 -0500 |
parents | 4d47ab2b7bcc |
children | f3462128e87c |
comparison
equal
deleted
inserted
replaced
7:8d676bbd1f2d | 8:5149eb3a89c2 |
---|---|
130 codon_alphabetT=['AAA', 'AAC', 'AAG', 'AAT', 'ACA', 'ACC', 'ACG', 'ACT', 'AGA', 'AGC', 'AGG', 'AGT', 'ATA', 'ATC', 'ATG', 'ATT', 'CAA', 'CAC', 'CAG', 'CAT', 'CCA', 'CCC', 'CCG', 'CCT', 'CGA', 'CGC', 'CGG', 'CGT', 'CTA', 'CTC', 'CTG', 'CTT', 'GAA', 'GAC', 'GAG', 'GAT', 'GCA', 'GCC', 'GCG', 'GCT', 'GGA', 'GGC', 'GGG', 'GGT', 'GTA', 'GTC', 'GTG', 'GTT', 'TAA', 'TAC', 'TAG', 'TAT', 'TCA', 'TCC', 'TCG', 'TCT', 'TGA', 'TGC', 'TGG', 'TGT', 'TTA', 'TTC', 'TTG', 'TTT'] | 130 codon_alphabetT=['AAA', 'AAC', 'AAG', 'AAT', 'ACA', 'ACC', 'ACG', 'ACT', 'AGA', 'AGC', 'AGG', 'AGT', 'ATA', 'ATC', 'ATG', 'ATT', 'CAA', 'CAC', 'CAG', 'CAT', 'CCA', 'CCC', 'CCG', 'CCT', 'CGA', 'CGC', 'CGG', 'CGT', 'CTA', 'CTC', 'CTG', 'CTT', 'GAA', 'GAC', 'GAG', 'GAT', 'GCA', 'GCC', 'GCG', 'GCT', 'GGA', 'GGC', 'GGG', 'GGT', 'GTA', 'GTC', 'GTG', 'GTT', 'TAA', 'TAC', 'TAG', 'TAT', 'TCA', 'TCC', 'TCG', 'TCT', 'TGA', 'TGC', 'TGG', 'TGT', 'TTA', 'TTC', 'TTG', 'TTT'] |
131 | 131 |
132 altype="codonsT" | 132 altype="codonsT" |
133 offset=0 | 133 offset=0 |
134 isreversed=False | 134 isreversed=False |
135 show_warnings=False | |
135 col=[] | 136 col=[] |
136 | 137 |
137 | 138 |
138 | 139 |
139 __all__ = ['LogoSize', | 140 __all__ = ['LogoSize', |
167 'colorscheme', | 168 'colorscheme', |
168 ] | 169 ] |
169 | 170 |
170 description = "Create sequence logos from biological sequence alignments." | 171 description = "Create sequence logos from biological sequence alignments." |
171 | 172 |
172 __version__ = "1.0" | 173 __version__ = "1.1" |
173 | 174 |
174 # These keywords are subsituted by subversion. | 175 # These keywords are subsituted by subversion. |
175 # The date and revision will only tell the truth after a branch or tag, | 176 # The date and revision will only tell the truth after a branch or tag, |
176 # since different files in trunk will have been changed at different times | 177 # since different files in trunk will have been changed at different times |
177 release_date ="$Date: 2011-09-17 16:30:00 -0700 (Tue, 14 Oct 2008) $".split()[1] | 178 release_date ="$Date: 2011-09-17 16:30:00 -0700 (Tue, 14 Oct 2008) $".split()[1] |
310 # The base stack width is set equal to 9pt Courier. | 311 # The base stack width is set equal to 9pt Courier. |
311 # (Courier has a width equal to 3/5 of the point size.) | 312 # (Courier has a width equal to 3/5 of the point size.) |
312 # Check that can get 80 characters in journal page @small | 313 # Check that can get 80 characters in journal page @small |
313 # 40 chacaters in a journal column | 314 # 40 chacaters in a journal column |
314 std_sizes = { | 315 std_sizes = { |
315 "small" : LogoSize( stack_width = 10, stack_height = 10*1*5), | 316 "small" : LogoSize( stack_width = 16.2, stack_height = 10*1*5), |
316 "medium" : LogoSize( stack_width = 10*2, stack_height = 10*2*5), | 317 "medium" : LogoSize( stack_width = 16.2*2, stack_height = 10*2*5), |
317 "large" : LogoSize( stack_width = 10*3, stack_height = 10*3*5), | 318 "large" : LogoSize( stack_width = 16.2*3, stack_height = 10*3*5), |
318 } | 319 } |
319 | 320 |
320 | 321 |
321 std_alphabets = { | 322 std_alphabets = { |
322 'protein': unambiguous_protein_alphabet, | 323 'protein': unambiguous_protein_alphabet, |
437 self.yaxis_tic_interval = 1. | 438 self.yaxis_tic_interval = 1. |
438 self.yaxis_minor_tic_ratio = 5 | 439 self.yaxis_minor_tic_ratio = 5 |
439 self.yaxis_scale = None | 440 self.yaxis_scale = None |
440 | 441 |
441 self.show_xaxis = True | 442 self.show_xaxis = True |
443 self.strict=False | |
442 self.xaxis_label = "" | 444 self.xaxis_label = "" |
443 self.xaxis_tic_interval =1 | 445 self.xaxis_tic_interval =1 |
444 self.rotate_numbers = False | 446 self.rotate_numbers = False |
445 self.number_interval = 5 | 447 self.number_interval = 5 |
446 self.show_ends = False | 448 self.show_ends = False |
796 "xaxis_label", "xaxis_tic_interval", "number_interval", | 798 "xaxis_label", "xaxis_tic_interval", "number_interval", |
797 "fineprint", "shrink_fraction", "errorbar_fraction", | 799 "fineprint", "shrink_fraction", "errorbar_fraction", |
798 "errorbar_width_fraction", | 800 "errorbar_width_fraction", |
799 "errorbar_gray", "small_fontsize", "fontsize", | 801 "errorbar_gray", "small_fontsize", "fontsize", |
800 "title_fontsize", "number_fontsize", "text_font", | 802 "title_fontsize", "number_fontsize", "text_font", |
801 "logo_font", "title_font", | 803 "logo_font", "title_font", "strict", |
802 "logo_label", "yaxis_scale", "end_type", | 804 "logo_label", "yaxis_scale", "end_type", |
803 "debug", "show_title", "show_xaxis", | 805 "debug", "show_title", "show_xaxis", |
804 "show_xaxis_label", "show_yaxis", "show_yaxis_label", | 806 "show_xaxis_label", "show_yaxis", "show_yaxis_label", |
805 "show_boxes", "show_errorbars", "show_fineprint", | 807 "show_boxes", "show_errorbars", "show_fineprint", |
806 "rotate_numbers", "show_ends", "altype", | 808 "rotate_numbers", "show_ends", "altype", |
928 subsitutions["logo_data"] = "\n".join(data) | 930 subsitutions["logo_data"] = "\n".join(data) |
929 | 931 |
930 | 932 |
931 # Create and output logo | 933 # Create and output logo |
932 template = resource_string( __name__, 'template.eps', __file__) | 934 template = resource_string( __name__, 'template.eps', __file__) |
935 | |
936 | |
933 logo = Template(template).substitute(subsitutions) | 937 logo = Template(template).substitute(subsitutions) |
934 print >>fout, logo | 938 print >>fout, logo |
935 | 939 |
936 | 940 |
937 # map between output format names and logo | 941 # map between output format names and logo |
1246 while counter+offset<seq_length: | 1250 while counter+offset<seq_length: |
1247 for i in range(0,len(seqs)): | 1251 for i in range(0,len(seqs)): |
1248 if len(str(seqs[i][(counter):(counter+3)]))==3 and len(seqs[i][(counter):(counter+3)].strip("GATUC"))==0 : | 1252 if len(str(seqs[i][(counter):(counter+3)]))==3 and len(seqs[i][(counter):(counter+3)].strip("GATUC"))==0 : |
1249 if(str(seqs[i][(counter):(counter+3)]) in alphabet): | 1253 if(str(seqs[i][(counter):(counter+3)]) in alphabet): |
1250 x[counter/3][ (alphabet.index(str(seqs[i][(counter):(counter+3)]))) ]+=1 | 1254 x[counter/3][ (alphabet.index(str(seqs[i][(counter):(counter+3)]))) ]+=1 |
1255 elif show_warnings: | |
1256 if len(seqs[i][(counter):(counter+3)].strip("GATUC"))==1 or len(seqs[i][(counter):(counter+3)].strip("GATUC"))==2 : | |
1257 print >>sys.stderr, 'Warning:Incomplete or non GATUC codon detected:', seqs[i][(counter):(counter+3)] | |
1258 print >>sys.stderr, 'Position:',counter | |
1259 print >>sys.stderr, 'Sequence:',i | |
1260 print >>sys.stderr, 'This will be treated as ---' | |
1261 | |
1262 | |
1251 counter=counter+3 | 1263 counter=counter+3 |
1252 counts=asarray(x) | 1264 counts=asarray(x) |
1253 else: | 1265 else: |
1254 counts = asarray(seqs.tally()) | 1266 counts = asarray(seqs.tally()) |
1255 | 1267 |
1367 while 1: | 1379 while 1: |
1368 line = finp.readline() | 1380 line = finp.readline() |
1369 if not line: | 1381 if not line: |
1370 break | 1382 break |
1371 line = line.split() | 1383 line = line.split() |
1372 priordict[line[0]]=line[1] | 1384 |
1373 return priordict | 1385 if(altype=="codonsT"): |
1386 priordict[line[0].upper().replace("U", "T")]=(float(line[1])/1000)*64 | |
1387 else: | |
1388 priordict[line[0].upper().replace("T", "U")]=(float(line[1])/1000)*64 | |
1389 | |
1390 return priordict | |
1374 | 1391 |
1375 def _build_logodata(options) : | 1392 def _build_logodata(options) : |
1376 global offset | 1393 global offset |
1377 offset=options.frame | 1394 offset=options.frame |
1395 global show_warnings | |
1396 show_warnings = options.strict | |
1378 options.alphabet = None | 1397 options.alphabet = None |
1379 options.ignore_lower_case = False | 1398 options.ignore_lower_case = False |
1380 #options.default_color = Color.by_name("black") | 1399 #options.default_color = Color.by_name("black") |
1381 options.color_scheme=None | 1400 options.color_scheme=None |
1382 #options.colors=[] | 1401 #options.colors=[] |
1383 options.show_ends=False | 1402 options.show_ends=False |
1384 seqs = read_seq_data(options.fin, | 1403 seqs = read_seq_data(options.fin, |
1385 options.input_parser.read, | 1404 options.input_parser.read, |
1386 alphabet=options.alphabet, | 1405 alphabet=options.alphabet, |
1387 ignore_lower_case = options.ignore_lower_case) | 1406 ignore_lower_case = options.ignore_lower_case) |
1388 if(options.priorfile!=None): | 1407 if(options.priorfile!=None): |
1389 if(altype=="CodonsT"): | 1408 if(altype=="CodonsT"): |
1390 options.composition= str(read_priors(options.priorfile,codon_alphabetT)) | 1409 options.composition= str(read_priors(options.priorfile,codon_alphabetT)) |
1391 options.alphabet = codon_alphabetT | 1410 options.alphabet = codon_alphabetT |
1392 else: | 1411 else: |
1406 direct_from_opts = [ | 1425 direct_from_opts = [ |
1407 "stacks_per_line", | 1426 "stacks_per_line", |
1408 "logo_title", | 1427 "logo_title", |
1409 "yaxis_label", | 1428 "yaxis_label", |
1410 "show_xaxis", | 1429 "show_xaxis", |
1430 "strict", | |
1411 "show_yaxis", | 1431 "show_yaxis", |
1412 "xaxis_label", | 1432 "xaxis_label", |
1413 "show_ends", | 1433 "show_ends", |
1414 "fineprint", | 1434 "fineprint", |
1415 "show_errorbars", | 1435 "show_errorbars", |
1642 dest="logo_end", | 1662 dest="logo_end", |
1643 action="store", | 1663 action="store", |
1644 type="int", | 1664 type="int", |
1645 help="Upper bound of sequence to display", | 1665 help="Upper bound of sequence to display", |
1646 metavar="INDEX") | 1666 metavar="INDEX") |
1647 | 1667 |
1668 data_grp.add_option( "-G", "--strict", | |
1669 dest="strict", | |
1670 action="store", | |
1671 type="boolean", | |
1672 help="Issue warnings if partial codons are encountered. Default: %default",default = defaults.strict, | |
1673 metavar="True/False") | |
1648 # ========================== FORMAT OPTIONS ========================== | 1674 # ========================== FORMAT OPTIONS ========================== |
1649 | 1675 |
1650 format_grp.add_option( "-s", "--size", | 1676 format_grp.add_option( "-s", "--size", |
1651 dest="logo_size", | 1677 dest="logo_size", |
1652 action="store", | 1678 action="store", |