Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
comparison alignment/phytab_mview.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:5b9a38ec4a39 |
---|---|
1 #!/usr/bin/env python | |
2 ## usage: ./phytab_mview.py -i <phytabinput> -d <protein|dna> | |
3 ## splits up an aligned phytab file containing multiple genes into | |
4 ## individual files to run mview | |
5 | |
6 import sys, os, os.path, tempfile, shutil, re, shlex, subprocess | |
7 import optparse | |
8 from multiprocessing import Pool | |
9 | |
10 #define some variables to call later: | |
11 | |
12 directory = "" | |
13 extension = ".fs" | |
14 html_header = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> | |
15 <HTML> | |
16 <HEAD> | |
17 <TITLE></TITLE> | |
18 </HEAD> | |
19 <BODY BGCOLOR='white' TEXT='black' LINK='blue' ALINK='red' VLINK='purple'> | |
20 <H1>PHYTAB MVIEW ALIGNMENT VIEWER</H1> | |
21 <PRE>Select from below to view aligned sequence as HTML (left) or FASTA (right) in browser. | |
22 </PRE> | |
23 <table border="1" bordercolor="#000000" style="background-color:#FFFFFF" width="300" cellpadding="3" cellspacing="0"> | |
24 <tr> | |
25 <td>mview HTML</td> | |
26 <!--<td>FASTA</td>--> | |
27 </tr>""" | |
28 html_close = """ | |
29 <P><SMALL><A HREF="http://bio-mview.sourceforge.net">MView</A> </SMALL><BR> | |
30 </BODY> | |
31 </HTML>""" | |
32 | |
33 #define some functions to call in 'main': | |
34 # first, sanitize problematic characters | |
35 def unescape(string): | |
36 mapped_chars = { | |
37 '>': '__gt__', | |
38 '<': '__lt__', | |
39 "'": '__sq__', | |
40 '"': '__dq__', | |
41 '[': '__ob__', | |
42 ']': '__cb__', | |
43 '{': '__oc__', | |
44 '}': '__cc__', | |
45 '@': '__at__', | |
46 '\n': '__cn__', | |
47 '\r': '__cr__', | |
48 '\t': '__tc__', | |
49 '#': '__pd__' | |
50 } | |
51 | |
52 for key, value in mapped_chars.iteritems(): | |
53 string = string.replace(value, key) | |
54 | |
55 return string | |
56 # next, define tabular --> fasta conversion | |
57 class Sequence: | |
58 def __init__(self, string): | |
59 lis = string.split() | |
60 self.species = lis[0] | |
61 self.family = lis[1] | |
62 self.name = lis[2] | |
63 self.header = ' '.join(lis[:-1]) | |
64 self.sequence = lis[-1] | |
65 self.string = string | |
66 | |
67 def printFASTA(self): | |
68 return '> ' + self.header + '\n' + self.sequence + '\n' | |
69 | |
70 # then define function to apply preceding conversion method to all genes | |
71 # (creates separate file for each gene) | |
72 def saveMulti(tabFile): | |
73 with open(tabFile) as f: | |
74 for line in f: | |
75 seq = Sequence(line) | |
76 with open(seq.family + extension, "a") as p: | |
77 p.write(seq.printFASTA()) | |
78 | |
79 #subroutine to write main HTML output containing valid urls to mview htmls | |
80 def resultsto_output_html(html_mainoutput,basepath): | |
81 htmllist = [f for f in os.listdir(basepath) if 'html' in f] | |
82 sortedhtmllist = sorted(htmllist) | |
83 html = open(html_mainoutput, 'w') | |
84 html.write(html_header) | |
85 for f in sortedhtmllist: | |
86 f_path = os.path.join(basepath,f) | |
87 htmllink = '<tr><td><a href="' + f + '">' + f + '</a></td>\n' | |
88 html.write(htmllink) | |
89 html.write(html_close) | |
90 html.close() | |
91 | |
92 def main(): | |
93 #the command line arguments from the xml: | |
94 """ | |
95 ##params for galaxy wrapper | |
96 $input | |
97 $dna | |
98 $output | |
99 "$output.extra_files_path" #save the htmlfiles here | |
100 """ | |
101 inputphytabfile = sys.argv[1] | |
102 dnaorprotein = sys.argv[2] | |
103 output = sys.argv[3] | |
104 extra_files_path = sys.argv[4] | |
105 | |
106 inputFile = unescape(inputphytabfile) | |
107 ##make the fasta files | |
108 saveMulti(inputFile) | |
109 | |
110 #prepare to put mview htmls into valid path | |
111 | |
112 if not os.path.isdir(extra_files_path): #make filepath for alns to go with galaxy info | |
113 os.makedirs(extra_files_path) | |
114 | |
115 # execute mview on each fasta, storing in extra_files_path as <gene_aln>.html | |
116 list_of_fastafiles = [f for f in os.listdir(os.getcwd()) if 'fs' in f] | |
117 sortedfileorder = sorted(list_of_fastafiles) | |
118 for gene_aln in sortedfileorder: | |
119 result_htmlfile = gene_aln + '.html' | |
120 result_path = os.path.join(extra_files_path,result_htmlfile) #puts the htmls in permanent Galaxy directory | |
121 if dnaorprotein is 'dna': | |
122 cmd = subprocess.Popen(['mview','-in','pearson','-DNA','-bold','-coloring','group','-html','head', gene_aln],stdout=subprocess.PIPE) | |
123 else: | |
124 cmd = subprocess.Popen(['mview','-in','pearson','-bold','-coloring','group','-html','head', gene_aln],stdout=subprocess.PIPE) | |
125 cmd.wait() | |
126 out = cmd.communicate()[0] | |
127 | |
128 with open(result_path, 'wb') as fileout: | |
129 fileout.write(out) | |
130 ##now have # of gene htmls in extra_files_path/ | |
131 | |
132 #write main html output | |
133 resultsto_output_html(output,extra_files_path) | |
134 | |
135 | |
136 if __name__ == '__main__': | |
137 main() | |
138 |