Mercurial > repos > ucsb-phylogenetics > osiris_phylogenetics
diff alignment/phytab_mview.py @ 0:5b9a38ec4a39 draft default tip
First commit of old repositories
author | osiris_phylogenetics <ucsb_phylogenetics@lifesci.ucsb.edu> |
---|---|
date | Tue, 11 Mar 2014 12:19:13 -0700 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alignment/phytab_mview.py Tue Mar 11 12:19:13 2014 -0700 @@ -0,0 +1,138 @@ +#!/usr/bin/env python +## usage: ./phytab_mview.py -i <phytabinput> -d <protein|dna> +## splits up an aligned phytab file containing multiple genes into +## individual files to run mview + +import sys, os, os.path, tempfile, shutil, re, shlex, subprocess +import optparse +from multiprocessing import Pool + +#define some variables to call later: + +directory = "" +extension = ".fs" +html_header = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN"> +<HTML> +<HEAD> +<TITLE></TITLE> +</HEAD> +<BODY BGCOLOR='white' TEXT='black' LINK='blue' ALINK='red' VLINK='purple'> +<H1>PHYTAB MVIEW ALIGNMENT VIEWER</H1> +<PRE>Select from below to view aligned sequence as HTML (left) or FASTA (right) in browser. +</PRE> +<table border="1" bordercolor="#000000" style="background-color:#FFFFFF" width="300" cellpadding="3" cellspacing="0"> + <tr> + <td>mview HTML</td> + <!--<td>FASTA</td>--> + </tr>""" +html_close = """ +<P><SMALL><A HREF="http://bio-mview.sourceforge.net">MView</A> </SMALL><BR> +</BODY> +</HTML>""" + +#define some functions to call in 'main': +# first, sanitize problematic characters +def unescape(string): + mapped_chars = { + '>': '__gt__', + '<': '__lt__', + "'": '__sq__', + '"': '__dq__', + '[': '__ob__', + ']': '__cb__', + '{': '__oc__', + '}': '__cc__', + '@': '__at__', + '\n': '__cn__', + '\r': '__cr__', + '\t': '__tc__', + '#': '__pd__' + } + + for key, value in mapped_chars.iteritems(): + string = string.replace(value, key) + + return string +# next, define tabular --> fasta conversion +class Sequence: + def __init__(self, string): + lis = string.split() + self.species = lis[0] + self.family = lis[1] + self.name = lis[2] + self.header = ' '.join(lis[:-1]) + self.sequence = lis[-1] + self.string = string + + def printFASTA(self): + return '> ' + self.header + '\n' + self.sequence + '\n' + +# then define function to apply preceding conversion method to all genes +# (creates separate file for each gene) +def saveMulti(tabFile): + with open(tabFile) as f: + for line in f: + seq = Sequence(line) + with open(seq.family + extension, "a") as p: + p.write(seq.printFASTA()) + +#subroutine to write main HTML output containing valid urls to mview htmls +def resultsto_output_html(html_mainoutput,basepath): + htmllist = [f for f in os.listdir(basepath) if 'html' in f] + sortedhtmllist = sorted(htmllist) + html = open(html_mainoutput, 'w') + html.write(html_header) + for f in sortedhtmllist: + f_path = os.path.join(basepath,f) + htmllink = '<tr><td><a href="' + f + '">' + f + '</a></td>\n' + html.write(htmllink) + html.write(html_close) + html.close() + +def main(): +#the command line arguments from the xml: + """ + ##params for galaxy wrapper + $input + $dna + $output + "$output.extra_files_path" #save the htmlfiles here + """ + inputphytabfile = sys.argv[1] + dnaorprotein = sys.argv[2] + output = sys.argv[3] + extra_files_path = sys.argv[4] + + inputFile = unescape(inputphytabfile) + ##make the fasta files + saveMulti(inputFile) + + #prepare to put mview htmls into valid path + + if not os.path.isdir(extra_files_path): #make filepath for alns to go with galaxy info + os.makedirs(extra_files_path) + + # execute mview on each fasta, storing in extra_files_path as <gene_aln>.html + list_of_fastafiles = [f for f in os.listdir(os.getcwd()) if 'fs' in f] + sortedfileorder = sorted(list_of_fastafiles) + for gene_aln in sortedfileorder: + result_htmlfile = gene_aln + '.html' + result_path = os.path.join(extra_files_path,result_htmlfile) #puts the htmls in permanent Galaxy directory + if dnaorprotein is 'dna': + cmd = subprocess.Popen(['mview','-in','pearson','-DNA','-bold','-coloring','group','-html','head', gene_aln],stdout=subprocess.PIPE) + else: + cmd = subprocess.Popen(['mview','-in','pearson','-bold','-coloring','group','-html','head', gene_aln],stdout=subprocess.PIPE) + cmd.wait() + out = cmd.communicate()[0] + + with open(result_path, 'wb') as fileout: + fileout.write(out) + ##now have # of gene htmls in extra_files_path/ + + #write main html output + resultsto_output_html(output,extra_files_path) + + +if __name__ == '__main__': + main() +