annotate rgWebLogo3.py @ 0:66253fc0a69b draft

Imported from capsule None
author devteam
date Mon, 27 Jan 2014 09:25:50 -0500
parents
children c1a9e644d580
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
1 """
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
2 # modified june 2 ross lazarus to add units option at Assaf Gordon's suggestion
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
3 # rgWebLogo3.py
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
4 # wrapper to check that all fasta files are same length
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
5
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
6 """
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
7 import optparse, os, sys, subprocess, tempfile
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
8
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
9 WEBLOGO = 'weblogo' # executable name for weblogo3 - confusing isn't it?
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
10
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
11 class WL3:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
12 """
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
13 simple wrapper class to check fasta sequence lengths are all identical
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
14 """
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
15 FASTASTARTSYM = '>'
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
16 badseq = '## error - sequences in file %s are not all the same length - cannot proceed. Please read the tool documentation carefully'
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
17
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
18 def __init__(self,opts=None):
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
19 assert opts<>None,'WL3 class needs opts passed in - got None'
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
20 self.opts = opts
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
21 self.fastaf = file(self.opts.input,'r')
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
22 self.clparams = {}
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
23
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
24 def whereis(self,program):
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
25 for path in os.environ.get('PATH', '').split(':'):
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
26 if os.path.exists(os.path.join(path, program)) and not os.path.isdir(os.path.join(path, program)):
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
27 return os.path.join(path, program)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
28 return None
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
29
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
30 def runCL(self):
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
31 """ construct and run a command line
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
32 """
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
33 wl = self.whereis(WEBLOGO)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
34 if not wl:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
35 print >> sys.stderr, '## rgWebLogo3.py error - cannot locate the weblogo binary %s on the current path' % WEBLOGO
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
36 print >> sys.stderr, '## Please ensure it is installed and working from http://code.google.com/p/weblogo'
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
37 sys.exit(1)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
38 cll = [WEBLOGO,]
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
39 cll += [' '.join(it) for it in list(self.clparams.items())]
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
40 cl = ' '.join(cll)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
41 assert cl > '', 'runCL needs a command line as clparms'
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
42 fd,templog = tempfile.mkstemp(suffix='rgtempRun.txt')
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
43 tlf = open(templog,'w')
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
44 process = subprocess.Popen(cl, shell=True, stderr=tlf, stdout=tlf)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
45 rval = process.wait()
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
46 tlf.close()
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
47 tlogs = ''.join(open(templog,'r').readlines())
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
48 if len(tlogs) > 1:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
49 s = '## executing %s returned status %d and log (stdout/stderr) records: \n%s\n' % (cl,rval,tlogs)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
50 else:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
51 s = '## executing %s returned status %d. Nothing appeared on stderr/stdout\n' % (cl,rval)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
52 os.unlink(templog) # always
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
53 if rval <> 0:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
54 print >> sys.stderr, '## rgWebLogo3.py error - executing %s returned error code %d' % (cl,rval)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
55 print >> sys.stderr, '## This may be a data problem or a tool dependency (%s) installation problem' % WEBLOGO
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
56 print >> sys.stderr, '## Please ensure %s is correctly installed and working on the command line -see http://code.google.com/p/weblogo' % WEBLOGO
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
57 sys.exit(1)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
58 return s
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
59
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
60
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
61 def iter_fasta(self):
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
62 """
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
63 generator for fasta sequences from a file
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
64 """
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
65 aseq = []
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
66 seqname = None
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
67 for i,row in enumerate(self.fastaf):
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
68 if row.startswith(self.FASTASTARTSYM):
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
69 if seqname <> None: # already in a sequence
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
70 s = ''.join(aseq)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
71 l = len(s)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
72 yield (seqname,l)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
73 seqname = row[1:].strip()
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
74 aseq = []
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
75 else:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
76 if i > 0:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
77 print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
78 sys.exit(1)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
79 else:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
80 seqname = row[1:].strip()
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
81 else: # sequence row
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
82 if seqname == None:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
83 print >> sys.stderr,'Invalid fasta file %s - does not start with %s - please read the tool documentation carefully' % (self.opts.input,self.FASTASTARTSYM)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
84 sys.exit(1)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
85 else:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
86 aseq.append(row.strip())
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
87
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
88 if seqname <> None: # last one
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
89 l = len(''.join(aseq))
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
90 yield (seqname,l)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
91
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
92
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
93 def fcheck(self):
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
94 """ are all fasta sequence same length?
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
95 might be mongo big
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
96 """
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
97 flen = None
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
98 lasti = None
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
99 f = self.iter_fasta()
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
100 for i,(seqname,seqlen) in enumerate(f):
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
101 lasti = i
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
102 if i == 0:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
103 flen = seqlen
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
104 else:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
105 if seqlen <> flen:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
106 print >> sys.stderr,self.badseq % self.opts.input
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
107 sys.exit(1)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
108 return '# weblogo input %s has %d sequences all of length %d' % (self.opts.input,lasti,flen)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
109
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
110
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
111 def run(self):
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
112 check = self.fcheck()
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
113 self.clparams['-f'] = self.opts.input
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
114 self.clparams['-o'] = self.opts.output
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
115 self.clparams['-t'] = '"%s"' % self.opts.logoname # must be wrapped as a string
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
116 self.clparams['-F'] = self.opts.outformat
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
117 if self.opts.size <> None:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
118 self.clparams['-s'] = self.opts.size
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
119 if self.opts.lower <> None:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
120 self.clparams['-l'] = self.opts.lower
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
121 if self.opts.upper <> None:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
122 self.clparams['-u'] = self.opts.upper
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
123 if self.opts.colours <> None:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
124 self.clparams['-c'] = self.opts.colours
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
125 if self.opts.units <> None:
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
126 self.clparams['-U'] = self.opts.units
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
127 s = self.runCL()
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
128 return check,s
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
129
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
130
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
131 if __name__ == '__main__':
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
132 '''
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
133 called as
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
134 <command interpreter="python">
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
135 rgWebLogo3.py --outformat $outformat -s $size -i $input -o $output -t "$logoname" -c "$colours"
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
136 #if $range.mode == 'part'
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
137 -l "$range.seqstart" -u "$range.seqend"
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
138 #end if
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
139 </command>
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
140
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
141 '''
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
142 op = optparse.OptionParser()
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
143 op.add_option('-i', '--input', default=None)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
144 op.add_option('-F', '--outformat', default='png')
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
145 op.add_option('-s', '--size', default=None)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
146 op.add_option('-o', '--output', default='rgWebLogo3')
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
147 op.add_option('-t', '--logoname', default='rgWebLogo3')
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
148 op.add_option('-c', '--colours', default=None)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
149 op.add_option('-l', '--lower', default=None)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
150 op.add_option('-u', '--upper', default=None)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
151 op.add_option('-U', '--units', default=None)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
152 opts, args = op.parse_args()
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
153 assert opts.input <> None,'weblogo3 needs a -i parameter with a fasta input file - cannot open'
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
154 assert os.path.isfile(opts.input),'weblogo3 needs a valid fasta input file - cannot open %s' % opts.input
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
155 w = WL3(opts)
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
156 checks,s = w.run()
66253fc0a69b Imported from capsule None
devteam
parents:
diff changeset
157 print >> sys.stdout, checks # for info