annotate bin/last-map-probs @ 2:f274c166e738 default tip

remove comments in bsfcall_wrapper.xml
author yutaka-saito
date Sun, 19 Apr 2015 23:02:04 +0900
parents 06f8460885ff
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1 #! /usr/bin/env python
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
2
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
3 # Copyright 2010, 2011, 2012, 2014 Martin C. Frith
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
4
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
5 # Read query-genome alignments: write them along with the probability
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
6 # that each alignment is not the true mapping of its query. These
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
7 # probabilities make the risky assumption that one of the alignments
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
8 # reported for each query is correct.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
9
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
10 import sys, os, fileinput, math, optparse, signal
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
11
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
12 def logsum(numbers):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
13 """Adds numbers, in log space, to avoid overflow."""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
14 m = max(numbers)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
15 s = sum(math.exp(i - m) for i in numbers)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
16 return m + math.log(s)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
17
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
18 def mafScore(aLine):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
19 for word in aLine.split():
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
20 if word.startswith("score="):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
21 return float(word[6:])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
22 raise Exception("found an alignment without a score")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
23
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
24 def namesAndScores(lines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
25 queryNames = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
26 scores = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
27 for line in lines:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
28 if line[0] == "a": # faster than line.startswith("a")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
29 s = mafScore(line)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
30 scores.append(s)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
31 sLineCount = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
32 elif line[0] == "s":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
33 sLineCount += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
34 if sLineCount == 2: queryNames.append(line.split(None, 2)[1])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
35 elif line[0].isdigit(): # we have an alignment in tabular format
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
36 w = line.split(None, 7)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
37 scores.append(float(w[0]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
38 queryNames.append(w[6])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
39 return queryNames, scores
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
40
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
41 def scoreTotals(queryNames, scores, temperature):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
42 queryLists = {}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
43 for n, s in zip(queryNames, scores):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
44 queryLists.setdefault(n, []).append(s / temperature)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
45 return dict((k, logsum(v)) for k, v in queryLists.iteritems())
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
46
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
47 def writeOneBatch(lines, queryNames, scores, denominators, opts, temperature):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
48 isWanted = True
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
49 i = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
50 for line in lines:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
51 if line[0] == "a":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
52 s = scores[i]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
53 p = 1.0 - math.exp(s / temperature - denominators[queryNames[i]])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
54 i += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
55 if s < opts.score or p > opts.mismap:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
56 isWanted = False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
57 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
58 newLineEnd = " mismap=%.3g\n" % p
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
59 line = line.rstrip() + newLineEnd
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
60 elif line[0].isdigit(): # we have an alignment in tabular format
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
61 s = scores[i]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
62 p = 1.0 - math.exp(s / temperature - denominators[queryNames[i]])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
63 i += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
64 if s < opts.score or p > opts.mismap: continue
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
65 newLineEnd = "\t%.3g\n" % p
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
66 line = line.rstrip() + newLineEnd
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
67 if isWanted: print line,
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
68 if line.isspace(): isWanted = True # reset at end of maf paragraph
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
69
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
70 def doOneBatch(lines, opts, temperature):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
71 queryNames, scores = namesAndScores(lines)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
72 denominators = scoreTotals(queryNames, scores, temperature)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
73 writeOneBatch(lines, queryNames, scores, denominators, opts, temperature)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
74
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
75 def readHeaderOrDie(lines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
76 t = 0.0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
77 e = -1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
78 for line in lines:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
79 if line.startswith("#") or line.isspace():
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
80 print line,
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
81 for i in line.split():
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
82 if i.startswith("t="): t = float(i[2:])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
83 elif i.startswith("e="): e = int(i[2:])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
84 if t > 0 and e >= 0: break
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
85 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
86 raise Exception("I need a header with t= and e=")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
87 return t, e
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
88
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
89 def lastMapProbs(opts, args):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
90 f = fileinput.input(args)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
91 temperature, e = readHeaderOrDie(f)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
92 if opts.score < 0: opts.score = e + round(temperature * math.log(1000))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
93 lines = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
94
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
95 for line in f:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
96 if line.startswith("# batch"):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
97 doOneBatch(lines, opts, temperature)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
98 lines = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
99 lines.append(line)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
100 doOneBatch(lines, opts, temperature)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
101
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
102 if __name__ == "__main__":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
103 signal.signal(signal.SIGPIPE, signal.SIG_DFL) # avoid silly error message
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
104
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
105 usage = """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
106 %prog --help
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
107 %prog [options] lastal-alignments"""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
108
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
109 description = "Calculate a mismap probability for each alignment. This is the probability that the alignment does not reflect the origin of the query sequence, assuming that one reported alignment does reflect the origin of each query."
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
110
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
111 op = optparse.OptionParser(usage=usage, description=description)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
112 op.add_option("-m", "--mismap", type="float", default=0.01, metavar="M",
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
113 help="don't write alignments with mismap probability > M (default: %default)")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
114 op.add_option("-s", "--score", type="float", default=-1, metavar="S",
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
115 help="don't write alignments with score < S (default: e+t*ln[1000])")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
116 (opts, args) = op.parse_args()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
117 if not args and sys.stdin.isatty():
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
118 op.print_help()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
119 op.exit()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
120
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
121 try: lastMapProbs(opts, args)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
122 except KeyboardInterrupt: pass # avoid silly error message
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
123 except Exception, e:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
124 prog = os.path.basename(sys.argv[0])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
125 sys.exit(prog + ": error: " + str(e))