annotate bin/maf-convert @ 0:06f8460885ff

migrate from GitHub
author yutaka-saito
date Sun, 19 Apr 2015 20:51:13 +0900
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
1 #! /usr/bin/env python
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
2 # Copyright 2010, 2011, 2013, 2014 Martin C. Frith
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
3 # Read MAF-format alignments: write them in other formats.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
4 # Seems to work with Python 2.x, x>=4
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
5
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
6 # By "MAF" we mean "multiple alignment format" described in the UCSC
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
7 # Genome FAQ, not e.g. "MIRA assembly format".
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
8
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
9 from itertools import *
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
10 import sys, os, fileinput, math, operator, optparse, signal, string
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
11
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
12 def maxlen(s):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
13 return max(map(len, s))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
14
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
15 def joined(things, delimiter):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
16 return delimiter.join(map(str, things))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
17
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
18 identityTranslation = string.maketrans("", "")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
19 def deleted(myString, deleteChars):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
20 return myString.translate(identityTranslation, deleteChars)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
21
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
22 def quantify(iterable, pred=bool):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
23 """Count how many times the predicate is true."""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
24 return sum(map(pred, iterable))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
25
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
26 class Maf:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
27 def __init__(self, aLine, sLines, qLines, pLines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
28 self.namesAndValues = dict(i.split("=") for i in aLine[1:])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
29
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
30 if not sLines: raise Exception("empty alignment")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
31 cols = zip(*sLines)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
32 self.seqNames = cols[1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
33 self.alnStarts = map(int, cols[2])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
34 self.alnSizes = map(int, cols[3])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
35 self.strands = cols[4]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
36 self.seqSizes = map(int, cols[5])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
37 self.alnStrings = cols[6]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
38
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
39 self.qLines = qLines
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
40 self.pLines = pLines
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
41
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
42 def dieUnlessPairwise(maf):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
43 if len(maf.alnStrings) != 2:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
44 raise Exception("pairwise alignments only, please")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
45
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
46 def insertionCounts(alnString):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
47 gaps = alnString.count("-")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
48 forwardFrameshifts = alnString.count("\\")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
49 reverseFrameshifts = alnString.count("/")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
50 letters = len(alnString) - gaps - forwardFrameshifts - reverseFrameshifts
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
51 return letters, forwardFrameshifts, reverseFrameshifts
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
52
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
53 def coordinatesPerLetter(alnString, alnSize):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
54 letters, forwardShifts, reverseShifts = insertionCounts(alnString)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
55 if forwardShifts or reverseShifts or letters < alnSize: return 3
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
56 else: return 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
57
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
58 def mafLetterSizes(maf):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
59 return map(coordinatesPerLetter, maf.alnStrings, maf.alnSizes)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
60
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
61 def alnLetterSizes(sLines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
62 return [coordinatesPerLetter(i[6], int(i[3])) for i in sLines]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
63
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
64 def isTranslated(sLines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
65 for i in sLines:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
66 alnString = i[6]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
67 if "\\" in alnString or "/" in alnString: return True
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
68 if len(alnString) - alnString.count("-") < int(i[3]): return True
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
69 return False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
70
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
71 def insertionSize(alnString, letterSize):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
72 """Get the length of sequence included in the alnString."""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
73 letters, forwardShifts, reverseShifts = insertionCounts(alnString)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
74 return letters * letterSize + forwardShifts - reverseShifts
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
75
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
76 def symbolSize(symbol, letterSize):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
77 if symbol == "-": return 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
78 if symbol == "\\": return 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
79 if symbol == "/": return -1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
80 return letterSize
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
81
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
82 def isMatch(alignmentColumn):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
83 # No special treatment of ambiguous bases/residues: same as NCBI BLAST.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
84 first = alignmentColumn[0].upper()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
85 for i in alignmentColumn[1:]:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
86 if i.upper() != first: return False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
87 return True
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
88
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
89 def isGapless(alignmentColumn):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
90 return "-" not in alignmentColumn
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
91
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
92 def matchAndInsertSizes(alignmentColumns, letterSizes):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
93 """Get sizes of gapless blocks, and of the inserts between them."""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
94 for k, v in groupby(alignmentColumns, isGapless):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
95 if k:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
96 sizeOfGaplessBlock = len(list(v))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
97 yield str(sizeOfGaplessBlock)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
98 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
99 blockRows = izip(*v)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
100 blockRows = imap(''.join, blockRows)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
101 insertSizes = imap(insertionSize, blockRows, letterSizes)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
102 yield joined(insertSizes, ":")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
103
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
104 ##### Routines for converting to AXT format: #####
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
105
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
106 axtCounter = count()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
107
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
108 def writeAxt(maf):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
109 if maf.strands[0] != "+":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
110 raise Exception("for AXT, the 1st strand in each alignment must be +")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
111
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
112 # Convert to AXT's 1-based coordinates:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
113 alnStarts = imap(operator.add, maf.alnStarts, repeat(1))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
114 alnEnds = imap(operator.add, maf.alnStarts, maf.alnSizes)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
115
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
116 rows = zip(maf.seqNames, alnStarts, alnEnds, maf.strands)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
117 head, tail = rows[0], rows[1:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
118
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
119 outWords = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
120 outWords.append(axtCounter.next())
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
121 outWords.extend(head[:3])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
122 outWords.extend(chain(*tail))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
123 outWords.append(maf.namesAndValues["score"])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
124
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
125 print joined(outWords, " ")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
126 for i in maf.alnStrings: print i
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
127 print # print a blank line at the end
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
128
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
129 ##### Routines for converting to tabular format: #####
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
130
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
131 def writeTab(maf):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
132 aLine, sLines, qLines, pLines = maf
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
133
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
134 score = "0"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
135 expect = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
136 mismap = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
137 for i in aLine:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
138 if i.startswith("score="): score = i[6:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
139 elif i.startswith("expect="): expect = i[7:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
140 elif i.startswith("mismap="): mismap = i[7:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
141
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
142 outWords = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
143 outWords.append(score)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
144
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
145 for i in sLines: outWords.extend(i[1:6])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
146
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
147 alnStrings = [i[6] for i in sLines]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
148 alignmentColumns = zip(*alnStrings)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
149 letterSizes = alnLetterSizes(sLines)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
150 gapStrings = matchAndInsertSizes(alignmentColumns, letterSizes)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
151 gapWord = ",".join(gapStrings)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
152 outWords.append(gapWord)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
153
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
154 if expect: outWords.append(expect)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
155 if mismap: outWords.append(mismap)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
156
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
157 print "\t".join(outWords)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
158
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
159 ##### Routines for converting to PSL format: #####
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
160
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
161 def pslBlocks(alignmentColumns, alnStarts, letterSizes):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
162 """Get sizes and start coordinates of gapless blocks in an alignment."""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
163 start1, start2 = alnStarts
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
164 letterSize1, letterSize2 = letterSizes
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
165 size = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
166 for x, y in alignmentColumns:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
167 if x == "-":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
168 if size:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
169 yield size, start1, start2
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
170 start1 += size * letterSize1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
171 start2 += size * letterSize2
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
172 size = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
173 start2 += symbolSize(y, letterSize2)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
174 elif y == "-":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
175 if size:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
176 yield size, start1, start2
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
177 start1 += size * letterSize1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
178 start2 += size * letterSize2
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
179 size = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
180 start1 += symbolSize(x, letterSize1)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
181 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
182 size += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
183 if size: yield size, start1, start2
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
184
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
185 def pslCommaString(things):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
186 # UCSC software seems to prefer a trailing comma
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
187 return joined(things, ",") + ","
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
188
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
189 def gapRunCount(letters):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
190 """Get the number of runs of gap characters."""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
191 uniqLetters = map(operator.itemgetter(0), groupby(letters))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
192 return uniqLetters.count("-")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
193
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
194 def pslEndpoints(alnStart, alnSize, strand, seqSize):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
195 alnEnd = alnStart + alnSize
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
196 if strand == "+": return alnStart, alnEnd
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
197 else: return seqSize - alnEnd, seqSize - alnStart
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
198
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
199 def caseSensitivePairwiseMatchCounts(columns, isProtein):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
200 # repMatches is always zero
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
201 # for proteins, nCount is always zero, because that's what BLATv34 does
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
202 standardBases = "ACGTU"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
203 matches = mismatches = repMatches = nCount = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
204 for i in columns:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
205 if "-" in i: continue
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
206 x, y = i
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
207 if x in standardBases and y in standardBases or isProtein:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
208 if x == y: matches += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
209 else: mismatches += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
210 else: nCount += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
211 return matches, mismatches, repMatches, nCount
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
212
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
213 def writePsl(maf, isProtein):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
214 dieUnlessPairwise(maf)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
215
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
216 alnStrings = map(str.upper, maf.alnStrings)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
217 alignmentColumns = zip(*alnStrings)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
218 letterSizes = mafLetterSizes(maf)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
219
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
220 matchCounts = caseSensitivePairwiseMatchCounts(alignmentColumns, isProtein)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
221 matches, mismatches, repMatches, nCount = matchCounts
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
222 numGaplessColumns = sum(matchCounts)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
223
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
224 qNumInsert = gapRunCount(maf.alnStrings[0])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
225 qBaseInsert = maf.alnSizes[1] - numGaplessColumns * letterSizes[1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
226
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
227 tNumInsert = gapRunCount(maf.alnStrings[1])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
228 tBaseInsert = maf.alnSizes[0] - numGaplessColumns * letterSizes[0]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
229
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
230 strand = maf.strands[1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
231 if max(letterSizes) > 1:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
232 strand += maf.strands[0]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
233 elif maf.strands[0] != "+":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
234 raise Exception("for non-translated PSL, the 1st strand in each alignment must be +")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
235
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
236 tName, qName = maf.seqNames
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
237 tSeqSize, qSeqSize = maf.seqSizes
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
238
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
239 rows = zip(maf.alnStarts, maf.alnSizes, maf.strands, maf.seqSizes)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
240 tStart, tEnd = pslEndpoints(*rows[0])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
241 qStart, qEnd = pslEndpoints(*rows[1])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
242
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
243 blocks = list(pslBlocks(alignmentColumns, maf.alnStarts, letterSizes))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
244 blockCount = len(blocks)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
245 blockSizes, tStarts, qStarts = map(pslCommaString, zip(*blocks))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
246
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
247 outWords = (matches, mismatches, repMatches, nCount,
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
248 qNumInsert, qBaseInsert, tNumInsert, tBaseInsert, strand,
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
249 qName, qSeqSize, qStart, qEnd, tName, tSeqSize, tStart, tEnd,
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
250 blockCount, blockSizes, qStarts, tStarts)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
251 print joined(outWords, "\t")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
252
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
253 ##### Routines for converting to SAM format: #####
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
254
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
255 def readGroupId(readGroupItems):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
256 for i in readGroupItems:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
257 if i.startswith("ID:"):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
258 return i[3:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
259 raise Exception("readgroup must include ID")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
260
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
261 def readSequenceLengths(lines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
262 """Read name & length of topmost sequence in each maf block."""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
263 sequenceLengths = {} # an OrderedDict might be nice
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
264 isSearching = True
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
265 for line in lines:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
266 if line.isspace(): isSearching = True
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
267 elif isSearching:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
268 w = line.split()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
269 if w[0] == "s":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
270 seqName, seqSize = w[1], w[5]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
271 sequenceLengths[seqName] = seqSize
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
272 isSearching = False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
273 return sequenceLengths
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
274
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
275 def naturalSortKey(s):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
276 """Return a key that sorts strings in "natural" order."""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
277 return [(str, int)[k]("".join(v)) for k, v in groupby(s, str.isdigit)]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
278
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
279 def karyotypicSortKey(s):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
280 """Attempt to sort chromosomes in GATK's ridiculous order."""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
281 if s == "chrM": return []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
282 if s == "MT": return ["~"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
283 return naturalSortKey(s)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
284
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
285 def writeSamHeader(sequenceLengths, dictFile, readGroupItems):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
286 print "@HD\tVN:1.3\tSO:unsorted"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
287 for k in sorted(sequenceLengths, key=karyotypicSortKey):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
288 print "@SQ\tSN:%s\tLN:%s" % (k, sequenceLengths[k])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
289 if dictFile:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
290 for i in fileinput.input(dictFile):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
291 if i.startswith("@SQ"): print i,
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
292 elif not i.startswith("@"): break
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
293 if readGroupItems:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
294 print "@RG\t" + "\t".join(readGroupItems)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
295
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
296 mapqMissing = "255"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
297 mapqMaximum = "254"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
298 mapqMaximumNum = float(mapqMaximum)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
299
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
300 def mapqFromProb(probString):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
301 try: p = float(probString)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
302 except ValueError: raise Exception("bad probability: " + probString)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
303 if p < 0 or p > 1: raise Exception("bad probability: " + probString)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
304 if p == 0: return mapqMaximum
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
305 phred = -10 * math.log(p, 10)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
306 if phred >= mapqMaximumNum: return mapqMaximum
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
307 return str(int(round(phred)))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
308
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
309 def cigarParts(beg, alignmentColumns, end):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
310 if beg: yield str(beg) + "H"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
311
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
312 # (doesn't handle translated alignments)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
313 isActive = True
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
314 for x, y in alignmentColumns: break
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
315 else: isActive = False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
316 while isActive:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
317 size = 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
318 if x == "-":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
319 for x, y in alignmentColumns:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
320 if x != "-": break
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
321 size += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
322 else: isActive = False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
323 yield str(size) + "I"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
324 elif y == "-":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
325 for x, y in alignmentColumns:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
326 if y != "-": break
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
327 size += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
328 else: isActive = False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
329 yield str(size) + "D"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
330 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
331 for x, y in alignmentColumns:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
332 if x == "-" or y == "-": break
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
333 size += 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
334 else: isActive = False
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
335 yield str(size) + "M"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
336
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
337 if end: yield str(end) + "H"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
338
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
339 def writeSam(maf, rg):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
340 aLine, sLines, qLines, pLines = maf
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
341
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
342 if isTranslated(sLines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
343 raise Exception("this looks like translated DNA - can't convert to SAM format")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
344
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
345 score = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
346 evalue = None
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
347 mapq = mapqMissing
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
348 for i in aLine:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
349 if i.startswith("score="):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
350 v = i[6:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
351 if v.isdigit(): score = "AS:i:" + v # it must be an integer
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
352 elif i.startswith("expect="):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
353 evalue = "EV:Z:" + i[7:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
354 elif i.startswith("mismap="):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
355 mapq = mapqFromProb(i[7:])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
356
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
357 head, tail = sLines[0], sLines[1:]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
358
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
359 s, rName, rStart, rAlnSize, rStrand, rSeqSize, rAlnString = head
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
360 if rStrand != "+":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
361 raise Exception("for SAM, the 1st strand in each alignment must be +")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
362 pos = str(int(rStart) + 1) # convert to 1-based coordinate
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
363
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
364 for s, qName, qStart, qAlnSize, qStrand, qSeqSize, qAlnString in tail:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
365 alignmentColumns = zip(rAlnString.upper(), qAlnString.upper())
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
366
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
367 qStart = int(qStart)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
368 qRevStart = int(qSeqSize) - qStart - int(qAlnSize)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
369 cigar = "".join(cigarParts(qStart, iter(alignmentColumns), qRevStart))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
370
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
371 seq = deleted(qAlnString, "-")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
372
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
373 qual = "*"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
374 if qLines:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
375 q, qualityName, qualityString = qLines[0]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
376 # don't try to handle multiple alignments for now (YAGNI):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
377 if len(qLines) > 1 or len(tail) > 1 or qualityName != qName:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
378 raise Exception("can't interpret the quality data")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
379 qual = ''.join(j for i, j in izip(qAlnString, qualityString)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
380 if i != "-")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
381
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
382 # It's hard to get all the pair info, so this is very
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
383 # incomplete, but hopefully good enough.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
384 # I'm not sure whether to add 2 and/or 8 to flag.
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
385 if qName.endswith("/1"):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
386 qName = qName[:-2]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
387 if qStrand == "+": flag = "99" # 1 + 2 + 32 + 64
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
388 else: flag = "83" # 1 + 2 + 16 + 64
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
389 elif qName.endswith("/2"):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
390 qName = qName[:-2]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
391 if qStrand == "+": flag = "163" # 1 + 2 + 32 + 128
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
392 else: flag = "147" # 1 + 2 + 16 + 128
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
393 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
394 if qStrand == "+": flag = "0"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
395 else: flag = "16"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
396
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
397 editDistance = sum(1 for x, y in alignmentColumns if x != y)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
398 # no special treatment of ambiguous bases: might be a minor bug
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
399 editDistance = "NM:i:" + str(editDistance)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
400
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
401 outWords = [qName, flag, rName, pos, mapq, cigar, "*\t0\t0", seq, qual]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
402 outWords.append(editDistance)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
403 if score: outWords.append(score)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
404 if evalue: outWords.append(evalue)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
405 if rg: outWords.append(rg)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
406 print "\t".join(outWords)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
407
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
408 ##### Routines for converting to BLAST-like format: #####
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
409
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
410 def pairwiseMatchSymbol(alignmentColumn):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
411 if isMatch(alignmentColumn): return "|"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
412 else: return " "
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
413
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
414 def strandText(strand):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
415 if strand == "+": return "Plus"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
416 else: return "Minus"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
417
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
418 def blastCoordinate(oneBasedCoordinate, strand, seqSize):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
419 if strand == "-":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
420 oneBasedCoordinate = seqSize - oneBasedCoordinate + 1
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
421 return str(oneBasedCoordinate)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
422
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
423 def chunker(things, chunkSize):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
424 for i in range(0, len(things), chunkSize):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
425 yield things[i:i+chunkSize]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
426
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
427 def blastChunker(maf, lineSize, alignmentColumns):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
428 letterSizes = mafLetterSizes(maf)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
429 coords = maf.alnStarts
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
430 for chunkCols in chunker(alignmentColumns, lineSize):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
431 chunkRows = zip(*chunkCols)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
432 chunkRows = map(''.join, chunkRows)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
433 starts = [i + 1 for i in coords] # change to 1-based coordinates
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
434 starts = map(blastCoordinate, starts, maf.strands, maf.seqSizes)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
435 increments = map(insertionSize, chunkRows, letterSizes)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
436 coords = map(operator.add, coords, increments)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
437 ends = map(blastCoordinate, coords, maf.strands, maf.seqSizes)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
438 yield chunkCols, chunkRows, starts, ends
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
439
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
440 def writeBlast(maf, lineSize, oldQueryName):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
441 dieUnlessPairwise(maf)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
442
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
443 if maf.seqNames[1] != oldQueryName:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
444 print "Query= %s" % maf.seqNames[1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
445 print " (%s letters)" % maf.seqSizes[1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
446 print
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
447
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
448 print ">%s" % maf.seqNames[0]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
449 print " Length = %s" % maf.seqSizes[0]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
450 print
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
451
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
452 scoreLine = " Score = %s" % maf.namesAndValues["score"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
453 try: scoreLine += ", Expect = %s" % maf.namesAndValues["expect"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
454 except KeyError: pass
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
455 print scoreLine
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
456
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
457 alignmentColumns = zip(*maf.alnStrings)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
458
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
459 alnSize = len(alignmentColumns)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
460 matches = quantify(alignmentColumns, isMatch)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
461 matchPercent = 100 * matches // alnSize # round down, like BLAST
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
462 identLine = " Identities = %s/%s (%s%%)" % (matches, alnSize, matchPercent)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
463 gaps = alnSize - quantify(alignmentColumns, isGapless)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
464 gapPercent = 100 * gaps // alnSize # round down, like BLAST
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
465 if gaps: identLine += ", Gaps = %s/%s (%s%%)" % (gaps, alnSize, gapPercent)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
466 print identLine
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
467
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
468 strands = map(strandText, maf.strands)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
469 print " Strand = %s / %s" % (strands[1], strands[0])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
470 print
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
471
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
472 for chunk in blastChunker(maf, lineSize, alignmentColumns):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
473 cols, rows, starts, ends = chunk
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
474 startWidth = maxlen(starts)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
475 matchSymbols = map(pairwiseMatchSymbol, cols)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
476 matchSymbols = ''.join(matchSymbols)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
477 print "Query: %-*s %s %s" % (startWidth, starts[1], rows[1], ends[1])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
478 print " %-*s %s" % (startWidth, " ", matchSymbols)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
479 print "Sbjct: %-*s %s %s" % (startWidth, starts[0], rows[0], ends[0])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
480 print
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
481
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
482 ##### Routines for converting to HTML format: #####
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
483
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
484 def writeHtmlHeader():
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
485 print '''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
486 <!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
487 "http://www.w3.org/TR/html4/strict.dtd">
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
488 <html lang="en"><head>
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
489 <meta http-equiv="Content-type" content="text/html; charset=UTF-8">
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
490 <title>Reliable Alignments</title>
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
491 <style type="text/css">
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
492 /* Try to force monospace, working around browser insanity: */
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
493 pre {font-family: "Courier New", monospace, serif; font-size: 0.8125em}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
494 .a {background-color: #3333FF}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
495 .b {background-color: #9933FF}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
496 .c {background-color: #FF66CC}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
497 .d {background-color: #FF3333}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
498 .e {background-color: #FF9933}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
499 .f {background-color: #FFFF00}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
500 .key {display:inline; margin-right:2em}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
501 </style>
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
502 </head><body>
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
503
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
504 <div style="line-height:1">
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
505 <pre class="key"><span class="a"> </span> prob &gt; 0.999</pre>
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
506 <pre class="key"><span class="b"> </span> prob &gt; 0.99 </pre>
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
507 <pre class="key"><span class="c"> </span> prob &gt; 0.95 </pre>
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
508 <pre class="key"><span class="d"> </span> prob &gt; 0.9 </pre>
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
509 <pre class="key"><span class="e"> </span> prob &gt; 0.5 </pre>
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
510 <pre class="key"><span class="f"> </span> prob &le; 0.5 </pre>
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
511 </div>
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
512 '''
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
513
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
514 def probabilityClass(probabilityColumn):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
515 p = ord(min(probabilityColumn)) - 33
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
516 if p >= 30: return 'a'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
517 elif p >= 20: return 'b'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
518 elif p >= 13: return 'c'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
519 elif p >= 10: return 'd'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
520 elif p >= 3: return 'e'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
521 else: return 'f'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
522
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
523 def identicalRuns(s):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
524 """Yield (item, start, end) for each run of identical items in s."""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
525 beg = 0
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
526 for k, v in groupby(s):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
527 end = beg + len(list(v))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
528 yield k, beg, end
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
529 beg = end
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
530
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
531 def htmlSpan(text, classRun):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
532 key, beg, end = classRun
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
533 textbit = text[beg:end]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
534 if key: return '<span class="%s">%s</span>' % (key, textbit)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
535 else: return textbit
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
536
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
537 def multipleMatchSymbol(alignmentColumn):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
538 if isMatch(alignmentColumn): return "*"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
539 else: return " "
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
540
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
541 def writeHtml(maf, lineSize):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
542 scoreLine = "Alignment"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
543 try:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
544 scoreLine += " score=%s" % maf.namesAndValues["score"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
545 scoreLine += ", expect=%s" % maf.namesAndValues["expect"]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
546 except KeyError: pass
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
547 print "<h3>%s:</h3>" % scoreLine
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
548
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
549 if maf.pLines:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
550 probRows = [i[1] for i in maf.pLines]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
551 probCols = izip(*probRows)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
552 classes = imap(probabilityClass, probCols)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
553 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
554 classes = repeat(None)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
555
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
556 nameWidth = maxlen(maf.seqNames)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
557 alignmentColumns = zip(*maf.alnStrings)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
558
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
559 print '<pre>'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
560 for chunk in blastChunker(maf, lineSize, alignmentColumns):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
561 cols, rows, starts, ends = chunk
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
562 startWidth = maxlen(starts)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
563 endWidth = maxlen(ends)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
564 matchSymbols = map(multipleMatchSymbol, cols)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
565 matchSymbols = ''.join(matchSymbols)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
566 classChunk = islice(classes, lineSize)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
567 classRuns = list(identicalRuns(classChunk))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
568 for n, s, r, e in zip(maf.seqNames, starts, rows, ends):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
569 spans = [htmlSpan(r, i) for i in classRuns]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
570 spans = ''.join(spans)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
571 formatParams = nameWidth, n, startWidth, s, spans, endWidth, e
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
572 print '%-*s %*s %s %*s' % formatParams
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
573 print ' ' * nameWidth, ' ' * startWidth, matchSymbols
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
574 print
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
575 print '</pre>'
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
576
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
577 ##### Routines for reading MAF format: #####
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
578
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
579 def mafInput(lines, isKeepComments):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
580 a = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
581 s = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
582 q = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
583 p = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
584 for i in lines:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
585 w = i.split()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
586 if not w:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
587 if a: yield a, s, q, p
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
588 a = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
589 s = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
590 q = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
591 p = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
592 elif w[0] == "a":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
593 a = w
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
594 elif w[0] == "s":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
595 s.append(w)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
596 elif w[0] == "q":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
597 q.append(w)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
598 elif w[0] == "p":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
599 p.append(w)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
600 elif i[0] == "#" and isKeepComments:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
601 print i,
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
602 if a: yield a, s, q, p
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
603
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
604 def isFormat(myString, myFormat):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
605 return myFormat.startswith(myString)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
606
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
607 def mafConvert(opts, args):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
608 format = args[0].lower()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
609 if isFormat(format, "sam"):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
610 if opts.dictionary: d = readSequenceLengths(fileinput.input(args[1:]))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
611 else: d = {}
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
612 if opts.readgroup:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
613 readGroupItems = opts.readgroup.split()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
614 rg = "RG:Z:" + readGroupId(readGroupItems)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
615 else:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
616 readGroupItems = []
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
617 rg = ""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
618 if not opts.noheader: writeSamHeader(d, opts.dictfile, readGroupItems)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
619 inputLines = fileinput.input(args[1:])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
620 if isFormat(format, "html") and not opts.noheader: writeHtmlHeader()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
621 isKeepCommentLines = isFormat(format, "tabular") and not opts.noheader
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
622 oldQueryName = ""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
623 for maf in mafInput(inputLines, isKeepCommentLines):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
624 if isFormat(format, "axt"): writeAxt(Maf(*maf))
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
625 elif isFormat(format, "blast"):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
626 maf = Maf(*maf)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
627 writeBlast(maf, opts.linesize, oldQueryName)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
628 oldQueryName = maf.seqNames[1]
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
629 elif isFormat(format, "html"): writeHtml(Maf(*maf), opts.linesize)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
630 elif isFormat(format, "psl"): writePsl(Maf(*maf), opts.protein)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
631 elif isFormat(format, "sam"): writeSam(maf, rg)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
632 elif isFormat(format, "tabular"): writeTab(maf)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
633 else: raise Exception("unknown format: " + format)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
634 if isFormat(format, "html") and not opts.noheader: print "</body></html>"
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
635
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
636 if __name__ == "__main__":
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
637 signal.signal(signal.SIGPIPE, signal.SIG_DFL) # avoid silly error message
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
638
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
639 usage = """
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
640 %prog --help
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
641 %prog axt mafFile(s)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
642 %prog blast mafFile(s)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
643 %prog html mafFile(s)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
644 %prog psl mafFile(s)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
645 %prog sam mafFile(s)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
646 %prog tab mafFile(s)"""
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
647
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
648 description = "Read MAF-format alignments & write them in another format."
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
649
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
650 op = optparse.OptionParser(usage=usage, description=description)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
651 op.add_option("-p", "--protein", action="store_true",
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
652 help="assume protein alignments, for psl match counts")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
653 op.add_option("-n", "--noheader", action="store_true",
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
654 help="omit any header lines from the output")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
655 op.add_option("-d", "--dictionary", action="store_true",
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
656 help="include dictionary of sequence lengths in sam format")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
657 op.add_option("-f", "--dictfile",
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
658 help="get sequence dictionary from DICTFILE")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
659 op.add_option("-r", "--readgroup",
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
660 help="read group info for sam format")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
661 op.add_option("-l", "--linesize", type="int", default=60, #metavar="CHARS",
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
662 help="line length for blast and html formats (default: %default)")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
663 (opts, args) = op.parse_args()
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
664 if opts.linesize <= 0: op.error("option -l: should be >= 1")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
665 if opts.dictionary and opts.dictfile: op.error("can't use both -d and -f")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
666 if len(args) < 1: op.error("I need a format-name and some MAF alignments")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
667 if opts.dictionary and (len(args) == 1 or "-" in args[1:]):
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
668 op.error("need file (not pipe) with option -d")
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
669
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
670 try: mafConvert(opts, args)
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
671 except KeyboardInterrupt: pass # avoid silly error message
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
672 except Exception, e:
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
673 prog = os.path.basename(sys.argv[0])
06f8460885ff migrate from GitHub
yutaka-saito
parents:
diff changeset
674 sys.exit(prog + ": error: " + str(e))