annotate commons/core/parsing/PalsToAlign.py @ 58:5f5c9b74c2dd

Uploaded
author m-zytnicki
date Fri, 07 Feb 2014 11:53:36 -0500
parents 769e306b7933
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 import time
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2 import os
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4 class PalsToAlign(object):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 Convert the output from PALS (GFF2 format) into the 'align' format.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8 def __init__(self,inputPalsFileName="" , outputAlignFileName="", removeSameSequences=False):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9 self._removeSameSequences = removeSameSequences
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10 self._inputPalsFileName = inputPalsFileName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11 self._outputAlignFileName = outputAlignFileName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13 def run (self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 file = open(self._inputPalsFileName, "r")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15 tmpFileName = "PalsToAlign%s"%str(os.getpid() )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16 tmpFile = open(tmpFileName, "w")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18 for line in file.readlines():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20 if line == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 data = line.split("\t")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25 qryName = data[0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 source = data[1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 feature = data[2]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28 qryStart = data[3]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 qryEnd = data[4]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30 score = data[5]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31 strand = data[6]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32 frame = data[7]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33 attributes = data[8][:-1].split()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 sbjName = attributes[1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36 sbjStart = attributes[2]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37 sbjEnd = attributes[3][:-1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38 percId = (1 - float(attributes[-1])) * 100.0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40 if strand != "+":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41 tmp = sbjStart
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42 sbjStart = sbjEnd
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43 sbjEnd = tmp
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 if self._removeSameSequences \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46 and "chunk" in qryName and "chunk" in sbjName \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
47 and min(int(qryStart), int(qryEnd)) == 1 \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
48 and min(int(sbjStart), int(sbjEnd)) == 1 \
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
49 and percId == 100.0:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
50 line = self.inFile.readline()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
51 continue
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
52
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
53 if qryStart < qryEnd:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54 alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, "0.0", score, percId)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
56 alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryEnd, qryStart, sbjName, sbjEnd, sbjStart, "0.0", score, percId)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
57
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58 tmpFile.write(alignLine)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
60 file.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
61 tmpFile.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63 os.system("sort -k 1,1 -k 4,4 -k 2,2n -k 3,3n -k 5,5n -k 6,6n -k 8,8n %s > %s" % (tmpFileName, self._outputAlignFileName))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
64 os.remove(tmpFileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
65
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66