annotate smart_toolShed/commons/core/parsing/PalsToAlign.py @ 0:e0f8dcca02ed

Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
author yufei-luo
date Thu, 17 Jan 2013 10:52:14 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
1 import time
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
2 import os
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
3
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
4 class PalsToAlign(object):
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
5 """
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
6 Convert the output from PALS (GFF2 format) into the 'align' format.
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
7 """
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
8 def __init__(self,inputPalsFileName="" , outputAlignFileName="", removeSameSequences=False):
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
9 self._removeSameSequences = removeSameSequences
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
10 self._inputPalsFileName = inputPalsFileName
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
11 self._outputAlignFileName = outputAlignFileName
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
12
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
13 def run (self):
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
14 file = open(self._inputPalsFileName, "r")
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
15 tmpFileName = "PalsToAlign%s"%str(os.getpid() )
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
16 tmpFile = open(tmpFileName, "w")
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
17
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
18 for line in file.readlines():
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
19
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
20 if line == "":
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
21 break
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
22
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
23 data = line.split("\t")
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
24
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
25 qryName = data[0]
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
26 source = data[1]
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
27 feature = data[2]
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
28 qryStart = data[3]
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
29 qryEnd = data[4]
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
30 score = data[5]
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
31 strand = data[6]
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
32 frame = data[7]
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
33 attributes = data[8][:-1].split()
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
34
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
35 sbjName = attributes[1]
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
36 sbjStart = attributes[2]
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
37 sbjEnd = attributes[3][:-1]
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
38 percId = (1 - float(attributes[-1])) * 100.0
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
39
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
40 if strand != "+":
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
41 tmp = sbjStart
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
42 sbjStart = sbjEnd
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
43 sbjEnd = tmp
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
44
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
45 if self._removeSameSequences \
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
46 and "chunk" in qryName and "chunk" in sbjName \
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
47 and min(int(qryStart), int(qryEnd)) == 1 \
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
48 and min(int(sbjStart), int(sbjEnd)) == 1 \
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
49 and percId == 100.0:
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
50 line = self.inFile.readline()
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
51 continue
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
52
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
53 if qryStart < qryEnd:
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
54 alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, "0.0", score, percId)
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
55 else:
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
56 alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryEnd, qryStart, sbjName, sbjEnd, sbjStart, "0.0", score, percId)
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
57
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
58 tmpFile.write(alignLine)
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
59
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
60 file.close()
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
61 tmpFile.close()
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
62
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
63 os.system("sort -k 1,1 -k 4,4 -k 2,2n -k 3,3n -k 5,5n -k 6,6n -k 8,8n %s > %s" % (tmpFileName, self._outputAlignFileName))
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
64 os.remove(tmpFileName)
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
65
e0f8dcca02ed Uploaded S-MART tool. A toolbox manages RNA-Seq and ChIP-Seq data.
yufei-luo
parents:
diff changeset
66