annotate commons/core/parsing/PilerTAToGrouperMap.py @ 68:85e80c21b1f7 draft

Uploaded
author m-zytnicki
date Mon, 16 Nov 2015 12:00:32 -0500
parents 769e306b7933
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 import time
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2 import os
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4 class PilerTAToGrouperMap(object):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 Convert the output file from Piler into grouper format.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7 """
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8 def __init__(self, inputGffFileName, inputPYRFileName, inputMOTIFFileName, outputFileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9 self._inputGffFileName = inputGffFileName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10 self._inputPYRFileName = inputPYRFileName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11 self._inputMOTIFFileName = inputMOTIFFileName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12 self._outFileName = outputFileName
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 def run (self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15 inFileGff = open( self._inputGffFileName, "r" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16 inFilePyr = open( self._inputPYRFileName, "r" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17 outFile = open(self._outFileName,"w")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19 #step 0 : get pile Info and write out an info file
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20 for pyrLine in inFilePyr.readlines():#-tan_pyr.gff
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21 if pyrLine == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 pileIndex = ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24 pyrIndex = pyrLine.split('\t')[8].replace ('PyramidIndex', 'Pyramid')
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25 for gffLine in inFileGff.readlines(): #-tan.gff
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 if gffLine == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28 if pyrIndex in gffLine:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 pileIndex = gffLine.split(';')[1].strip()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31 line = "%s\t%s" % (pileIndex, pyrIndex)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32 outFile.write(line)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34 inFilePyr.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 inFileGff.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36 outFile.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38 #Step 1 : Add pile info to motif file and write out two files one with grouperID and one in map format
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39 outFileMotifGrpFileName = self._inputMOTIFFileName + ".grp"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40 outFileMotifGrpMapFileName = self._inputMOTIFFileName + ".grp.map"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42 inFileInfo = open(self._outFileName,"r")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43 inFileMotif = open(self._inputMOTIFFileName, "r" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44 outFileMotifGrp = open(outFileMotifGrpFileName, "w" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 outFileMotifGrpMap = open(outFileMotifGrpMapFileName, "w" )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
47 inFileInfos = inFileInfo.readlines()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
48 lineInfoIndex = 0
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
49
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
50 for countMotif,lineMotif in enumerate(inFileMotif.readlines()):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
51 if lineMotif == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
52 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
53 dataMotif = lineMotif.split(';')
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54 motif, pyrNameMotif = dataMotif[:2]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55 pyrNameMotif = pyrNameMotif.strip()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
56 pileNameMotif = ""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
57
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58 while lineInfoIndex < len(inFileInfos):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59 lineInfo = inFileInfos[lineInfoIndex]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
60 if lineInfo == "":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
61 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62 if pyrNameMotif in lineInfo:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63 pileNameMotif = lineInfo.split('\t')[0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
64 break
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
65 lineInfoIndex +=1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
67 #translate to Grouper IdFormat
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
68 pyrID = pyrNameMotif.split(' ')[1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
69 pileID = pileNameMotif.split(' ')[1]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
70 dataMotif = motif.split ('\t')
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
71 chrm = dataMotif [0]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
72 start,end = dataMotif [3:5]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
73 countMotif += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
74 memberID = "MbS%sGr" % (countMotif) + pyrID + "Cl" + pileID
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
75
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
76 stringMotif = "%s\t%s\t%s\t%s\n" % ( memberID, motif, pileNameMotif, pyrNameMotif)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
77 outFileMotifGrp.write( stringMotif)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
78
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
79 stringGrpMap = "%s\t%s\t%s\t%s\n" % ( memberID, chrm, start, end )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
80 outFileMotifGrpMap.write( stringGrpMap )
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
81
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
82 inFileMotif.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
83 inFileInfo.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
84 outFileMotifGrp.close()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
85 outFileMotifGrpMap.close()