Mercurial > repos > yufei-luo > s_mart
comparison commons/core/parsing/PilerTAToGrouperMap.py @ 38:2c0c0a89fad7
Uploaded
author | m-zytnicki |
---|---|
date | Thu, 02 May 2013 09:56:47 -0400 |
parents | 769e306b7933 |
children |
comparison
equal
deleted
inserted
replaced
37:d22fadc825e3 | 38:2c0c0a89fad7 |
---|---|
1 import time | |
2 import os | |
3 | |
4 class PilerTAToGrouperMap(object): | |
5 """ | |
6 Convert the output file from Piler into grouper format. | |
7 """ | |
8 def __init__(self, inputGffFileName, inputPYRFileName, inputMOTIFFileName, outputFileName): | |
9 self._inputGffFileName = inputGffFileName | |
10 self._inputPYRFileName = inputPYRFileName | |
11 self._inputMOTIFFileName = inputMOTIFFileName | |
12 self._outFileName = outputFileName | |
13 | |
14 def run (self): | |
15 inFileGff = open( self._inputGffFileName, "r" ) | |
16 inFilePyr = open( self._inputPYRFileName, "r" ) | |
17 outFile = open(self._outFileName,"w") | |
18 | |
19 #step 0 : get pile Info and write out an info file | |
20 for pyrLine in inFilePyr.readlines():#-tan_pyr.gff | |
21 if pyrLine == "": | |
22 break | |
23 pileIndex = "" | |
24 pyrIndex = pyrLine.split('\t')[8].replace ('PyramidIndex', 'Pyramid') | |
25 for gffLine in inFileGff.readlines(): #-tan.gff | |
26 if gffLine == "": | |
27 break | |
28 if pyrIndex in gffLine: | |
29 pileIndex = gffLine.split(';')[1].strip() | |
30 break | |
31 line = "%s\t%s" % (pileIndex, pyrIndex) | |
32 outFile.write(line) | |
33 | |
34 inFilePyr.close() | |
35 inFileGff.close() | |
36 outFile.close() | |
37 | |
38 #Step 1 : Add pile info to motif file and write out two files one with grouperID and one in map format | |
39 outFileMotifGrpFileName = self._inputMOTIFFileName + ".grp" | |
40 outFileMotifGrpMapFileName = self._inputMOTIFFileName + ".grp.map" | |
41 | |
42 inFileInfo = open(self._outFileName,"r") | |
43 inFileMotif = open(self._inputMOTIFFileName, "r" ) | |
44 outFileMotifGrp = open(outFileMotifGrpFileName, "w" ) | |
45 outFileMotifGrpMap = open(outFileMotifGrpMapFileName, "w" ) | |
46 | |
47 inFileInfos = inFileInfo.readlines() | |
48 lineInfoIndex = 0 | |
49 | |
50 for countMotif,lineMotif in enumerate(inFileMotif.readlines()): | |
51 if lineMotif == "": | |
52 break | |
53 dataMotif = lineMotif.split(';') | |
54 motif, pyrNameMotif = dataMotif[:2] | |
55 pyrNameMotif = pyrNameMotif.strip() | |
56 pileNameMotif = "" | |
57 | |
58 while lineInfoIndex < len(inFileInfos): | |
59 lineInfo = inFileInfos[lineInfoIndex] | |
60 if lineInfo == "": | |
61 break | |
62 if pyrNameMotif in lineInfo: | |
63 pileNameMotif = lineInfo.split('\t')[0] | |
64 break | |
65 lineInfoIndex +=1 | |
66 | |
67 #translate to Grouper IdFormat | |
68 pyrID = pyrNameMotif.split(' ')[1] | |
69 pileID = pileNameMotif.split(' ')[1] | |
70 dataMotif = motif.split ('\t') | |
71 chrm = dataMotif [0] | |
72 start,end = dataMotif [3:5] | |
73 countMotif += 1 | |
74 memberID = "MbS%sGr" % (countMotif) + pyrID + "Cl" + pileID | |
75 | |
76 stringMotif = "%s\t%s\t%s\t%s\n" % ( memberID, motif, pileNameMotif, pyrNameMotif) | |
77 outFileMotifGrp.write( stringMotif) | |
78 | |
79 stringGrpMap = "%s\t%s\t%s\t%s\n" % ( memberID, chrm, start, end ) | |
80 outFileMotifGrpMap.write( stringGrpMap ) | |
81 | |
82 inFileMotif.close() | |
83 inFileInfo.close() | |
84 outFileMotifGrp.close() | |
85 outFileMotifGrpMap.close() |