Mercurial > repos > yufei-luo > s_mart
diff commons/core/parsing/PilerTAToGrouperMap.py @ 38:2c0c0a89fad7
Uploaded
author | m-zytnicki |
---|---|
date | Thu, 02 May 2013 09:56:47 -0400 |
parents | 769e306b7933 |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/commons/core/parsing/PilerTAToGrouperMap.py Thu May 02 09:56:47 2013 -0400 @@ -0,0 +1,85 @@ +import time +import os + +class PilerTAToGrouperMap(object): + """ + Convert the output file from Piler into grouper format. + """ + def __init__(self, inputGffFileName, inputPYRFileName, inputMOTIFFileName, outputFileName): + self._inputGffFileName = inputGffFileName + self._inputPYRFileName = inputPYRFileName + self._inputMOTIFFileName = inputMOTIFFileName + self._outFileName = outputFileName + + def run (self): + inFileGff = open( self._inputGffFileName, "r" ) + inFilePyr = open( self._inputPYRFileName, "r" ) + outFile = open(self._outFileName,"w") + + #step 0 : get pile Info and write out an info file + for pyrLine in inFilePyr.readlines():#-tan_pyr.gff + if pyrLine == "": + break + pileIndex = "" + pyrIndex = pyrLine.split('\t')[8].replace ('PyramidIndex', 'Pyramid') + for gffLine in inFileGff.readlines(): #-tan.gff + if gffLine == "": + break + if pyrIndex in gffLine: + pileIndex = gffLine.split(';')[1].strip() + break + line = "%s\t%s" % (pileIndex, pyrIndex) + outFile.write(line) + + inFilePyr.close() + inFileGff.close() + outFile.close() + + #Step 1 : Add pile info to motif file and write out two files one with grouperID and one in map format + outFileMotifGrpFileName = self._inputMOTIFFileName + ".grp" + outFileMotifGrpMapFileName = self._inputMOTIFFileName + ".grp.map" + + inFileInfo = open(self._outFileName,"r") + inFileMotif = open(self._inputMOTIFFileName, "r" ) + outFileMotifGrp = open(outFileMotifGrpFileName, "w" ) + outFileMotifGrpMap = open(outFileMotifGrpMapFileName, "w" ) + + inFileInfos = inFileInfo.readlines() + lineInfoIndex = 0 + + for countMotif,lineMotif in enumerate(inFileMotif.readlines()): + if lineMotif == "": + break + dataMotif = lineMotif.split(';') + motif, pyrNameMotif = dataMotif[:2] + pyrNameMotif = pyrNameMotif.strip() + pileNameMotif = "" + + while lineInfoIndex < len(inFileInfos): + lineInfo = inFileInfos[lineInfoIndex] + if lineInfo == "": + break + if pyrNameMotif in lineInfo: + pileNameMotif = lineInfo.split('\t')[0] + break + lineInfoIndex +=1 + + #translate to Grouper IdFormat + pyrID = pyrNameMotif.split(' ')[1] + pileID = pileNameMotif.split(' ')[1] + dataMotif = motif.split ('\t') + chrm = dataMotif [0] + start,end = dataMotif [3:5] + countMotif += 1 + memberID = "MbS%sGr" % (countMotif) + pyrID + "Cl" + pileID + + stringMotif = "%s\t%s\t%s\t%s\n" % ( memberID, motif, pileNameMotif, pyrNameMotif) + outFileMotifGrp.write( stringMotif) + + stringGrpMap = "%s\t%s\t%s\t%s\n" % ( memberID, chrm, start, end ) + outFileMotifGrpMap.write( stringGrpMap ) + + inFileMotif.close() + inFileInfo.close() + outFileMotifGrp.close() + outFileMotifGrpMap.close()