Mercurial > repos > yufei-luo > s_mart
view commons/core/parsing/PilerTAToGrouperMap.py @ 60:90f4b29d884f
Uploaded
author | m-zytnicki |
---|---|
date | Fri, 21 Feb 2014 08:32:36 -0500 |
parents | 769e306b7933 |
children |
line wrap: on
line source
import time import os class PilerTAToGrouperMap(object): """ Convert the output file from Piler into grouper format. """ def __init__(self, inputGffFileName, inputPYRFileName, inputMOTIFFileName, outputFileName): self._inputGffFileName = inputGffFileName self._inputPYRFileName = inputPYRFileName self._inputMOTIFFileName = inputMOTIFFileName self._outFileName = outputFileName def run (self): inFileGff = open( self._inputGffFileName, "r" ) inFilePyr = open( self._inputPYRFileName, "r" ) outFile = open(self._outFileName,"w") #step 0 : get pile Info and write out an info file for pyrLine in inFilePyr.readlines():#-tan_pyr.gff if pyrLine == "": break pileIndex = "" pyrIndex = pyrLine.split('\t')[8].replace ('PyramidIndex', 'Pyramid') for gffLine in inFileGff.readlines(): #-tan.gff if gffLine == "": break if pyrIndex in gffLine: pileIndex = gffLine.split(';')[1].strip() break line = "%s\t%s" % (pileIndex, pyrIndex) outFile.write(line) inFilePyr.close() inFileGff.close() outFile.close() #Step 1 : Add pile info to motif file and write out two files one with grouperID and one in map format outFileMotifGrpFileName = self._inputMOTIFFileName + ".grp" outFileMotifGrpMapFileName = self._inputMOTIFFileName + ".grp.map" inFileInfo = open(self._outFileName,"r") inFileMotif = open(self._inputMOTIFFileName, "r" ) outFileMotifGrp = open(outFileMotifGrpFileName, "w" ) outFileMotifGrpMap = open(outFileMotifGrpMapFileName, "w" ) inFileInfos = inFileInfo.readlines() lineInfoIndex = 0 for countMotif,lineMotif in enumerate(inFileMotif.readlines()): if lineMotif == "": break dataMotif = lineMotif.split(';') motif, pyrNameMotif = dataMotif[:2] pyrNameMotif = pyrNameMotif.strip() pileNameMotif = "" while lineInfoIndex < len(inFileInfos): lineInfo = inFileInfos[lineInfoIndex] if lineInfo == "": break if pyrNameMotif in lineInfo: pileNameMotif = lineInfo.split('\t')[0] break lineInfoIndex +=1 #translate to Grouper IdFormat pyrID = pyrNameMotif.split(' ')[1] pileID = pileNameMotif.split(' ')[1] dataMotif = motif.split ('\t') chrm = dataMotif [0] start,end = dataMotif [3:5] countMotif += 1 memberID = "MbS%sGr" % (countMotif) + pyrID + "Cl" + pileID stringMotif = "%s\t%s\t%s\t%s\n" % ( memberID, motif, pileNameMotif, pyrNameMotif) outFileMotifGrp.write( stringMotif) stringGrpMap = "%s\t%s\t%s\t%s\n" % ( memberID, chrm, start, end ) outFileMotifGrpMap.write( stringGrpMap ) inFileMotif.close() inFileInfo.close() outFileMotifGrp.close() outFileMotifGrpMap.close()