Mercurial > repos > yufei-luo > s_mart
view commons/core/parsing/PalsToAlign.py @ 18:94ab73e8a190
Uploaded
author | m-zytnicki |
---|---|
date | Mon, 29 Apr 2013 03:20:15 -0400 |
parents | 769e306b7933 |
children |
line wrap: on
line source
import time import os class PalsToAlign(object): """ Convert the output from PALS (GFF2 format) into the 'align' format. """ def __init__(self,inputPalsFileName="" , outputAlignFileName="", removeSameSequences=False): self._removeSameSequences = removeSameSequences self._inputPalsFileName = inputPalsFileName self._outputAlignFileName = outputAlignFileName def run (self): file = open(self._inputPalsFileName, "r") tmpFileName = "PalsToAlign%s"%str(os.getpid() ) tmpFile = open(tmpFileName, "w") for line in file.readlines(): if line == "": break data = line.split("\t") qryName = data[0] source = data[1] feature = data[2] qryStart = data[3] qryEnd = data[4] score = data[5] strand = data[6] frame = data[7] attributes = data[8][:-1].split() sbjName = attributes[1] sbjStart = attributes[2] sbjEnd = attributes[3][:-1] percId = (1 - float(attributes[-1])) * 100.0 if strand != "+": tmp = sbjStart sbjStart = sbjEnd sbjEnd = tmp if self._removeSameSequences \ and "chunk" in qryName and "chunk" in sbjName \ and min(int(qryStart), int(qryEnd)) == 1 \ and min(int(sbjStart), int(sbjEnd)) == 1 \ and percId == 100.0: line = self.inFile.readline() continue if qryStart < qryEnd: alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryStart, qryEnd, sbjName, sbjStart, sbjEnd, "0.0", score, percId) else: alignLine = "%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\t%s\n" % (qryName, qryEnd, qryStart, sbjName, sbjEnd, sbjStart, "0.0", score, percId) tmpFile.write(alignLine) file.close() tmpFile.close() os.system("sort -k 1,1 -k 4,4 -k 2,2n -k 3,3n -k 5,5n -k 6,6n -k 8,8n %s > %s" % (tmpFileName, self._outputAlignFileName)) os.remove(tmpFileName)