Mercurial > repos > yufei-luo > s_mart
comparison SMART/Java/Python/ncList/test/MockFindOverlaps_randomExample.py @ 6:769e306b7933
Change the repository level.
| author | yufei-luo |
|---|---|
| date | Fri, 18 Jan 2013 04:54:14 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 5:ea3082881bf8 | 6:769e306b7933 |
|---|---|
| 1 import os | |
| 2 import random | |
| 3 from SMART.Java.Python.getRandomRegions import RandomRegionsGenerator | |
| 4 from commons.core.writer.TranscriptWriter import TranscriptWriter | |
| 5 from SMART.Java.Python.structure.Transcript import Transcript | |
| 6 from commons.core.parsing.GffParser import GffParser | |
| 7 | |
| 8 class MockFindOverlaps_randomExample(object): | |
| 9 | |
| 10 def __init__(self, fileName, ID, numberOfReads, chromSize): | |
| 11 self._fileName = fileName | |
| 12 self._ID = ID | |
| 13 self._numberOfReads = numberOfReads | |
| 14 self._chromSize = chromSize | |
| 15 | |
| 16 def write(self): | |
| 17 iMFO_RE = MockFindOverlaps_randomExample_NonOrder(self._fileName, self._ID, self._numberOfReads, self._chromSize) | |
| 18 iMFO_RE.write() | |
| 19 cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._fileName, self._fileName) | |
| 20 os.system(cmd) | |
| 21 | |
| 22 class MockFindOverlaps_randomExample_NonOrder(object): | |
| 23 | |
| 24 def __init__(self, fileName, ID, numberOfReads, chromSize): | |
| 25 self._fileName = fileName | |
| 26 self._ID = ID | |
| 27 self._numberOfReads = numberOfReads | |
| 28 self._chromSize = chromSize | |
| 29 | |
| 30 def write(self): | |
| 31 iRRG = RandomRegionsGenerator(2) | |
| 32 iRRG.setMinSize(36) | |
| 33 iRRG.setMaxSize(100) | |
| 34 iRRG.setGenomeSize(self._chromSize) | |
| 35 iRRG.setChromosomeName("chr1") | |
| 36 iRRG.setStrands(False) | |
| 37 iRRG.setNumber(self._numberOfReads) | |
| 38 iRRG.setOutputFile(self._fileName) | |
| 39 iRRG.run() | |
| 40 | |
| 41 | |
| 42 class MockFindOverlaps_randomExample_MOverlaps(object): | |
| 43 | |
| 44 def __init__(self, refFileName, queryFileName, overlapNumber, numberOfReads, chromSize): | |
| 45 self._refFileName = refFileName | |
| 46 self._queryFileName = queryFileName | |
| 47 self._overlapNumber = overlapNumber | |
| 48 self._numberOfReads = numberOfReads | |
| 49 self._chromSize = chromSize | |
| 50 | |
| 51 def createRandomExample(self): | |
| 52 id = 'reference' | |
| 53 iRSS = MockFindOverlaps_randomExample(self._refFileName, id, self._numberOfReads, self._chromSize) | |
| 54 iRSS.write() | |
| 55 self.queryWriter = TranscriptWriter(self._queryFileName , 'gff3') | |
| 56 totalOverlap = 0 | |
| 57 while totalOverlap != self._overlapNumber: | |
| 58 totalOverlap = 0 | |
| 59 i = 0 | |
| 60 while i < 10: | |
| 61 query = self.createRandomTranscript(i, id) | |
| 62 overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap) | |
| 63 while overlapNumber > self._overlapNumber: | |
| 64 query = self.createRandomTranscript(i, id) | |
| 65 overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap) | |
| 66 totalOverlap = overlapNumber | |
| 67 i += 1 | |
| 68 self.queryWriter.addTranscript(query) | |
| 69 self.queryWriter.write() | |
| 70 self.queryWriter.close() | |
| 71 # os.rename("%s.gff3" % (self._queryFileName), self._queryFileName) | |
| 72 | |
| 73 cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._refFileName, self._refFileName) | |
| 74 os.system(cmd) | |
| 75 cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._queryFileName, self._queryFileName) | |
| 76 os.system(cmd) | |
| 77 | |
| 78 def createRandomTranscript(self, cpt, id): | |
| 79 iRRG = RandomRegionsGenerator(2) | |
| 80 strand = '+' | |
| 81 chromosome = 'chr1' | |
| 82 size = random.randint(36, 100) | |
| 83 iRRG.setSize(size) | |
| 84 start = random.randint(0, 1000-size) | |
| 85 transcript = iRRG.createTranscript(chromosome, start, size, strand, cpt) | |
| 86 IDdetail = '%s_%d'%(id,cpt) | |
| 87 transcript.setTagValue('ID', IDdetail) | |
| 88 transcript.setName(IDdetail) | |
| 89 return transcript | |
| 90 | |
| 91 def isOverlap(self, query, ref): | |
| 92 if (query.getStart() <= ref.getEnd() and query.getEnd() >= ref.getStart()): | |
| 93 return True | |
| 94 else: | |
| 95 return False | |
| 96 | |
| 97 def getIntervalFromAdress(self, fileName, address): | |
| 98 iParser = GffParser(fileName) | |
| 99 iParser.gotoAddress(int(address)) | |
| 100 iTranscrit = iParser.getNextTranscript() | |
| 101 iParser.close() | |
| 102 return iTranscrit | |
| 103 | |
| 104 def getOverlapNumber(self, query, refFileName, totalOverlap): | |
| 105 count = totalOverlap | |
| 106 fRef = open(refFileName, 'r') | |
| 107 address = fRef.tell() | |
| 108 line = fRef.readline() | |
| 109 while line != '': | |
| 110 ref = self.getIntervalFromAdress(refFileName, address) | |
| 111 if self.isOverlap(query, ref): | |
| 112 count += 1 | |
| 113 address = fRef.tell() | |
| 114 line = fRef.readline() | |
| 115 fRef.close() | |
| 116 return count | |
| 117 | |
| 118 |
