comparison SMART/Java/Python/ncList/test/MockFindOverlaps_randomExample.py @ 6:769e306b7933

Change the repository level.
author yufei-luo
date Fri, 18 Jan 2013 04:54:14 -0500
parents
children
comparison
equal deleted inserted replaced
5:ea3082881bf8 6:769e306b7933
1 import os
2 import random
3 from SMART.Java.Python.getRandomRegions import RandomRegionsGenerator
4 from commons.core.writer.TranscriptWriter import TranscriptWriter
5 from SMART.Java.Python.structure.Transcript import Transcript
6 from commons.core.parsing.GffParser import GffParser
7
8 class MockFindOverlaps_randomExample(object):
9
10 def __init__(self, fileName, ID, numberOfReads, chromSize):
11 self._fileName = fileName
12 self._ID = ID
13 self._numberOfReads = numberOfReads
14 self._chromSize = chromSize
15
16 def write(self):
17 iMFO_RE = MockFindOverlaps_randomExample_NonOrder(self._fileName, self._ID, self._numberOfReads, self._chromSize)
18 iMFO_RE.write()
19 cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._fileName, self._fileName)
20 os.system(cmd)
21
22 class MockFindOverlaps_randomExample_NonOrder(object):
23
24 def __init__(self, fileName, ID, numberOfReads, chromSize):
25 self._fileName = fileName
26 self._ID = ID
27 self._numberOfReads = numberOfReads
28 self._chromSize = chromSize
29
30 def write(self):
31 iRRG = RandomRegionsGenerator(2)
32 iRRG.setMinSize(36)
33 iRRG.setMaxSize(100)
34 iRRG.setGenomeSize(self._chromSize)
35 iRRG.setChromosomeName("chr1")
36 iRRG.setStrands(False)
37 iRRG.setNumber(self._numberOfReads)
38 iRRG.setOutputFile(self._fileName)
39 iRRG.run()
40
41
42 class MockFindOverlaps_randomExample_MOverlaps(object):
43
44 def __init__(self, refFileName, queryFileName, overlapNumber, numberOfReads, chromSize):
45 self._refFileName = refFileName
46 self._queryFileName = queryFileName
47 self._overlapNumber = overlapNumber
48 self._numberOfReads = numberOfReads
49 self._chromSize = chromSize
50
51 def createRandomExample(self):
52 id = 'reference'
53 iRSS = MockFindOverlaps_randomExample(self._refFileName, id, self._numberOfReads, self._chromSize)
54 iRSS.write()
55 self.queryWriter = TranscriptWriter(self._queryFileName , 'gff3')
56 totalOverlap = 0
57 while totalOverlap != self._overlapNumber:
58 totalOverlap = 0
59 i = 0
60 while i < 10:
61 query = self.createRandomTranscript(i, id)
62 overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap)
63 while overlapNumber > self._overlapNumber:
64 query = self.createRandomTranscript(i, id)
65 overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap)
66 totalOverlap = overlapNumber
67 i += 1
68 self.queryWriter.addTranscript(query)
69 self.queryWriter.write()
70 self.queryWriter.close()
71 # os.rename("%s.gff3" % (self._queryFileName), self._queryFileName)
72
73 cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._refFileName, self._refFileName)
74 os.system(cmd)
75 cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._queryFileName, self._queryFileName)
76 os.system(cmd)
77
78 def createRandomTranscript(self, cpt, id):
79 iRRG = RandomRegionsGenerator(2)
80 strand = '+'
81 chromosome = 'chr1'
82 size = random.randint(36, 100)
83 iRRG.setSize(size)
84 start = random.randint(0, 1000-size)
85 transcript = iRRG.createTranscript(chromosome, start, size, strand, cpt)
86 IDdetail = '%s_%d'%(id,cpt)
87 transcript.setTagValue('ID', IDdetail)
88 transcript.setName(IDdetail)
89 return transcript
90
91 def isOverlap(self, query, ref):
92 if (query.getStart() <= ref.getEnd() and query.getEnd() >= ref.getStart()):
93 return True
94 else:
95 return False
96
97 def getIntervalFromAdress(self, fileName, address):
98 iParser = GffParser(fileName)
99 iParser.gotoAddress(int(address))
100 iTranscrit = iParser.getNextTranscript()
101 iParser.close()
102 return iTranscrit
103
104 def getOverlapNumber(self, query, refFileName, totalOverlap):
105 count = totalOverlap
106 fRef = open(refFileName, 'r')
107 address = fRef.tell()
108 line = fRef.readline()
109 while line != '':
110 ref = self.getIntervalFromAdress(refFileName, address)
111 if self.isOverlap(query, ref):
112 count += 1
113 address = fRef.tell()
114 line = fRef.readline()
115 fRef.close()
116 return count
117
118