Mercurial > repos > yufei-luo > s_mart
comparison SMART/Java/Python/ncList/test/MockFindOverlaps_randomExample.py @ 6:769e306b7933
Change the repository level.
author | yufei-luo |
---|---|
date | Fri, 18 Jan 2013 04:54:14 -0500 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
5:ea3082881bf8 | 6:769e306b7933 |
---|---|
1 import os | |
2 import random | |
3 from SMART.Java.Python.getRandomRegions import RandomRegionsGenerator | |
4 from commons.core.writer.TranscriptWriter import TranscriptWriter | |
5 from SMART.Java.Python.structure.Transcript import Transcript | |
6 from commons.core.parsing.GffParser import GffParser | |
7 | |
8 class MockFindOverlaps_randomExample(object): | |
9 | |
10 def __init__(self, fileName, ID, numberOfReads, chromSize): | |
11 self._fileName = fileName | |
12 self._ID = ID | |
13 self._numberOfReads = numberOfReads | |
14 self._chromSize = chromSize | |
15 | |
16 def write(self): | |
17 iMFO_RE = MockFindOverlaps_randomExample_NonOrder(self._fileName, self._ID, self._numberOfReads, self._chromSize) | |
18 iMFO_RE.write() | |
19 cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._fileName, self._fileName) | |
20 os.system(cmd) | |
21 | |
22 class MockFindOverlaps_randomExample_NonOrder(object): | |
23 | |
24 def __init__(self, fileName, ID, numberOfReads, chromSize): | |
25 self._fileName = fileName | |
26 self._ID = ID | |
27 self._numberOfReads = numberOfReads | |
28 self._chromSize = chromSize | |
29 | |
30 def write(self): | |
31 iRRG = RandomRegionsGenerator(2) | |
32 iRRG.setMinSize(36) | |
33 iRRG.setMaxSize(100) | |
34 iRRG.setGenomeSize(self._chromSize) | |
35 iRRG.setChromosomeName("chr1") | |
36 iRRG.setStrands(False) | |
37 iRRG.setNumber(self._numberOfReads) | |
38 iRRG.setOutputFile(self._fileName) | |
39 iRRG.run() | |
40 | |
41 | |
42 class MockFindOverlaps_randomExample_MOverlaps(object): | |
43 | |
44 def __init__(self, refFileName, queryFileName, overlapNumber, numberOfReads, chromSize): | |
45 self._refFileName = refFileName | |
46 self._queryFileName = queryFileName | |
47 self._overlapNumber = overlapNumber | |
48 self._numberOfReads = numberOfReads | |
49 self._chromSize = chromSize | |
50 | |
51 def createRandomExample(self): | |
52 id = 'reference' | |
53 iRSS = MockFindOverlaps_randomExample(self._refFileName, id, self._numberOfReads, self._chromSize) | |
54 iRSS.write() | |
55 self.queryWriter = TranscriptWriter(self._queryFileName , 'gff3') | |
56 totalOverlap = 0 | |
57 while totalOverlap != self._overlapNumber: | |
58 totalOverlap = 0 | |
59 i = 0 | |
60 while i < 10: | |
61 query = self.createRandomTranscript(i, id) | |
62 overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap) | |
63 while overlapNumber > self._overlapNumber: | |
64 query = self.createRandomTranscript(i, id) | |
65 overlapNumber = self.getOverlapNumber(query, self._refFileName, totalOverlap) | |
66 totalOverlap = overlapNumber | |
67 i += 1 | |
68 self.queryWriter.addTranscript(query) | |
69 self.queryWriter.write() | |
70 self.queryWriter.close() | |
71 # os.rename("%s.gff3" % (self._queryFileName), self._queryFileName) | |
72 | |
73 cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._refFileName, self._refFileName) | |
74 os.system(cmd) | |
75 cmd = 'sort -f -n -k4 -k5.4rn -o %s %s'%(self._queryFileName, self._queryFileName) | |
76 os.system(cmd) | |
77 | |
78 def createRandomTranscript(self, cpt, id): | |
79 iRRG = RandomRegionsGenerator(2) | |
80 strand = '+' | |
81 chromosome = 'chr1' | |
82 size = random.randint(36, 100) | |
83 iRRG.setSize(size) | |
84 start = random.randint(0, 1000-size) | |
85 transcript = iRRG.createTranscript(chromosome, start, size, strand, cpt) | |
86 IDdetail = '%s_%d'%(id,cpt) | |
87 transcript.setTagValue('ID', IDdetail) | |
88 transcript.setName(IDdetail) | |
89 return transcript | |
90 | |
91 def isOverlap(self, query, ref): | |
92 if (query.getStart() <= ref.getEnd() and query.getEnd() >= ref.getStart()): | |
93 return True | |
94 else: | |
95 return False | |
96 | |
97 def getIntervalFromAdress(self, fileName, address): | |
98 iParser = GffParser(fileName) | |
99 iParser.gotoAddress(int(address)) | |
100 iTranscrit = iParser.getNextTranscript() | |
101 iParser.close() | |
102 return iTranscrit | |
103 | |
104 def getOverlapNumber(self, query, refFileName, totalOverlap): | |
105 count = totalOverlap | |
106 fRef = open(refFileName, 'r') | |
107 address = fRef.tell() | |
108 line = fRef.readline() | |
109 while line != '': | |
110 ref = self.getIntervalFromAdress(refFileName, address) | |
111 if self.isOverlap(query, ref): | |
112 count += 1 | |
113 address = fRef.tell() | |
114 line = fRef.readline() | |
115 fRef.close() | |
116 return count | |
117 | |
118 |