annotate SMART/Java/Python/getRandomRegions.py @ 69:1473ab954708 draft

Corrected bug in "CollapsedReads" XML file.
author m-zytnicki
date Wed, 18 Nov 2015 10:59:02 -0500
parents 169d364ddd91
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 #! /usr/bin/env python
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3 # Copyright INRA-URGI 2009-2010
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 # This software is governed by the CeCILL license under French law and
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 # abiding by the rules of distribution of free software. You can use,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7 # modify and/ or redistribute the software under the terms of the CeCILL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8 # license as circulated by CEA, CNRS and INRIA at the following URL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9 # "http://www.cecill.info".
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11 # As a counterpart to the access to the source code and rights to copy,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12 # modify and redistribute granted by the license, users are provided only
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13 # with a limited warranty and the software's author, the holder of the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 # economic rights, and the successive licensors have only limited
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15 # liability.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17 # In this respect, the user's attention is drawn to the risks associated
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18 # with loading, using, modifying and/or developing or reproducing the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19 # software by the user in light of its specific status of free software,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20 # that may mean that it is complicated to manipulate, and that also
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21 # therefore means that it is reserved for developers and experienced
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22 # professionals having in-depth computer knowledge. Users are therefore
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 # encouraged to load and test the software's suitability as regards their
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24 # requirements in conditions enabling the security of their systems and/or
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25 # data to be ensured and, more generally, to use and operate it in the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 # same conditions as regards security.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28 # The fact that you are presently reading this means that you have had
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 # knowledge of the CeCILL license and that you accept its terms.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31 """Find random regions in a genome"""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33 import random, math
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34 from optparse import OptionParser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 from commons.core.parsing.FastaParser import *
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36 from commons.core.writer.Gff3Writer import *
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37 from commons.core.writer.MySqlTranscriptWriter import *
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38 from SMART.Java.Python.misc.Progress import *
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39 from SMART.Java.Python.structure.Transcript import Transcript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40 from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42 repetitions = 100
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 class RandomRegionsGenerator(object):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
47 def __init__(self, verbosity):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
48 self.verbosity = verbosity
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
49 self.strands = False
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
50 self.distribution = "uniform"
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
51 self.transcripts = None
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
52 self.sequenceParser = None
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
53 random.seed()
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
56 def setInput(self, fileName):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
57 self.sequenceParser = FastaParser(fileName, self.verbosity)
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
60 def setGenomeSize(self, size):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
61 self.genomeSize = size
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
64 def setChromosomeName(self, name):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
65 self.chromosomeName = name
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
67
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
68 def setAnnotation(self, fileName, format):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
69 parser = TranscriptContainer(fileName, format, self.verbosity)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
70 self.transcripts = []
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
71 for transcript in parser.getIterator():
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
72 self.transcripts.append(transcript)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
73 self.setNumber(len(self.transcripts))
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
74 self.setSize(0)
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
75
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
76
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
77 def setOutputFile(self, fileName):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
78 self.outputFileName = fileName
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
79
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
80
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
81 def setSize(self, size):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
82 self.minSize = size
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
83 self.maxSize = size
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
84
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
85
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
86 def setMinSize(self, size):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
87 self.minSize = size
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
88
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
89
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
90 def setMaxSize(self, size):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
91 self.maxSize = size
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
92
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
93
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
94 def setNumber(self, number):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
95 self.number = number
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
96
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
97
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
98 def setStrands(self, strands):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
99 self.strands = strands
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
100
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
101
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
102 def setMaxDistribution(self, maxElements):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
103 if maxElements == None:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
104 return
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
105 self.maxElements = maxElements
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
106 self.distribution = "gaussian"
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
107
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
108
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
109 def setDeviationDistribution(self, deviation):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
110 if deviation == None:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
111 return
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
112 self.deviation = deviation
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
113 self.distribution = "gaussian"
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
114
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
115
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
116 def getSizes(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
117 if self.sequenceParser == None:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
118 self.chromosomes = [self.chromosomeName]
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
119 self.sizes = {self.chromosomeName: self.genomeSize}
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
120 self.cumulatedSize = self.genomeSize
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
121 self.cumulatedSizes = {self.chromosomeName: self.genomeSize}
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
122 return
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
123 self.chromosomes = self.sequenceParser.getRegions()
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
124 self.sizes = {}
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
125 self.cumulatedSize = 0
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
126 self.cumulatedSizes = {}
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
127 for chromosome in self.chromosomes:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
128 self.sizes[chromosome] = self.sequenceParser.getSizeOfRegion(chromosome)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
129 self.cumulatedSize += self.sizes[chromosome]
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
130 self.cumulatedSizes[chromosome] = self.cumulatedSize
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
131
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
132
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
133 def findPosition(self, size = None):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
134 if size == None:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
135 size = random.randint(self.minSize, self.maxSize)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
136 integer = random.randint(0, self.cumulatedSize)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
137 for chromosome in self.chromosomes:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
138 if self.cumulatedSizes[chromosome] > integer:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
139 break
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
140 start = random.randint(1, self.sizes[chromosome] - size)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
141 return (chromosome, start, size)
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
142
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
143
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
144 def createTranscript(self, chromosome, start, size, strand, cpt):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
145 transcript = Transcript()
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
146 transcript.setChromosome(chromosome)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
147 transcript.setEnd(start + size-1)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
148 transcript.setStart(start)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
149 transcript.setDirection(strand)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
150 transcript.setName("rand_%d" % (cpt))
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
151 return transcript
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
152
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
153
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
154 def moveTranscript(self, chromosome, start, transcript):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
155 while transcript.getEnd() + start - transcript.getStart() > self.cumulatedSizes[chromosome]:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
156 chromosome, start, size = self.findPosition(transcript.getEnd() - transcript.getStart())
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
157 newTranscript = Transcript()
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
158 newTranscript.setChromosome(chromosome)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
159 newTranscript.tags = transcript.tags
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
160 if transcript.getNbExons() > 1:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
161 for exon in transcript.getNbExons():
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
162 newExon = Interval()
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
163 newExon.setChromosome(chromosome)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
164 newExon.setEnd(exon.getEnd() + start - transcript.getStart())
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
165 newExon.setStart(exon.getStart() + start - transcript.getStart())
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
166 newTranscript.addExon(newExon)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
167 newTranscript.setEnd(transcript.getEnd() + start - transcript.getStart())
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
168 newTranscript.setStart(start)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
169 newTranscript.setDirection(transcript.getDirection())
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
170 return [newTranscript]
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
171
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
172
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
173 def createUniformCluster(self, chromosome, start, size, strand, cpt):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
174 transcript = self.createTranscript(chromosome, start, size, strand, cpt)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
175 return [transcript]
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
176
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
177
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
178 def findNbTranscripts(self, cpt):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
179 return min(int(round(math.exp(random.random() * math.log(self.maxElements)))), self.number - cpt + 1)
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
180
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
181
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
182 def getDev(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
183 deviation = 0.0
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
184 for j in range(repetitions):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
185 deviation += random.randint(-self.deviation, self.deviation)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
186 deviation /= repetitions
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
187 deviation = int(round(deviation))
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
188 return deviation
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
189
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
190
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
191 def createGaussianCluster(self, chromosome, start, size, strand, cpt):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
192 transcripts = []
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
193 nbTranscripts = self.findNbTranscripts(cpt)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
194 for i in range(nbTranscripts):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
195 transcript = self.createTranscript(chromosome, start + self.getDev(), size + self.getDev(), strand, cpt + i)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
196 transcripts.append(transcript)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
197 return transcripts
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
198
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
199
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
200 def writeRegions(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
201 writer = Gff3Writer(self.outputFileName, self.verbosity)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
202 outputFile = open(self.outputFileName, "w")
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
203 progress = Progress(self.number, "Writing to %s" % (self.outputFileName), self.verbosity)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
204 i = 0
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
205 while i < self.number:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
206 chromosome, start, size = self.findPosition()
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
207 strand = random.choice([-1, 1]) if self.strands else 1
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
208 if self.transcripts != None:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
209 transcripts = self.moveTranscript(chromosome, start, self.transcripts[i])
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
210 elif self.distribution == "uniform":
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
211 transcripts = self.createUniformCluster(chromosome, start, size, strand, i+1)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
212 else:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
213 transcripts = self.createGaussianCluster(chromosome, start, size, strand, i+1)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
214 for transcript in transcripts:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
215 writer.addTranscript(transcript)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
216 i += 1
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
217 progress.inc()
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
218 progress.done()
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
219 outputFile.close()
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
220 writer.write()
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
221 writer.close()
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
222
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
223
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
224 def run(self):
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
225 self.getSizes()
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
226 self.writeRegions()
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
227
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
228
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
229 if __name__ == "__main__":
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
230
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
231 # parse command line
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
232 description = "Get Random Regions v1.0.2: Get some random coordinates on a genome. May use uniform or gaussian distribution (in gaussion distribution, # of element per cluster follows a power law). [Category: Other]"
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
233
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
234 parser = OptionParser(description = description)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
235 parser.add_option("-r", "--reference", dest="reference", action="store", default=None, type="string", help="file that contains the sequences [format: file in FASTA format]")
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
236 parser.add_option("-S", "--referenceSize", dest="referenceSize", action="store", default=None, type="int", help="size of the chromosome (when no reference is given) [format: int]")
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
237 parser.add_option("-c", "--chromosome", dest="chromosome", action="store", default=None, type="string", help="name of the chromosome (when no reference is given) [format: string]")
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
238 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [compulsory] [format: output file in FASTA format]")
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
239 parser.add_option("-i", "--input", dest="inputFileName", action="store", default=None, type="string", help="optional file containing regions to shuffle [format: file in transcript format given by -f]")
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
240 parser.add_option("-f", "--format", dest="format", action="store", default=None, type="string", help="format of the previous file [format: transcript file format]")
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
241 parser.add_option("-s", "--size", dest="size", action="store", default=None, type="int", help="size of the regions (if no region set is provided) [format: int]")
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
242 parser.add_option("-z", "--minSize", dest="minSize", action="store", default=None, type="int", help="minimum size of the regions (if no region set nor a fixed size are provided) [format: int]")
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
243 parser.add_option("-Z", "--maxSize", dest="maxSize", action="store", default=None, type="int", help="maximum size of the regions (if no region set nor a fixed size are provided) [format: int]")
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
244 parser.add_option("-n", "--number", dest="number", action="store", default=None, type="int", help="number of regions (if no region set is provided) [format: int]")
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
245 parser.add_option("-t", "--strands", dest="strands", action="store_true", default=False, help="use both strands (if no region set is provided) [format: boolean]")
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
246 parser.add_option("-m", "--max", dest="max", action="store", default=None, type="int", help="max. # reads in a cluster (for Gaussian dist.) [format: int]")
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
247 parser.add_option("-d", "--deviation", dest="deviation", action="store", default=None, type="int", help="deviation around the center of the cluster (for Gaussian dist.) [format: int]")
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
248 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
249 (options, args) = parser.parse_args()
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
250
46
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
251 rrg = RandomRegionsGenerator(options.verbosity)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
252 if options.reference == None:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
253 rrg.setGenomeSize(options.referenceSize)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
254 rrg.setChromosomeName(options.chromosome)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
255 else:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
256 rrg.setInput(options.reference)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
257 rrg.setOutputFile(options.outputFileName)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
258 if options.inputFileName == None:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
259 if options.size != None:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
260 rrg.setSize(options.size)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
261 else:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
262 rrg.setMinSize(options.minSize)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
263 rrg.setMaxSize(options.maxSize)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
264 rrg.setNumber(options.number)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
265 rrg.setStrands(options.strands)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
266 else:
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
267 rrg.setAnnotation(options.inputFileName, options.format)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
268 rrg.setMaxDistribution(options.max)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
269 rrg.setDeviationDistribution(options.deviation)
169d364ddd91 Uploaded
m-zytnicki
parents: 6
diff changeset
270 rrg.run()
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
271