annotate SMART/Java/Python/getDifference.py @ 44:5f796c5c579f

Uploaded
author m-zytnicki
date Wed, 18 Sep 2013 08:32:38 -0400
parents 769e306b7933
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
6
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
1 #! /usr/bin/env python
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
2 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
3 # Copyright INRA-URGI 2009-2010
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
4 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
5 # This software is governed by the CeCILL license under French law and
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
6 # abiding by the rules of distribution of free software. You can use,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
7 # modify and/ or redistribute the software under the terms of the CeCILL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
8 # license as circulated by CEA, CNRS and INRIA at the following URL
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
9 # "http://www.cecill.info".
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
10 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
11 # As a counterpart to the access to the source code and rights to copy,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
12 # modify and redistribute granted by the license, users are provided only
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
13 # with a limited warranty and the software's author, the holder of the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
14 # economic rights, and the successive licensors have only limited
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
15 # liability.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
16 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
17 # In this respect, the user's attention is drawn to the risks associated
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
18 # with loading, using, modifying and/or developing or reproducing the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
19 # software by the user in light of its specific status of free software,
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
20 # that may mean that it is complicated to manipulate, and that also
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
21 # therefore means that it is reserved for developers and experienced
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
22 # professionals having in-depth computer knowledge. Users are therefore
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
23 # encouraged to load and test the software's suitability as regards their
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
24 # requirements in conditions enabling the security of their systems and/or
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
25 # data to be ensured and, more generally, to use and operate it in the
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
26 # same conditions as regards security.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
27 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
28 # The fact that you are presently reading this means that you have had
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
29 # knowledge of the CeCILL license and that you accept its terms.
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
30 #
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
31 """Restrict a transcript list with some parameters (regions)"""
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
32
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
33 from optparse import OptionParser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
34 from SMART.Java.Python.structure.Transcript import Transcript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
35 from SMART.Java.Python.structure.TranscriptContainer import TranscriptContainer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
36 from SMART.Java.Python.structure.TranscriptListsComparator import TranscriptListsComparator
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
37 from commons.core.writer.Gff3Writer import Gff3Writer
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
38 from commons.core.parsing.FastaParser import FastaParser
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
39 from SMART.Java.Python.misc.Progress import Progress
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
40
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
41 class DifferenceGetter(object):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
42
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
43 def __init__(self, verbosity):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
44 self.verbosity = verbosity
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
45 self.annotationParser = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
46 self.referenceParser = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
47 self.sequenceParser = None
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
48 self.transcriptCount = 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
49 self.split = False
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
50
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
51 def createTranscript(self, chromosome, start, end):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
52 transcript = Transcript()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
53 transcript.setChromosome(chromosome)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
54 transcript.setDirection("+")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
55 transcript.setStart(start)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
56 transcript.setEnd(end)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
57 transcript.setName("region_%d" % self.transcriptCount)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
58 transcript.setTagValue("ID", "region_%d" % self.transcriptCount)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
59 self.transcriptCount += 1
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
60 return transcript
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
61
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
62 def setSplit(self, split):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
63 self.split = split
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
64
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
65 def setAnnotationFile(self, fileName, format):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
66 if fileName != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
67 self.annotationParser = TranscriptContainer(fileName, format, self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
68
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
69 def setReferenceFile(self, fileName, format):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
70 if fileName != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
71 self.referenceParser = TranscriptContainer(fileName, format, self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
72
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
73 def setSequenceFile(self, fileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
74 if fileName != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
75 self.sequenceParser = FastaParser(fileName, self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
76
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
77 def setOutputFile(self, fileName):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
78 self.writer = Gff3Writer(fileName, self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
79
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
80 def initialize(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
81 self.presence = {}
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
82 for chromosome in self.sequenceParser.getRegions():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
83 self.presence[chromosome] = [[1, self.sequenceParser.getSizeOfRegion(chromosome)]]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
84
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
85 def readTranscripts(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
86 nbTranscripts = self.annotationParser.getNbTranscripts()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
87 progress = Progress(nbTranscripts, "Parsing annotation file" , self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
88 for transcript in self.annotationParser.getIterator():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
89 chromosome = transcript.getChromosome()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
90 toBeDeleted = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
91 toBeAppended = []
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
92 for i, element in enumerate(self.presence[chromosome]):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
93 start, end = element
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
94 if start <= transcript.getEnd() and transcript.getStart() <= end:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
95 toBeDeleted.append(i)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
96 if start < transcript.getStart():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
97 toBeAppended.append([start, transcript.getStart() - 1])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
98 if end > transcript.getEnd():
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
99 toBeAppended.append([transcript.getEnd() + 1, end])
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
100 for i in reversed(toBeDeleted):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
101 del self.presence[chromosome][i]
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
102 self.presence[chromosome].extend(toBeAppended)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
103 progress.inc()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
104 progress.done()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
105
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
106 def writeOutput(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
107 for chromosome in self.presence:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
108 for element in self.presence[chromosome]:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
109 start, end = element
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
110 self.writer.addTranscript(self.createTranscript(chromosome, start, end))
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
111 self.writer.write()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
112
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
113 def compareToSequence(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
114 self.initialize()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
115 self.readTranscripts()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
116 self.writeOutput()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
117
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
118 def compareToAnnotation(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
119 transcriptListComparator = TranscriptListsComparator(None, self.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
120 transcriptListComparator.setSplitDifference(self.split)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
121 transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.QUERY, self.annotationParser)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
122 transcriptListComparator.setInputTranscriptContainer(transcriptListComparator.REFERENCE, self.referenceParser)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
123 transcriptListComparator.setOutputWriter(self.writer)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
124 transcriptListComparator.getDifferenceTranscriptList()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
125
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
126 def run(self):
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
127 if self.referenceParser != None:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
128 self.compareToAnnotation()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
129 else:
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
130 self.compareToSequence()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
131
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
132
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
133 if __name__ == "__main__":
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
134
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
135 # parse command line
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
136 description = "Get Difference v1.0.1: Get all the regions of the genome, except the one given or get all the elements from the first set which does not ovelap with the second set (at the nucleotide level). [Category: Data Comparison]"
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
137
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
138 parser = OptionParser(description = description)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
139 parser.add_option("-i", "--input1", dest="inputFileName1", action="store", type="string", help="input file [compulsory] [format: file in transcript format given by -f]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
140 parser.add_option("-f", "--format1", dest="format1", action="store", type="string", help="format [compulsory] [format: transcript file format]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
141 parser.add_option("-j", "--input2", dest="inputFileName2", action="store", default=None, type="string", help="reference file [format: file in transcript format given by -g]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
142 parser.add_option("-g", "--format2", dest="format2", action="store", default=None, type="string", help="format of the reference file [format: transcript file format]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
143 parser.add_option("-s", "--sequence", dest="sequenceFileName", action="store", default=None, type="string", help="sequence file [format: file in FASTA format]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
144 parser.add_option("-p", "--split", dest="split", action="store_true", default=False, help="when comparing to a set of genomic coordinates, do not join [format: boolean] [default: False")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
145 parser.add_option("-o", "--output", dest="outputFileName", action="store", type="string", help="output file [format: output file in GFF3 format]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
146 parser.add_option("-v", "--verbosity", dest="verbosity", action="store", default=1, type="int", help="trace level [format: int]")
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
147 (options, args) = parser.parse_args()
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
148
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
149 getter = DifferenceGetter(options.verbosity)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
150 getter.setSplit(options.split)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
151 getter.setAnnotationFile(options.inputFileName1, options.format1)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
152 getter.setSequenceFile(options.sequenceFileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
153 getter.setReferenceFile(options.inputFileName2, options.format2)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
154 getter.setOutputFile(options.outputFileName)
769e306b7933 Change the repository level.
yufei-luo
parents:
diff changeset
155 getter.run()