| 0 | 1 #!/usr/bin/python | 
|  | 2 | 
|  | 3 import os.path | 
|  | 4 import sys | 
|  | 5 | 
|  | 6 class bed: | 
|  | 7   def __init__(self): | 
|  | 8     self.numberTargets = 0 | 
|  | 9     self.referenceSequences = {} | 
|  | 10     self.referenceSequenceList = [] | 
|  | 11 | 
|  | 12   def openBed(self, filename): | 
|  | 13     if filename == "stdin": self.filehandle = sys.stdin | 
|  | 14     else: | 
|  | 15       try: self.filehandle = open(filename,"r") | 
|  | 16       except IOError: | 
|  | 17         print >> sys.stderr, "Failed to find file: ",filename | 
|  | 18         exit(1) | 
|  | 19 | 
|  | 20 # Get a bed record. | 
|  | 21   def getRecord(self): | 
|  | 22     self.record = self.filehandle.readline() | 
|  | 23     if not self.record: return False | 
|  | 24 | 
|  | 25     self.numberTargets = self.numberTargets + 1 | 
|  | 26     self.ref = "" | 
|  | 27     self.start = 0 | 
|  | 28     self.end = 0 | 
|  | 29 | 
|  | 30 # bed file should be 0-based, half-open, so the start coordinate | 
|  | 31 # must be that in the bed file plus one. | 
|  | 32     entries = self.record.rstrip("\n").split("\t") | 
|  | 33     self.referenceSequence = entries[0] | 
|  | 34 | 
|  | 35 # Add the reference sequence to the dictionary.  If it didn't previously | 
|  | 36 # exist append the reference sequence to the end of the list as well. | 
|  | 37 # This ensures that the order in which the reference sequences appeared | 
|  | 38 # in the header can be preserved. | 
|  | 39     if self.referenceSequence not in self.referenceSequences: | 
|  | 40       self.referenceSequences[self.referenceSequence] = True | 
|  | 41       self.referenceSequenceList.append(self.referenceSequence) | 
|  | 42 | 
|  | 43     try: self.start = int(entries[1]) + 1 | 
|  | 44     except: | 
|  | 45       text = "start position need is not an integer" | 
|  | 46       self.generalError(text, "start", entries[1]) | 
|  | 47 | 
|  | 48     try: self.end = int(entries[2]) | 
|  | 49     except: | 
|  | 50       text = "end position need is not an integer" | 
|  | 51       self.generalError(text, "end", entries[2]) | 
|  | 52 | 
|  | 53 # Check that the record is a valid interval. | 
|  | 54     if self.end - self.start < 0: | 
|  | 55       print >> sys.stderr, "Invalid target interval:\n\t", self.record | 
|  | 56       exit(1) | 
|  | 57 | 
|  | 58     return True | 
|  | 59 | 
|  | 60 # Parse through the bed file until the correct reference sequence is | 
|  | 61 # encountered and the end position is greater than or equal to that requested. | 
|  | 62   def parseBed(self, referenceSequence, position): | 
|  | 63     success = True | 
|  | 64     if self.referenceSequence != referenceSequence: | 
|  | 65       while self.referenceSequence != referenceSequence and success: success = self.getRecord() | 
|  | 66 | 
|  | 67     while self.referenceSequence == referenceSequence and self.end < position and success: success = self.getRecord() | 
|  | 68 | 
|  | 69     return success | 
|  | 70 | 
|  | 71 # Close the bed file. | 
|  | 72   def closeBed(self, filename): | 
|  | 73     self.filehandle.close() | 
|  | 74 | 
|  | 75 # Define error messages for different handled errors. | 
|  | 76   def generalError(self, text, field, fieldValue): | 
|  | 77     print >> sys.stderr, "\nError encountered when attempting to read:" | 
|  | 78     if field != "": print >> sys.stderr, "\t", field, ":             ", fieldValue | 
|  | 79     print >> sys.stderr,  "\n", text | 
|  | 80     exit(1) |