Mercurial > repos > devteam > vcf_annotate
comparison bedClass.py @ 0:b001b50f2009 draft default tip
Imported from capsule None
| author | devteam |
|---|---|
| date | Mon, 27 Jan 2014 09:28:06 -0500 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:b001b50f2009 |
|---|---|
| 1 #!/usr/bin/python | |
| 2 | |
| 3 import os.path | |
| 4 import sys | |
| 5 | |
| 6 class bed: | |
| 7 def __init__(self): | |
| 8 self.numberTargets = 0 | |
| 9 self.referenceSequences = {} | |
| 10 self.referenceSequenceList = [] | |
| 11 | |
| 12 def openBed(self, filename): | |
| 13 if filename == "stdin": self.filehandle = sys.stdin | |
| 14 else: | |
| 15 try: self.filehandle = open(filename,"r") | |
| 16 except IOError: | |
| 17 print >> sys.stderr, "Failed to find file: ",filename | |
| 18 exit(1) | |
| 19 | |
| 20 # Get a bed record. | |
| 21 def getRecord(self): | |
| 22 self.record = self.filehandle.readline() | |
| 23 if not self.record: return False | |
| 24 | |
| 25 self.numberTargets = self.numberTargets + 1 | |
| 26 self.ref = "" | |
| 27 self.start = 0 | |
| 28 self.end = 0 | |
| 29 | |
| 30 # bed file should be 0-based, half-open, so the start coordinate | |
| 31 # must be that in the bed file plus one. | |
| 32 entries = self.record.rstrip("\n").split("\t") | |
| 33 self.referenceSequence = entries[0] | |
| 34 | |
| 35 # Add the reference sequence to the dictionary. If it didn't previously | |
| 36 # exist append the reference sequence to the end of the list as well. | |
| 37 # This ensures that the order in which the reference sequences appeared | |
| 38 # in the header can be preserved. | |
| 39 if self.referenceSequence not in self.referenceSequences: | |
| 40 self.referenceSequences[self.referenceSequence] = True | |
| 41 self.referenceSequenceList.append(self.referenceSequence) | |
| 42 | |
| 43 try: self.start = int(entries[1]) + 1 | |
| 44 except: | |
| 45 text = "start position need is not an integer" | |
| 46 self.generalError(text, "start", entries[1]) | |
| 47 | |
| 48 try: self.end = int(entries[2]) | |
| 49 except: | |
| 50 text = "end position need is not an integer" | |
| 51 self.generalError(text, "end", entries[2]) | |
| 52 | |
| 53 # Check that the record is a valid interval. | |
| 54 if self.end - self.start < 0: | |
| 55 print >> sys.stderr, "Invalid target interval:\n\t", self.record | |
| 56 exit(1) | |
| 57 | |
| 58 return True | |
| 59 | |
| 60 # Parse through the bed file until the correct reference sequence is | |
| 61 # encountered and the end position is greater than or equal to that requested. | |
| 62 def parseBed(self, referenceSequence, position): | |
| 63 success = True | |
| 64 if self.referenceSequence != referenceSequence: | |
| 65 while self.referenceSequence != referenceSequence and success: success = self.getRecord() | |
| 66 | |
| 67 while self.referenceSequence == referenceSequence and self.end < position and success: success = self.getRecord() | |
| 68 | |
| 69 return success | |
| 70 | |
| 71 # Close the bed file. | |
| 72 def closeBed(self, filename): | |
| 73 self.filehandle.close() | |
| 74 | |
| 75 # Define error messages for different handled errors. | |
| 76 def generalError(self, text, field, fieldValue): | |
| 77 print >> sys.stderr, "\nError encountered when attempting to read:" | |
| 78 if field != "": print >> sys.stderr, "\t", field, ": ", fieldValue | |
| 79 print >> sys.stderr, "\n", text | |
| 80 exit(1) |
