annotate vcfClass.py @ 0:76ad0b7865b9 draft default tip

Imported from capsule None
author devteam
date Mon, 27 Jan 2014 09:26:27 -0500
parents
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
1 #!/usr/bin/python
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
2
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
3 import os.path
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
4 import sys
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
5 import re
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
6
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
7 class vcf:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
8 def __init__(self):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
9
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
10 # Header info.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
11 self.filename = ""
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
12 self.hasHeader = True
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
13 self.headerText = ""
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
14 self.headerTitles = ""
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
15 self.vcfFormat = ""
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
16 #self.headerInfoText = ""
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
17 #self.headerFormatText = ""
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
18
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
19 # Store the info and format tags as well as the lines that describe
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
20 # them in a dictionary.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
21 self.numberDataSets = 0
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
22 self.includedDataSets = {}
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
23 self.infoHeaderTags = {}
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
24 self.infoHeaderString = {}
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
25 self.formatHeaderTags = {}
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
26 self.formatHeaderString = {}
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
27
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
28 # Genotype information.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
29 self.genotypes = False
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
30 self.infoField = {}
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
31
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
32 # Reference sequence information.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
33 self.referenceSequences = {}
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
34 self.referenceSequenceList = []
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
35 self.referenceSequence = ""
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
36
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
37 # Record information.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
38 self.position = -1
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
39 self.samplesList = []
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
40
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
41 # Determine which fields to process.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
42 self.processInfo = False
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
43 self.processGenotypes = False
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
44 self.dbsnpVcf = False
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
45 self.hapmapVcf = False
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
46
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
47 # Open a vcf file.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
48 def openVcf(self, filename):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
49 if filename == "stdin":
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
50 self.filehandle = sys.stdin
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
51 self.filename = "stdin"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
52 else:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
53 try: self.filehandle = open(filename,"r")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
54 except IOError:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
55 print >> sys.stderr, "Failed to find file: ",filename
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
56 exit(1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
57 self.filename = os.path.abspath(filename)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
58
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
59 # Parse the vcf header.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
60 def parseHeader(self, filename, writeOut):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
61 while self.getHeaderLine(filename, writeOut):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
62 continue
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
63
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
64 # Determine the type of information in the header line.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
65 def getHeaderLine(self, filename, writeOut):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
66 self.headerLine = self.filehandle.readline().rstrip("\n")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
67 if self.headerLine.startswith("##fileformat"): success = self.getvcfFormat()
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
68 if self.headerLine.startswith("##INFO"): success = self.headerInfo(writeOut, "info")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
69 elif self.headerLine.startswith("##FORMAT"): success = self.headerInfo(writeOut, "format")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
70 elif self.headerLine.startswith("##FILE"): success = self.headerFiles(writeOut)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
71 elif self.headerLine.startswith("##"): success = self.headerAdditional()
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
72 elif self.headerLine.startswith("#"): success = self.headerTitleString(filename, writeOut)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
73 else: success = self.noHeader(filename, writeOut)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
74
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
75 return success
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
76
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
77 # Read VCF format
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
78 def getvcfFormat(self):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
79 try:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
80 self.vcfFormat = self.headerLine.split("=",1)[1]
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
81 self.vcfFormat = float( self.vcfFormat.split("VCFv",1)[1] )## Extract the version number rather than the whole string
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
82 except IndexError:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
83 print >> sys.stderr, "\nError parsing the fileformat"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
84 print >> sys.stderr, "The following fileformat header is wrongly formatted: ", self.headerLine
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
85 exit(1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
86 return True
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
87
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
88
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
89 # Read information on an info field from the header line.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
90 def headerInfo(self, writeOut, lineType):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
91 tag = self.headerLine.split("=",1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
92 tagID = (tag[1].split("ID=",1))[1].split(",",1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
93
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
94 # Check if this info field has already been defined.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
95 if (lineType == "info" and self.infoHeaderTags.has_key(tagID[0])) or (lineType == "format" and self.formatHeaderTags.has_key(tagID[0])):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
96 print >> sys.stderr, "Info tag \"", tagID[0], "\" is defined multiple times in the header."
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
97 exit(1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
98
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
99 # Determine the number of entries, entry type and description.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
100 tagNumber = (tagID[1].split("Number=",1))[1].split(",",1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
101 tagType = (tagNumber[1].split("Type=",1))[1].split(",",1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
102 try: tagDescription = ( ( (tagType[1].split("Description=\"",1))[1] ).split("\">") )[0]
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
103 except IndexError: tagDescription = ""
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
104 tagID = tagID[0]; tagNumber = tagNumber[0]; tagType = tagType[0]
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
105
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
106 # Check that the number of fields associated with the tag is either
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
107 # an integer or a '.' to indicate variable number of entries.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
108 if tagNumber == ".": tagNumber = "variable"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
109 else:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
110 if self.vcfFormat<4.1:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
111
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
112 try:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
113 tagNumber = int(tagNumber)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
114
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
115 except ValueError:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
116 print >> sys.stderr, "\nError parsing header. Problem with info tag:", tagID
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
117 print >> sys.stderr, "Number of fields associated with this tag is not an integer or '.'"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
118 exit(1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
119
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
120 if lineType == "info":
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
121 self.infoHeaderTags[tagID] = tagNumber, tagType, tagDescription
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
122 self.infoHeaderString[tagID] = self.headerLine
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
123 if lineType == "format":
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
124 self.formatHeaderTags[tagID] = tagNumber, tagType, tagDescription
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
125 self.formatHeaderString[tagID] = self.headerLine
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
126
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
127 return True
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
128
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
129 # Check to see if the records contain information from multiple different
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
130 # sources. If vcfPytools has been used to find the intersection or union
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
131 # of two vcf files, the records may have been merged to keep all the
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
132 # information available. If this is the case, there will be a ##FILE line
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
133 # for each set of information in the file. The order of these files needs
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
134 # to be maintained.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
135 def headerFiles(self, writeOut):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
136 fileID = (self.headerLine.split("ID=",1))[1].split(",",1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
137 filename = fileID[1].split("\"",2)[1]
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
138 try: fileID = int(fileID[0])
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
139 except ValueError:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
140 print >> sys.stderr, "File ID in ##FILE entry must be an integer."
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
141 print >> sys.stderr, self.headerLine
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
142 exit(1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
143 if self.includedDataSets.has_key(fileID):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
144 print >> sys.stderr, "\nERROR: file " + self.filename
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
145 print >> sys.stderr, "Multiple files in the ##FILE list have identical ID values."
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
146 exit(1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
147 self.includedDataSets[fileID] = filename
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
148
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
149 # Set the number of files with information in this vcf file.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
150 if fileID > self.numberDataSets: self.numberDataSets = fileID
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
151
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
152 return True
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
153
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
154 # Read additional information contained in the header.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
155 def headerAdditional(self):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
156 self.headerText += self.headerLine + "\n"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
157
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
158 return True
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
159
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
160 # Read in the column titles to check that all standard fields
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
161 # are present and read in all the samples.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
162 def headerTitleString(self, filename, writeOut):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
163 self.headerTitles = self.headerLine + "\n"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
164
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
165 # Strip the end of line character from the last infoFields entry.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
166 infoFields = self.headerLine.split("\t")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
167 if len(infoFields) > 8:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
168 # if len(infoFields) - 9 == 1 and writeOut: print >> sys.stdout, len(infoFields) - 9, " sample present in vcf file: ", filename
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
169 # elif writeOut: print >> sys.stdout, len(infoFields) - 9, " samples present in vcf file: ", filename
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
170 self.samplesList = infoFields[9:]
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
171 self.genotypes = True
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
172 elif len(infoFields) == 8:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
173 if writeOut: print >> sys.stdout, "No samples present in the header. No genotype information available."
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
174 else:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
175 print self.headerLine, len(infoFields)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
176 print >> sys.stderr, "Not all vcf standard fields are available."
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
177 exit(1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
178
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
179 return False
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
180
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
181 # If there is no header in the vcf file, close and reopen the
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
182 # file so that the first line is avaiable for parsing as a
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
183 # vcf record.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
184 def noHeader(self, filename, writeOut):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
185 if writeOut: print >> sys.stdout, "No header lines present in", filename
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
186 self.hasHeader = False
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
187 self.closeVcf(filename)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
188 self.openVcf(filename)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
189
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
190 return False
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
191
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
192 # Check that info fields exist.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
193 def checkInfoFields(self, tag):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
194 if self.infoHeaderTags.has_key(tag) == False:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
195 print >> sys.stderr, "Info tag \"", tag, "\" does not exist in the header."
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
196 exit(1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
197
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
198 # Get the next line of information from the vcf file.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
199 def getRecord(self):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
200 self.record = self.filehandle.readline()
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
201 if not self.record: return False
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
202
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
203 # Set up and execute a regular expression match.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
204 recordRe = re.compile(r"^(\S+)\t(\d+)\t(\S+)\t(\S+)\t(\S+)\t(\S+)\t(\S+)\t(\S+)(\n|\t.+)$")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
205 #recordRe = re.compile(r"^(\S+)\s+(\d+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)\s+(\S+)(\n|\s+.+)$")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
206 recordMatch = recordRe.match(self.record)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
207 if recordMatch == None:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
208 print >> sys.stderr, "Unable to resolve vcf record.\n"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
209 print >> sys.stderr, self.record
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
210 exit(1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
211
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
212 self.referenceSequence = recordMatch.group(1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
213 try: self.position = int(recordMatch.group(2))
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
214 except ValueError:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
215 text = "variant position is not an integer"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
216 self.generalError(text, "", None)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
217 self.rsid = recordMatch.group(3)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
218 self.ref = recordMatch.group(4)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
219 self.alt = recordMatch.group(5)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
220 self.quality = recordMatch.group(6)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
221 self.filters = recordMatch.group(7)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
222 self.info = recordMatch.group(8)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
223 self.genotypeString = recordMatch.group(9)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
224 self.infoTags = {}
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
225
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
226 # Check that the quality is an integer or a float. If not, set the quality
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
227 # to zero.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
228 try: self.quality = float(self.quality)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
229 except ValueError: self.quality = float(0.)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
230
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
231 # If recordMatch.group(9) is not the end of line character, there is
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
232 # genotype information with this record.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
233 if self.genotypeString != "\n": self.hasGenotypes = True
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
234 else: self.hasGenotypes = False
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
235
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
236 # Add the reference sequence to the dictionary. If it didn't previously
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
237 # exist append the reference sequence to the end of the list as well.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
238 # This ensures that the order in which the reference sequences appeared
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
239 # in the header can be preserved.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
240 if self.referenceSequence not in self.referenceSequences:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
241 self.referenceSequences[self.referenceSequence] = True
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
242 self.referenceSequenceList.append(self.referenceSequence)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
243
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
244 # Check for multiple alternate alleles.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
245 self.alternateAlleles = self.alt.split(",")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
246 self.numberAlternateAlleles = len(self.alternateAlleles)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
247
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
248 # If required, process the info and genotypes.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
249 if self.processInfo: self.processInfoFields()
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
250 if self.processGenotypes and self.hasGenotypes: self.processGenotypeFields()
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
251
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
252 return True
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
253
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
254 # Process the info string.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
255 def processInfoFields(self):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
256
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
257 # First break the info string into its constituent elements.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
258 infoEntries = self.info.split(";")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
259
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
260 # As long as some info fields exist, place them into a dictionary.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
261 for entry in infoEntries:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
262 infoEntry = entry.split("=")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
263
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
264 # If the entry is a flag, there will be no equals and the length of
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
265 # infoEntry will be 1. In this case, set the dictionary entry to the
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
266 # whole entry. If the vcf file has undergone a union or intersection
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
267 # operation and contains the information from multiple files, this may
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
268 # be a '/' seperate list of flags and so cannot be set to a Boolean value
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
269 # yet.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
270 if len(infoEntry) == 1: self.infoTags[infoEntry[0]] = infoEntry[0]
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
271 elif len(infoEntry) > 1: self.infoTags[infoEntry[0]] = infoEntry[1]
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
272
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
273 # Process the genotype formats and values.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
274 def processGenotypeFields(self):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
275 genotypeEntries = self.genotypeString.split("\t")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
276 self.genotypeFormatString = genotypeEntries[1]
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
277 self.genotypes = list(genotypeEntries[2:])
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
278 self.genotypeFormats = {}
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
279 self.genotypeFields = {}
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
280 self.genotypeFormats = self.genotypeFormatString.split(":")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
281
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
282 # Check that the number of genotype fields is equal to the number of samples
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
283 if len(self.samplesList) != len(self.genotypes):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
284 text = "The number of genotypes is different to the number of samples"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
285 self.generalError(text, "", "")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
286
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
287 # Add the genotype information to a dictionary.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
288 for i in range( len(self.samplesList) ):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
289 genotypeInfo = self.genotypes[i].split(":")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
290 self.genotypeFields[ self.samplesList[i] ] = {}
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
291
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
292 # Check that there are as many fields as in the format field. If not, this must
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
293 # be because the information is not known. In this case, it is permitted that
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
294 # the genotype information is either . or ./.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
295 if genotypeInfo[0] == "./." or genotypeInfo[0] == "." and len(self.genotypeFormats) != len(genotypeInfo):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
296 self.genotypeFields[ self.samplesList[i] ] = "."
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
297 else:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
298 if len(self.genotypeFormats) != len(genotypeInfo):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
299 text = "The number of genotype fields is different to the number specified in the format string"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
300 self.generalError(text, "sample", self.samplesList[i])
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
301
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
302 for j in range( len(self.genotypeFormats) ): self.genotypeFields[ self.samplesList[i] ][ self.genotypeFormats[j] ] = genotypeInfo[j]
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
303
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
304 # Parse through the vcf file until the correct reference sequence is
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
305 # encountered and the position is greater than or equal to that requested.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
306 def parseVcf(self, referenceSequence, position, writeOut, outputFile):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
307 success = True
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
308 if self.referenceSequence != referenceSequence:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
309 while self.referenceSequence != referenceSequence and success:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
310 if writeOut: outputFile.write(self.record)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
311 success = self.getRecord()
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
312
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
313 while self.referenceSequence == referenceSequence and self.position < position and success:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
314 if writeOut: outputFile.write(self.record)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
315 success = self.getRecord()
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
316
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
317 return success
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
318
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
319 # Get the information for a specific info tag. Also check that it contains
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
320 # the correct number and type of entries.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
321 def getInfo(self, tag):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
322 result = []
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
323
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
324 # Check if the tag exists in the header information. If so,
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
325 # determine the number and type of entries asscoiated with this
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
326 # tag.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
327 if self.infoHeaderTags.has_key(tag):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
328 infoNumber = self.infoHeaderTags[tag][0]
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
329 infoType = self.infoHeaderTags[tag][1]
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
330 numberValues = infoNumber
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
331
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
332 # First check that the tag exists in the information string. Then split
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
333 # the entry on commas. For flag entries, do not perform the split.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
334 if self.infoTags.has_key(tag):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
335 if numberValues == 0 and infoType == "Flag": result = True
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
336 elif numberValues != 0 and infoType == "Flag":
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
337 print >> sys.stderr, "ERROR"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
338 exit(1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
339 else:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
340 fields = self.infoTags[tag].split(",")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
341 if len(fields) != numberValues:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
342 text = "Unexpected number of entries"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
343 self.generalError(text, "information tag", tag)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
344
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
345 for i in range(infoNumber):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
346 try: result.append(fields[i])
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
347 except IndexError:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
348 text = "Insufficient values. Expected: " + self.infoHeaderTags[tag][0]
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
349 self.generalError(text, "tag:", tag)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
350 else: numberValues = 0
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
351
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
352 else:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
353 text = "information field does not have a definition in the header"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
354 self.generalError(text, "tag", tag)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
355
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
356 return numberValues, infoType, result
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
357
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
358 # Get the genotype information.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
359 def getGenotypeInfo(self, sample, tag):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
360 result = []
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
361 if self.formatHeaderTags.has_key(tag):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
362 infoNumber = self.formatHeaderTags[tag][0]
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
363 infoType = self.formatHeaderTags[tag][1]
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
364 numberValues = infoNumber
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
365
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
366 if self.genotypeFields[sample] == "." and len(self.genotypeFields[sample]) == 1:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
367 numberValues = 0
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
368 result = "."
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
369 else:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
370 if self.genotypeFields[sample].has_key(tag):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
371 if tag == "GT":
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
372 if len(self.genotypeFields[sample][tag]) != 3 and len(self.genotypeFields[sample][tag]) != 1:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
373 text = "Unexected number of characters in genotype (GT) field"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
374 self.generalError(text, "sample", sample)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
375
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
376 # If a diploid call, check whether or not the genotype is phased.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
377 elif len(self.genotypeFields[sample][tag]) == 3:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
378 self.phased = True if self.genotypeFields[sample][tag][1] == "|" else False
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
379 result.append( self.genotypeFields[sample][tag][0] )
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
380 result.append( self.genotypeFields[sample][tag][2] )
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
381 elif len(self.genotypeFields[sample][tag]) == 3:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
382 result.append( self.genotypeFields[sample][tag][0] )
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
383 else:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
384 fields = self.genotypeFields[sample][tag].split(",")
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
385 if len(fields) != numberValues:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
386 text = "Unexpected number of characters in " + tag + " field"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
387 self.generalError(text, "sample", sample)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
388
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
389 for i in range(infoNumber): result.append(fields[i])
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
390 else:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
391 text = "genotype field does not have a definition in the header"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
392 self.generalError(text, "tag", tag)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
393
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
394 return numberValues, result
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
395
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
396 # Parse the dbsnp entry. If the entry conforms to the required variant type,
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
397 # return the dbsnp rsid value, otherwise ".".
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
398 def getDbsnpInfo(self):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
399
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
400 # First check that the variant class (VC) is listed as SNP.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
401 vc = self.info.split("VC=",1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
402 if vc[1].find(";") != -1: snp = vc[1].split(";",1)
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
403 else:
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
404 snp = []
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
405 snp.append(vc[1])
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
406
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
407 if snp[0].lower() == "snp": rsid = self.rsid
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
408 else: rsid = "."
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
409
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
410 return rsid
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
411
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
412 # Build a new vcf record.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
413 def buildRecord(self, removeGenotypes):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
414 record = self.referenceSequence + "\t" + \
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
415 str(self.position) + "\t" + \
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
416 self.rsid + "\t" + \
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
417 self.ref + "\t" + \
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
418 self.alt + "\t" + \
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
419 str(self.quality) + "\t" + \
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
420 self.filters + "\t" + \
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
421 self.info
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
422
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
423 if self.hasGenotypes and not removeGenotypes: record += self.genotypeString
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
424
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
425 record += "\n"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
426
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
427 return record
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
428
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
429 # Close the vcf file.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
430 def closeVcf(self, filename):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
431 self.filehandle.close()
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
432
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
433 # Define error messages for different handled errors.
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
434 def generalError(self, text, field, fieldValue):
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
435 print >> sys.stderr, "\nError encountered when attempting to read:"
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
436 print >> sys.stderr, "\treference sequence :\t", self.referenceSequence
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
437 print >> sys.stderr, "\tposition :\t\t", self.position
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
438 if field != "": print >> sys.stderr, "\t", field, ":\t", fieldValue
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
439 print >> sys.stderr, "\n", text
76ad0b7865b9 Imported from capsule None
devteam
parents:
diff changeset
440 exit(1)