lumpy_sv: extractSplitReads_BwaMem.py comparison

comparison extractSplitReads_BwaMem.py @ 1:1ed8619a5611 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/lumpy-sv commit 0b55a106b1f76e3cc3d89932fef2cc8d3eb24e4f

author	artbio
date	Wed, 26 Jul 2017 18:17:01 -0400
parents	796552c157de
children	48e97429a749

comparison

equal deleted inserted replaced

-:796552c157de
+:1ed8619a5611
 #!/usr/bin/env python
+import re
 import sys
-import getopt
-import string
 from optparse import OptionParser
-import re
-def extractSplitsFromBwaMem(inFile,numSplits,includeDups,minNonOverlap):
+def extractSplitsFromBwaMem(inFile, numSplits, includeDups, minNonOverlap):
 if inFile == "stdin":
 data = sys.stdin
 else:
 data = open(inFile, 'r')
 for line in data:
 split = 0
 if line[0] == '@':
-print line.strip()
+print(line.strip())
 continue
 samList = line.strip().split('\t')
 sam = SAM(samList)
-if includeDups==0 and (1024 & sam.flag)==1024:
+if includeDups == 0 and (1024 & sam.flag) == 1024:
 continue
 for el in sam.tags:
 if "SA:" in el:
-if(len(el.split(";")))<=numSplits:
+if(len(el.split(";"))) <= numSplits:
 split = 1
 mate = el.split(",")
 mateCigar = mate[3]
 mateFlag = int(0)
-if mate[2]=="-": mateFlag = int(16)
+if mate[2] == "-":
+mateFlag = int(16)
 if split:
 read1 = sam.flag & 64
-if read1 == 64: tag = "_1"
+if read1 == 64:
-else: tag="_2"
+tag = "_1"
+else:
+tag = "_2"
 samList[0] = sam.query + tag
 readCigar = sam.cigar
-readCigarOps = extractCigarOps(readCigar,sam.flag)
+readCigarOps = extractCigarOps(readCigar, sam.flag)
 readQueryPos = calcQueryPosFromCigar(readCigarOps)
-mateCigarOps = extractCigarOps(mateCigar,mateFlag)
+mateCigarOps = extractCigarOps(mateCigar, mateFlag)
 mateQueryPos = calcQueryPosFromCigar(mateCigarOps)
-overlap = calcQueryOverlap(readQueryPos.qsPos,readQueryPos.qePos,mateQueryPos.qsPos,mateQueryPos.qePos)
+overlap = calcQueryOverlap(readQueryPos.qsPos, readQueryPos.qePos,
+mateQueryPos.qsPos, mateQueryPos.qePos)
 nonOverlap1 = 1 + readQueryPos.qePos - readQueryPos.qsPos - overlap
 nonOverlap2 = 1 + mateQueryPos.qePos - mateQueryPos.qsPos - overlap
 mno = min(nonOverlap1, nonOverlap2)
 if mno >= minNonOverlap:
-print "\t".join(samList)
+print("\t".join(samList))
-#--------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------
 # functions
-#--------------------------------------------------------------------------------------------------
+# -----------------------------------------------------------------------
 class SAM (object):
 """
 __very__ basic class for SAM input.
 """
-def __init__(self, samList = []):
+def __init__(self, samList=[]):
 if len(samList) > 0:
-self.query    = samList[0]
+self.query = samList[0]
-self.flag     = int(samList[1])
+self.flag = int(samList[1])
-self.ref      = samList[2]
+self.ref = samList[2]
-self.pos      = int(samList[3])
+self.pos = int(samList[3])
-self.mapq     = int(samList[4])
+self.mapq = int(samList[4])
-self.cigar    = samList[5]
+self.cigar = samList[5]
-self.matRef   = samList[6]
+self.matRef = samList[6]
-self.matePos  = int(samList[7])
+self.matePos = int(samList[7])
-self.iSize    = int(samList[8])
+self.iSize = int(samList[8])
-self.seq      = samList[9]
+self.seq = samList[9]
-self.qual     = samList[10]
+self.qual = samList[10]
-self.tags     = samList[11:]#tags is a list of each tag:vtype:value sets
+self.tags = samList[11:]
-self.valid    = 1
+# tags is a list of each tag:vtype:value sets
+self.valid = 1
 else:
 self.valid = 0
 self.query = 'null'
-def extractTagValue (self, tagID):
+def extractTagValue(self, tagID):
 for tag in self.tags:
-tagParts = tag.split(':', 2);
+tagParts = tag.split(':', 2)
 if (tagParts[0] == tagID):
 if (tagParts[1] == 'i'):
-return int(tagParts[2]);
+return int(tagParts[2])
 elif (tagParts[1] == 'H'):
-return int(tagParts[2],16);
+return int(tagParts[2], 16)
-return tagParts[2];
+return tagParts[2]
-return None;
+return None
-#-----------------------------------------------
 cigarPattern = '([0-9]+[MIDNSHP])'
 cigarSearch = re.compile(cigarPattern)
 atomicCigarPattern = '([0-9]+)([MIDNSHP])'
 atomicCigarSearch = re.compile(atomicCigarPattern)
-def extractCigarOps(cigar,flag):
+def extractCigarOps(cigar, flag):
 if (cigar == "*"):
 cigarOps = []
 elif (flag & 0x0010):
 cigarOpStrings = cigarSearch.findall(cigar)
 cigarOps = []
 cigar = cigarOp(cigarOpList[0][0], cigarOpList[0][1])
 # add to the list of cigarOps
 cigarOps.append(cigar)
 cigarOps = cigarOps
 cigarOps.reverse()
-##do in reverse order because negative strand##
+# do in reverse order because negative strand
 else:
 cigarOpStrings = cigarSearch.findall(cigar)
 cigarOps = []
 for opString in cigarOpStrings:
 cigarOpList = atomicCigarSearch.findall(opString)
 # add to the list of cigarOps
 cigarOps.append(cigar)
 #            cigarOps = cigarOps
 return(cigarOps)
 def calcQueryPosFromCigar(cigarOps):
 qsPos = 0
 qePos = 0
-qLen  = 0
+qLen = 0
 # if first op is a H, need to shift start position
-# the opPosition counter sees if the for loop is looking at the first index of the cigar object
+# the opPosition counter sees if the for loop is looking
+# at the first index of the cigar object
 opPosition = 0
 for cigar in cigarOps:
 if opPosition == 0 and (cigar.op == 'H' or cigar.op == 'S'):
 qsPos += cigar.length
 qePos += cigar.length
-qLen  += cigar.length
+qLen += cigar.length
 elif opPosition > 0 and (cigar.op == 'H' or cigar.op == 'S'):
-qLen  += cigar.length
+qLen += cigar.length
 elif cigar.op == 'M' or cigar.op == 'I':
 qePos += cigar.length
-qLen  += cigar.length
+qLen += cigar.length
 opPosition += 1
-d = queryPos(qsPos, qePos, qLen);
+d = queryPos(qsPos, qePos, qLen)
 return d
 class cigarOp (object):
 """
 sturct to store a discrete CIGAR operations
 """
 def __init__(self, opLength, op):
 self.length = int(opLength)
-self.op     = op
+self.op = op
 class queryPos (object):
 """
 struct to store the start and end positions of query CIGAR operations
 """
 def __init__(self, qsPos, qePos, qLen):
 self.qsPos = int(qsPos)
 self.qePos = int(qePos)
-self.qLen  = int(qLen)
+self.qLen = int(qLen)
-def calcQueryOverlap(s1,e1,s2,e2):
+def calcQueryOverlap(s1, e1, s2, e2):
 o = 1 + min(e1, e2) - max(s1, s2)
 return max(0, o)
 ###############################################
 class Usage(Exception):
 def __init__(self, msg):
 self.msg = msg
 def main():
 usage = """%prog -i <file>
 extractSplitReads_BwaMem v0.1.0
 Author: Ira Hall
-Description: Get split-read alignments from bwa-mem in lumpy compatible format. Ignores reads marked as duplicates.
+Description: Get split-read alignments from bwa-mem in lumpy compatible
+format. Ignores reads marked as duplicates.
 Works on read or position sorted SAM input. Tested on bwa mem v0.7.5a-r405.
 """
 parser = OptionParser(usage)
 parser.add_option("-i", "--inFile", dest="inFile",
 help="A SAM file or standard input (-i stdin).",
 metavar="FILE")
-parser.add_option("-n", "--numSplits", dest="numSplits", default=2, type = "int",
+parser.add_option("-n", "--numSplits", dest="numSplits", default=2,
-help="The maximum number of split-read mappings to allow per read. Reads with more are excluded. Default=2",
+type="int",
-metavar="INT")
+help='''The maximum number of split-read mappings to
-parser.add_option("-d", "--includeDups", dest="includeDups", action="store_true",default=0,
+allow per read. Reads with more are excluded.
-help="Include alignments marked as duplicates. Default=False")
+Default=2''', metavar="INT")
-parser.add_option("-m", "--minNonOverlap", dest="minNonOverlap", default=20, type = "int",
+parser.add_option("-d", "--includeDups", dest="includeDups",
-help="minimum non-overlap between split alignments on the query (default=20)",
+action="store_true", default=0,
-metavar="INT")
+help='''Include alignments marked as duplicates.
+Default=False''')
+parser.add_option("-m", "--minNonOverlap", dest="minNonOverlap",
+default=20, type="int", help='''minimum non-overlap between
+split alignments on the query (default=20)''',
+metavar="INT")
 (opts, args) = parser.parse_args()
 if opts.inFile is None:
 parser.print_help()
 print
 else:
 try:
-extractSplitsFromBwaMem(opts.inFile, opts.numSplits, opts.includeDups, opts.minNonOverlap)
+extractSplitsFromBwaMem(opts.inFile, opts.numSplits,
+opts.includeDups, opts.minNonOverlap)
 except IOError as err:
-sys.stderr.write("IOError " + str(err) + "\n");
+sys.stderr.write("IOError " + str(err) + "\n")
 return
 if __name__ == "__main__":
 sys.exit(main())

Mercurial > repos > artbio > lumpy_sv

comparison extractSplitReads_BwaMem.py @ 1:1ed8619a5611 draft