comparison filter.py @ 2:7a68005de299 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ngsutils commit 9a243c616a4a3156347e38fdb5f35863ae5133f9
author iuc
date Sun, 27 Nov 2016 15:01:21 -0500
parents 4e4e4093d65d
children 9b9ae5963d3c
comparison
equal deleted inserted replaced
1:8187a729d9f4 2:7a68005de299
1 #!/usr/bin/env python 1 #!/usr/bin/env python
2 ## category General 2 # category General
3 ## desc Removes reads from a BAM file based on criteria 3 # desc Removes reads from a BAM file based on criteria
4 """ 4 """
5 Removes reads from a BAM file based on criteria 5 Removes reads from a BAM file based on criteria
6 6
7 Given a BAM file, this script will only allow reads that meet filtering 7 Given a BAM file, this script will only allow reads that meet filtering
8 criteria to be written to output. The output is another BAM file with the 8 criteria to be written to output. The output is another BAM file with the
15 Currently, the available filters are: 15 Currently, the available filters are:
16 -minlen val Remove reads that are smaller than {val} 16 -minlen val Remove reads that are smaller than {val}
17 -maxlen val Remove reads that are larger than {val} 17 -maxlen val Remove reads that are larger than {val}
18 -mapped Keep only mapped reads 18 -mapped Keep only mapped reads
19 -unmapped Keep only unmapped reads 19 -unmapped Keep only unmapped reads
20 -properpair Keep only properly paired reads (both mapped, 20 -properpair Keep only properly paired reads (both mapped,
21 correct orientation, flag set in BAM) 21 correct orientation, flag set in BAM)
22 -noproperpair Keep only not-properly paired reads 22 -noproperpair Keep only not-properly paired reads
23 23
24 -mask bitmask Remove reads that match the mask (base 10/hex) 24 -mask bitmask Remove reads that match the mask (base 10/hex)
25 -uniq {length} Remove reads that are have the same sequence 25 -uniq {length} Remove reads that are have the same sequence
108 108
109 """ 109 """
110 110
111 import os 111 import os
112 import sys 112 import sys
113
113 import pysam 114 import pysam
114 from ngsutils.bam import bam_iter 115 from ngsutils.bam import bam_iter, read_calc_mismatches, read_calc_mismatches_gen, read_calc_mismatches_ref, read_calc_variations
116 from ngsutils.bed import BedFile
115 from ngsutils.support.dbsnp import DBSNP 117 from ngsutils.support.dbsnp import DBSNP
116 from ngsutils.bam import read_calc_mismatches, read_calc_mismatches_ref, read_calc_mismatches_gen, read_calc_variations
117 from ngsutils.bed import BedFile
118 118
119 119
120 def usage(): 120 def usage():
121 print __doc__ 121 print __doc__
122 print """ 122 print """
205 del_list.append(k) 205 del_list.append(k)
206 206
207 for k in del_list: 207 for k in del_list:
208 self.rev_pos.remove(k) 208 self.rev_pos.remove(k)
209 209
210 if not start_pos in self.rev_pos: 210 if start_pos not in self.rev_pos:
211 self.rev_pos.add(start_pos) 211 self.rev_pos.add(start_pos)
212 return True 212 return True
213 return False 213 return False
214 else: 214 else:
215 if read.pos != self.last_fwd_pos: 215 if read.pos != self.last_fwd_pos:
341 def __repr__(self): 341 def __repr__(self):
342 return 'Excluding: %s' % (self.ref) 342 return 'Excluding: %s' % (self.ref)
343 343
344 def close(self): 344 def close(self):
345 pass 345 pass
346
346 347
347 class IncludeRef(object): 348 class IncludeRef(object):
348 def __init__(self, ref): 349 def __init__(self, ref):
349 self.ref = ref 350 self.ref = ref
350 351
643 pass 644 pass
644 645
645 646
646 class MaskFlag(object): 647 class MaskFlag(object):
647 def __init__(self, value): 648 def __init__(self, value):
648 if type(value) == type(1): 649 if isinstance(value, int):
649 self.flag = value 650 self.flag = value
650 else: 651 else:
651 if value[0:2] == '0x': 652 if value[0:2] == '0x':
652 self.flag = int(value, 16) 653 self.flag = int(value, 16)
653 else: 654 else:
708 709
709 def __repr__(self): 710 def __repr__(self):
710 return "maximum mismatch ratio: %s" % self.val 711 return "maximum mismatch ratio: %s" % self.val
711 712
712 def filter(self, bam, read): 713 def filter(self, bam, read):
713 return read_calc_mismatches(read) <= self.ratio*len(read.seq) 714 return read_calc_mismatches(read) <= self.ratio * len(read.seq)
714 715
715 def close(self): 716 def close(self):
716 pass 717 pass
717 718
718 719
823 824
824 def filter(self, bam, read): 825 def filter(self, bam, read):
825 if self.get_value(read) == self.value: 826 if self.get_value(read) == self.value:
826 return True 827 return True
827 return False 828 return False
829
828 830
829 _criteria = { 831 _criteria = {
830 'mapped': Mapped, 832 'mapped': Mapped,
831 'unmapped': Unmapped, 833 'unmapped': Unmapped,
832 'properpair': ProperPair, 834 'properpair': ProperPair,
893 if not criterion.filter(bamfile, read): 895 if not criterion.filter(bamfile, read):
894 p = False 896 p = False
895 failed += 1 897 failed += 1
896 if failed_out: 898 if failed_out:
897 failed_out.write('%s\t%s\n' % (read.qname, criterion)) 899 failed_out.write('%s\t%s\n' % (read.qname, criterion))
898 #outfile.write(read_to_unmapped(read)) 900 # outfile.write(read_to_unmapped(read))
899 break 901 break
900 if p: 902 if p:
901 passed += 1 903 passed += 1
902 outfile.write(read) 904 outfile.write(read)
903 905
927 read.is_unmapped = True 929 read.is_unmapped = True
928 read.tid = None 930 read.tid = None
929 read.pos = 0 931 read.pos = 0
930 read.mapq = 0 932 read.mapq = 0
931 return read 933 return read
934
932 935
933 if __name__ == '__main__': 936 if __name__ == '__main__':
934 infile = None 937 infile = None
935 outfile = None 938 outfile = None
936 failed = None 939 failed = None