Mercurial > repos > iuc > ngsutils_bam_filter
comparison filter.py @ 2:7a68005de299 draft
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/ngsutils commit 9a243c616a4a3156347e38fdb5f35863ae5133f9
| author | iuc |
|---|---|
| date | Sun, 27 Nov 2016 15:01:21 -0500 |
| parents | 4e4e4093d65d |
| children | 9b9ae5963d3c |
comparison
equal
deleted
inserted
replaced
| 1:8187a729d9f4 | 2:7a68005de299 |
|---|---|
| 1 #!/usr/bin/env python | 1 #!/usr/bin/env python |
| 2 ## category General | 2 # category General |
| 3 ## desc Removes reads from a BAM file based on criteria | 3 # desc Removes reads from a BAM file based on criteria |
| 4 """ | 4 """ |
| 5 Removes reads from a BAM file based on criteria | 5 Removes reads from a BAM file based on criteria |
| 6 | 6 |
| 7 Given a BAM file, this script will only allow reads that meet filtering | 7 Given a BAM file, this script will only allow reads that meet filtering |
| 8 criteria to be written to output. The output is another BAM file with the | 8 criteria to be written to output. The output is another BAM file with the |
| 15 Currently, the available filters are: | 15 Currently, the available filters are: |
| 16 -minlen val Remove reads that are smaller than {val} | 16 -minlen val Remove reads that are smaller than {val} |
| 17 -maxlen val Remove reads that are larger than {val} | 17 -maxlen val Remove reads that are larger than {val} |
| 18 -mapped Keep only mapped reads | 18 -mapped Keep only mapped reads |
| 19 -unmapped Keep only unmapped reads | 19 -unmapped Keep only unmapped reads |
| 20 -properpair Keep only properly paired reads (both mapped, | 20 -properpair Keep only properly paired reads (both mapped, |
| 21 correct orientation, flag set in BAM) | 21 correct orientation, flag set in BAM) |
| 22 -noproperpair Keep only not-properly paired reads | 22 -noproperpair Keep only not-properly paired reads |
| 23 | 23 |
| 24 -mask bitmask Remove reads that match the mask (base 10/hex) | 24 -mask bitmask Remove reads that match the mask (base 10/hex) |
| 25 -uniq {length} Remove reads that are have the same sequence | 25 -uniq {length} Remove reads that are have the same sequence |
| 108 | 108 |
| 109 """ | 109 """ |
| 110 | 110 |
| 111 import os | 111 import os |
| 112 import sys | 112 import sys |
| 113 | |
| 113 import pysam | 114 import pysam |
| 114 from ngsutils.bam import bam_iter | 115 from ngsutils.bam import bam_iter, read_calc_mismatches, read_calc_mismatches_gen, read_calc_mismatches_ref, read_calc_variations |
| 116 from ngsutils.bed import BedFile | |
| 115 from ngsutils.support.dbsnp import DBSNP | 117 from ngsutils.support.dbsnp import DBSNP |
| 116 from ngsutils.bam import read_calc_mismatches, read_calc_mismatches_ref, read_calc_mismatches_gen, read_calc_variations | |
| 117 from ngsutils.bed import BedFile | |
| 118 | 118 |
| 119 | 119 |
| 120 def usage(): | 120 def usage(): |
| 121 print __doc__ | 121 print __doc__ |
| 122 print """ | 122 print """ |
| 205 del_list.append(k) | 205 del_list.append(k) |
| 206 | 206 |
| 207 for k in del_list: | 207 for k in del_list: |
| 208 self.rev_pos.remove(k) | 208 self.rev_pos.remove(k) |
| 209 | 209 |
| 210 if not start_pos in self.rev_pos: | 210 if start_pos not in self.rev_pos: |
| 211 self.rev_pos.add(start_pos) | 211 self.rev_pos.add(start_pos) |
| 212 return True | 212 return True |
| 213 return False | 213 return False |
| 214 else: | 214 else: |
| 215 if read.pos != self.last_fwd_pos: | 215 if read.pos != self.last_fwd_pos: |
| 341 def __repr__(self): | 341 def __repr__(self): |
| 342 return 'Excluding: %s' % (self.ref) | 342 return 'Excluding: %s' % (self.ref) |
| 343 | 343 |
| 344 def close(self): | 344 def close(self): |
| 345 pass | 345 pass |
| 346 | |
| 346 | 347 |
| 347 class IncludeRef(object): | 348 class IncludeRef(object): |
| 348 def __init__(self, ref): | 349 def __init__(self, ref): |
| 349 self.ref = ref | 350 self.ref = ref |
| 350 | 351 |
| 643 pass | 644 pass |
| 644 | 645 |
| 645 | 646 |
| 646 class MaskFlag(object): | 647 class MaskFlag(object): |
| 647 def __init__(self, value): | 648 def __init__(self, value): |
| 648 if type(value) == type(1): | 649 if isinstance(value, int): |
| 649 self.flag = value | 650 self.flag = value |
| 650 else: | 651 else: |
| 651 if value[0:2] == '0x': | 652 if value[0:2] == '0x': |
| 652 self.flag = int(value, 16) | 653 self.flag = int(value, 16) |
| 653 else: | 654 else: |
| 708 | 709 |
| 709 def __repr__(self): | 710 def __repr__(self): |
| 710 return "maximum mismatch ratio: %s" % self.val | 711 return "maximum mismatch ratio: %s" % self.val |
| 711 | 712 |
| 712 def filter(self, bam, read): | 713 def filter(self, bam, read): |
| 713 return read_calc_mismatches(read) <= self.ratio*len(read.seq) | 714 return read_calc_mismatches(read) <= self.ratio * len(read.seq) |
| 714 | 715 |
| 715 def close(self): | 716 def close(self): |
| 716 pass | 717 pass |
| 717 | 718 |
| 718 | 719 |
| 823 | 824 |
| 824 def filter(self, bam, read): | 825 def filter(self, bam, read): |
| 825 if self.get_value(read) == self.value: | 826 if self.get_value(read) == self.value: |
| 826 return True | 827 return True |
| 827 return False | 828 return False |
| 829 | |
| 828 | 830 |
| 829 _criteria = { | 831 _criteria = { |
| 830 'mapped': Mapped, | 832 'mapped': Mapped, |
| 831 'unmapped': Unmapped, | 833 'unmapped': Unmapped, |
| 832 'properpair': ProperPair, | 834 'properpair': ProperPair, |
| 893 if not criterion.filter(bamfile, read): | 895 if not criterion.filter(bamfile, read): |
| 894 p = False | 896 p = False |
| 895 failed += 1 | 897 failed += 1 |
| 896 if failed_out: | 898 if failed_out: |
| 897 failed_out.write('%s\t%s\n' % (read.qname, criterion)) | 899 failed_out.write('%s\t%s\n' % (read.qname, criterion)) |
| 898 #outfile.write(read_to_unmapped(read)) | 900 # outfile.write(read_to_unmapped(read)) |
| 899 break | 901 break |
| 900 if p: | 902 if p: |
| 901 passed += 1 | 903 passed += 1 |
| 902 outfile.write(read) | 904 outfile.write(read) |
| 903 | 905 |
| 927 read.is_unmapped = True | 929 read.is_unmapped = True |
| 928 read.tid = None | 930 read.tid = None |
| 929 read.pos = 0 | 931 read.pos = 0 |
| 930 read.mapq = 0 | 932 read.mapq = 0 |
| 931 return read | 933 return read |
| 934 | |
| 932 | 935 |
| 933 if __name__ == '__main__': | 936 if __name__ == '__main__': |
| 934 infile = None | 937 infile = None |
| 935 outfile = None | 938 outfile = None |
| 936 failed = None | 939 failed = None |
