comparison bamparse.py @ 2:8ea06787c08a draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
author artbio
date Tue, 09 Oct 2018 17:14:57 -0400
parents ae9ea0488850
children
comparison
equal deleted inserted replaced
1:ae9ea0488850 2:8ea06787c08a
7 7
8 def Parser(): 8 def Parser():
9 the_parser = argparse.ArgumentParser() 9 the_parser = argparse.ArgumentParser()
10 the_parser.add_argument('--output', nargs='+', action='store', type=str, 10 the_parser.add_argument('--output', nargs='+', action='store', type=str,
11 help='Count tables') 11 help='Count tables')
12 the_parser.add_argument('--polarity',
13 choices=["sense", "antisense", "both"],
14 help="forward, reverse or both forward an\
15 reverse reads are counted")
16 the_parser.add_argument('--alignments', nargs='+', 12 the_parser.add_argument('--alignments', nargs='+',
17 help="bam alignments files") 13 help="bam alignments files")
18 the_parser.add_argument('--labels', nargs='+', help="Alignments labels") 14 the_parser.add_argument('--labels', nargs='+', help="Alignments labels")
19 the_parser.add_argument('--number', 15 the_parser.add_argument('--number',
20 choices=["unique", "multiple"], 16 choices=["unique", "multiple"],
21 help="output is a single table or multiple tables") 17 help="output is a single table or multiple tables")
22 args = the_parser.parse_args() 18 args = the_parser.parse_args()
23 return args 19 return args
24 20
25 21
26 def get_counts(bamfile, polarity="both"): 22 def get_counts(bamfile):
27 """ 23 """
28 Takes an AlignmentFile object and returns a dictionary of counts for sense, 24 Takes an AlignmentFile object and returns a dictionary of counts for sense,
29 antisense, or both sense and antisense reads aligning to the bam references 25 antisense, or both sense and antisense bam alignments to the references,
26 depending on the pre-treatment performed by sambamba in the xml wrapper
30 """ 27 """
31 def filter_sense_read(read):
32 if read.is_reverse:
33 return 0
34 else:
35 return 1
36
37 def filter_antisense_read(read):
38 if read.is_reverse:
39 return 1
40 else:
41 return 0
42
43 counts = defaultdict(int) 28 counts = defaultdict(int)
44 for ref_name in bamfile.references: 29 for ref_name in bamfile.references:
45 counts[ref_name] = 0 30 counts[ref_name] = 0
46 if polarity == "both": 31 for ref_name in bamfile.references:
47 for ref_name in bamfile.references: 32 counts[ref_name] = bamfile.count(reference=ref_name)
48 counts[ref_name] = bamfile.count(reference=ref_name)
49 if polarity == "sense":
50 for ref_name in bamfile.references:
51 for read in bamfile.fetch(ref_name):
52 counts[ref_name] += filter_sense_read(read)
53 if polarity == "antisense":
54 for ref_name in bamfile.references:
55 for read in bamfile.fetch(ref_name):
56 counts[ref_name] += filter_antisense_read(read)
57 return counts 33 return counts
58 34
59 35
60 def writetable(diclist, labels, output, number): 36 def writetable(diclist, labels, output, number):
61 ''' diclist is a list of count dictionnaries ''' 37 ''' diclist is a list of count dictionnaries '''
78 for gene in sorted(dic): 54 for gene in sorted(dic):
79 out.write("%s\t%s\n" % (gene, dic[gene])) 55 out.write("%s\t%s\n" % (gene, dic[gene]))
80 out.close() 56 out.close()
81 57
82 58
83 def main(alignments, labels, polarity, output, number): 59 def main(alignments, labels, output, number):
84 diclist = [] 60 diclist = []
85 for file in alignments: 61 for file in alignments:
86 bam_object = pysam.AlignmentFile(file, 'rb') 62 bam_object = pysam.AlignmentFile(file, 'rb')
87 diclist.append(get_counts(bam_object, polarity=polarity)) 63 diclist.append(get_counts(bam_object))
88 writetable(diclist, labels, output, number) 64 writetable(diclist, labels, output, number)
89 65
90 66
91 if __name__ == "__main__": 67 if __name__ == "__main__":
92 args = Parser() 68 args = Parser()
93 main(args.alignments, args.labels, args.polarity, args.output, args.number) 69 main(args.alignments, args.labels, args.output, args.number)