comparison bamparse.py @ 0:2a1a2bc6ae8b draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
author artbio
date Fri, 13 Oct 2017 02:59:36 -0400
parents
children ae9ea0488850
comparison
equal deleted inserted replaced
-1:000000000000 0:2a1a2bc6ae8b
1 #!/usr/bin/env python
2 import argparse
3 from collections import defaultdict
4
5 import pysam
6
7
8 def Parser():
9 the_parser = argparse.ArgumentParser()
10 the_parser.add_argument('--output', nargs='+', action='store', type=str,
11 help='Count tables')
12 the_parser.add_argument('--polarity',
13 choices=["sense", "antisense", "both"],
14 help="forward, reverse or both forward an\
15 reverse reads are counted")
16 the_parser.add_argument('--alignments', nargs='+',
17 help="bam alignments files")
18 the_parser.add_argument('--labels', nargs='+', help="Alignments labels")
19 the_parser.add_argument('--number',
20 choices=["unique", "multiple"],
21 help="output is a single table or multiple tables")
22 args = the_parser.parse_args()
23 return args
24
25
26 def get_counts(bamfile, polarity="both"):
27 """
28 Takes an AlignmentFile object and returns a dictionary of counts for sense,
29 antisense, or both sense and antisense reads aligning to the bam references
30 """
31 def filter_sense_read(read):
32 if read.is_reverse:
33 return 0
34 else:
35 return 1
36
37 def filter_antisense_read(read):
38 if read.is_reverse:
39 return 1
40 else:
41 return 0
42
43 counts = defaultdict(int)
44 for ref_name in bamfile.references:
45 counts[ref_name] = 0
46 if polarity == "both":
47 for ref_name in bamfile.references:
48 counts[ref_name] = bamfile.count(reference=ref_name)
49 if polarity == "sense":
50 for ref_name in bamfile.references:
51 for read in bamfile.fetch(ref_name):
52 counts[ref_name] += filter_sense_read(read)
53 if polarity == "antisense":
54 for ref_name in bamfile.references:
55 for read in bamfile.fetch(ref_name):
56 counts[ref_name] += filter_antisense_read(read)
57 return counts
58
59
60 def writetable(diclist, labels, output, number):
61 ''' diclist is a list of count dictionnaries '''
62 countlists = []
63 for dic in diclist:
64 counts = sorted(dic.items())
65 counts = [j for (i,j) in counts]
66 countlists.append(counts)
67 if number == "unique":
68 out = open("outputdir/table.tabular", "w")
69 out.write("gene\t%s\n" % "\t".join(labels))
70 for countline in zip(sorted(diclist[0]), *countlists):
71 line = [ str(i) for i in countline]
72 out.write("%s\n" % "\t".join(line))
73 out.close()
74 else:
75 for i, (dic, label) in enumerate(zip(diclist, labels)):
76 out = open("outputdir/table" + str(i) + ".tabular", "w")
77 out.write("gene\t%s\n" % label)
78 for gene in sorted(dic):
79 out.write("%s\t%s\n" % (gene, dic[gene]))
80 out.close()
81
82
83
84 def main(alignments, labels, polarity, output, number):
85 diclist = []
86 for file in alignments:
87 bam_object = pysam.AlignmentFile(file, 'rb')
88 diclist.append(get_counts(bam_object, polarity=polarity))
89 writetable(diclist, labels, output, number)
90
91
92 if __name__ == "__main__":
93 args = Parser()
94 main(args.alignments, args.labels, args.polarity, args.output, args.number)