Mercurial > repos > artbio > bamparse
comparison bamparse.py @ 0:2a1a2bc6ae8b draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
author | artbio |
---|---|
date | Fri, 13 Oct 2017 02:59:36 -0400 |
parents | |
children | ae9ea0488850 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:2a1a2bc6ae8b |
---|---|
1 #!/usr/bin/env python | |
2 import argparse | |
3 from collections import defaultdict | |
4 | |
5 import pysam | |
6 | |
7 | |
8 def Parser(): | |
9 the_parser = argparse.ArgumentParser() | |
10 the_parser.add_argument('--output', nargs='+', action='store', type=str, | |
11 help='Count tables') | |
12 the_parser.add_argument('--polarity', | |
13 choices=["sense", "antisense", "both"], | |
14 help="forward, reverse or both forward an\ | |
15 reverse reads are counted") | |
16 the_parser.add_argument('--alignments', nargs='+', | |
17 help="bam alignments files") | |
18 the_parser.add_argument('--labels', nargs='+', help="Alignments labels") | |
19 the_parser.add_argument('--number', | |
20 choices=["unique", "multiple"], | |
21 help="output is a single table or multiple tables") | |
22 args = the_parser.parse_args() | |
23 return args | |
24 | |
25 | |
26 def get_counts(bamfile, polarity="both"): | |
27 """ | |
28 Takes an AlignmentFile object and returns a dictionary of counts for sense, | |
29 antisense, or both sense and antisense reads aligning to the bam references | |
30 """ | |
31 def filter_sense_read(read): | |
32 if read.is_reverse: | |
33 return 0 | |
34 else: | |
35 return 1 | |
36 | |
37 def filter_antisense_read(read): | |
38 if read.is_reverse: | |
39 return 1 | |
40 else: | |
41 return 0 | |
42 | |
43 counts = defaultdict(int) | |
44 for ref_name in bamfile.references: | |
45 counts[ref_name] = 0 | |
46 if polarity == "both": | |
47 for ref_name in bamfile.references: | |
48 counts[ref_name] = bamfile.count(reference=ref_name) | |
49 if polarity == "sense": | |
50 for ref_name in bamfile.references: | |
51 for read in bamfile.fetch(ref_name): | |
52 counts[ref_name] += filter_sense_read(read) | |
53 if polarity == "antisense": | |
54 for ref_name in bamfile.references: | |
55 for read in bamfile.fetch(ref_name): | |
56 counts[ref_name] += filter_antisense_read(read) | |
57 return counts | |
58 | |
59 | |
60 def writetable(diclist, labels, output, number): | |
61 ''' diclist is a list of count dictionnaries ''' | |
62 countlists = [] | |
63 for dic in diclist: | |
64 counts = sorted(dic.items()) | |
65 counts = [j for (i,j) in counts] | |
66 countlists.append(counts) | |
67 if number == "unique": | |
68 out = open("outputdir/table.tabular", "w") | |
69 out.write("gene\t%s\n" % "\t".join(labels)) | |
70 for countline in zip(sorted(diclist[0]), *countlists): | |
71 line = [ str(i) for i in countline] | |
72 out.write("%s\n" % "\t".join(line)) | |
73 out.close() | |
74 else: | |
75 for i, (dic, label) in enumerate(zip(diclist, labels)): | |
76 out = open("outputdir/table" + str(i) + ".tabular", "w") | |
77 out.write("gene\t%s\n" % label) | |
78 for gene in sorted(dic): | |
79 out.write("%s\t%s\n" % (gene, dic[gene])) | |
80 out.close() | |
81 | |
82 | |
83 | |
84 def main(alignments, labels, polarity, output, number): | |
85 diclist = [] | |
86 for file in alignments: | |
87 bam_object = pysam.AlignmentFile(file, 'rb') | |
88 diclist.append(get_counts(bam_object, polarity=polarity)) | |
89 writetable(diclist, labels, output, number) | |
90 | |
91 | |
92 if __name__ == "__main__": | |
93 args = Parser() | |
94 main(args.alignments, args.labels, args.polarity, args.output, args.number) |