Mercurial > repos > artbio > bamparse
annotate bamparse.py @ 3:120eb76aa500 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit e8a19ac6ada887e6daa0a2e2abc9ba69392cdb8a
author | artbio |
---|---|
date | Mon, 17 Jul 2023 01:02:17 +0000 |
parents | 8ea06787c08a |
children |
rev | line source |
---|---|
0
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
1 #!/usr/bin/env python |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
2 import argparse |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
3 from collections import defaultdict |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
4 |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
5 import pysam |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
6 |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
7 |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
8 def Parser(): |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
9 the_parser = argparse.ArgumentParser() |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
10 the_parser.add_argument('--output', nargs='+', action='store', type=str, |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
11 help='Count tables') |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
12 the_parser.add_argument('--alignments', nargs='+', |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
13 help="bam alignments files") |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
14 the_parser.add_argument('--labels', nargs='+', help="Alignments labels") |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
15 the_parser.add_argument('--number', |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
16 choices=["unique", "multiple"], |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
17 help="output is a single table or multiple tables") |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
18 args = the_parser.parse_args() |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
19 return args |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
20 |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
21 |
2
8ea06787c08a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents:
1
diff
changeset
|
22 def get_counts(bamfile): |
0
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
23 """ |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
24 Takes an AlignmentFile object and returns a dictionary of counts for sense, |
2
8ea06787c08a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents:
1
diff
changeset
|
25 antisense, or both sense and antisense bam alignments to the references, |
8ea06787c08a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents:
1
diff
changeset
|
26 depending on the pre-treatment performed by sambamba in the xml wrapper |
0
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
27 """ |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
28 counts = defaultdict(int) |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
29 for ref_name in bamfile.references: |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
30 counts[ref_name] = 0 |
2
8ea06787c08a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents:
1
diff
changeset
|
31 for ref_name in bamfile.references: |
8ea06787c08a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents:
1
diff
changeset
|
32 counts[ref_name] = bamfile.count(reference=ref_name) |
0
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
33 return counts |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
34 |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
35 |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
36 def writetable(diclist, labels, output, number): |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
37 ''' diclist is a list of count dictionnaries ''' |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
38 countlists = [] |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
39 for dic in diclist: |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
40 counts = sorted(dic.items()) |
1
ae9ea0488850
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 9aec415b3fcc66bf3ed2aad43fd70b57bfe2fb18
artbio
parents:
0
diff
changeset
|
41 counts = [j for (i, j) in counts] |
0
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
42 countlists.append(counts) |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
43 if number == "unique": |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
44 out = open("outputdir/table.tabular", "w") |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
45 out.write("gene\t%s\n" % "\t".join(labels)) |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
46 for countline in zip(sorted(diclist[0]), *countlists): |
1
ae9ea0488850
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 9aec415b3fcc66bf3ed2aad43fd70b57bfe2fb18
artbio
parents:
0
diff
changeset
|
47 line = [str(i) for i in countline] |
0
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
48 out.write("%s\n" % "\t".join(line)) |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
49 out.close() |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
50 else: |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
51 for i, (dic, label) in enumerate(zip(diclist, labels)): |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
52 out = open("outputdir/table" + str(i) + ".tabular", "w") |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
53 out.write("gene\t%s\n" % label) |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
54 for gene in sorted(dic): |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
55 out.write("%s\t%s\n" % (gene, dic[gene])) |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
56 out.close() |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
57 |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
58 |
2
8ea06787c08a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents:
1
diff
changeset
|
59 def main(alignments, labels, output, number): |
0
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
60 diclist = [] |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
61 for file in alignments: |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
62 bam_object = pysam.AlignmentFile(file, 'rb') |
2
8ea06787c08a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents:
1
diff
changeset
|
63 diclist.append(get_counts(bam_object)) |
0
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
64 writetable(diclist, labels, output, number) |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
65 |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
66 |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
67 if __name__ == "__main__": |
2a1a2bc6ae8b
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 4e42cba873625fad03423e65dfffbf4afa91598c
artbio
parents:
diff
changeset
|
68 args = Parser() |
2
8ea06787c08a
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/bamparse commit 968c9ab925ed768027ff8012d0ff6410fc24f079
artbio
parents:
1
diff
changeset
|
69 main(args.alignments, args.labels, args.output, args.number) |