annotate dustmasker_wrapper.py @ 12:755a4d643184 draft default tip

planemo upload commit a61591d548f42ff417781e7fe7418dc2901ccc23
author petr-novak
date Tue, 26 Sep 2023 07:28:04 +0000
parents 5366d5ea04bc
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
11
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
1 #!/usr/bin/env python
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
2 """
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
3 This script is used to run dustmasker on a fasta file. It will create
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
4 a bed file from default dustmasker output.
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
5 """
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
6 import argparse
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
7 import subprocess
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
8 import tempfile
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
9
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
10 # parse arguments from command line, and pass it to duskmasker
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
11
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
12 parser = argparse.ArgumentParser(
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
13 description="""This script is used to run dustmasker on a fasta file. It will create
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
14 a bed file from default dustmasker output.""",
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
15 formatter_class=argparse.RawTextHelpFormatter, )
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
16 parser.add_argument(
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
17 '-f', '--fasta', default=None, required=True, help="fasta file", type=str,
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
18 action='store'
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
19 )
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
20 parser.add_argument(
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
21 '-o', '--output', default=None, required=True, help="output file name", type=str,
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
22 action='store'
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
23 )
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
24 parser.add_argument(
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
25 '-w', '--window', default=60, required=False, help="dustmasker window size", type=int,
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
26 action='store'
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
27 )
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
28 parser.add_argument(
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
29 '-l', '--level', default=20, required=False, help="dustmasker level", action='store'
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
30 )
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
31 args = parser.parse_args()
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
32
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
33
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
34 def main(args):
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
35 """
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
36 run dustmasker and convert output do bed file
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
37 """
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
38 # temp file for dust maske output
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
39 tmpfile = tempfile.NamedTemporaryFile().name
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
40 # run dustmasker
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
41 subprocess.call(
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
42 ["dustmasker", "-in", args.fasta, "-out", tmpfile, "-window", str(args.window),
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
43 "-level", str(args.level)]
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
44 )
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
45 # create bed file from dustmasker output
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
46 # syntax of dustmasker output is:
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
47 # >contig_name
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
48 # start_position - end_position
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
49 # start_position - end_position
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
50 # ...
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
51 # if not masked, only contig name is printed
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
52 with open(args.output, "w") as f:
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
53 with open(tmpfile, "r") as f2:
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
54 for line in f2:
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
55 if line[0] == ">":
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
56 contig_name = line.strip()[1:]
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
57 continue
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
58 else:
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
59 line = line.strip()
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
60 line = line.split()
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
61 f.write(contig_name + "\t" + line[0] + "\t" + line[2] + "\n")
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
62
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
63
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
64 if __name__ == '__main__':
5366d5ea04bc planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff changeset
65 main(args)