Mercurial > repos > petr-novak > repeat_annotation_pipeline3
annotate dustmasker_wrapper.py @ 12:755a4d643184 draft default tip
planemo upload commit a61591d548f42ff417781e7fe7418dc2901ccc23
author | petr-novak |
---|---|
date | Tue, 26 Sep 2023 07:28:04 +0000 |
parents | 5366d5ea04bc |
children |
rev | line source |
---|---|
11
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
1 #!/usr/bin/env python |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
2 """ |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
3 This script is used to run dustmasker on a fasta file. It will create |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
4 a bed file from default dustmasker output. |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
5 """ |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
6 import argparse |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
7 import subprocess |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
8 import tempfile |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
9 |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
10 # parse arguments from command line, and pass it to duskmasker |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
11 |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
12 parser = argparse.ArgumentParser( |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
13 description="""This script is used to run dustmasker on a fasta file. It will create |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
14 a bed file from default dustmasker output.""", |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
15 formatter_class=argparse.RawTextHelpFormatter, ) |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
16 parser.add_argument( |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
17 '-f', '--fasta', default=None, required=True, help="fasta file", type=str, |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
18 action='store' |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
19 ) |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
20 parser.add_argument( |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
21 '-o', '--output', default=None, required=True, help="output file name", type=str, |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
22 action='store' |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
23 ) |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
24 parser.add_argument( |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
25 '-w', '--window', default=60, required=False, help="dustmasker window size", type=int, |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
26 action='store' |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
27 ) |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
28 parser.add_argument( |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
29 '-l', '--level', default=20, required=False, help="dustmasker level", action='store' |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
30 ) |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
31 args = parser.parse_args() |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
32 |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
33 |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
34 def main(args): |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
35 """ |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
36 run dustmasker and convert output do bed file |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
37 """ |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
38 # temp file for dust maske output |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
39 tmpfile = tempfile.NamedTemporaryFile().name |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
40 # run dustmasker |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
41 subprocess.call( |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
42 ["dustmasker", "-in", args.fasta, "-out", tmpfile, "-window", str(args.window), |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
43 "-level", str(args.level)] |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
44 ) |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
45 # create bed file from dustmasker output |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
46 # syntax of dustmasker output is: |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
47 # >contig_name |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
48 # start_position - end_position |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
49 # start_position - end_position |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
50 # ... |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
51 # if not masked, only contig name is printed |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
52 with open(args.output, "w") as f: |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
53 with open(tmpfile, "r") as f2: |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
54 for line in f2: |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
55 if line[0] == ">": |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
56 contig_name = line.strip()[1:] |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
57 continue |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
58 else: |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
59 line = line.strip() |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
60 line = line.split() |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
61 f.write(contig_name + "\t" + line[0] + "\t" + line[2] + "\n") |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
62 |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
63 |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
64 if __name__ == '__main__': |
5366d5ea04bc
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
petr-novak
parents:
diff
changeset
|
65 main(args) |