Mercurial > repos > petr-novak > repeat_annotation_pipeline3
comparison dustmasker_wrapper.py @ 11:5366d5ea04bc draft
planemo upload commit 9d1b19f98d8b7f0a0d1baf2da63a373d155626f8-dirty
| author | petr-novak |
|---|---|
| date | Fri, 04 Aug 2023 12:35:32 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| 10:276efc4cb17f | 11:5366d5ea04bc |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 """ | |
| 3 This script is used to run dustmasker on a fasta file. It will create | |
| 4 a bed file from default dustmasker output. | |
| 5 """ | |
| 6 import argparse | |
| 7 import subprocess | |
| 8 import tempfile | |
| 9 | |
| 10 # parse arguments from command line, and pass it to duskmasker | |
| 11 | |
| 12 parser = argparse.ArgumentParser( | |
| 13 description="""This script is used to run dustmasker on a fasta file. It will create | |
| 14 a bed file from default dustmasker output.""", | |
| 15 formatter_class=argparse.RawTextHelpFormatter, ) | |
| 16 parser.add_argument( | |
| 17 '-f', '--fasta', default=None, required=True, help="fasta file", type=str, | |
| 18 action='store' | |
| 19 ) | |
| 20 parser.add_argument( | |
| 21 '-o', '--output', default=None, required=True, help="output file name", type=str, | |
| 22 action='store' | |
| 23 ) | |
| 24 parser.add_argument( | |
| 25 '-w', '--window', default=60, required=False, help="dustmasker window size", type=int, | |
| 26 action='store' | |
| 27 ) | |
| 28 parser.add_argument( | |
| 29 '-l', '--level', default=20, required=False, help="dustmasker level", action='store' | |
| 30 ) | |
| 31 args = parser.parse_args() | |
| 32 | |
| 33 | |
| 34 def main(args): | |
| 35 """ | |
| 36 run dustmasker and convert output do bed file | |
| 37 """ | |
| 38 # temp file for dust maske output | |
| 39 tmpfile = tempfile.NamedTemporaryFile().name | |
| 40 # run dustmasker | |
| 41 subprocess.call( | |
| 42 ["dustmasker", "-in", args.fasta, "-out", tmpfile, "-window", str(args.window), | |
| 43 "-level", str(args.level)] | |
| 44 ) | |
| 45 # create bed file from dustmasker output | |
| 46 # syntax of dustmasker output is: | |
| 47 # >contig_name | |
| 48 # start_position - end_position | |
| 49 # start_position - end_position | |
| 50 # ... | |
| 51 # if not masked, only contig name is printed | |
| 52 with open(args.output, "w") as f: | |
| 53 with open(tmpfile, "r") as f2: | |
| 54 for line in f2: | |
| 55 if line[0] == ">": | |
| 56 contig_name = line.strip()[1:] | |
| 57 continue | |
| 58 else: | |
| 59 line = line.strip() | |
| 60 line = line.split() | |
| 61 f.write(contig_name + "\t" + line[0] + "\t" + line[2] + "\n") | |
| 62 | |
| 63 | |
| 64 if __name__ == '__main__': | |
| 65 main(args) |
