comparison list_spaln_tables.py @ 1:37b5e1f0b544 draft

"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
author iuc
date Thu, 16 Jul 2020 07:57:10 -0400
parents
children
comparison
equal deleted inserted replaced
0:95ea8d97abb4 1:37b5e1f0b544
1 #!/usr/bin/env python3
2
3 import argparse
4 import shlex
5 import sys
6 from subprocess import run
7 from typing import TextIO
8
9
10 def find_common_ancestor_distance(
11 taxon: str, other_taxon: str, taxonomy_db_path: str, only_canonical: bool
12 ):
13 canonical = "--only_canonical" if only_canonical else ""
14 cmd_str = f"taxonomy_util -d {taxonomy_db_path} common_ancestor_distance {canonical} '{other_taxon}' '{taxon}'"
15 cmd = shlex.split(cmd_str)
16 proc = run(cmd, encoding="utf8", capture_output=True)
17 return proc
18
19
20 def find_distances(gnm2tab_file: TextIO, taxon: str, taxonomy_db_path: str):
21 cmd = ["taxonomy_util", "-d", taxonomy_db_path, "get_id", taxon]
22 proc = run(cmd, capture_output=True, encoding="utf8")
23 if "not found in" in proc.stderr:
24 exit("Error: " + proc.stderr.strip())
25 for line in gnm2tab_file:
26 fields = line.split("\t")
27 (species_code, settings, other_taxon) = map(lambda el: el.strip(), fields[:3])
28 proc = find_common_ancestor_distance(taxon, other_taxon, taxonomy_db_path, True)
29 ancestor_info = proc.stdout.rstrip()
30 if proc.stderr != "":
31 print("Warning:", other_taxon, proc.stderr.rstrip(), file=sys.stderr)
32 else:
33 proc = find_common_ancestor_distance(
34 taxon, other_taxon, taxonomy_db_path, False
35 )
36 non_canonical_distance = proc.stdout.split("\t")[0]
37 print(
38 non_canonical_distance,
39 ancestor_info,
40 species_code,
41 settings,
42 other_taxon,
43 sep="\t",
44 )
45
46
47 if __name__ == "__main__":
48 parser = argparse.ArgumentParser(description="Find distance to common ancestor")
49 parser.add_argument(
50 "--taxonomy_db", required=True, help="NCBI Taxonomy database (SQLite format)"
51 )
52 parser.add_argument(
53 "--gnm2tab_file",
54 required=True,
55 type=argparse.FileType(),
56 help="gnm2tab file from spal",
57 )
58 parser.add_argument("taxon")
59 args = parser.parse_args()
60
61 find_distances(args.gnm2tab_file, args.taxon, args.taxonomy_db)