annotate list_spaln_tables.py @ 3:32c11a4b5dbf draft

"planemo upload for repository https://github.com/ogotoh/spaln commit cb502aebea8ea9ba332768864b8b6daf933e4da8"
author iuc
date Wed, 02 Feb 2022 14:15:38 +0000
parents 37b5e1f0b544
children
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
1
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
1 #!/usr/bin/env python3
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
2
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
3 import argparse
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
4 import shlex
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
5 import sys
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
6 from subprocess import run
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
7 from typing import TextIO
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
8
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
9
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
10 def find_common_ancestor_distance(
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
11 taxon: str, other_taxon: str, taxonomy_db_path: str, only_canonical: bool
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
12 ):
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
13 canonical = "--only_canonical" if only_canonical else ""
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
14 cmd_str = f"taxonomy_util -d {taxonomy_db_path} common_ancestor_distance {canonical} '{other_taxon}' '{taxon}'"
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
15 cmd = shlex.split(cmd_str)
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
16 proc = run(cmd, encoding="utf8", capture_output=True)
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
17 return proc
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
18
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
19
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
20 def find_distances(gnm2tab_file: TextIO, taxon: str, taxonomy_db_path: str):
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
21 cmd = ["taxonomy_util", "-d", taxonomy_db_path, "get_id", taxon]
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
22 proc = run(cmd, capture_output=True, encoding="utf8")
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
23 if "not found in" in proc.stderr:
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
24 exit("Error: " + proc.stderr.strip())
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
25 for line in gnm2tab_file:
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
26 fields = line.split("\t")
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
27 (species_code, settings, other_taxon) = map(lambda el: el.strip(), fields[:3])
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
28 proc = find_common_ancestor_distance(taxon, other_taxon, taxonomy_db_path, True)
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
29 ancestor_info = proc.stdout.rstrip()
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
30 if proc.stderr != "":
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
31 print("Warning:", other_taxon, proc.stderr.rstrip(), file=sys.stderr)
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
32 else:
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
33 proc = find_common_ancestor_distance(
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
34 taxon, other_taxon, taxonomy_db_path, False
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
35 )
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
36 non_canonical_distance = proc.stdout.split("\t")[0]
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
37 print(
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
38 non_canonical_distance,
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
39 ancestor_info,
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
40 species_code,
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
41 settings,
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
42 other_taxon,
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
43 sep="\t",
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
44 )
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
45
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
46
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
47 if __name__ == "__main__":
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
48 parser = argparse.ArgumentParser(description="Find distance to common ancestor")
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
49 parser.add_argument(
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
50 "--taxonomy_db", required=True, help="NCBI Taxonomy database (SQLite format)"
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
51 )
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
52 parser.add_argument(
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
53 "--gnm2tab_file",
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
54 required=True,
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
55 type=argparse.FileType(),
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
56 help="gnm2tab file from spal",
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
57 )
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
58 parser.add_argument("taxon")
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
59 args = parser.parse_args()
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
60
37b5e1f0b544 "planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff changeset
61 find_distances(args.gnm2tab_file, args.taxon, args.taxonomy_db)