Mercurial > repos > iuc > spaln
diff list_spaln_tables.py @ 1:37b5e1f0b544 draft
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
author | iuc |
---|---|
date | Thu, 16 Jul 2020 07:57:10 -0400 |
parents | |
children |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/list_spaln_tables.py Thu Jul 16 07:57:10 2020 -0400 @@ -0,0 +1,61 @@ +#!/usr/bin/env python3 + +import argparse +import shlex +import sys +from subprocess import run +from typing import TextIO + + +def find_common_ancestor_distance( + taxon: str, other_taxon: str, taxonomy_db_path: str, only_canonical: bool +): + canonical = "--only_canonical" if only_canonical else "" + cmd_str = f"taxonomy_util -d {taxonomy_db_path} common_ancestor_distance {canonical} '{other_taxon}' '{taxon}'" + cmd = shlex.split(cmd_str) + proc = run(cmd, encoding="utf8", capture_output=True) + return proc + + +def find_distances(gnm2tab_file: TextIO, taxon: str, taxonomy_db_path: str): + cmd = ["taxonomy_util", "-d", taxonomy_db_path, "get_id", taxon] + proc = run(cmd, capture_output=True, encoding="utf8") + if "not found in" in proc.stderr: + exit("Error: " + proc.stderr.strip()) + for line in gnm2tab_file: + fields = line.split("\t") + (species_code, settings, other_taxon) = map(lambda el: el.strip(), fields[:3]) + proc = find_common_ancestor_distance(taxon, other_taxon, taxonomy_db_path, True) + ancestor_info = proc.stdout.rstrip() + if proc.stderr != "": + print("Warning:", other_taxon, proc.stderr.rstrip(), file=sys.stderr) + else: + proc = find_common_ancestor_distance( + taxon, other_taxon, taxonomy_db_path, False + ) + non_canonical_distance = proc.stdout.split("\t")[0] + print( + non_canonical_distance, + ancestor_info, + species_code, + settings, + other_taxon, + sep="\t", + ) + + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description="Find distance to common ancestor") + parser.add_argument( + "--taxonomy_db", required=True, help="NCBI Taxonomy database (SQLite format)" + ) + parser.add_argument( + "--gnm2tab_file", + required=True, + type=argparse.FileType(), + help="gnm2tab file from spal", + ) + parser.add_argument("taxon") + args = parser.parse_args() + + find_distances(args.gnm2tab_file, args.taxon, args.taxonomy_db)