Mercurial > repos > iuc > spaln
annotate list_spaln_tables.py @ 1:37b5e1f0b544 draft
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
author | iuc |
---|---|
date | Thu, 16 Jul 2020 07:57:10 -0400 |
parents | |
children |
rev | line source |
---|---|
1
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python3 |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
2 |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
3 import argparse |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
4 import shlex |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
5 import sys |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
6 from subprocess import run |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
7 from typing import TextIO |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
8 |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
9 |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
10 def find_common_ancestor_distance( |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
11 taxon: str, other_taxon: str, taxonomy_db_path: str, only_canonical: bool |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
12 ): |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
13 canonical = "--only_canonical" if only_canonical else "" |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
14 cmd_str = f"taxonomy_util -d {taxonomy_db_path} common_ancestor_distance {canonical} '{other_taxon}' '{taxon}'" |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
15 cmd = shlex.split(cmd_str) |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
16 proc = run(cmd, encoding="utf8", capture_output=True) |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
17 return proc |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
18 |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
19 |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
20 def find_distances(gnm2tab_file: TextIO, taxon: str, taxonomy_db_path: str): |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
21 cmd = ["taxonomy_util", "-d", taxonomy_db_path, "get_id", taxon] |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
22 proc = run(cmd, capture_output=True, encoding="utf8") |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
23 if "not found in" in proc.stderr: |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
24 exit("Error: " + proc.stderr.strip()) |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
25 for line in gnm2tab_file: |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
26 fields = line.split("\t") |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
27 (species_code, settings, other_taxon) = map(lambda el: el.strip(), fields[:3]) |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
28 proc = find_common_ancestor_distance(taxon, other_taxon, taxonomy_db_path, True) |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
29 ancestor_info = proc.stdout.rstrip() |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
30 if proc.stderr != "": |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
31 print("Warning:", other_taxon, proc.stderr.rstrip(), file=sys.stderr) |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
32 else: |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
33 proc = find_common_ancestor_distance( |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
34 taxon, other_taxon, taxonomy_db_path, False |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
35 ) |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
36 non_canonical_distance = proc.stdout.split("\t")[0] |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
37 print( |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
38 non_canonical_distance, |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
39 ancestor_info, |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
40 species_code, |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
41 settings, |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
42 other_taxon, |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
43 sep="\t", |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
44 ) |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
45 |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
46 |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
47 if __name__ == "__main__": |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
48 parser = argparse.ArgumentParser(description="Find distance to common ancestor") |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
49 parser.add_argument( |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
50 "--taxonomy_db", required=True, help="NCBI Taxonomy database (SQLite format)" |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
51 ) |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
52 parser.add_argument( |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
53 "--gnm2tab_file", |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
54 required=True, |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
55 type=argparse.FileType(), |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
56 help="gnm2tab file from spal", |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
57 ) |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
58 parser.add_argument("taxon") |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
59 args = parser.parse_args() |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
60 |
37b5e1f0b544
"planemo upload for repository https://github.com/ogotoh/spaln commit 4cfc21ef8456ca8b8da0a8a8c045b8a472858608"
iuc
parents:
diff
changeset
|
61 find_distances(args.gnm2tab_file, args.taxon, args.taxonomy_db) |