annotate kraken_taxonomy_report.py @ 1:b97694b21bc3 draft

planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
author iuc
date Wed, 23 Nov 2016 03:27:33 -0500
parents 3f1a0d47ea8d
children 528a1d91b066
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
1 #!/usr/bin/env python
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
2
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
3 # Reports a summary of Kraken's results
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
4 # and optionally creates a newick Tree
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
5 # Copyright (c) 2016 Daniel Blankenberg
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
6 # Licensed under the Academic Free License version 3.0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
7 # https://github.com/blankenberg/Kraken-Taxonomy-Report
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
8
1
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
9 from __future__ import print_function
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
10
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
11 import sys
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
12 import os
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
13 import optparse
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
14 import re
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
15
1
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
16 __VERSION__ = '0.0.2'
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
17
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
18 __URL__ = "https://github.com/blankenberg/Kraken-Taxonomy-Report"
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
19
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
20 # Rank names were pulled from ncbi nodes.dmp on 02/02/2016
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
21 # cat nodes.dmp | cut -f 5 | sort | uniq
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
22 # "root" is added manually
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
23 NO_RANK_NAME = "no rank"
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
24 RANK_NAMES = [ NO_RANK_NAME,
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
25 "root",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
26 "superkingdom",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
27 "kingdom",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
28 "subkingdom",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
29 "superphylum",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
30 "phylum",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
31 "subphylum",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
32 "superclass",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
33 "class",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
34 "subclass",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
35 "infraclass",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
36 "superorder",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
37 "order",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
38 "suborder",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
39 "infraorder",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
40 "parvorder",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
41 "superfamily",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
42 "family",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
43 "subfamily",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
44 "tribe",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
45 "subtribe",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
46 "genus",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
47 "subgenus",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
48 "species group",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
49 "species subgroup",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
50 "species",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
51 "subspecies",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
52 "varietas",
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
53 "forma" ]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
54 # NB: We put 'no rank' at top of list for generating trees, due to e.g.
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
55 # root (root) -> cellular organisms (no rank) -> bacteria (superkingdom)
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
56
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
57 RANK_NAME_TO_INTS = dict( [ (y, x) for (x, y) in enumerate( RANK_NAMES ) ] )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
58 RANK_NAMES_INTS = range( len( RANK_NAMES ) )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
59
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
60 NO_RANK_INT = RANK_NAMES.index( NO_RANK_NAME )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
61 NO_RANK_CODE = 'n'
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
62
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
63 PRIMARY_RANK_NAMES = [ 'species', 'genus', 'family', 'order', 'class', 'phylum', 'kingdom' ]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
64 RANK_INT_TO_CODE = {}
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
65 for name in PRIMARY_RANK_NAMES:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
66 RANK_INT_TO_CODE[ RANK_NAMES.index( name ) ] = name[0]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
67 RANK_INT_TO_CODE[ RANK_NAMES.index( 'superkingdom' ) ] = 'd'
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
68 PRIMARY_RANK_NAMES.append( 'superkingdom' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
69
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
70 NAME_STUB = "%s__%s"
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
71 NAME_RE = re.compile( "(\t| |\||\.;)" )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
72 NAME_REPL = "_"
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
73
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
74
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
75 def get_kraken_db_path( db ):
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
76 assert db, ValueError( "You must provide a kraken database" )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
77 k_db_path = os.getenv('KRAKEN_DB_PATH', None )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
78 if k_db_path:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
79 db = os.path.join( k_db_path, db )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
80 return db
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
81
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
82
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
83 def load_taxonomy( db_path, sanitize_names=False ):
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
84 child_lists = {}
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
85 name_map = {}
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
86 rank_map = {}
1
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
87 names = {} # Store names here to look for duplicates (id, True/False name fixed)
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
88 with open( os.path.join( db_path, "taxonomy/names.dmp" ) ) as fh:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
89 for line in fh:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
90 line = line.rstrip( "\n\r" )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
91 if line.endswith( "\t|" ):
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
92 line = line[:-2]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
93 fields = line.split( "\t|\t" )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
94 node_id = fields[0]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
95 name = fields[1]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
96 if sanitize_names:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
97 name = NAME_RE.sub( NAME_REPL, name )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
98 name_type = fields[3]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
99 if name_type == "scientific name":
1
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
100 if name in names:
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
101 print( 'Warning: name "%s" found at node "%s" but already exists originally for node "%s".' % ( name, node_id, names[name][0] ), file=sys.stderr )
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
102 new_name = "%s_%s" % ( name, node_id )
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
103 print( 'Transforming node "%s" named "%s" to "%s".' % ( node_id, name, new_name ), file=sys.stderr )
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
104 assert new_name not in names, 'Transformed Name "%s" already exists. Cannot recover at this time.' % new_name
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
105 if not names[name][1]:
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
106 orig_new_name = "%s_%s" % ( name, names[name][0] )
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
107 print( 'Transforming node "%s" named "%s" to "%s".' % ( names[name][0], name, orig_new_name ), file=sys.stderr )
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
108 assert orig_new_name not in names, 'Transformed Name "%s" already exists. Cannot recover at this time.' % orig_new_name
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
109 name_map[names[name][0]] = orig_new_name
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
110 names[name] = ( names[name][0], True )
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
111 name = new_name
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
112 else:
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
113 names[name] = ( node_id, False )
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
114 name_map[ node_id ] = name
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
115
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
116 with open( os.path.join( db_path, "taxonomy/nodes.dmp" ) ) as fh:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
117 for line in fh:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
118 line = line.rstrip( "\n\r" )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
119 fields = line.split( "\t|\t" )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
120 node_id = fields[0]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
121 parent_id = fields[1]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
122 rank = RANK_NAME_TO_INTS.get( fields[2].lower(), None )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
123 if rank is None:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
124 # This should never happen, unless new taxonomy ranks are created
1
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
125 print( 'Unrecognized rank: Node "%s" is "%s", setting to "%s"' % ( node_id, fields[2], NO_RANK_NAME ), file=sys.stderr )
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
126 rank = NO_RANK_INT
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
127 if node_id == '1':
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
128 parent_id = '0'
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
129 if parent_id not in child_lists:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
130 child_lists[ parent_id ] = []
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
131 child_lists[ parent_id ].append( node_id )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
132 rank_map[node_id] = rank
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
133 return ( child_lists, name_map, rank_map )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
134
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
135
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
136 def dfs_summation( node, counts, child_lists ):
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
137 children = child_lists.get( node, None )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
138 if children:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
139 for child in children:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
140 dfs_summation( child, counts, child_lists )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
141 counts[ node ] = counts.get( node, 0 ) + counts.get( child, 0 )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
142
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
143
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
144 def dfs_report( node, file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None ):
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
145 rank_int = rank_map[node]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
146 code = RANK_INT_TO_CODE.get( rank_int, NO_RANK_CODE )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
147 if ( code != NO_RANK_CODE or options.intermediate ) and ( options.show_zeros or node in hit_taxa):
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
148 if name is None:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
149 name = ""
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
150 else:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
151 name = "%s|" % name
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
152 if tax is None:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
153 tax = ''
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
154 else:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
155 tax = "%s;" % tax
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
156 sanitized_name = name_map[ node ]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
157 name_stub = NAME_STUB % ( code, sanitized_name )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
158 name = name + name_stub
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
159 tax = tax + name_stub
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
160 if options.name_id:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
161 output = node
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
162 elif options.name_long:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
163 output = name
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
164 else:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
165 output = sanitized_name
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
166 for val in file_data:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
167 output = "%s\t%i" % ( output, val.get( node, 0 ) )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
168 if options.show_rank:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
169 output = "%s\t%s" % ( output, RANK_NAMES[ rank_int ] )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
170 if options.taxonomy:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
171 output = "%s\t%s" % ( output, tax )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
172 output_lines[ rank_int ].append( output )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
173 children = child_lists.get( node )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
174 if children:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
175 for child in children:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
176 dfs_report( child, file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=name, tax=tax )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
177
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
178
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
179 def write_tree( child_lists, name_map, rank_map, options, branch_length=1 ):
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
180 # Uses Biopython, only load if making tree
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
181 import Bio.Phylo
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
182 from Bio.Phylo import BaseTree
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
183
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
184 def _get_name( node_id ):
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
185 if options.name_id:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
186 return node_id
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
187 return name_map[node_id]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
188 nodes = {}
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
189 root_node_id = child_lists["0"][0]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
190 nodes[root_node_id] = BaseTree.Clade( name=_get_name( root_node_id), branch_length=branch_length )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
191
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
192 def recurse_children( parent_id ):
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
193 if options.cluster is not None and rank_map[parent_id] == options.cluster:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
194 # Short circuit if we found our rank, prevents 'hanging' no ranks from being output
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
195 # e.g. clustering by "species" (Escherichia coli), but have "no rank" below (Escherichia coli K-12) in test_db
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
196 return
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
197 if parent_id not in nodes:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
198 nodes[parent_id] = BaseTree.Clade( name=_get_name( parent_id ), branch_length=branch_length )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
199 for child_id in child_lists.get( parent_id, [] ):
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
200 if options.cluster is None or ( rank_map[child_id] <= options.cluster ):
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
201 if child_id not in nodes:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
202 nodes[child_id] = BaseTree.Clade(name=_get_name( child_id ), branch_length=branch_length)
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
203 nodes[parent_id].clades.append(nodes[child_id])
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
204 recurse_children( child_id )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
205 recurse_children( root_node_id )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
206 tree = BaseTree.Tree(root=nodes[root_node_id])
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
207 Bio.Phylo.write( [tree], options.output_tree, 'newick' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
208
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
209
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
210 def __main__():
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
211 parser = optparse.OptionParser( usage="%prog [options] file1 file...fileN" )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
212 parser.add_option( '-v', '--version', dest='version', action='store_true', default=False, help='print version and exit' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
213 parser.add_option( '', '--show-zeros', dest='show_zeros', action='store_true', default=False, help='Show empty nodes' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
214 parser.add_option( '', '--header-line', dest='header_line', action='store_true', default=False, help='Provide a header on output' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
215 parser.add_option( '', '--intermediate', dest='intermediate', action='store_true', default=False, help='Intermediate Ranks' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
216 parser.add_option( '', '--name-id', dest='name_id', action='store_true', default=False, help='Use Taxa ID instead of Name' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
217 parser.add_option( '', '--name-long', dest='name_long', action='store_true', default=False, help='Use Long taxa ID instead of base name' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
218 parser.add_option( '', '--taxonomy', dest='taxonomy', action='store_true', default=False, help='Output taxonomy in last column' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
219 parser.add_option( '', '--cluster', dest='cluster', action='store', type="string", default=None, help='Cluster counts to specified rank' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
220 parser.add_option( '', '--summation', dest='summation', action='store_true', default=False, help='Add summation of child counts to each taxa' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
221 parser.add_option( '', '--sanitize-names', dest='sanitize_names', action='store_true', default=False, help='Replace special chars (\t| |\||\.;) with underscore (_)' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
222 parser.add_option( '', '--show-rank', dest='show_rank', action='store_true', default=False, help='Output column with Rank name' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
223 parser.add_option( '', '--db', dest='db', action='store', type="string", default=None, help='Name of Kraken database' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
224 parser.add_option( '', '--output', dest='output', action='store', type="string", default=None, help='Name of output file' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
225 parser.add_option( '', '--output-tree', dest='output_tree', action='store', type="string", default=None, help='Name of output file to place newick tree' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
226 (options, args) = parser.parse_args()
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
227 if options.version:
1
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
228 print( "Kraken Taxonomy Report (%s) version %s" % ( __URL__, __VERSION__ ), file=sys.stderr )
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
229 sys.exit()
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
230 if not args:
1
b97694b21bc3 planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 3265247e909410db2a6d6087a2c0d3a9885c120c
iuc
parents: 0
diff changeset
231 print( parser.get_usage(), file=sys.stderr )
0
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
232 sys.exit()
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
233
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
234 if options.cluster:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
235 cluster_name = options.cluster.lower()
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
236 cluster = RANK_NAME_TO_INTS.get( cluster_name, None )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
237 assert cluster is not None, ValueError( '"%s" is not a valid rank for clustering.' % options.cluster )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
238 if cluster_name not in PRIMARY_RANK_NAMES:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
239 assert options.intermediate, ValueError( 'You cannot cluster by "%s", unless you enable intermediate ranks.' % options.cluster )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
240 ranks_to_report = [ cluster ]
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
241 options.cluster = cluster
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
242 # When clustering we need to do summatation
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
243 options.summation = True
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
244 else:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
245 options.cluster = None # make empty string into None
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
246 ranks_to_report = RANK_NAMES_INTS
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
247
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
248 if options.output:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
249 output_fh = open( options.output, 'wb+' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
250 else:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
251 output_fh = sys.stdout
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
252
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
253 db_path = get_kraken_db_path( options.db )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
254 ( child_lists, name_map, rank_map ) = load_taxonomy( db_path, sanitize_names=options.sanitize_names )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
255 file_data = []
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
256 hit_taxa = []
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
257 for input_filename in args:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
258 taxo_counts = {}
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
259 with open( input_filename ) as fh:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
260 for line in fh:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
261 fields = line.split( "\t" )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
262 taxo_counts[ fields[2] ] = taxo_counts.get( fields[2], 0 ) + 1
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
263 clade_counts = taxo_counts.copy() # fixme remove copying?
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
264 if options.summation:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
265 dfs_summation( '1', clade_counts, child_lists )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
266 for key, value in clade_counts.items():
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
267 if value and key not in hit_taxa:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
268 hit_taxa.append( key )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
269 file_data.append( clade_counts )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
270
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
271 if options.header_line:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
272 output_fh.write( "#ID\t" )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
273 output_fh.write( "\t".join( args ) )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
274 if options.show_rank:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
275 output_fh.write( "\trank" )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
276 if options.taxonomy:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
277 output_fh.write( "\ttaxonomy" )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
278 output_fh.write( '\n' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
279
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
280 output_lines = dict( [ ( x, [] ) for x in RANK_NAMES_INTS ] )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
281 dfs_report( '1', file_data, hit_taxa, rank_map, name_map, child_lists, output_lines, options, name=None, tax=None )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
282
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
283 for rank_int in ranks_to_report:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
284 for line in output_lines.get( rank_int, [] ):
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
285 output_fh.write( line )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
286 output_fh.write( '\n' )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
287 fh.close()
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
288 if options.output_tree:
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
289 write_tree( child_lists, name_map, rank_map, options )
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
290
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
291
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
292 if __name__ == "__main__":
3f1a0d47ea8d planemo upload for repository https://github.com/galaxyproject/tools-iuc/blob/master/tools/kraken_taxonomy_report/ commit 1c0a7aff7c5f6578a11e6e8e9bface8d02e7f8a1
iuc
parents:
diff changeset
293 __main__()