comparison scripts/modules/typing.py @ 3:0cbed1c0a762 draft default tip

planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author cstrittmatter
date Tue, 28 Jan 2020 10:42:31 -0500
parents 965517909457
children
comparison
equal deleted inserted replaced
2:6837f733b4aa 3:0cbed1c0a762
1 import os.path 1 import os.path
2 import functools 2 import functools
3 3
4 import utils 4 try:
5 import modules.utils as utils
6 except ImportError:
7 from pathotyping.modules import utils as utils
8
9
10 # {1: {'gene_identity': 0, 'gene_mean_read_coverage': 0.0, 'gene_number_positions_multiple_alleles': 0, 'header': 'fyuA', 'gene_coverage': 0.0, 'gene_low_coverage': 100.0}}
11
5 12
6 def simplify_data_by_gene(data_by_gene): 13 def simplify_data_by_gene(data_by_gene):
7 cleaned_data_by_gene = {} 14 cleaned_data_by_gene = {}
8 for counter, data in data_by_gene.items(): 15 for counter, data in list(data_by_gene.items()):
9 cleaned_data_by_gene[data['header']] = {'gene_identity': data['gene_identity'], 'gene_coverage': data['gene_coverage'], 'gene_depth': data['gene_mean_read_coverage']} 16 cleaned_data_by_gene[data['header'].lower()] = {'gene_identity': data['gene_identity'],
17 'gene_coverage': data['gene_coverage'],
18 'gene_depth': data['gene_mean_read_coverage']}
10 return cleaned_data_by_gene 19 return cleaned_data_by_gene
11 20
12 21
13 def possible_types(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth): 22 def possible_types(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth):
14 data_by_gene = simplify_data_by_gene(data_by_gene) 23 data_by_gene = simplify_data_by_gene(data_by_gene)
15 24
16 possible_pathotypes = [] 25 possible_pathotypes = []
26 genes_present = []
17 with open(typing_rules_file, 'rtU') as reader: 27 with open(typing_rules_file, 'rtU') as reader:
18 genes = [] 28 genes = []
19 for line in reader: 29 for line in reader:
20 line = line.splitlines()[0] 30 line = line.splitlines()[0]
21 if len(line) > 0: 31 if len(line) > 0:
22 line = line.split('\t') 32 line = line.split('\t')
23 if line[0].startswith('#'): 33 if not line[0].startswith('##'):
24 genes = map(str.lower, line[1:]) 34 if line[0].startswith('#'):
25 else: 35 genes = line[1:]
26 profile = line[1:] 36 else:
27 congruence = [] 37 profile = line[1:]
28 for x, gene_requirement in enumerate(profile): 38 congruence = []
29 gene_requirement = True if gene_requirement == '1' else False if gene_requirement == '0' else None 39 for x, gene_requirement in enumerate(profile):
30 if gene_requirement is None: 40 if data_by_gene[genes[x].lower()]['gene_coverage'] >= min_gene_coverage and \
31 congruence.append(True) 41 data_by_gene[genes[x].lower()]['gene_identity'] >= min_gene_identity and \
32 else: 42 data_by_gene[genes[x].lower()]['gene_depth'] >= min_gene_depth:
33 if data_by_gene[genes[x]]['gene_coverage'] >= min_gene_coverage and data_by_gene[genes[x]]['gene_identity'] >= min_gene_identity and data_by_gene[genes[x]]['gene_depth'] >= min_gene_depth:
34 gene_present = True 43 gene_present = True
44 genes_present.append(genes[x])
35 else: 45 else:
36 gene_present = False 46 gene_present = False
37 47
38 if gene_present == gene_requirement: 48 gene_requirement = \
49 True if gene_requirement == '1' else False if gene_requirement == '0' else None
50 if gene_requirement is not None:
51 if gene_present == gene_requirement:
52 congruence.append(True)
53 else:
54 congruence.append(False)
55 else:
39 congruence.append(True) 56 congruence.append(True)
40 else: 57 if all(congruence):
41 congruence.append(False) 58 possible_pathotypes.append(line[0])
42 if all(congruence): 59 return possible_pathotypes, list(set(genes_present))
43 possible_pathotypes.append(line[0])
44 return possible_pathotypes
45 60
46 61
47 module_timer = functools.partial(utils.timer, name='Module Typing') 62 module_timer = functools.partial(utils.timer, name='Module Typing')
48 63
49 64
50 @module_timer 65 @module_timer
51 def typing(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth, outdir): 66 def typing(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth, outdir):
52 possible_pathotypes = possible_types(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth) 67 possible_pathotypes, genes_present = possible_types(data_by_gene, typing_rules_file, min_gene_coverage,
53 with open(os.path.join(outdir, 'patho_typing.report.txt'), 'wt') as writer: 68 min_gene_identity, min_gene_depth)
54 if len(possible_pathotypes) > 0: 69 with open(os.path.join(outdir, 'patho_typing.report.txt'), 'wt') as writer_report:
55 writer.write('\n'.join(possible_pathotypes) + '\n') 70 with open(os.path.join(outdir, 'patho_typing.extended_report.txt'), 'wt') as writer_extended_report:
56 print '\n' + 'Pathotypes found:' + '\n' 71 writer_extended_report.write('#Pathotypes_found' + '\n')
57 print '\n'.join(possible_pathotypes) + '\n' 72 if len(possible_pathotypes) > 0:
58 else: 73 writer_report.write('\n'.join(possible_pathotypes) + '\n')
59 writer.write('NA' + '\n') 74 writer_extended_report.write('\n'.join(possible_pathotypes) + '\n')
60 print '\n' + 'It was not possible to identify any possible pathotype match' + '\n' 75 print('\n' + 'Pathotypes found:' + '\n')
76 print('\n'.join(possible_pathotypes) + '\n')
77 else:
78 writer_report.write('TND' + '\n') # Type Not Determined
79 writer_extended_report.write('TND' + '\n') # Type Not Determined
80 print('\n' + 'It was not possible to identify any possible pathotype match' + '\n')
81 writer_extended_report.write('\n' + '#Genes_present' + '\n')
82 writer_extended_report.write('\n'.join(genes_present) + '\n')
61 83
62 return None, None 84 return None, None