comparison scripts/modules/typing.py @ 0:965517909457 draft

planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author cstrittmatter
date Wed, 22 Jan 2020 08:41:44 -0500
parents
children 0cbed1c0a762
comparison
equal deleted inserted replaced
-1:000000000000 0:965517909457
1 import os.path
2 import functools
3
4 import utils
5
6 def simplify_data_by_gene(data_by_gene):
7 cleaned_data_by_gene = {}
8 for counter, data in data_by_gene.items():
9 cleaned_data_by_gene[data['header']] = {'gene_identity': data['gene_identity'], 'gene_coverage': data['gene_coverage'], 'gene_depth': data['gene_mean_read_coverage']}
10 return cleaned_data_by_gene
11
12
13 def possible_types(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth):
14 data_by_gene = simplify_data_by_gene(data_by_gene)
15
16 possible_pathotypes = []
17 with open(typing_rules_file, 'rtU') as reader:
18 genes = []
19 for line in reader:
20 line = line.splitlines()[0]
21 if len(line) > 0:
22 line = line.split('\t')
23 if line[0].startswith('#'):
24 genes = map(str.lower, line[1:])
25 else:
26 profile = line[1:]
27 congruence = []
28 for x, gene_requirement in enumerate(profile):
29 gene_requirement = True if gene_requirement == '1' else False if gene_requirement == '0' else None
30 if gene_requirement is None:
31 congruence.append(True)
32 else:
33 if data_by_gene[genes[x]]['gene_coverage'] >= min_gene_coverage and data_by_gene[genes[x]]['gene_identity'] >= min_gene_identity and data_by_gene[genes[x]]['gene_depth'] >= min_gene_depth:
34 gene_present = True
35 else:
36 gene_present = False
37
38 if gene_present == gene_requirement:
39 congruence.append(True)
40 else:
41 congruence.append(False)
42 if all(congruence):
43 possible_pathotypes.append(line[0])
44 return possible_pathotypes
45
46
47 module_timer = functools.partial(utils.timer, name='Module Typing')
48
49
50 @module_timer
51 def typing(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth, outdir):
52 possible_pathotypes = possible_types(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth)
53 with open(os.path.join(outdir, 'patho_typing.report.txt'), 'wt') as writer:
54 if len(possible_pathotypes) > 0:
55 writer.write('\n'.join(possible_pathotypes) + '\n')
56 print '\n' + 'Pathotypes found:' + '\n'
57 print '\n'.join(possible_pathotypes) + '\n'
58 else:
59 writer.write('NA' + '\n')
60 print '\n' + 'It was not possible to identify any possible pathotype match' + '\n'
61
62 return None, None