Mercurial > repos > cstrittmatter > test_eurl_vtec_wgs_pt
comparison scripts/modules/typing.py @ 0:965517909457 draft
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author | cstrittmatter |
---|---|
date | Wed, 22 Jan 2020 08:41:44 -0500 |
parents | |
children | 0cbed1c0a762 |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:965517909457 |
---|---|
1 import os.path | |
2 import functools | |
3 | |
4 import utils | |
5 | |
6 def simplify_data_by_gene(data_by_gene): | |
7 cleaned_data_by_gene = {} | |
8 for counter, data in data_by_gene.items(): | |
9 cleaned_data_by_gene[data['header']] = {'gene_identity': data['gene_identity'], 'gene_coverage': data['gene_coverage'], 'gene_depth': data['gene_mean_read_coverage']} | |
10 return cleaned_data_by_gene | |
11 | |
12 | |
13 def possible_types(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth): | |
14 data_by_gene = simplify_data_by_gene(data_by_gene) | |
15 | |
16 possible_pathotypes = [] | |
17 with open(typing_rules_file, 'rtU') as reader: | |
18 genes = [] | |
19 for line in reader: | |
20 line = line.splitlines()[0] | |
21 if len(line) > 0: | |
22 line = line.split('\t') | |
23 if line[0].startswith('#'): | |
24 genes = map(str.lower, line[1:]) | |
25 else: | |
26 profile = line[1:] | |
27 congruence = [] | |
28 for x, gene_requirement in enumerate(profile): | |
29 gene_requirement = True if gene_requirement == '1' else False if gene_requirement == '0' else None | |
30 if gene_requirement is None: | |
31 congruence.append(True) | |
32 else: | |
33 if data_by_gene[genes[x]]['gene_coverage'] >= min_gene_coverage and data_by_gene[genes[x]]['gene_identity'] >= min_gene_identity and data_by_gene[genes[x]]['gene_depth'] >= min_gene_depth: | |
34 gene_present = True | |
35 else: | |
36 gene_present = False | |
37 | |
38 if gene_present == gene_requirement: | |
39 congruence.append(True) | |
40 else: | |
41 congruence.append(False) | |
42 if all(congruence): | |
43 possible_pathotypes.append(line[0]) | |
44 return possible_pathotypes | |
45 | |
46 | |
47 module_timer = functools.partial(utils.timer, name='Module Typing') | |
48 | |
49 | |
50 @module_timer | |
51 def typing(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth, outdir): | |
52 possible_pathotypes = possible_types(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth) | |
53 with open(os.path.join(outdir, 'patho_typing.report.txt'), 'wt') as writer: | |
54 if len(possible_pathotypes) > 0: | |
55 writer.write('\n'.join(possible_pathotypes) + '\n') | |
56 print '\n' + 'Pathotypes found:' + '\n' | |
57 print '\n'.join(possible_pathotypes) + '\n' | |
58 else: | |
59 writer.write('NA' + '\n') | |
60 print '\n' + 'It was not possible to identify any possible pathotype match' + '\n' | |
61 | |
62 return None, None |