Mercurial > repos > cstrittmatter > test_eurl_vtec_wgs_pt
diff scripts/modules/typing.py @ 3:0cbed1c0a762 draft default tip
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
author | cstrittmatter |
---|---|
date | Tue, 28 Jan 2020 10:42:31 -0500 |
parents | 965517909457 |
children |
line wrap: on
line diff
--- a/scripts/modules/typing.py Wed Jan 22 09:10:12 2020 -0500 +++ b/scripts/modules/typing.py Tue Jan 28 10:42:31 2020 -0500 @@ -1,12 +1,21 @@ import os.path import functools -import utils +try: + import modules.utils as utils +except ImportError: + from pathotyping.modules import utils as utils + + +# {1: {'gene_identity': 0, 'gene_mean_read_coverage': 0.0, 'gene_number_positions_multiple_alleles': 0, 'header': 'fyuA', 'gene_coverage': 0.0, 'gene_low_coverage': 100.0}} + def simplify_data_by_gene(data_by_gene): cleaned_data_by_gene = {} - for counter, data in data_by_gene.items(): - cleaned_data_by_gene[data['header']] = {'gene_identity': data['gene_identity'], 'gene_coverage': data['gene_coverage'], 'gene_depth': data['gene_mean_read_coverage']} + for counter, data in list(data_by_gene.items()): + cleaned_data_by_gene[data['header'].lower()] = {'gene_identity': data['gene_identity'], + 'gene_coverage': data['gene_coverage'], + 'gene_depth': data['gene_mean_read_coverage']} return cleaned_data_by_gene @@ -14,34 +23,40 @@ data_by_gene = simplify_data_by_gene(data_by_gene) possible_pathotypes = [] + genes_present = [] with open(typing_rules_file, 'rtU') as reader: genes = [] for line in reader: line = line.splitlines()[0] if len(line) > 0: line = line.split('\t') - if line[0].startswith('#'): - genes = map(str.lower, line[1:]) - else: - profile = line[1:] - congruence = [] - for x, gene_requirement in enumerate(profile): - gene_requirement = True if gene_requirement == '1' else False if gene_requirement == '0' else None - if gene_requirement is None: - congruence.append(True) - else: - if data_by_gene[genes[x]]['gene_coverage'] >= min_gene_coverage and data_by_gene[genes[x]]['gene_identity'] >= min_gene_identity and data_by_gene[genes[x]]['gene_depth'] >= min_gene_depth: + if not line[0].startswith('##'): + if line[0].startswith('#'): + genes = line[1:] + else: + profile = line[1:] + congruence = [] + for x, gene_requirement in enumerate(profile): + if data_by_gene[genes[x].lower()]['gene_coverage'] >= min_gene_coverage and \ + data_by_gene[genes[x].lower()]['gene_identity'] >= min_gene_identity and \ + data_by_gene[genes[x].lower()]['gene_depth'] >= min_gene_depth: gene_present = True + genes_present.append(genes[x]) else: gene_present = False - if gene_present == gene_requirement: - congruence.append(True) + gene_requirement = \ + True if gene_requirement == '1' else False if gene_requirement == '0' else None + if gene_requirement is not None: + if gene_present == gene_requirement: + congruence.append(True) + else: + congruence.append(False) else: - congruence.append(False) - if all(congruence): - possible_pathotypes.append(line[0]) - return possible_pathotypes + congruence.append(True) + if all(congruence): + possible_pathotypes.append(line[0]) + return possible_pathotypes, list(set(genes_present)) module_timer = functools.partial(utils.timer, name='Module Typing') @@ -49,14 +64,21 @@ @module_timer def typing(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth, outdir): - possible_pathotypes = possible_types(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth) - with open(os.path.join(outdir, 'patho_typing.report.txt'), 'wt') as writer: - if len(possible_pathotypes) > 0: - writer.write('\n'.join(possible_pathotypes) + '\n') - print '\n' + 'Pathotypes found:' + '\n' - print '\n'.join(possible_pathotypes) + '\n' - else: - writer.write('NA' + '\n') - print '\n' + 'It was not possible to identify any possible pathotype match' + '\n' + possible_pathotypes, genes_present = possible_types(data_by_gene, typing_rules_file, min_gene_coverage, + min_gene_identity, min_gene_depth) + with open(os.path.join(outdir, 'patho_typing.report.txt'), 'wt') as writer_report: + with open(os.path.join(outdir, 'patho_typing.extended_report.txt'), 'wt') as writer_extended_report: + writer_extended_report.write('#Pathotypes_found' + '\n') + if len(possible_pathotypes) > 0: + writer_report.write('\n'.join(possible_pathotypes) + '\n') + writer_extended_report.write('\n'.join(possible_pathotypes) + '\n') + print('\n' + 'Pathotypes found:' + '\n') + print('\n'.join(possible_pathotypes) + '\n') + else: + writer_report.write('TND' + '\n') # Type Not Determined + writer_extended_report.write('TND' + '\n') # Type Not Determined + print('\n' + 'It was not possible to identify any possible pathotype match' + '\n') + writer_extended_report.write('\n' + '#Genes_present' + '\n') + writer_extended_report.write('\n'.join(genes_present) + '\n') return None, None