Mercurial > repos > cstrittmatter > test_eurl_vtec_wgs_pt
comparison scripts/modules/typing.py @ 3:0cbed1c0a762 draft default tip
planemo upload commit 15239f1674081ab51ab8dd75a9a40cf1bfaa93e8
| author | cstrittmatter |
|---|---|
| date | Tue, 28 Jan 2020 10:42:31 -0500 |
| parents | 965517909457 |
| children |
comparison
equal
deleted
inserted
replaced
| 2:6837f733b4aa | 3:0cbed1c0a762 |
|---|---|
| 1 import os.path | 1 import os.path |
| 2 import functools | 2 import functools |
| 3 | 3 |
| 4 import utils | 4 try: |
| 5 import modules.utils as utils | |
| 6 except ImportError: | |
| 7 from pathotyping.modules import utils as utils | |
| 8 | |
| 9 | |
| 10 # {1: {'gene_identity': 0, 'gene_mean_read_coverage': 0.0, 'gene_number_positions_multiple_alleles': 0, 'header': 'fyuA', 'gene_coverage': 0.0, 'gene_low_coverage': 100.0}} | |
| 11 | |
| 5 | 12 |
| 6 def simplify_data_by_gene(data_by_gene): | 13 def simplify_data_by_gene(data_by_gene): |
| 7 cleaned_data_by_gene = {} | 14 cleaned_data_by_gene = {} |
| 8 for counter, data in data_by_gene.items(): | 15 for counter, data in list(data_by_gene.items()): |
| 9 cleaned_data_by_gene[data['header']] = {'gene_identity': data['gene_identity'], 'gene_coverage': data['gene_coverage'], 'gene_depth': data['gene_mean_read_coverage']} | 16 cleaned_data_by_gene[data['header'].lower()] = {'gene_identity': data['gene_identity'], |
| 17 'gene_coverage': data['gene_coverage'], | |
| 18 'gene_depth': data['gene_mean_read_coverage']} | |
| 10 return cleaned_data_by_gene | 19 return cleaned_data_by_gene |
| 11 | 20 |
| 12 | 21 |
| 13 def possible_types(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth): | 22 def possible_types(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth): |
| 14 data_by_gene = simplify_data_by_gene(data_by_gene) | 23 data_by_gene = simplify_data_by_gene(data_by_gene) |
| 15 | 24 |
| 16 possible_pathotypes = [] | 25 possible_pathotypes = [] |
| 26 genes_present = [] | |
| 17 with open(typing_rules_file, 'rtU') as reader: | 27 with open(typing_rules_file, 'rtU') as reader: |
| 18 genes = [] | 28 genes = [] |
| 19 for line in reader: | 29 for line in reader: |
| 20 line = line.splitlines()[0] | 30 line = line.splitlines()[0] |
| 21 if len(line) > 0: | 31 if len(line) > 0: |
| 22 line = line.split('\t') | 32 line = line.split('\t') |
| 23 if line[0].startswith('#'): | 33 if not line[0].startswith('##'): |
| 24 genes = map(str.lower, line[1:]) | 34 if line[0].startswith('#'): |
| 25 else: | 35 genes = line[1:] |
| 26 profile = line[1:] | 36 else: |
| 27 congruence = [] | 37 profile = line[1:] |
| 28 for x, gene_requirement in enumerate(profile): | 38 congruence = [] |
| 29 gene_requirement = True if gene_requirement == '1' else False if gene_requirement == '0' else None | 39 for x, gene_requirement in enumerate(profile): |
| 30 if gene_requirement is None: | 40 if data_by_gene[genes[x].lower()]['gene_coverage'] >= min_gene_coverage and \ |
| 31 congruence.append(True) | 41 data_by_gene[genes[x].lower()]['gene_identity'] >= min_gene_identity and \ |
| 32 else: | 42 data_by_gene[genes[x].lower()]['gene_depth'] >= min_gene_depth: |
| 33 if data_by_gene[genes[x]]['gene_coverage'] >= min_gene_coverage and data_by_gene[genes[x]]['gene_identity'] >= min_gene_identity and data_by_gene[genes[x]]['gene_depth'] >= min_gene_depth: | |
| 34 gene_present = True | 43 gene_present = True |
| 44 genes_present.append(genes[x]) | |
| 35 else: | 45 else: |
| 36 gene_present = False | 46 gene_present = False |
| 37 | 47 |
| 38 if gene_present == gene_requirement: | 48 gene_requirement = \ |
| 49 True if gene_requirement == '1' else False if gene_requirement == '0' else None | |
| 50 if gene_requirement is not None: | |
| 51 if gene_present == gene_requirement: | |
| 52 congruence.append(True) | |
| 53 else: | |
| 54 congruence.append(False) | |
| 55 else: | |
| 39 congruence.append(True) | 56 congruence.append(True) |
| 40 else: | 57 if all(congruence): |
| 41 congruence.append(False) | 58 possible_pathotypes.append(line[0]) |
| 42 if all(congruence): | 59 return possible_pathotypes, list(set(genes_present)) |
| 43 possible_pathotypes.append(line[0]) | |
| 44 return possible_pathotypes | |
| 45 | 60 |
| 46 | 61 |
| 47 module_timer = functools.partial(utils.timer, name='Module Typing') | 62 module_timer = functools.partial(utils.timer, name='Module Typing') |
| 48 | 63 |
| 49 | 64 |
| 50 @module_timer | 65 @module_timer |
| 51 def typing(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth, outdir): | 66 def typing(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth, outdir): |
| 52 possible_pathotypes = possible_types(data_by_gene, typing_rules_file, min_gene_coverage, min_gene_identity, min_gene_depth) | 67 possible_pathotypes, genes_present = possible_types(data_by_gene, typing_rules_file, min_gene_coverage, |
| 53 with open(os.path.join(outdir, 'patho_typing.report.txt'), 'wt') as writer: | 68 min_gene_identity, min_gene_depth) |
| 54 if len(possible_pathotypes) > 0: | 69 with open(os.path.join(outdir, 'patho_typing.report.txt'), 'wt') as writer_report: |
| 55 writer.write('\n'.join(possible_pathotypes) + '\n') | 70 with open(os.path.join(outdir, 'patho_typing.extended_report.txt'), 'wt') as writer_extended_report: |
| 56 print '\n' + 'Pathotypes found:' + '\n' | 71 writer_extended_report.write('#Pathotypes_found' + '\n') |
| 57 print '\n'.join(possible_pathotypes) + '\n' | 72 if len(possible_pathotypes) > 0: |
| 58 else: | 73 writer_report.write('\n'.join(possible_pathotypes) + '\n') |
| 59 writer.write('NA' + '\n') | 74 writer_extended_report.write('\n'.join(possible_pathotypes) + '\n') |
| 60 print '\n' + 'It was not possible to identify any possible pathotype match' + '\n' | 75 print('\n' + 'Pathotypes found:' + '\n') |
| 76 print('\n'.join(possible_pathotypes) + '\n') | |
| 77 else: | |
| 78 writer_report.write('TND' + '\n') # Type Not Determined | |
| 79 writer_extended_report.write('TND' + '\n') # Type Not Determined | |
| 80 print('\n' + 'It was not possible to identify any possible pathotype match' + '\n') | |
| 81 writer_extended_report.write('\n' + '#Genes_present' + '\n') | |
| 82 writer_extended_report.write('\n'.join(genes_present) + '\n') | |
| 61 | 83 |
| 62 return None, None | 84 return None, None |
