comparison tbprofiler_json_to_tabular.py @ 0:3e0d5ceeeb0f draft default tip

planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/tbprofiler_json_to_tabular commit 24535690aedb81353cf5e036dc4577022d9604ad
author public-health-bioinformatics
date Tue, 19 Sep 2023 18:03:41 +0000
parents
children
comparison
equal deleted inserted replaced
-1:000000000000 0:3e0d5ceeeb0f
1 #!/usr/bin/env python
2
3 import argparse
4 import csv
5 import json
6
7
8 def main(args):
9
10 with open(args.input, 'r') as f:
11 report = json.load(f)
12
13 qc_fieldnames = [
14 'pct_reads_mapped',
15 'num_reads_mapped',
16 'median_coverage',
17 ]
18
19 with open(args.qc, 'w') as f:
20 writer = csv.DictWriter(f, fieldnames=qc_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL)
21 writer.writeheader()
22 output = {k: report['qc'][k] for k in qc_fieldnames}
23 writer.writerow(output)
24
25 gene_coverage_fieldnames = [
26 'locus_tag',
27 'gene',
28 'fraction',
29 'cutoff',
30 ]
31
32 with open(args.gene_coverage, 'w') as f:
33 writer = csv.DictWriter(f, fieldnames=gene_coverage_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL)
34 writer.writeheader()
35 for row in report['qc']['gene_coverage']:
36 writer.writerow(row)
37
38 missing_positions_fieldnames = [
39 'locus_tag',
40 'gene',
41 'position',
42 'variants',
43 'drugs'
44 ]
45
46 with open(args.missing_positions, 'w') as f:
47 writer = csv.DictWriter(f, fieldnames=missing_positions_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL)
48 writer.writeheader()
49 for row in report['qc']['missing_positions']:
50 writer.writerow(row)
51
52 resistance_variants_fieldnames = [
53 'chrom',
54 'genome_pos',
55 'locus_tag',
56 'feature_id',
57 'gene',
58 'type',
59 'ref',
60 'alt',
61 'freq',
62 'nucleotide_change',
63 'protein_change',
64 'change',
65 'drugs',
66 ]
67
68 with open(args.resistance_variants, 'w') as f:
69 writer = csv.DictWriter(f, fieldnames=resistance_variants_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL)
70 writer.writeheader()
71 for row in report['dr_variants']:
72 row['drugs'] = ', '.join([drug['drug'] + ':' + drug['confers'] for drug in row['drugs']])
73 output = {k: row[k] for k in resistance_variants_fieldnames}
74 writer.writerow(output)
75
76 other_variants_fieldnames = [
77 'chrom',
78 'genome_pos',
79 'locus_tag',
80 'feature_id',
81 'gene',
82 'type',
83 'ref',
84 'alt',
85 'freq',
86 'nucleotide_change',
87 'protein_change',
88 'change',
89 'gene_associated_drugs',
90 ]
91
92 with open(args.other_variants, 'w') as f:
93 writer = csv.DictWriter(f, fieldnames=other_variants_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL)
94 writer.writeheader()
95 for row in report['other_variants']:
96 row['gene_associated_drugs'] = ', '.join(row['gene_associated_drugs'])
97 output = {k: row[k] for k in other_variants_fieldnames}
98 writer.writerow(output)
99
100 analysis_metadata_fieldnames = [
101 'timestamp',
102 'tbprofiler_version',
103 'mapping_program',
104 'variant_calling_program',
105 'db_name',
106 'db_commit',
107 'db_date',
108 ]
109
110 with open(args.analysis_metadata, 'w') as f:
111 writer = csv.DictWriter(f, fieldnames=analysis_metadata_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL)
112 writer.writeheader()
113 output = {}
114 output['timestamp'] = report['timestamp']
115 output['tbprofiler_version'] = report['tbprofiler_version']
116 output['db_name'] = report['db_version']['name']
117 output['db_commit'] = report['db_version']['commit']
118 output['db_date'] = report['db_version']['Date']
119 for pipeline_entry in report['pipeline']:
120 if pipeline_entry['Analysis'] == "Mapping":
121 output['mapping_program'] = pipeline_entry['Program']
122 elif pipeline_entry['Analysis'] == "Variant calling":
123 output['variant_calling_program'] = pipeline_entry['Program']
124
125 writer.writerow(output)
126
127 if __name__ == '__main__':
128 parser = argparse.ArgumentParser()
129 parser.add_argument('input')
130 parser.add_argument('--qc')
131 parser.add_argument('--gene-coverage')
132 parser.add_argument('--missing-positions')
133 parser.add_argument('--resistance-variants')
134 parser.add_argument('--other-variants')
135 parser.add_argument('--analysis-metadata')
136 args = parser.parse_args()
137 main(args)