Mercurial > repos > public-health-bioinformatics > tbprofiler_json_to_tabular
comparison tbprofiler_json_to_tabular.py @ 0:3e0d5ceeeb0f draft default tip
planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/tbprofiler_json_to_tabular commit 24535690aedb81353cf5e036dc4577022d9604ad
author | public-health-bioinformatics |
---|---|
date | Tue, 19 Sep 2023 18:03:41 +0000 |
parents | |
children |
comparison
equal
deleted
inserted
replaced
-1:000000000000 | 0:3e0d5ceeeb0f |
---|---|
1 #!/usr/bin/env python | |
2 | |
3 import argparse | |
4 import csv | |
5 import json | |
6 | |
7 | |
8 def main(args): | |
9 | |
10 with open(args.input, 'r') as f: | |
11 report = json.load(f) | |
12 | |
13 qc_fieldnames = [ | |
14 'pct_reads_mapped', | |
15 'num_reads_mapped', | |
16 'median_coverage', | |
17 ] | |
18 | |
19 with open(args.qc, 'w') as f: | |
20 writer = csv.DictWriter(f, fieldnames=qc_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL) | |
21 writer.writeheader() | |
22 output = {k: report['qc'][k] for k in qc_fieldnames} | |
23 writer.writerow(output) | |
24 | |
25 gene_coverage_fieldnames = [ | |
26 'locus_tag', | |
27 'gene', | |
28 'fraction', | |
29 'cutoff', | |
30 ] | |
31 | |
32 with open(args.gene_coverage, 'w') as f: | |
33 writer = csv.DictWriter(f, fieldnames=gene_coverage_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL) | |
34 writer.writeheader() | |
35 for row in report['qc']['gene_coverage']: | |
36 writer.writerow(row) | |
37 | |
38 missing_positions_fieldnames = [ | |
39 'locus_tag', | |
40 'gene', | |
41 'position', | |
42 'variants', | |
43 'drugs' | |
44 ] | |
45 | |
46 with open(args.missing_positions, 'w') as f: | |
47 writer = csv.DictWriter(f, fieldnames=missing_positions_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL) | |
48 writer.writeheader() | |
49 for row in report['qc']['missing_positions']: | |
50 writer.writerow(row) | |
51 | |
52 resistance_variants_fieldnames = [ | |
53 'chrom', | |
54 'genome_pos', | |
55 'locus_tag', | |
56 'feature_id', | |
57 'gene', | |
58 'type', | |
59 'ref', | |
60 'alt', | |
61 'freq', | |
62 'nucleotide_change', | |
63 'protein_change', | |
64 'change', | |
65 'drugs', | |
66 ] | |
67 | |
68 with open(args.resistance_variants, 'w') as f: | |
69 writer = csv.DictWriter(f, fieldnames=resistance_variants_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL) | |
70 writer.writeheader() | |
71 for row in report['dr_variants']: | |
72 row['drugs'] = ', '.join([drug['drug'] + ':' + drug['confers'] for drug in row['drugs']]) | |
73 output = {k: row[k] for k in resistance_variants_fieldnames} | |
74 writer.writerow(output) | |
75 | |
76 other_variants_fieldnames = [ | |
77 'chrom', | |
78 'genome_pos', | |
79 'locus_tag', | |
80 'feature_id', | |
81 'gene', | |
82 'type', | |
83 'ref', | |
84 'alt', | |
85 'freq', | |
86 'nucleotide_change', | |
87 'protein_change', | |
88 'change', | |
89 'gene_associated_drugs', | |
90 ] | |
91 | |
92 with open(args.other_variants, 'w') as f: | |
93 writer = csv.DictWriter(f, fieldnames=other_variants_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL) | |
94 writer.writeheader() | |
95 for row in report['other_variants']: | |
96 row['gene_associated_drugs'] = ', '.join(row['gene_associated_drugs']) | |
97 output = {k: row[k] for k in other_variants_fieldnames} | |
98 writer.writerow(output) | |
99 | |
100 analysis_metadata_fieldnames = [ | |
101 'timestamp', | |
102 'tbprofiler_version', | |
103 'mapping_program', | |
104 'variant_calling_program', | |
105 'db_name', | |
106 'db_commit', | |
107 'db_date', | |
108 ] | |
109 | |
110 with open(args.analysis_metadata, 'w') as f: | |
111 writer = csv.DictWriter(f, fieldnames=analysis_metadata_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL) | |
112 writer.writeheader() | |
113 output = {} | |
114 output['timestamp'] = report['timestamp'] | |
115 output['tbprofiler_version'] = report['tbprofiler_version'] | |
116 output['db_name'] = report['db_version']['name'] | |
117 output['db_commit'] = report['db_version']['commit'] | |
118 output['db_date'] = report['db_version']['Date'] | |
119 for pipeline_entry in report['pipeline']: | |
120 if pipeline_entry['Analysis'] == "Mapping": | |
121 output['mapping_program'] = pipeline_entry['Program'] | |
122 elif pipeline_entry['Analysis'] == "Variant calling": | |
123 output['variant_calling_program'] = pipeline_entry['Program'] | |
124 | |
125 writer.writerow(output) | |
126 | |
127 if __name__ == '__main__': | |
128 parser = argparse.ArgumentParser() | |
129 parser.add_argument('input') | |
130 parser.add_argument('--qc') | |
131 parser.add_argument('--gene-coverage') | |
132 parser.add_argument('--missing-positions') | |
133 parser.add_argument('--resistance-variants') | |
134 parser.add_argument('--other-variants') | |
135 parser.add_argument('--analysis-metadata') | |
136 args = parser.parse_args() | |
137 main(args) |