Mercurial > repos > public-health-bioinformatics > tbprofiler_json_to_tabular
comparison tbprofiler_json_to_tabular.py @ 0:3e0d5ceeeb0f draft default tip
planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/tbprofiler_json_to_tabular commit 24535690aedb81353cf5e036dc4577022d9604ad
| author | public-health-bioinformatics |
|---|---|
| date | Tue, 19 Sep 2023 18:03:41 +0000 |
| parents | |
| children |
comparison
equal
deleted
inserted
replaced
| -1:000000000000 | 0:3e0d5ceeeb0f |
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import argparse | |
| 4 import csv | |
| 5 import json | |
| 6 | |
| 7 | |
| 8 def main(args): | |
| 9 | |
| 10 with open(args.input, 'r') as f: | |
| 11 report = json.load(f) | |
| 12 | |
| 13 qc_fieldnames = [ | |
| 14 'pct_reads_mapped', | |
| 15 'num_reads_mapped', | |
| 16 'median_coverage', | |
| 17 ] | |
| 18 | |
| 19 with open(args.qc, 'w') as f: | |
| 20 writer = csv.DictWriter(f, fieldnames=qc_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL) | |
| 21 writer.writeheader() | |
| 22 output = {k: report['qc'][k] for k in qc_fieldnames} | |
| 23 writer.writerow(output) | |
| 24 | |
| 25 gene_coverage_fieldnames = [ | |
| 26 'locus_tag', | |
| 27 'gene', | |
| 28 'fraction', | |
| 29 'cutoff', | |
| 30 ] | |
| 31 | |
| 32 with open(args.gene_coverage, 'w') as f: | |
| 33 writer = csv.DictWriter(f, fieldnames=gene_coverage_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL) | |
| 34 writer.writeheader() | |
| 35 for row in report['qc']['gene_coverage']: | |
| 36 writer.writerow(row) | |
| 37 | |
| 38 missing_positions_fieldnames = [ | |
| 39 'locus_tag', | |
| 40 'gene', | |
| 41 'position', | |
| 42 'variants', | |
| 43 'drugs' | |
| 44 ] | |
| 45 | |
| 46 with open(args.missing_positions, 'w') as f: | |
| 47 writer = csv.DictWriter(f, fieldnames=missing_positions_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL) | |
| 48 writer.writeheader() | |
| 49 for row in report['qc']['missing_positions']: | |
| 50 writer.writerow(row) | |
| 51 | |
| 52 resistance_variants_fieldnames = [ | |
| 53 'chrom', | |
| 54 'genome_pos', | |
| 55 'locus_tag', | |
| 56 'feature_id', | |
| 57 'gene', | |
| 58 'type', | |
| 59 'ref', | |
| 60 'alt', | |
| 61 'freq', | |
| 62 'nucleotide_change', | |
| 63 'protein_change', | |
| 64 'change', | |
| 65 'drugs', | |
| 66 ] | |
| 67 | |
| 68 with open(args.resistance_variants, 'w') as f: | |
| 69 writer = csv.DictWriter(f, fieldnames=resistance_variants_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL) | |
| 70 writer.writeheader() | |
| 71 for row in report['dr_variants']: | |
| 72 row['drugs'] = ', '.join([drug['drug'] + ':' + drug['confers'] for drug in row['drugs']]) | |
| 73 output = {k: row[k] for k in resistance_variants_fieldnames} | |
| 74 writer.writerow(output) | |
| 75 | |
| 76 other_variants_fieldnames = [ | |
| 77 'chrom', | |
| 78 'genome_pos', | |
| 79 'locus_tag', | |
| 80 'feature_id', | |
| 81 'gene', | |
| 82 'type', | |
| 83 'ref', | |
| 84 'alt', | |
| 85 'freq', | |
| 86 'nucleotide_change', | |
| 87 'protein_change', | |
| 88 'change', | |
| 89 'gene_associated_drugs', | |
| 90 ] | |
| 91 | |
| 92 with open(args.other_variants, 'w') as f: | |
| 93 writer = csv.DictWriter(f, fieldnames=other_variants_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL) | |
| 94 writer.writeheader() | |
| 95 for row in report['other_variants']: | |
| 96 row['gene_associated_drugs'] = ', '.join(row['gene_associated_drugs']) | |
| 97 output = {k: row[k] for k in other_variants_fieldnames} | |
| 98 writer.writerow(output) | |
| 99 | |
| 100 analysis_metadata_fieldnames = [ | |
| 101 'timestamp', | |
| 102 'tbprofiler_version', | |
| 103 'mapping_program', | |
| 104 'variant_calling_program', | |
| 105 'db_name', | |
| 106 'db_commit', | |
| 107 'db_date', | |
| 108 ] | |
| 109 | |
| 110 with open(args.analysis_metadata, 'w') as f: | |
| 111 writer = csv.DictWriter(f, fieldnames=analysis_metadata_fieldnames, dialect='excel-tab', quoting=csv.QUOTE_MINIMAL) | |
| 112 writer.writeheader() | |
| 113 output = {} | |
| 114 output['timestamp'] = report['timestamp'] | |
| 115 output['tbprofiler_version'] = report['tbprofiler_version'] | |
| 116 output['db_name'] = report['db_version']['name'] | |
| 117 output['db_commit'] = report['db_version']['commit'] | |
| 118 output['db_date'] = report['db_version']['Date'] | |
| 119 for pipeline_entry in report['pipeline']: | |
| 120 if pipeline_entry['Analysis'] == "Mapping": | |
| 121 output['mapping_program'] = pipeline_entry['Program'] | |
| 122 elif pipeline_entry['Analysis'] == "Variant calling": | |
| 123 output['variant_calling_program'] = pipeline_entry['Program'] | |
| 124 | |
| 125 writer.writerow(output) | |
| 126 | |
| 127 if __name__ == '__main__': | |
| 128 parser = argparse.ArgumentParser() | |
| 129 parser.add_argument('input') | |
| 130 parser.add_argument('--qc') | |
| 131 parser.add_argument('--gene-coverage') | |
| 132 parser.add_argument('--missing-positions') | |
| 133 parser.add_argument('--resistance-variants') | |
| 134 parser.add_argument('--other-variants') | |
| 135 parser.add_argument('--analysis-metadata') | |
| 136 args = parser.parse_args() | |
| 137 main(args) |
