annotate adjust_bracken_for_unclassified_reads.py @ 0:3ab9d37e547e draft

"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
author public-health-bioinformatics
date Thu, 10 Mar 2022 21:35:14 +0000
parents
children 87459bd1615a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
1 #!/usr/bin/env python
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
2
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
3 import argparse
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
4 import csv
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
5 import json
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
6 import sys
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
7
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
8 def parse_bracken_abundances(bracken_abundances_path):
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
9 bracken_abundances = []
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
10 with open(bracken_abundances_path, 'r') as f:
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
11 reader = csv.DictReader(f, dialect='excel-tab')
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
12 for row in reader:
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
13 b = {}
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
14 b['name'] = row['name']
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
15 b['taxonomy_id'] = row['taxonomy_id']
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
16 b['taxonomy_lvl'] = row['taxonomy_lvl']
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
17 b['kraken_assigned_seqs'] = int(row['kraken_assigned_reads'])
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
18 b['bracken_assigned_seqs'] = int(row['new_est_reads'])
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
19 b['bracken_fraction_total_seqs'] = float(row['fraction_total_reads'])
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
20 bracken_abundances.append(b)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
21
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
22 return bracken_abundances
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
23
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
24
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
25 def parse_kraken_report(kraken_report_path):
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
26 kraken_report = []
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
27 with open(kraken_report_path, 'r') as f:
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
28 for line in f:
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
29 kraken_line = {}
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
30 [percentage, seqs_total, seqs_this_level, taxonomic_level, ncbi_taxid, taxon_name] = line.strip().split(None, 5)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
31 kraken_line['percentage'] = float(percentage)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
32 kraken_line['seqs_total'] = int(seqs_total)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
33 kraken_line['seqs_this_level'] = int(seqs_this_level)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
34 kraken_line['taxonomic_level'] = taxonomic_level
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
35 kraken_line['ncbi_taxid'] = ncbi_taxid
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
36 kraken_line['taxon_name'] = taxon_name
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
37 kraken_report.append(kraken_line)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
38
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
39 return kraken_report
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
40
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
41
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
42 def main(args):
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
43 kraken_report = parse_kraken_report(args.kraken_report)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
44 bracken_abundances = parse_bracken_abundances(args.bracken_abundances)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
45
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
46 kraken_report_unclassified_seqs = list(filter(lambda x: x['taxon_name'] == 'unclassified', kraken_report))[0]['seqs_this_level']
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
47 kraken_report_classified_seqs = list(filter(lambda x: x['taxon_name'] == 'root', kraken_report))[0]['seqs_total']
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
48
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
49 total_seqs = kraken_report_classified_seqs + kraken_report_unclassified_seqs
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
50 percent_unclassified = float(kraken_report_unclassified_seqs) / float(total_seqs)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
51
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
52 bracken_unclassified_entry = {
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
53 'name': 'unclassified',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
54 'taxonomy_id': 0,
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
55 'taxonomy_lvl': 'U',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
56 'kraken_assigned_seqs': kraken_report_unclassified_seqs,
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
57 'bracken_assigned_seqs': kraken_report_unclassified_seqs,
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
58 'kraken_fraction_total_seqs': percent_unclassified,
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
59 'bracken_fraction_total_seqs': 0.0,
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
60 }
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
61
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
62 bracken_abundances = [bracken_unclassified_entry] + bracken_abundances
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
63
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
64 output_fieldnames = [
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
65 'name',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
66 'taxonomy_id',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
67 'taxonomy_lvl',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
68 'kraken_assigned_seqs',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
69 'bracken_assigned_seqs',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
70 'total_seqs',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
71 'kraken_fraction_total_seqs',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
72 'bracken_fraction_total_seqs',
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
73 ]
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
74
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
75 writer = csv.DictWriter(sys.stdout, fieldnames=output_fieldnames, dialect='excel-tab')
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
76 writer.writeheader()
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
77
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
78 for b in bracken_abundances:
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
79 b['total_seqs'] = total_seqs
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
80 kraken_adjusted_fraction_total_seqs = float(b['kraken_assigned_seqs']) / float(total_seqs)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
81 b['kraken_fraction_total_seqs'] = '{:.6f}'.format(kraken_adjusted_fraction_total_seqs)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
82 bracken_adjusted_fraction_total_seqs = float(b['bracken_assigned_seqs']) / float(total_seqs)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
83 b['bracken_fraction_total_seqs'] = '{:.6f}'.format(bracken_adjusted_fraction_total_seqs)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
84
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
85 for b in sorted(bracken_abundances, key=lambda x: x['bracken_fraction_total_seqs'], reverse=True):
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
86 writer.writerow(b)
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
87
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
88
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
89 if __name__ == '__main__':
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
90 parser = argparse.ArgumentParser()
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
91 parser.add_argument('-k', '--kraken-report')
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
92 parser.add_argument('-a', '--bracken-abundances')
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
93 args = parser.parse_args()
3ab9d37e547e "planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff changeset
94 main(args)