Mercurial > repos > public-health-bioinformatics > adjust_bracken_for_unclassified_reads
annotate adjust_bracken_for_unclassified_reads.py @ 0:3ab9d37e547e draft
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
author | public-health-bioinformatics |
---|---|
date | Thu, 10 Mar 2022 21:35:14 +0000 |
parents | |
children | 87459bd1615a |
rev | line source |
---|---|
0
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
1 #!/usr/bin/env python |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
2 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
3 import argparse |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
4 import csv |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
5 import json |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
6 import sys |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
7 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
8 def parse_bracken_abundances(bracken_abundances_path): |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
9 bracken_abundances = [] |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
10 with open(bracken_abundances_path, 'r') as f: |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
11 reader = csv.DictReader(f, dialect='excel-tab') |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
12 for row in reader: |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
13 b = {} |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
14 b['name'] = row['name'] |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
15 b['taxonomy_id'] = row['taxonomy_id'] |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
16 b['taxonomy_lvl'] = row['taxonomy_lvl'] |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
17 b['kraken_assigned_seqs'] = int(row['kraken_assigned_reads']) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
18 b['bracken_assigned_seqs'] = int(row['new_est_reads']) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
19 b['bracken_fraction_total_seqs'] = float(row['fraction_total_reads']) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
20 bracken_abundances.append(b) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
21 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
22 return bracken_abundances |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
23 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
24 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
25 def parse_kraken_report(kraken_report_path): |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
26 kraken_report = [] |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
27 with open(kraken_report_path, 'r') as f: |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
28 for line in f: |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
29 kraken_line = {} |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
30 [percentage, seqs_total, seqs_this_level, taxonomic_level, ncbi_taxid, taxon_name] = line.strip().split(None, 5) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
31 kraken_line['percentage'] = float(percentage) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
32 kraken_line['seqs_total'] = int(seqs_total) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
33 kraken_line['seqs_this_level'] = int(seqs_this_level) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
34 kraken_line['taxonomic_level'] = taxonomic_level |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
35 kraken_line['ncbi_taxid'] = ncbi_taxid |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
36 kraken_line['taxon_name'] = taxon_name |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
37 kraken_report.append(kraken_line) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
38 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
39 return kraken_report |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
40 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
41 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
42 def main(args): |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
43 kraken_report = parse_kraken_report(args.kraken_report) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
44 bracken_abundances = parse_bracken_abundances(args.bracken_abundances) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
45 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
46 kraken_report_unclassified_seqs = list(filter(lambda x: x['taxon_name'] == 'unclassified', kraken_report))[0]['seqs_this_level'] |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
47 kraken_report_classified_seqs = list(filter(lambda x: x['taxon_name'] == 'root', kraken_report))[0]['seqs_total'] |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
48 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
49 total_seqs = kraken_report_classified_seqs + kraken_report_unclassified_seqs |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
50 percent_unclassified = float(kraken_report_unclassified_seqs) / float(total_seqs) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
51 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
52 bracken_unclassified_entry = { |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
53 'name': 'unclassified', |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
54 'taxonomy_id': 0, |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
55 'taxonomy_lvl': 'U', |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
56 'kraken_assigned_seqs': kraken_report_unclassified_seqs, |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
57 'bracken_assigned_seqs': kraken_report_unclassified_seqs, |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
58 'kraken_fraction_total_seqs': percent_unclassified, |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
59 'bracken_fraction_total_seqs': 0.0, |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
60 } |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
61 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
62 bracken_abundances = [bracken_unclassified_entry] + bracken_abundances |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
63 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
64 output_fieldnames = [ |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
65 'name', |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
66 'taxonomy_id', |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
67 'taxonomy_lvl', |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
68 'kraken_assigned_seqs', |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
69 'bracken_assigned_seqs', |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
70 'total_seqs', |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
71 'kraken_fraction_total_seqs', |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
72 'bracken_fraction_total_seqs', |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
73 ] |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
74 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
75 writer = csv.DictWriter(sys.stdout, fieldnames=output_fieldnames, dialect='excel-tab') |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
76 writer.writeheader() |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
77 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
78 for b in bracken_abundances: |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
79 b['total_seqs'] = total_seqs |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
80 kraken_adjusted_fraction_total_seqs = float(b['kraken_assigned_seqs']) / float(total_seqs) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
81 b['kraken_fraction_total_seqs'] = '{:.6f}'.format(kraken_adjusted_fraction_total_seqs) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
82 bracken_adjusted_fraction_total_seqs = float(b['bracken_assigned_seqs']) / float(total_seqs) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
83 b['bracken_fraction_total_seqs'] = '{:.6f}'.format(bracken_adjusted_fraction_total_seqs) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
84 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
85 for b in sorted(bracken_abundances, key=lambda x: x['bracken_fraction_total_seqs'], reverse=True): |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
86 writer.writerow(b) |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
87 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
88 |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
89 if __name__ == '__main__': |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
90 parser = argparse.ArgumentParser() |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
91 parser.add_argument('-k', '--kraken-report') |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
92 parser.add_argument('-a', '--bracken-abundances') |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
93 args = parser.parse_args() |
3ab9d37e547e
"planemo upload for repository https://github.com/public-health-bioinformatics/galaxy_tools/blob/master/tools/adjust_bracken_for_unclassified_reads commit 0d1d1f356cdfd8ef6dbcdd1bfe76c4637587ff53"
public-health-bioinformatics
parents:
diff
changeset
|
94 main(args) |