Mercurial > repos > iuc > vsnp_add_zero_coverage
annotate vsnp_determine_ref_from_data.py @ 8:18b59c38017e draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 10077c740e7cbe6a6563a1c632d711691753e46d"
author | iuc |
---|---|
date | Mon, 06 Dec 2021 18:30:02 +0000 |
parents | e12ccc57875c |
children |
rev | line source |
---|---|
2
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
2 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
3 import argparse |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
4 import gzip |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
5 import os |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
6 from collections import OrderedDict |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
7 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
8 import yaml |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
9 from Bio.SeqIO.QualityIO import FastqGeneralIterator |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
10 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
11 OUTPUT_DBKEY_DIR = 'output_dbkey' |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
12 OUTPUT_METRICS_DIR = 'output_metrics' |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
13 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
14 |
3
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
iuc
parents:
2
diff
changeset
|
15 def get_sample_name(file_path): |
2
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
16 base_file_name = os.path.basename(file_path) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
17 if base_file_name.find(".") > 0: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
18 # Eliminate the extension. |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
19 return os.path.splitext(base_file_name)[0] |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
20 return base_file_name |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
21 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
22 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
23 def get_dbkey(dnaprints_dict, key, s): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
24 # dnaprints_dict looks something like this: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
25 # {'brucella': {'NC_002945v4': ['11001110', '11011110', '11001100']} |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
26 # {'bovis': {'NC_006895': ['11111110', '00010010', '01111011']}} |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
27 d = dnaprints_dict.get(key, {}) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
28 for data_table_value, v_list in d.items(): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
29 if s in v_list: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
30 return data_table_value |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
31 return "" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
32 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
33 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
34 def get_dnaprints_dict(dnaprint_fields): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
35 # A dndprint_fields entry looks something liek this. |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
36 # [['AF2122', '/galaxy/tool-data/vsnp/AF2122/dnaprints/NC_002945v4.yml']] |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
37 dnaprints_dict = {} |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
38 for item in dnaprint_fields: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
39 # Here item is a 2-element list of data |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
40 # table components, # value and path. |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
41 value = item[0] |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
42 path = item[1].strip() |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
43 with open(path, "rt") as fh: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
44 # The format of all dnaprints yaml |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
45 # files is something like this: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
46 # brucella: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
47 # - 0111111111111111 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
48 print_dict = yaml.load(fh, Loader=yaml.Loader) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
49 for print_dict_k, print_dict_v in print_dict.items(): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
50 dnaprints_v_dict = dnaprints_dict.get(print_dict_k, {}) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
51 if len(dnaprints_v_dict) > 0: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
52 # dnaprints_dict already contains k (e.g., 'brucella', |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
53 # and dnaprints_v_dict will be a dictionary # that |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
54 # looks something like this: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
55 # {'NC_002945v4': ['11001110', '11011110', '11001100']} |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
56 value_list = dnaprints_v_dict.get(value, []) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
57 value_list = value_list + print_dict_v |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
58 dnaprints_v_dict[value] = value_list |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
59 else: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
60 # dnaprints_v_dict is an empty dictionary. |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
61 dnaprints_v_dict[value] = print_dict_v |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
62 dnaprints_dict[print_dict_k] = dnaprints_v_dict |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
63 # dnaprints_dict looks something like this: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
64 # {'brucella': {'NC_002945v4': ['11001110', '11011110', '11001100']} |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
65 # {'bovis': {'NC_006895': ['11111110', '00010010', '01111011']}} |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
66 return dnaprints_dict |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
67 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
68 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
69 def get_group_and_dbkey(dnaprints_dict, brucella_string, brucella_sum, bovis_string, bovis_sum, para_string, para_sum): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
70 if brucella_sum > 3: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
71 group = "Brucella" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
72 dbkey = get_dbkey(dnaprints_dict, "brucella", brucella_string) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
73 elif bovis_sum > 3: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
74 group = "TB" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
75 dbkey = get_dbkey(dnaprints_dict, "bovis", bovis_string) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
76 elif para_sum >= 1: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
77 group = "paraTB" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
78 dbkey = get_dbkey(dnaprints_dict, "para", para_string) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
79 else: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
80 group = "" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
81 dbkey = "" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
82 return group, dbkey |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
83 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
84 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
85 def get_oligo_dict(): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
86 oligo_dict = {} |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
87 oligo_dict["01_ab1"] = "AATTGTCGGATAGCCTGGCGATAACGACGC" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
88 oligo_dict["02_ab3"] = "CACACGCGGGCCGGAACTGCCGCAAATGAC" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
89 oligo_dict["03_ab5"] = "GCTGAAGCGGCAGACCGGCAGAACGAATAT" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
90 oligo_dict["04_mel"] = "TGTCGCGCGTCAAGCGGCGTGAAATCTCTG" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
91 oligo_dict["05_suis1"] = "TGCGTTGCCGTGAAGCTTAATTCGGCTGAT" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
92 oligo_dict["06_suis2"] = "GGCAATCATGCGCAGGGCTTTGCATTCGTC" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
93 oligo_dict["07_suis3"] = "CAAGGCAGATGCACATAATCCGGCGACCCG" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
94 oligo_dict["08_ceti1"] = "GTGAATATAGGGTGAATTGATCTTCAGCCG" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
95 oligo_dict["09_ceti2"] = "TTACAAGCAGGCCTATGAGCGCGGCGTGAA" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
96 oligo_dict["10_canis4"] = "CTGCTACATAAAGCACCCGGCGACCGAGTT" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
97 oligo_dict["11_canis"] = "ATCGTTTTGCGGCATATCGCTGACCACAGC" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
98 oligo_dict["12_ovis"] = "CACTCAATCTTCTCTACGGGCGTGGTATCC" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
99 oligo_dict["13_ether2"] = "CGAAATCGTGGTGAAGGACGGGACCGAACC" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
100 oligo_dict["14_63B1"] = "CCTGTTTAAAAGAATCGTCGGAACCGCTCT" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
101 oligo_dict["15_16M0"] = "TCCCGCCGCCATGCCGCCGAAAGTCGCCGT" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
102 oligo_dict["16_mel1b"] = "TCTGTCCAAACCCCGTGACCGAACAATAGA" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
103 oligo_dict["17_tb157"] = "CTCTTCGTATACCGTTCCGTCGTCACCATGGTCCT" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
104 oligo_dict["18_tb7"] = "TCACGCAGCCAACGATATTCGTGTACCGCGACGGT" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
105 oligo_dict["19_tbbov"] = "CTGGGCGACCCGGCCGACCTGCACACCGCGCATCA" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
106 oligo_dict["20_tb5"] = "CCGTGGTGGCGTATCGGGCCCCTGGATCGCGCCCT" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
107 oligo_dict["21_tb2"] = "ATGTCTGCGTAAAGAAGTTCCATGTCCGGGAAGTA" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
108 oligo_dict["22_tb3"] = "GAAGACCTTGATGCCGATCTGGGTGTCGATCTTGA" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
109 oligo_dict["23_tb4"] = "CGGTGTTGAAGGGTCCCCCGTTCCAGAAGCCGGTG" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
110 oligo_dict["24_tb6"] = "ACGGTGATTCGGGTGGTCGACACCGATGGTTCAGA" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
111 oligo_dict["25_para"] = "CCTTTCTTGAAGGGTGTTCG" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
112 oligo_dict["26_para_sheep"] = "CGTGGTGGCGACGGCGGCGGGCCTGTCTAT" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
113 oligo_dict["27_para_cattle"] = "TCTCCTCGGTCGGTGATTCGGGGGCGCGGT" |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
114 return oligo_dict |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
115 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
116 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
117 def get_seq_counts(value, fastq_list, gzipped): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
118 count = 0 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
119 for fastq_file in fastq_list: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
120 if gzipped: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
121 with gzip.open(fastq_file, 'rt') as fh: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
122 for title, seq, qual in FastqGeneralIterator(fh): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
123 count += seq.count(value) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
124 else: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
125 with open(fastq_file, 'r') as fh: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
126 for title, seq, qual in FastqGeneralIterator(fh): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
127 count += seq.count(value) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
128 return(value, count) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
129 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
130 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
131 def get_species_counts(fastq_list, gzipped): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
132 count_summary = {} |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
133 oligo_dict = get_oligo_dict() |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
134 for v1 in oligo_dict.values(): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
135 returned_value, count = get_seq_counts(v1, fastq_list, gzipped) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
136 for key, v2 in oligo_dict.items(): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
137 if returned_value == v2: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
138 count_summary.update({key: count}) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
139 count_list = [] |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
140 for v in count_summary.values(): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
141 count_list.append(v) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
142 brucella_sum = sum(count_list[:16]) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
143 bovis_sum = sum(count_list[16:24]) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
144 para_sum = sum(count_list[24:]) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
145 return count_summary, count_list, brucella_sum, bovis_sum, para_sum |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
146 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
147 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
148 def get_species_strings(count_summary): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
149 binary_dictionary = {} |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
150 for k, v in count_summary.items(): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
151 if v > 1: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
152 binary_dictionary.update({k: 1}) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
153 else: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
154 binary_dictionary.update({k: 0}) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
155 binary_dictionary = OrderedDict(sorted(binary_dictionary.items())) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
156 binary_list = [] |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
157 for v in binary_dictionary.values(): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
158 binary_list.append(v) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
159 brucella_binary = binary_list[:16] |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
160 brucella_string = ''.join(str(e) for e in brucella_binary) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
161 bovis_binary = binary_list[16:24] |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
162 bovis_string = ''.join(str(e) for e in bovis_binary) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
163 para_binary = binary_list[24:] |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
164 para_string = ''.join(str(e) for e in para_binary) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
165 return brucella_string, bovis_string, para_string |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
166 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
167 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
168 def output_dbkey(file_name, dbkey, output_file): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
169 # Output the dbkey. |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
170 with open(output_file, "w") as fh: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
171 fh.write("%s" % dbkey) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
172 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
173 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
174 def output_files(fastq_file, count_list, group, dbkey, dbkey_file, metrics_file): |
3
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
iuc
parents:
2
diff
changeset
|
175 base_file_name = get_sample_name(fastq_file) |
2
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
176 output_dbkey(base_file_name, dbkey, dbkey_file) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
177 output_metrics(base_file_name, count_list, group, dbkey, metrics_file) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
178 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
179 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
180 def output_metrics(file_name, count_list, group, dbkey, output_file): |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
181 # Output the metrics. |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
182 with open(output_file, "w") as fh: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
183 fh.write("Sample: %s\n" % file_name) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
184 fh.write("Brucella counts: ") |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
185 for i in count_list[:16]: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
186 fh.write("%d," % i) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
187 fh.write("\nTB counts: ") |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
188 for i in count_list[16:24]: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
189 fh.write("%d," % i) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
190 fh.write("\nPara counts: ") |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
191 for i in count_list[24:]: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
192 fh.write("%d," % i) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
193 fh.write("\nGroup: %s" % group) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
194 fh.write("\ndbkey: %s\n" % dbkey) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
195 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
196 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
197 if __name__ == '__main__': |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
198 parser = argparse.ArgumentParser() |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
199 |
4
e12ccc57875c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit a80e3713d10fafef22e964198d59251adf955812"
iuc
parents:
3
diff
changeset
|
200 parser.add_argument('--dnaprint_fields', action='append', dest='dnaprint_fields', nargs=2, help="List of dnaprints data table value, name and path fields") |
e12ccc57875c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit a80e3713d10fafef22e964198d59251adf955812"
iuc
parents:
3
diff
changeset
|
201 parser.add_argument('--read1', action='store', dest='read1', help='Required: single read') |
2
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
202 parser.add_argument('--read2', action='store', dest='read2', required=False, default=None, help='Optional: paired read') |
4
e12ccc57875c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit a80e3713d10fafef22e964198d59251adf955812"
iuc
parents:
3
diff
changeset
|
203 parser.add_argument('--gzipped', action='store_true', dest='gzipped', help='Input files are gzipped') |
e12ccc57875c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit a80e3713d10fafef22e964198d59251adf955812"
iuc
parents:
3
diff
changeset
|
204 parser.add_argument('--output_dbkey', action='store', dest='output_dbkey', help='Output reference file') |
e12ccc57875c
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit a80e3713d10fafef22e964198d59251adf955812"
iuc
parents:
3
diff
changeset
|
205 parser.add_argument('--output_metrics', action='store', dest='output_metrics', help='Output metrics file') |
2
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
206 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
207 args = parser.parse_args() |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
208 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
209 fastq_list = [args.read1] |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
210 if args.read2 is not None: |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
211 fastq_list.append(args.read2) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
212 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
213 # The value of dnaprint_fields is a list of lists, where each list is |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
214 # the [value, name, path] components of the vsnp_dnaprints data table. |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
215 # The data_manager_vsnp_dnaprints tool assigns the dbkey column from the |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
216 # all_fasta data table to the value column in the vsnp_dnaprints data |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
217 # table to ensure a proper mapping for discovering the dbkey. |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
218 dnaprints_dict = get_dnaprints_dict(args.dnaprint_fields) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
219 |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
220 # Here fastq_list consists of either a single read |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
221 # or a set of paired reads, producing single outputs. |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
222 count_summary, count_list, brucella_sum, bovis_sum, para_sum = get_species_counts(fastq_list, args.gzipped) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
223 brucella_string, bovis_string, para_string = get_species_strings(count_summary) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
224 group, dbkey = get_group_and_dbkey(dnaprints_dict, brucella_string, brucella_sum, bovis_string, bovis_sum, para_string, para_sum) |
4cc004985e27
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 524a39e08f2bea8b8754284df606ff8dd27ed24b"
iuc
parents:
diff
changeset
|
225 output_files(args.read1, count_list, group, dbkey, dbkey_file=args.output_dbkey, metrics_file=args.output_metrics) |