Mercurial > repos > iuc > vsnp_build_tables
annotate vsnp_get_snps.py @ 11:6b3b0f5858e6 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
author | iuc |
---|---|
date | Fri, 10 Jun 2022 06:11:08 +0000 |
parents | 152716f90b84 |
children |
rev | line source |
---|---|
8
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
2 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
3 # Collect quality parsimonious SNPs from vcf files |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
4 # and output alignment files in fasta format. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
5 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
6 import argparse |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
7 import multiprocessing |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
8 import os |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
9 import queue |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
10 import shutil |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
11 import sys |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
12 import time |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
13 from collections import OrderedDict |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
14 from datetime import datetime |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
15 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
16 import pandas |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
17 import vcf |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
18 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
19 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
20 def get_time_stamp(): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
21 return datetime.fromtimestamp(time.time()).strftime('%Y-%m-%d %H-%M-%S') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
22 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
23 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
24 def setup_all_vcfs(vcf_files, vcf_dirs): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
25 # Create the all_vcfs directory and link |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
26 # all input vcf files into it for processing. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
27 all_vcfs_dir = 'all_vcf' |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
28 os.makedirs(all_vcfs_dir) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
29 vcf_dirs.append(all_vcfs_dir) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
30 for vcf_file in vcf_files: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
31 file_name_base = os.path.basename(vcf_file) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
32 dst_file = os.path.join(all_vcfs_dir, file_name_base) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
33 os.symlink(vcf_file, dst_file) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
34 return vcf_dirs |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
35 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
36 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
37 class SnpFinder: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
38 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
39 def __init__(self, num_files, dbkey, input_excel, all_isolates, ac, min_mq, quality_score_n_threshold, min_quality_score, input_vcf_dir, output_json_avg_mq_dir, output_json_snps_dir, output_snps_dir, output_summary): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
40 # Allele count |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
41 self.ac = ac |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
42 # Create a group that will contain all isolates. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
43 self.all_isolates = all_isolates |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
44 # Evolving positions dictionary. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
45 self.all_positions = None |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
46 # Isolate groups. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
47 self.groups = [] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
48 # Excel file for grouping. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
49 self.input_excel = input_excel |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
50 # Directory of input zero coverage vcf files. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
51 self.input_vcf_dir = input_vcf_dir |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
52 # Minimum map quality value. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
53 self.min_mq = min_mq |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
54 # Minimum quality score value. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
55 self.min_quality_score = min_quality_score |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
56 # Number of input zero coverage vcf files. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
57 self.num_files = num_files |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
58 # Output directory for json average mq files. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
59 self.output_json_avg_mq_dir = output_json_avg_mq_dir |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
60 # Output directory for json snps files. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
61 self.output_json_snps_dir = output_json_snps_dir |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
62 # Output directory for snps files. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
63 self.output_snps_dir = output_snps_dir |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
64 # Quality score N threshold value. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
65 self.quality_score_n_threshold = quality_score_n_threshold |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
66 self.dbkey = dbkey |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
67 self.start_time = get_time_stamp() |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
68 self.summary_str = "" |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
69 self.timer_start = datetime.now() |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
70 self.initiate_summary(output_summary) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
71 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
72 def append_to_summary(self, html_str): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
73 # Append a string to the html summary output file. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
74 self.summary_str = "%s%s" % (self.summary_str, html_str) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
75 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
76 def bin_input_files(self, filename, samples_groups_dict, defining_snps, inverted_defining_snps, found_positions, found_positions_mix): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
77 # Categorize input files into closely related |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
78 # isolate groups based on discovered SNPs, and |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
79 # return a group dictionary. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
80 sample_groups_list = [] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
81 table_name = self.get_sample_name(filename) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
82 defining_snp = False |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
83 # Absolute positions in set union of two lists. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
84 for abs_position in list(defining_snps.keys() & (found_positions.keys() | found_positions_mix.keys())): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
85 group = defining_snps[abs_position] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
86 sample_groups_list.append(group) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
87 self.check_add_group(group) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
88 if len(list(defining_snps.keys() & found_positions_mix.keys())) > 0: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
89 table_name = self.get_sample_name(filename) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
90 table_name = '%s<font color="red">[[MIXED]]</font>' % table_name |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
91 self.copy_file(filename, group) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
92 defining_snp = True |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
93 if not set(inverted_defining_snps.keys()).intersection(found_positions.keys() | found_positions_mix.keys()): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
94 for abs_position in list(inverted_defining_snps.keys()): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
95 group = inverted_defining_snps[abs_position] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
96 sample_groups_list.append(group) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
97 self.check_add_group(group) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
98 self.copy_file(filename, group) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
99 defining_snp = True |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
100 if defining_snp: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
101 samples_groups_dict[table_name] = sorted(sample_groups_list) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
102 else: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
103 samples_groups_dict[table_name] = ['<font color="red">No defining SNP</font>'] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
104 return samples_groups_dict |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
105 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
106 def check_add_group(self, group): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
107 # Add a group if it is npt already in the list. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
108 if group not in self.groups: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
109 self.groups.append(group) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
110 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
111 def copy_file(self, filename, dir): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
112 if not os.path.exists(dir): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
113 os.makedirs(dir) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
114 shutil.copy(filename, dir) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
115 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
116 def decide_snps(self, filename): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
117 # Find the SNPs in a vcf file to produce a pandas data |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
118 # frame and a dictionary containing sample map qualities. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
119 positions_dict = self.all_positions |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
120 sample_map_qualities = {} |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
121 # Eliminate the path. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
122 file_name_base = self.get_sample_name(filename) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
123 vcf_reader = vcf.Reader(open(filename, 'r')) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
124 sample_dict = {} |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
125 for record in vcf_reader: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
126 alt = str(record.ALT[0]) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
127 record_position = "%s:%s" % (str(record.CHROM), str(record.POS)) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
128 if record_position in positions_dict: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
129 if alt == "None": |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
130 sample_dict.update({record_position: "-"}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
131 else: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
132 # On rare occassions MQM gets called "NaN", thus passing |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
133 # a string when a number is expected when calculating average. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
134 mq_val = self.get_mq_val(record.INFO, filename) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
135 if str(mq_val).lower() not in ["nan"]: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
136 sample_map_qualities.update({record_position: mq_val}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
137 if len(alt) == 1: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
138 qual_val = self.val_as_int(record.QUAL) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
139 ac = record.INFO['AC'][0] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
140 ref = str(record.REF[0]) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
141 if ac == 2 and qual_val > self.quality_score_n_threshold: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
142 # Add the SNP to a group. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
143 sample_dict.update({record_position: alt}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
144 elif ac == 1 and qual_val > self.quality_score_n_threshold: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
145 # The position is ambiguous. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
146 alt_ref = "%s%s" % (alt, ref) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
147 if alt_ref == "AG": |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
148 sample_dict.update({record_position: "R"}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
149 elif alt_ref == "CT": |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
150 sample_dict.update({record_position: "Y"}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
151 elif alt_ref == "GC": |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
152 sample_dict.update({record_position: "S"}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
153 elif alt_ref == "AT": |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
154 sample_dict.update({record_position: "W"}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
155 elif alt_ref == "GT": |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
156 sample_dict.update({record_position: "K"}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
157 elif alt_ref == "AC": |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
158 sample_dict.update({record_position: "M"}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
159 elif alt_ref == "GA": |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
160 sample_dict.update({record_position: "R"}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
161 elif alt_ref == "TC": |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
162 sample_dict.update({record_position: "Y"}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
163 elif alt_ref == "CG": |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
164 sample_dict.update({record_position: "S"}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
165 elif alt_ref == "TA": |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
166 sample_dict.update({record_position: "W"}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
167 elif alt_ref == "TG": |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
168 sample_dict.update({record_position: "K"}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
169 elif alt_ref == "CA": |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
170 sample_dict.update({record_position: "M"}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
171 else: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
172 sample_dict.update({record_position: "N"}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
173 # Poor calls |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
174 elif qual_val <= 50: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
175 # Call the reference allele. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
176 # Do not coerce record.REF[0] to a string! |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
177 sample_dict.update({record_position: record.REF[0]}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
178 elif qual_val <= self.quality_score_n_threshold: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
179 sample_dict.update({record_position: "N"}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
180 else: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
181 # Insurance -- Will still report on a possible |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
182 # SNP even if missed with above statements. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
183 # Do not coerce record.REF[0] to a string! |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
184 sample_dict.update({record_position: record.REF[0]}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
185 # Merge dictionaries and order |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
186 merge_dict = {} |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
187 merge_dict.update(positions_dict) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
188 merge_dict.update(sample_dict) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
189 sample_df = pandas.DataFrame(merge_dict, index=[file_name_base]) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
190 return sample_df, file_name_base, sample_map_qualities |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
191 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
192 def df_to_fasta(self, parsimonious_df, group): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
193 # Generate SNP alignment file from |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
194 # the parsimonious_df data frame. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
195 snps_file = os.path.join(self.output_snps_dir, "%s.fasta" % group) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
196 test_duplicates = [] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
197 has_sequence_data = False |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
198 for index, row in parsimonious_df.iterrows(): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
199 for pos in row: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
200 if len(pos) > 0: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
201 has_sequence_data = True |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
202 break |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
203 if has_sequence_data: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
204 with open(snps_file, 'w') as fh: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
205 for index, row in parsimonious_df.iterrows(): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
206 test_duplicates.append(row.name) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
207 if test_duplicates.count(row.name) < 2: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
208 print(f'>{row.name}', file=fh) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
209 for pos in row: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
210 print(pos, end='', file=fh) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
211 print("", file=fh) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
212 return has_sequence_data |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
213 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
214 def find_initial_positions(self, filename): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
215 # Find SNP positions in a vcf file. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
216 found_positions = {} |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
217 found_positions_mix = {} |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
218 vcf_reader = vcf.Reader(open(filename, 'r')) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
219 for record in vcf_reader: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
220 qual_val = self.val_as_int(record.QUAL) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
221 chrom = record.CHROM |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
222 position = record.POS |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
223 absolute_position = "%s:%s" % (str(chrom), str(position)) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
224 alt = str(record.ALT[0]) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
225 if alt != "None": |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
226 mq_val = self.get_mq_val(record.INFO, filename) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
227 ac = record.INFO['AC'][0] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
228 if ac == self.ac and len(record.REF) == 1 and qual_val > self.min_quality_score and mq_val > self.min_mq: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
229 found_positions.update({absolute_position: record.REF}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
230 if ac == 1 and len(record.REF) == 1 and qual_val > self.min_quality_score and mq_val > self.min_mq: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
231 found_positions_mix.update({absolute_position: record.REF}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
232 return found_positions, found_positions_mix |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
233 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
234 def gather_and_filter(self, prefilter_df, mq_averages, group_dir): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
235 # Group a data frame of SNPs. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
236 if self.input_excel is None: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
237 filtered_all_df = prefilter_df |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
238 sheet_names = None |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
239 else: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
240 # Filter positions to be removed from all. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
241 xl = pandas.ExcelFile(self.input_excel) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
242 sheet_names = xl.sheet_names |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
243 # Use the first column to filter "all" postions. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
244 exclusion_list_all = self.get_position_list(sheet_names, 0) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
245 exclusion_list_group = self.get_position_list(sheet_names, group_dir) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
246 exclusion_list = exclusion_list_all + exclusion_list_group |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
247 # Filters for all applied. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
248 filtered_all_df = prefilter_df.drop(columns=exclusion_list, errors='ignore') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
249 json_snps_file = os.path.join(self.output_json_snps_dir, "%s.json" % group_dir) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
250 parsimonious_df = self.get_parsimonious_df(filtered_all_df) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
251 samples_number, columns = parsimonious_df.shape |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
252 if samples_number >= 4: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
253 # Sufficient samples have been found |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
254 # to build a phylogenetic tree. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
255 has_sequence_data = self.df_to_fasta(parsimonious_df, group_dir) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
256 if has_sequence_data: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
257 json_avg_mq_file = os.path.join(self.output_json_avg_mq_dir, "%s.json" % group_dir) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
258 mq_averages.to_json(json_avg_mq_file, orient='split') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
259 parsimonious_df.to_json(json_snps_file, orient='split') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
260 else: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
261 msg = "<br/>No sequence data" |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
262 if group_dir is not None: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
263 msg = "%s for group: %s" % (msg, group_dir) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
264 self.append_to_summary("%s<br/>\n" % msg) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
265 else: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
266 msg = "<br/>Too few samples to build tree" |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
267 if group_dir is not None: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
268 msg = "%s for group: %s" % (msg, group_dir) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
269 self.append_to_summary("%s<br/>\n" % msg) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
270 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
271 def get_sample_name(self, file_path): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
272 # Return the sample part of a file name. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
273 base_file_name = os.path.basename(file_path) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
274 if base_file_name.find(".") > 0: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
275 # Eliminate the extension. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
276 return os.path.splitext(base_file_name)[0] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
277 return base_file_name |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
278 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
279 def get_mq_val(self, record_info, filename): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
280 # Get the MQ (gatk) or MQM (freebayes) value |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
281 # from the record.INFO component of the vcf file. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
282 try: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
283 mq_val = record_info['MQM'] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
284 return self.return_val(mq_val) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
285 except Exception: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
286 try: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
287 mq_val = record_info['MQ'] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
288 return self.return_val(mq_val) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
289 except Exception: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
290 msg = "Invalid or unsupported vcf header %s in file: %s\n" % (str(record_info), filename) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
291 sys.exit(msg) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
292 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
293 def get_parsimonious_df(self, filtered_all_df): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
294 # Get the parsimonious SNPs data frame |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
295 # from a data frame of filtered SNPs. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
296 try: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
297 ref_series = filtered_all_df.loc['root'] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
298 # In all_vcf root needs to be removed. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
299 filtered_all_df = filtered_all_df.drop(['root']) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
300 except KeyError: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
301 pass |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
302 parsimony = filtered_all_df.loc[:, (filtered_all_df != filtered_all_df.iloc[0]).any()] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
303 parsimony_positions = list(parsimony) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
304 parse_df = filtered_all_df[parsimony_positions] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
305 ref_df = ref_series.to_frame() |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
306 ref_df = ref_df.T |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
307 parsimonious_df = pandas.concat([parse_df, ref_df], join='inner') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
308 return parsimonious_df |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
309 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
310 def get_position_list(self, sheet_names, group): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
311 # Get a list of positions defined by an excel file. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
312 exclusion_list = [] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
313 try: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
314 filter_to_all = pandas.read_excel(self.input_excel, header=1, usecols=[group]) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
315 for value in filter_to_all.values: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
316 value = str(value[0]) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
317 if "-" not in value.split(":")[-1]: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
318 exclusion_list.append(value) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
319 elif "-" in value: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
320 try: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
321 chrom, sequence_range = value.split(":") |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
322 except Exception as e: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
323 sys.exit(str(e)) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
324 value = sequence_range.split("-") |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
325 for position in range(int(value[0].replace(',', '')), int(value[1].replace(',', '')) + 1): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
326 exclusion_list.append(chrom + ":" + str(position)) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
327 return exclusion_list |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
328 except ValueError: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
329 return [] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
330 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
331 def get_snps(self, task_queue, timeout): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
332 while True: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
333 try: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
334 group_dir = task_queue.get(block=True, timeout=timeout) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
335 except queue.Empty: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
336 break |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
337 # Parse all vcf files to accumulate |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
338 # the SNPs into a data frame. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
339 positions_dict = {} |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
340 group_files = [] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
341 for file_name in os.listdir(os.path.abspath(group_dir)): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
342 file_path = os.path.abspath(os.path.join(group_dir, file_name)) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
343 group_files.append(file_path) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
344 for file_name in group_files: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
345 found_positions, found_positions_mix = self.find_initial_positions(file_name) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
346 positions_dict.update(found_positions) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
347 # Order before adding to file to match |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
348 # with ordering of individual samples. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
349 # all_positions is abs_pos:REF |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
350 self.all_positions = OrderedDict(sorted(positions_dict.items())) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
351 ref_positions_df = pandas.DataFrame(self.all_positions, index=['root']) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
352 all_map_qualities = {} |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
353 df_list = [] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
354 for file_name in group_files: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
355 sample_df, file_name_base, sample_map_qualities = self.decide_snps(file_name) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
356 df_list.append(sample_df) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
357 all_map_qualities.update({file_name_base: sample_map_qualities}) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
358 all_sample_df = pandas.concat(df_list) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
359 # All positions have now been selected for each sample, |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
360 # so select parisomony informative SNPs. This removes |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
361 # columns where all fields are the same. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
362 # Add reference to top row. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
363 prefilter_df = pandas.concat([ref_positions_df, all_sample_df], join='inner') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
364 all_mq_df = pandas.DataFrame.from_dict(all_map_qualities) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
365 mq_averages = all_mq_df.mean(axis=1).astype(int) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
366 self.gather_and_filter(prefilter_df, mq_averages, group_dir) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
367 task_queue.task_done() |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
368 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
369 def group_vcfs(self, vcf_files): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
370 # Parse an excel file to produce a |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
371 # grouping dictionary for SNPs. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
372 xl = pandas.ExcelFile(self.input_excel) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
373 sheet_names = xl.sheet_names |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
374 ws = pandas.read_excel(self.input_excel, sheet_name=sheet_names[0]) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
375 defining_snps = ws.iloc[0] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
376 defsnp_iterator = iter(defining_snps.iteritems()) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
377 next(defsnp_iterator) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
378 defining_snps = {} |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
379 inverted_defining_snps = {} |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
380 for abs_pos, group in defsnp_iterator: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
381 if '!' in abs_pos: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
382 inverted_defining_snps[abs_pos.replace('!', '')] = group |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
383 else: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
384 defining_snps[abs_pos] = group |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
385 samples_groups_dict = {} |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
386 for vcf_file in vcf_files: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
387 found_positions, found_positions_mix = self.find_initial_positions(vcf_file) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
388 samples_groups_dict = self.bin_input_files(vcf_file, samples_groups_dict, defining_snps, inverted_defining_snps, found_positions, found_positions_mix) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
389 # Output summary grouping table. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
390 self.append_to_summary('<br/>') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
391 self.append_to_summary('<b>Groupings with %d listed:</b><br/>\n' % len(samples_groups_dict)) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
392 self.append_to_summary('<table cellpadding="5" cellspaging="5" border="1">\n') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
393 for key, value in samples_groups_dict.items(): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
394 self.append_to_summary('<tr align="left"><th>Sample Name</th>\n') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
395 self.append_to_summary('<td>%s</td>' % key) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
396 for group in value: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
397 self.append_to_summary('<td>%s</td>\n' % group) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
398 self.append_to_summary('</tr>\n') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
399 self.append_to_summary('</table><br/>\n') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
400 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
401 def initiate_summary(self, output_summary): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
402 # Output summary file handle. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
403 self.append_to_summary('<html>\n') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
404 self.append_to_summary('<head></head>\n') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
405 self.append_to_summary('<body style=\"font-size:12px;">') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
406 self.append_to_summary("<b>Time started:</b> %s<br/>" % get_time_stamp()) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
407 self.append_to_summary("<b>Number of VCF inputs:</b> %d<br/>" % self.num_files) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
408 self.append_to_summary("<b>Reference:</b> %s<br/>" % self.dbkey) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
409 self.append_to_summary("<b>All isolates:</b> %s<br/>" % str(self.all_isolates)) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
410 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
411 def return_val(self, val, index=0): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
412 # Handle element and single-element list values. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
413 if isinstance(val, list): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
414 return val[index] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
415 return val |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
416 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
417 def val_as_int(self, val): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
418 # Handle integer value conversion. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
419 try: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
420 return int(val) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
421 except TypeError: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
422 # val is likely None here. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
423 return 0 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
424 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
425 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
426 if __name__ == '__main__': |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
427 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
428 parser = argparse.ArgumentParser() |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
429 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
430 parser.add_argument('--ac', action='store', dest='ac', type=int, help='Allele count value'), |
9
25714108bb22
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
8
diff
changeset
|
431 parser.add_argument('--all_isolates', action='store_true', dest='all_isolates', help='Create table with all isolates'), |
8
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
432 parser.add_argument('--input_excel', action='store', dest='input_excel', required=False, default=None, help='Optional Excel filter file'), |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
433 parser.add_argument('--input_vcf_dir', action='store', dest='input_vcf_dir', help='Input vcf directory'), |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
434 parser.add_argument('--min_mq', action='store', dest='min_mq', type=int, help='Minimum map quality value'), |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
435 parser.add_argument('--min_quality_score', action='store', dest='min_quality_score', type=int, help='Minimum quality score value'), |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
436 parser.add_argument('--output_json_avg_mq_dir', action='store', dest='output_json_avg_mq_dir', help='Output json average mq directory'), |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
437 parser.add_argument('--output_json_snps_dir', action='store', dest='output_json_snps_dir', help='Output json snps directory'), |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
438 parser.add_argument('--output_snps_dir', action='store', dest='output_snps_dir', help='Output snps directory'), |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
439 parser.add_argument('--output_summary', action='store', dest='output_summary', help='Output summary html file'), |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
440 parser.add_argument('--processes', action='store', dest='processes', type=int, help='Configured processes for job'), |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
441 parser.add_argument('--quality_score_n_threshold', action='store', dest='quality_score_n_threshold', type=int, help='Minimum quality score N value for alleles'), |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
442 parser.add_argument('--dbkey', action='store', dest='dbkey', help='Galaxy genome build dbkey'), |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
443 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
444 args = parser.parse_args() |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
445 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
446 # Build the list of all input zero coverage vcf |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
447 # files, both the samples and the "database". |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
448 vcf_files = [] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
449 for file_name in os.listdir(args.input_vcf_dir): |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
450 file_path = os.path.abspath(os.path.join(args.input_vcf_dir, file_name)) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
451 vcf_files.append(file_path) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
452 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
453 multiprocessing.set_start_method('spawn') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
454 queue1 = multiprocessing.JoinableQueue() |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
455 num_files = len(vcf_files) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
456 # Set a timeout for get()s in the queue. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
457 timeout = 0.05 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
458 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
459 # Initialize the snp_finder object. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
460 snp_finder = SnpFinder(num_files, args.dbkey, args.input_excel, args.all_isolates, args.ac, args.min_mq, args.quality_score_n_threshold, args.min_quality_score, args.input_vcf_dir, args.output_json_avg_mq_dir, args.output_json_snps_dir, args.output_snps_dir, args.output_summary) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
461 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
462 # Define and make the set of directories into which the input_zc_vcf |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
463 # files will be placed. Selected input values (e.g., the use of |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
464 # an Excel file for grouping and filtering, creating a group with |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
465 # all isolates) are used to define the directories. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
466 vcf_dirs = [] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
467 if args.input_excel is None: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
468 vcf_dirs = setup_all_vcfs(vcf_files, vcf_dirs) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
469 else: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
470 if args.all_isolates: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
471 vcf_dirs = setup_all_vcfs(vcf_files, vcf_dirs) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
472 # Parse the Excel file to detemine groups for filtering. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
473 snp_finder.group_vcfs(vcf_files) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
474 # Append the list of group directories created by |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
475 # the above call to the set of directories containing |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
476 # vcf files for analysis. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
477 group_dirs = [d for d in os.listdir(os.getcwd()) if os.path.isdir(d) and d in snp_finder.groups] |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
478 vcf_dirs.extend(group_dirs) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
479 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
480 # Populate the queue for job splitting. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
481 for vcf_dir in vcf_dirs: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
482 queue1.put(vcf_dir) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
483 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
484 # Complete the get_snps task. |
11
6b3b0f5858e6
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
10
diff
changeset
|
485 processes = [multiprocessing.Process(target=snp_finder.get_snps, args=(queue1, timeout, )) for _ in range(args.processes)] |
8
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
486 for p in processes: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
487 p.start() |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
488 for p in processes: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
489 p.join() |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
490 queue1.join() |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
491 |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
492 # Finish summary log. |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
493 snp_finder.append_to_summary("<br/><b>Time finished:</b> %s<br/>\n" % get_time_stamp()) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
494 total_run_time = datetime.now() - snp_finder.timer_start |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
495 snp_finder.append_to_summary("<br/><b>Total run time:</b> %s<br/>\n" % str(total_run_time)) |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
496 snp_finder.append_to_summary('</body>\n</html>\n') |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
497 with open(args.output_summary, "w") as fh: |
e54b96acea98
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 95b221f68d19702681babd765c67caeeb24e7f1d"
iuc
parents:
diff
changeset
|
498 fh.write("%s" % snp_finder.summary_str) |