Mercurial > repos > greg > vsnp_build_tables
annotate vsnp_build_tables.py @ 11:be32fbc45ae8 draft
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit eb20edfaf93e9d92396131f9b08f704eca2ab3e8"
| author | greg |
|---|---|
| date | Thu, 29 Jul 2021 15:11:06 +0000 |
| parents | f641e52353e8 |
| children |
| rev | line source |
|---|---|
| 0 | 1 #!/usr/bin/env python |
| 2 | |
| 3 import argparse | |
|
9
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
4 import multiprocessing |
| 0 | 5 import os |
|
9
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
6 import queue |
| 3 | 7 import re |
| 8 | |
| 0 | 9 import pandas |
| 10 import pandas.io.formats.excel | |
| 11 from Bio import SeqIO | |
| 12 | |
| 13 # Maximum columns allowed in a LibreOffice | |
| 14 # spreadsheet is 1024. Excel allows for | |
| 15 # 16,384 columns, but we'll set the lower | |
| 1 | 16 # number as the maximum. Some browsers |
| 17 # (e.g., Firefox on Linux) are configured | |
| 18 # to use LibreOffice for Excel spreadsheets. | |
| 19 MAXCOLS = 1024 | |
| 0 | 20 OUTPUT_EXCEL_DIR = 'output_excel_dir' |
|
9
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
21 INPUT_JSON_AVG_MQ_DIR = 'input_json_avg_mq_dir' |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
22 INPUT_JSON_DIR = 'input_json_dir' |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
23 INPUT_NEWICK_DIR = 'input_newick_dir' |
| 0 | 24 |
| 25 | |
| 26 def annotate_table(table_df, group, annotation_dict): | |
| 27 for gbk_chrome, pro in list(annotation_dict.items()): | |
| 28 ref_pos = list(table_df) | |
| 29 ref_series = pandas.Series(ref_pos) | |
| 30 ref_df = pandas.DataFrame(ref_series.str.split(':', expand=True).values, columns=['reference', 'position']) | |
| 31 all_ref = ref_df[ref_df['reference'] == gbk_chrome] | |
| 32 positions = all_ref.position.to_frame() | |
| 33 # Create an annotation file. | |
| 34 annotation_file = "%s_annotations.csv" % group | |
| 35 with open(annotation_file, "a") as fh: | |
| 3 | 36 for _, row in positions.iterrows(): |
| 0 | 37 pos = row.position |
| 38 try: | |
| 39 aaa = pro.iloc[pro.index.get_loc(int(pos))][['chrom', 'locus', 'product', 'gene']] | |
| 40 try: | |
| 41 chrom, name, locus, tag = aaa.values[0] | |
| 42 print("{}:{}\t{}, {}, {}".format(chrom, pos, locus, tag, name), file=fh) | |
| 43 except ValueError: | |
| 44 # If only one annotation for the entire | |
| 45 # chromosome (e.g., flu) then having [0] fails | |
| 46 chrom, name, locus, tag = aaa.values | |
| 47 print("{}:{}\t{}, {}, {}".format(chrom, pos, locus, tag, name), file=fh) | |
| 48 except KeyError: | |
| 49 print("{}:{}\tNo annotated product".format(gbk_chrome, pos), file=fh) | |
| 50 # Read the annotation file into a data frame. | |
| 51 annotations_df = pandas.read_csv(annotation_file, sep='\t', header=None, names=['index', 'annotations'], index_col='index') | |
| 52 # Remove the annotation_file from disk since both | |
| 53 # cascade and sort tables are built using the file, | |
| 54 # and it is opened for writing in append mode. | |
| 55 os.remove(annotation_file) | |
| 56 # Process the data. | |
| 57 table_df_transposed = table_df.T | |
| 58 table_df_transposed.index = table_df_transposed.index.rename('index') | |
| 59 table_df_transposed = table_df_transposed.merge(annotations_df, left_index=True, right_index=True) | |
| 60 table_df = table_df_transposed.T | |
| 61 return table_df | |
| 62 | |
| 63 | |
| 64 def excel_formatter(json_file_name, excel_file_name, group, annotation_dict): | |
| 65 pandas.io.formats.excel.header_style = None | |
| 66 table_df = pandas.read_json(json_file_name, orient='split') | |
| 67 if annotation_dict is not None: | |
| 68 table_df = annotate_table(table_df, group, annotation_dict) | |
| 69 else: | |
| 70 table_df = table_df.append(pandas.Series(name='no annotations')) | |
| 71 writer = pandas.ExcelWriter(excel_file_name, engine='xlsxwriter') | |
| 72 table_df.to_excel(writer, sheet_name='Sheet1') | |
| 73 writer_book = writer.book | |
| 74 ws = writer.sheets['Sheet1'] | |
| 75 format_a = writer_book.add_format({'bg_color': '#58FA82'}) | |
| 76 format_g = writer_book.add_format({'bg_color': '#F7FE2E'}) | |
| 77 format_c = writer_book.add_format({'bg_color': '#0000FF'}) | |
| 78 format_t = writer_book.add_format({'bg_color': '#FF0000'}) | |
| 79 format_normal = writer_book.add_format({'bg_color': '#FDFEFE'}) | |
| 80 formatlowqual = writer_book.add_format({'font_color': '#C70039', 'bg_color': '#E2CFDD'}) | |
| 81 format_ambigous = writer_book.add_format({'font_color': '#C70039', 'bg_color': '#E2CFDD'}) | |
| 82 format_n = writer_book.add_format({'bg_color': '#E2CFDD'}) | |
| 83 rows, cols = table_df.shape | |
| 84 ws.set_column(0, 0, 30) | |
| 85 ws.set_column(1, cols, 2.1) | |
| 86 ws.freeze_panes(2, 1) | |
| 87 format_annotation = writer_book.add_format({'font_color': '#0A028C', 'rotation': '-90', 'align': 'top'}) | |
| 88 # Set last row. | |
| 89 ws.set_row(rows + 1, cols + 1, format_annotation) | |
| 90 # Make sure that row/column locations don't overlap. | |
| 91 ws.conditional_format(rows - 2, 1, rows - 1, cols, {'type': 'cell', 'criteria': '<', 'value': 55, 'format': formatlowqual}) | |
| 92 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'cell', 'criteria': '==', 'value': 'B$2', 'format': format_normal}) | |
| 93 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'A', 'format': format_a}) | |
| 94 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'G', 'format': format_g}) | |
| 95 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'C', 'format': format_c}) | |
| 96 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'T', 'format': format_t}) | |
| 97 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'S', 'format': format_ambigous}) | |
| 98 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'Y', 'format': format_ambigous}) | |
| 99 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'R', 'format': format_ambigous}) | |
| 100 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'W', 'format': format_ambigous}) | |
| 101 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'K', 'format': format_ambigous}) | |
| 102 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'M', 'format': format_ambigous}) | |
| 103 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'N', 'format': format_n}) | |
| 104 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': '-', 'format': format_n}) | |
| 105 format_rotation = writer_book.add_format({}) | |
| 106 format_rotation.set_rotation(90) | |
| 107 for column_num, column_name in enumerate(list(table_df.columns)): | |
| 108 ws.write(0, column_num + 1, column_name, format_rotation) | |
| 109 format_annotation = writer_book.add_format({'font_color': '#0A028C', 'rotation': '-90', 'align': 'top'}) | |
| 110 # Set last row. | |
| 111 ws.set_row(rows, 400, format_annotation) | |
| 112 writer.save() | |
| 113 | |
| 114 | |
| 115 def get_annotation_dict(gbk_file): | |
| 116 gbk_dict = SeqIO.to_dict(SeqIO.parse(gbk_file, "genbank")) | |
| 117 annotation_dict = {} | |
| 118 tmp_file = "features.csv" | |
| 119 # Create a file of chromosomes and features. | |
| 120 for chromosome in list(gbk_dict.keys()): | |
| 121 with open(tmp_file, 'w+') as fh: | |
| 122 for feature in gbk_dict[chromosome].features: | |
| 123 if "CDS" in feature.type or "rRNA" in feature.type: | |
| 124 try: | |
| 125 product = feature.qualifiers['product'][0] | |
| 126 except KeyError: | |
| 127 product = None | |
| 128 try: | |
| 129 locus = feature.qualifiers['locus_tag'][0] | |
| 130 except KeyError: | |
| 131 locus = None | |
| 132 try: | |
| 133 gene = feature.qualifiers['gene'][0] | |
| 134 except KeyError: | |
| 135 gene = None | |
| 136 fh.write("%s\t%d\t%d\t%s\t%s\t%s\n" % (chromosome, int(feature.location.start), int(feature.location.end), locus, product, gene)) | |
| 137 # Read the chromosomes and features file into a data frame. | |
| 138 df = pandas.read_csv(tmp_file, sep='\t', names=["chrom", "start", "stop", "locus", "product", "gene"]) | |
| 139 # Process the data. | |
| 140 df = df.sort_values(['start', 'gene'], ascending=[True, False]) | |
| 141 df = df.drop_duplicates('start') | |
| 142 pro = df.reset_index(drop=True) | |
| 143 pro.index = pandas.IntervalIndex.from_arrays(pro['start'], pro['stop'], closed='both') | |
| 144 annotation_dict[chromosome] = pro | |
| 145 return annotation_dict | |
| 146 | |
| 147 | |
| 3 | 148 def get_sample_name(file_path): |
| 0 | 149 base_file_name = os.path.basename(file_path) |
| 150 if base_file_name.find(".") > 0: | |
| 151 # Eliminate the extension. | |
| 152 return os.path.splitext(base_file_name)[0] | |
| 3 | 153 return base_file_name |
| 0 | 154 |
| 155 | |
| 156 def output_cascade_table(cascade_order, mqdf, group, annotation_dict): | |
| 157 cascade_order_mq = pandas.concat([cascade_order, mqdf], join='inner') | |
| 158 output_table(cascade_order_mq, "cascade", group, annotation_dict) | |
| 159 | |
| 160 | |
| 161 def output_excel(df, type_str, group, annotation_dict, count=None): | |
| 162 # Output the temporary json file that | |
| 163 # is used by the excel_formatter. | |
| 164 if count is None: | |
| 165 if group is None: | |
| 3 | 166 json_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_order_mq.json" % type_str) |
| 0 | 167 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_table.xlsx" % type_str) |
| 168 else: | |
| 3 | 169 json_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_%s_order_mq.json" % (group, type_str)) |
| 0 | 170 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_%s_table.xlsx" % (group, type_str)) |
| 171 else: | |
| 3 | 172 # The table has more columns than is allowed by the |
| 173 # MAXCOLS setting, so multiple files will be produced | |
| 174 # as an output collection. | |
| 0 | 175 if group is None: |
| 3 | 176 json_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_order_mq_%d.json" % (type_str, count)) |
| 0 | 177 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_table_%d.xlsx" % (type_str, count)) |
| 178 else: | |
| 3 | 179 json_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_%s_order_mq_%d.json" % (group, type_str, count)) |
| 0 | 180 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_%s_table_%d.xlsx" % (group, type_str, count)) |
| 181 df.to_json(json_file_name, orient='split') | |
| 182 # Output the Excel file. | |
| 183 excel_formatter(json_file_name, excel_file_name, group, annotation_dict) | |
| 184 | |
| 185 | |
| 186 def output_sort_table(cascade_order, mqdf, group, annotation_dict): | |
| 187 sort_df = cascade_order.T | |
| 188 sort_df['abs_value'] = sort_df.index | |
| 189 sort_df[['chrom', 'pos']] = sort_df['abs_value'].str.split(':', expand=True) | |
| 190 sort_df = sort_df.drop(['abs_value', 'chrom'], axis=1) | |
| 191 sort_df.pos = sort_df.pos.astype(int) | |
| 192 sort_df = sort_df.sort_values(by=['pos']) | |
| 193 sort_df = sort_df.drop(['pos'], axis=1) | |
| 194 sort_df = sort_df.T | |
| 195 sort_order_mq = pandas.concat([sort_df, mqdf], join='inner') | |
| 196 output_table(sort_order_mq, "sort", group, annotation_dict) | |
| 197 | |
| 198 | |
| 199 def output_table(df, type_str, group, annotation_dict): | |
| 200 if isinstance(group, str) and group.startswith("dataset"): | |
| 201 # Inputs are single files, not collections, | |
| 202 # so input file names are not useful for naming | |
| 203 # output files. | |
| 204 group_str = None | |
| 205 else: | |
| 206 group_str = group | |
| 207 count = 0 | |
| 208 chunk_start = 0 | |
| 209 chunk_end = 0 | |
| 210 column_count = df.shape[1] | |
| 211 if column_count >= MAXCOLS: | |
| 212 # Here the number of columns is greater than | |
| 213 # the maximum allowed by Excel, so multiple | |
| 214 # outputs will be produced. | |
| 215 while column_count >= MAXCOLS: | |
| 216 count += 1 | |
| 217 chunk_end += MAXCOLS | |
| 218 df_of_type = df.iloc[:, chunk_start:chunk_end] | |
| 219 output_excel(df_of_type, type_str, group_str, annotation_dict, count=count) | |
| 220 chunk_start += MAXCOLS | |
| 221 column_count -= MAXCOLS | |
| 222 count += 1 | |
| 223 df_of_type = df.iloc[:, chunk_start:] | |
| 224 output_excel(df_of_type, type_str, group_str, annotation_dict, count=count) | |
| 225 else: | |
| 226 output_excel(df, type_str, group_str, annotation_dict) | |
| 227 | |
| 228 | |
|
9
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
229 def preprocess_tables(task_queue, annotation_dict, timeout): |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
230 while True: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
231 try: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
232 tup = task_queue.get(block=True, timeout=timeout) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
233 except queue.Empty: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
234 break |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
235 newick_file, json_file, json_avg_mq_file = tup |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
236 avg_mq_series = pandas.read_json(json_avg_mq_file, typ='series', orient='split') |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
237 # Map quality to dataframe. |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
238 mqdf = avg_mq_series.to_frame(name='MQ') |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
239 mqdf = mqdf.T |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
240 # Get the group. |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
241 group = get_sample_name(newick_file) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
242 snps_df = pandas.read_json(json_file, orient='split') |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
243 with open(newick_file, 'r') as fh: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
244 for line in fh: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
245 line = re.sub('[:,]', '\n', line) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
246 line = re.sub('[)(]', '', line) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
247 line = re.sub(r'[0-9].*\.[0-9].*\n', '', line) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
248 line = re.sub('root\n', '', line) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
249 sample_order = line.split('\n') |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
250 sample_order = list([_f for _f in sample_order if _f]) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
251 sample_order.insert(0, 'root') |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
252 tree_order = snps_df.loc[sample_order] |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
253 # Count number of SNPs in each column. |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
254 snp_per_column = [] |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
255 for column_header in tree_order: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
256 count = 0 |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
257 column = tree_order[column_header] |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
258 for element in column: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
259 if element != column[0]: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
260 count = count + 1 |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
261 snp_per_column.append(count) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
262 row1 = pandas.Series(snp_per_column, tree_order.columns, name="snp_per_column") |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
263 # Count number of SNPS from the |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
264 # top of each column in the table. |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
265 snp_from_top = [] |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
266 for column_header in tree_order: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
267 count = 0 |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
268 column = tree_order[column_header] |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
269 # for each element in the column |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
270 # skip the first element |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
271 for element in column[1:]: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
272 if element == column[0]: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
273 count = count + 1 |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
274 else: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
275 break |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
276 snp_from_top.append(count) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
277 row2 = pandas.Series(snp_from_top, tree_order.columns, name="snp_from_top") |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
278 tree_order = tree_order.append([row1]) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
279 tree_order = tree_order.append([row2]) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
280 # In pandas=0.18.1 even this does not work: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
281 # abc = row1.to_frame() |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
282 # abc = abc.T --> tree_order.shape (5, 18), abc.shape (1, 18) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
283 # tree_order.append(abc) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
284 # Continue to get error: "*** ValueError: all the input arrays must have same number of dimensions" |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
285 tree_order = tree_order.T |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
286 tree_order = tree_order.sort_values(['snp_from_top', 'snp_per_column'], ascending=[True, False]) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
287 tree_order = tree_order.T |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
288 # Remove snp_per_column and snp_from_top rows. |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
289 cascade_order = tree_order[:-2] |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
290 # Output the cascade table. |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
291 output_cascade_table(cascade_order, mqdf, group, annotation_dict) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
292 # Output the sorted table. |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
293 output_sort_table(cascade_order, mqdf, group, annotation_dict) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
294 task_queue.task_done() |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
295 |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
296 |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
297 def set_num_cpus(num_files, processes): |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
298 num_cpus = int(multiprocessing.cpu_count()) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
299 if num_files < num_cpus and num_files < processes: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
300 return num_files |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
301 if num_cpus < processes: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
302 half_cpus = int(num_cpus / 2) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
303 if num_files < half_cpus: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
304 return num_files |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
305 return half_cpus |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
306 return processes |
| 0 | 307 |
| 308 | |
| 309 if __name__ == '__main__': | |
| 310 parser = argparse.ArgumentParser() | |
| 311 | |
|
9
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
312 parser.add_argument('--input_avg_mq_json', action='store', dest='input_avg_mq_json', required=False, default=None, help='Average MQ json file') |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
313 parser.add_argument('--input_newick', action='store', dest='input_newick', required=False, default=None, help='Newick file') |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
314 parser.add_argument('--input_snps_json', action='store', dest='input_snps_json', required=False, default=None, help='SNPs json file') |
| 0 | 315 parser.add_argument('--gbk_file', action='store', dest='gbk_file', required=False, default=None, help='Optional gbk file'), |
|
9
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
316 parser.add_argument('--processes', action='store', dest='processes', type=int, help='User-selected number of processes to use for job splitting') |
| 0 | 317 |
| 318 args = parser.parse_args() | |
| 319 | |
| 320 if args.gbk_file is not None: | |
| 321 # Create the annotation_dict for annotating | |
| 322 # the Excel tables. | |
| 323 annotation_dict = get_annotation_dict(args.gbk_file) | |
| 324 else: | |
| 325 annotation_dict = None | |
| 326 | |
|
9
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
327 # The assumption here is that the list of files |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
328 # in both INPUT_NEWICK_DIR and INPUT_JSON_DIR are |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
329 # named such that they are properly matched if |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
330 # the directories contain more than 1 file (i.e., |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
331 # hopefully the newick file names and json file names |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
332 # will be something like Mbovis-01D6_* so they can be |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
333 # sorted and properly associated with each other). |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
334 if args.input_newick is not None: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
335 newick_files = [args.input_newick] |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
336 else: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
337 newick_files = [] |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
338 for file_name in sorted(os.listdir(INPUT_NEWICK_DIR)): |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
339 file_path = os.path.abspath(os.path.join(INPUT_NEWICK_DIR, file_name)) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
340 newick_files.append(file_path) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
341 if args.input_snps_json is not None: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
342 json_files = [args.input_snps_json] |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
343 else: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
344 json_files = [] |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
345 for file_name in sorted(os.listdir(INPUT_JSON_DIR)): |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
346 file_path = os.path.abspath(os.path.join(INPUT_JSON_DIR, file_name)) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
347 json_files.append(file_path) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
348 if args.input_avg_mq_json is not None: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
349 json_avg_mq_files = [args.input_avg_mq_json] |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
350 else: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
351 json_avg_mq_files = [] |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
352 for file_name in sorted(os.listdir(INPUT_JSON_AVG_MQ_DIR)): |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
353 file_path = os.path.abspath(os.path.join(INPUT_JSON_AVG_MQ_DIR, file_name)) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
354 json_avg_mq_files.append(file_path) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
355 |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
356 multiprocessing.set_start_method('spawn') |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
357 queue1 = multiprocessing.JoinableQueue() |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
358 queue2 = multiprocessing.JoinableQueue() |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
359 num_files = len(newick_files) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
360 cpus = set_num_cpus(num_files, args.processes) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
361 # Set a timeout for get()s in the queue. |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
362 timeout = 0.05 |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
363 |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
364 for i, newick_file in enumerate(newick_files): |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
365 json_file = json_files[i] |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
366 json_avg_mq_file = json_avg_mq_files[i] |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
367 queue1.put((newick_file, json_file, json_avg_mq_file)) |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
368 |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
369 # Complete the preprocess_tables task. |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
370 processes = [multiprocessing.Process(target=preprocess_tables, args=(queue1, annotation_dict, timeout, )) for _ in range(cpus)] |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
371 for p in processes: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
372 p.start() |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
373 for p in processes: |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
374 p.join() |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
375 queue1.join() |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
376 |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
377 if queue1.empty(): |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
378 queue1.close() |
|
f641e52353e8
"planemo upload for repository https://github.com/gregvonkuster/galaxy_tools/tree/master/tools/sequence_analysis/vsnp/vsnp_build_tables commit 1131a7accc36df73eac621f6ae8aa3cb62403bde"
greg
parents:
3
diff
changeset
|
379 queue1.join_thread() |
