Mercurial > repos > iuc > vsnp_add_zero_coverage
annotate vsnp_build_tables.py @ 9:40b97055bb99 draft default tip
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
author | iuc |
---|---|
date | Fri, 10 Jun 2022 06:08:02 +0000 |
parents | 18b59c38017e |
children |
rev | line source |
---|---|
0
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
1 #!/usr/bin/env python |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
2 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
3 import argparse |
9
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
4 import itertools |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
5 import multiprocessing |
0
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
6 import os |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
7 import queue |
1
aed013f6b13b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 94e69abb568077267eb8b15ef624624e2899a750"
iuc
parents:
0
diff
changeset
|
8 import re |
aed013f6b13b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 94e69abb568077267eb8b15ef624624e2899a750"
iuc
parents:
0
diff
changeset
|
9 |
0
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
10 import pandas |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
11 import pandas.io.formats.excel |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
12 from Bio import SeqIO |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
13 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
14 # Maximum columns allowed in a LibreOffice |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
15 # spreadsheet is 1024. Excel allows for |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
16 # 16,384 columns, but we'll set the lower |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
17 # number as the maximum. Some browsers |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
18 # (e.g., Firefox on Linux) are configured |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
19 # to use LibreOffice for Excel spreadsheets. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
20 MAXCOLS = 1024 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
21 OUTPUT_EXCEL_DIR = 'output_excel_dir' |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
22 INPUT_JSON_AVG_MQ_DIR = 'input_json_avg_mq_dir' |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
23 INPUT_JSON_DIR = 'input_json_dir' |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
24 INPUT_NEWICK_DIR = 'input_newick_dir' |
0
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
25 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
26 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
27 def annotate_table(table_df, group, annotation_dict): |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
28 for gbk_chrome, pro in list(annotation_dict.items()): |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
29 ref_pos = list(table_df) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
30 ref_series = pandas.Series(ref_pos) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
31 ref_df = pandas.DataFrame(ref_series.str.split(':', expand=True).values, columns=['reference', 'position']) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
32 all_ref = ref_df[ref_df['reference'] == gbk_chrome] |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
33 positions = all_ref.position.to_frame() |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
34 # Create an annotation file. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
35 annotation_file = "%s_annotations.csv" % group |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
36 with open(annotation_file, "a") as fh: |
1
aed013f6b13b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 94e69abb568077267eb8b15ef624624e2899a750"
iuc
parents:
0
diff
changeset
|
37 for _, row in positions.iterrows(): |
0
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
38 pos = row.position |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
39 try: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
40 aaa = pro.iloc[pro.index.get_loc(int(pos))][['chrom', 'locus', 'product', 'gene']] |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
41 try: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
42 chrom, name, locus, tag = aaa.values[0] |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
43 print("{}:{}\t{}, {}, {}".format(chrom, pos, locus, tag, name), file=fh) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
44 except ValueError: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
45 # If only one annotation for the entire |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
46 # chromosome (e.g., flu) then having [0] fails |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
47 chrom, name, locus, tag = aaa.values |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
48 print("{}:{}\t{}, {}, {}".format(chrom, pos, locus, tag, name), file=fh) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
49 except KeyError: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
50 print("{}:{}\tNo annotated product".format(gbk_chrome, pos), file=fh) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
51 # Read the annotation file into a data frame. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
52 annotations_df = pandas.read_csv(annotation_file, sep='\t', header=None, names=['index', 'annotations'], index_col='index') |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
53 # Remove the annotation_file from disk since both |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
54 # cascade and sort tables are built using the file, |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
55 # and it is opened for writing in append mode. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
56 os.remove(annotation_file) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
57 # Process the data. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
58 table_df_transposed = table_df.T |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
59 table_df_transposed.index = table_df_transposed.index.rename('index') |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
60 table_df_transposed = table_df_transposed.merge(annotations_df, left_index=True, right_index=True) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
61 table_df = table_df_transposed.T |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
62 return table_df |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
63 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
64 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
65 def excel_formatter(json_file_name, excel_file_name, group, annotation_dict): |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
66 pandas.io.formats.excel.header_style = None |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
67 table_df = pandas.read_json(json_file_name, orient='split') |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
68 if annotation_dict is not None: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
69 table_df = annotate_table(table_df, group, annotation_dict) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
70 else: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
71 table_df = table_df.append(pandas.Series(name='no annotations')) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
72 writer = pandas.ExcelWriter(excel_file_name, engine='xlsxwriter') |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
73 table_df.to_excel(writer, sheet_name='Sheet1') |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
74 writer_book = writer.book |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
75 ws = writer.sheets['Sheet1'] |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
76 format_a = writer_book.add_format({'bg_color': '#58FA82'}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
77 format_g = writer_book.add_format({'bg_color': '#F7FE2E'}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
78 format_c = writer_book.add_format({'bg_color': '#0000FF'}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
79 format_t = writer_book.add_format({'bg_color': '#FF0000'}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
80 format_normal = writer_book.add_format({'bg_color': '#FDFEFE'}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
81 formatlowqual = writer_book.add_format({'font_color': '#C70039', 'bg_color': '#E2CFDD'}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
82 format_ambigous = writer_book.add_format({'font_color': '#C70039', 'bg_color': '#E2CFDD'}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
83 format_n = writer_book.add_format({'bg_color': '#E2CFDD'}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
84 rows, cols = table_df.shape |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
85 ws.set_column(0, 0, 30) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
86 ws.set_column(1, cols, 2.1) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
87 ws.freeze_panes(2, 1) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
88 format_annotation = writer_book.add_format({'font_color': '#0A028C', 'rotation': '-90', 'align': 'top'}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
89 # Set last row. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
90 ws.set_row(rows + 1, cols + 1, format_annotation) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
91 # Make sure that row/column locations don't overlap. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
92 ws.conditional_format(rows - 2, 1, rows - 1, cols, {'type': 'cell', 'criteria': '<', 'value': 55, 'format': formatlowqual}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
93 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'cell', 'criteria': '==', 'value': 'B$2', 'format': format_normal}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
94 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'A', 'format': format_a}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
95 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'G', 'format': format_g}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
96 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'C', 'format': format_c}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
97 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'T', 'format': format_t}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
98 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'S', 'format': format_ambigous}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
99 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'Y', 'format': format_ambigous}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
100 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'R', 'format': format_ambigous}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
101 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'W', 'format': format_ambigous}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
102 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'K', 'format': format_ambigous}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
103 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'M', 'format': format_ambigous}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
104 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'N', 'format': format_n}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
105 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': '-', 'format': format_n}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
106 format_rotation = writer_book.add_format({}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
107 format_rotation.set_rotation(90) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
108 for column_num, column_name in enumerate(list(table_df.columns)): |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
109 ws.write(0, column_num + 1, column_name, format_rotation) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
110 format_annotation = writer_book.add_format({'font_color': '#0A028C', 'rotation': '-90', 'align': 'top'}) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
111 # Set last row. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
112 ws.set_row(rows, 400, format_annotation) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
113 writer.save() |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
114 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
115 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
116 def get_annotation_dict(gbk_file): |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
117 gbk_dict = SeqIO.to_dict(SeqIO.parse(gbk_file, "genbank")) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
118 annotation_dict = {} |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
119 tmp_file = "features.csv" |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
120 # Create a file of chromosomes and features. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
121 for chromosome in list(gbk_dict.keys()): |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
122 with open(tmp_file, 'w+') as fh: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
123 for feature in gbk_dict[chromosome].features: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
124 if "CDS" in feature.type or "rRNA" in feature.type: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
125 try: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
126 product = feature.qualifiers['product'][0] |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
127 except KeyError: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
128 product = None |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
129 try: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
130 locus = feature.qualifiers['locus_tag'][0] |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
131 except KeyError: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
132 locus = None |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
133 try: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
134 gene = feature.qualifiers['gene'][0] |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
135 except KeyError: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
136 gene = None |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
137 fh.write("%s\t%d\t%d\t%s\t%s\t%s\n" % (chromosome, int(feature.location.start), int(feature.location.end), locus, product, gene)) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
138 # Read the chromosomes and features file into a data frame. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
139 df = pandas.read_csv(tmp_file, sep='\t', names=["chrom", "start", "stop", "locus", "product", "gene"]) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
140 # Process the data. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
141 df = df.sort_values(['start', 'gene'], ascending=[True, False]) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
142 df = df.drop_duplicates('start') |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
143 pro = df.reset_index(drop=True) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
144 pro.index = pandas.IntervalIndex.from_arrays(pro['start'], pro['stop'], closed='both') |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
145 annotation_dict[chromosome] = pro |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
146 return annotation_dict |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
147 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
148 |
3
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
iuc
parents:
1
diff
changeset
|
149 def get_sample_name(file_path): |
0
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
150 base_file_name = os.path.basename(file_path) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
151 if base_file_name.find(".") > 0: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
152 # Eliminate the extension. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
153 return os.path.splitext(base_file_name)[0] |
3
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
iuc
parents:
1
diff
changeset
|
154 return base_file_name |
0
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
155 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
156 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
157 def output_excel(df, type_str, group, annotation_dict, count=None): |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
158 # Output the temporary json file that |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
159 # is used by the excel_formatter. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
160 if count is None: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
161 if group is None: |
3
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
iuc
parents:
1
diff
changeset
|
162 json_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_order_mq.json" % type_str) |
0
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
163 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_table.xlsx" % type_str) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
164 else: |
3
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
iuc
parents:
1
diff
changeset
|
165 json_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_%s_order_mq.json" % (group, type_str)) |
0
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
166 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_%s_table.xlsx" % (group, type_str)) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
167 else: |
3
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
iuc
parents:
1
diff
changeset
|
168 # The table has more columns than is allowed by the |
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
iuc
parents:
1
diff
changeset
|
169 # MAXCOLS setting, so multiple files will be produced |
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
iuc
parents:
1
diff
changeset
|
170 # as an output collection. |
0
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
171 if group is None: |
3
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
iuc
parents:
1
diff
changeset
|
172 json_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_order_mq_%d.json" % (type_str, count)) |
0
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
173 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_table_%d.xlsx" % (type_str, count)) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
174 else: |
3
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
iuc
parents:
1
diff
changeset
|
175 json_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_%s_order_mq_%d.json" % (group, type_str, count)) |
0
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
176 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_%s_table_%d.xlsx" % (group, type_str, count)) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
177 df.to_json(json_file_name, orient='split') |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
178 # Output the Excel file. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
179 excel_formatter(json_file_name, excel_file_name, group, annotation_dict) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
180 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
181 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
182 def output_table(df, type_str, group, annotation_dict): |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
183 if isinstance(group, str) and group.startswith("dataset"): |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
184 # Inputs are single files, not collections, |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
185 # so input file names are not useful for naming |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
186 # output files. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
187 group_str = None |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
188 else: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
189 group_str = group |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
190 count = 0 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
191 chunk_start = 0 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
192 chunk_end = 0 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
193 column_count = df.shape[1] |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
194 if column_count >= MAXCOLS: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
195 # Here the number of columns is greater than |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
196 # the maximum allowed by Excel, so multiple |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
197 # outputs will be produced. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
198 while column_count >= MAXCOLS: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
199 count += 1 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
200 chunk_end += MAXCOLS |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
201 df_of_type = df.iloc[:, chunk_start:chunk_end] |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
202 output_excel(df_of_type, type_str, group_str, annotation_dict, count=count) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
203 chunk_start += MAXCOLS |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
204 column_count -= MAXCOLS |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
205 count += 1 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
206 df_of_type = df.iloc[:, chunk_start:] |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
207 output_excel(df_of_type, type_str, group_str, annotation_dict, count=count) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
208 else: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
209 output_excel(df, type_str, group_str, annotation_dict) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
210 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
211 |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
212 def preprocess_tables(task_queue, annotation_dict, timeout): |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
213 while True: |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
214 try: |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
215 tup = task_queue.get(block=True, timeout=timeout) |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
216 except queue.Empty: |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
217 break |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
218 newick_file, json_file, json_avg_mq_file = tup |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
219 # Get the group. |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
220 group = get_sample_name(newick_file) |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
221 snps_df = pandas.read_json(json_file, orient='split') |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
222 with open(newick_file, 'r') as fh: |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
223 for line in fh: |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
224 line = re.sub('[:,]', '\n', line) |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
225 line = re.sub('[)(]', '', line) |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
226 line = re.sub(r'[0-9].*\.[0-9].*\n', '', line) |
9
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
227 line = re.sub("'", '', line) |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
228 line = re.sub('root\n', '', line) |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
229 sample_order = line.split('\n') |
9
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
230 sample_order = list(filter(None, sample_order)) |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
231 sample_order.insert(0, 'root') |
9
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
232 tree_order_df = snps_df.loc[sample_order] |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
233 # Output the sorted table. |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
234 output_table(tree_order_df, "sort", group, annotation_dict) |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
235 # Count number of SNPs in each column. |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
236 snp_per_column = [] |
9
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
237 for column_header in tree_order_df: |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
238 count = 0 |
9
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
239 column = tree_order_df[column_header] |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
240 for element in column: |
9
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
241 # column[0] is top row/root/reference, |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
242 # element is everything below it. |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
243 if element != column[0] and element != '-': |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
244 count = count + 1 |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
245 snp_per_column.append(count) |
9
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
246 row1 = pandas.Series(snp_per_column, tree_order_df.columns, name="snp_per_column") |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
247 # Count number of SNPS from the |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
248 # top of each column in the table. |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
249 snp_from_top = [] |
9
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
250 for column_header in tree_order_df: |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
251 count = 0 |
9
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
252 column = tree_order_df[column_header] |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
253 index_list_of_ref_differences = [] |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
254 for ind, list_item in enumerate(column[1:].to_list()): |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
255 if list_item not in [column[0], '-']: |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
256 index_list_of_ref_differences.append(ind) |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
257 c = itertools.count() |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
258 val = max((list(g) for _, g in itertools.groupby(index_list_of_ref_differences, lambda x: x - next(c))), key=len) |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
259 # Starting row number with longest continous SNPs in column |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
260 snp_from_top.append(val[0]) |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
261 row2 = pandas.Series(snp_from_top, tree_order_df.columns, name="snp_from_top") |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
262 tree_order_df = tree_order_df.append([row1]) |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
263 tree_order_df = tree_order_df.append([row2]) |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
264 tree_order_df = tree_order_df.T |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
265 tree_order_df = tree_order_df.sort_values(['snp_from_top', 'snp_per_column'], ascending=[True, False]) |
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
266 tree_order_df = tree_order_df.T |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
267 # Remove snp_per_column and snp_from_top rows. |
9
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
268 cascade_order_df = tree_order_df[:-2] |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
269 # Output the cascade table. |
9
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
270 output_table(cascade_order_df, "cascade", group, annotation_dict) |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
271 task_queue.task_done() |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
272 |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
273 |
0
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
274 if __name__ == '__main__': |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
275 parser = argparse.ArgumentParser() |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
276 |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
277 parser.add_argument('--input_avg_mq_json', action='store', dest='input_avg_mq_json', required=False, default=None, help='Average MQ json file') |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
278 parser.add_argument('--input_newick', action='store', dest='input_newick', required=False, default=None, help='Newick file') |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
279 parser.add_argument('--input_snps_json', action='store', dest='input_snps_json', required=False, default=None, help='SNPs json file') |
0
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
280 parser.add_argument('--gbk_file', action='store', dest='gbk_file', required=False, default=None, help='Optional gbk file'), |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
281 parser.add_argument('--processes', action='store', dest='processes', type=int, help='User-selected number of processes to use for job splitting') |
0
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
282 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
283 args = parser.parse_args() |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
284 |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
285 if args.gbk_file is not None: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
286 # Create the annotation_dict for annotating |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
287 # the Excel tables. |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
288 annotation_dict = get_annotation_dict(args.gbk_file) |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
289 else: |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
290 annotation_dict = None |
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
iuc
parents:
diff
changeset
|
291 |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
292 # The assumption here is that the list of files |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
293 # in both INPUT_NEWICK_DIR and INPUT_JSON_DIR are |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
294 # named such that they are properly matched if |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
295 # the directories contain more than 1 file (i.e., |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
296 # hopefully the newick file names and json file names |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
297 # will be something like Mbovis-01D6_* so they can be |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
298 # sorted and properly associated with each other). |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
299 if args.input_newick is not None: |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
300 newick_files = [args.input_newick] |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
301 else: |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
302 newick_files = [] |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
303 for file_name in sorted(os.listdir(INPUT_NEWICK_DIR)): |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
304 file_path = os.path.abspath(os.path.join(INPUT_NEWICK_DIR, file_name)) |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
305 newick_files.append(file_path) |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
306 if args.input_snps_json is not None: |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
307 json_files = [args.input_snps_json] |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
308 else: |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
309 json_files = [] |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
310 for file_name in sorted(os.listdir(INPUT_JSON_DIR)): |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
311 file_path = os.path.abspath(os.path.join(INPUT_JSON_DIR, file_name)) |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
312 json_files.append(file_path) |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
313 if args.input_avg_mq_json is not None: |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
314 json_avg_mq_files = [args.input_avg_mq_json] |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
315 else: |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
316 json_avg_mq_files = [] |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
317 for file_name in sorted(os.listdir(INPUT_JSON_AVG_MQ_DIR)): |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
318 file_path = os.path.abspath(os.path.join(INPUT_JSON_AVG_MQ_DIR, file_name)) |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
319 json_avg_mq_files.append(file_path) |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
320 |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
321 multiprocessing.set_start_method('spawn') |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
322 queue1 = multiprocessing.JoinableQueue() |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
323 queue2 = multiprocessing.JoinableQueue() |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
324 num_files = len(newick_files) |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
325 # Set a timeout for get()s in the queue. |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
326 timeout = 0.05 |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
327 |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
328 for i, newick_file in enumerate(newick_files): |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
329 json_file = json_files[i] |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
330 json_avg_mq_file = json_avg_mq_files[i] |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
331 queue1.put((newick_file, json_file, json_avg_mq_file)) |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
332 |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
333 # Complete the preprocess_tables task. |
9
40b97055bb99
planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit c38fd63f7980c70390d104a73ba4c72b266444c3
iuc
parents:
8
diff
changeset
|
334 processes = [multiprocessing.Process(target=preprocess_tables, args=(queue1, annotation_dict, timeout, )) for _ in range(args.processes)] |
7
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
335 for p in processes: |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
336 p.start() |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
337 for p in processes: |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
338 p.join() |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
339 queue1.join() |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
340 |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
341 if queue1.empty(): |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
342 queue1.close() |
6dc6dd4666e3
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2a94c64d6c7236550bf483d2ffc4e86248c63aab"
iuc
parents:
3
diff
changeset
|
343 queue1.join_thread() |