Mercurial > repos > iuc > vsnp_add_zero_coverage
annotate vsnp_build_tables.py @ 3:2e863710a2f0 draft
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
| author | iuc | 
|---|---|
| date | Thu, 10 Dec 2020 15:26:25 +0000 | 
| parents | aed013f6b13b | 
| children | 6dc6dd4666e3 | 
| rev | line source | 
|---|---|
| 0 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 1 #!/usr/bin/env python | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 2 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 3 import argparse | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 4 import os | 
| 1 
aed013f6b13b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 94e69abb568077267eb8b15ef624624e2899a750"
 iuc parents: 
0diff
changeset | 5 import re | 
| 
aed013f6b13b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 94e69abb568077267eb8b15ef624624e2899a750"
 iuc parents: 
0diff
changeset | 6 | 
| 0 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 7 import pandas | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 8 import pandas.io.formats.excel | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 9 from Bio import SeqIO | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 10 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 11 # Maximum columns allowed in a LibreOffice | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 12 # spreadsheet is 1024. Excel allows for | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 13 # 16,384 columns, but we'll set the lower | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 14 # number as the maximum. Some browsers | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 15 # (e.g., Firefox on Linux) are configured | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 16 # to use LibreOffice for Excel spreadsheets. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 17 MAXCOLS = 1024 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 18 OUTPUT_EXCEL_DIR = 'output_excel_dir' | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 19 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 20 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 21 def annotate_table(table_df, group, annotation_dict): | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 22 for gbk_chrome, pro in list(annotation_dict.items()): | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 23 ref_pos = list(table_df) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 24 ref_series = pandas.Series(ref_pos) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 25 ref_df = pandas.DataFrame(ref_series.str.split(':', expand=True).values, columns=['reference', 'position']) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 26 all_ref = ref_df[ref_df['reference'] == gbk_chrome] | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 27 positions = all_ref.position.to_frame() | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 28 # Create an annotation file. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 29 annotation_file = "%s_annotations.csv" % group | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 30 with open(annotation_file, "a") as fh: | 
| 1 
aed013f6b13b
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 94e69abb568077267eb8b15ef624624e2899a750"
 iuc parents: 
0diff
changeset | 31 for _, row in positions.iterrows(): | 
| 0 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 32 pos = row.position | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 33 try: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 34 aaa = pro.iloc[pro.index.get_loc(int(pos))][['chrom', 'locus', 'product', 'gene']] | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 35 try: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 36 chrom, name, locus, tag = aaa.values[0] | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 37 print("{}:{}\t{}, {}, {}".format(chrom, pos, locus, tag, name), file=fh) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 38 except ValueError: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 39 # If only one annotation for the entire | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 40 # chromosome (e.g., flu) then having [0] fails | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 41 chrom, name, locus, tag = aaa.values | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 42 print("{}:{}\t{}, {}, {}".format(chrom, pos, locus, tag, name), file=fh) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 43 except KeyError: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 44 print("{}:{}\tNo annotated product".format(gbk_chrome, pos), file=fh) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 45 # Read the annotation file into a data frame. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 46 annotations_df = pandas.read_csv(annotation_file, sep='\t', header=None, names=['index', 'annotations'], index_col='index') | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 47 # Remove the annotation_file from disk since both | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 48 # cascade and sort tables are built using the file, | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 49 # and it is opened for writing in append mode. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 50 os.remove(annotation_file) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 51 # Process the data. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 52 table_df_transposed = table_df.T | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 53 table_df_transposed.index = table_df_transposed.index.rename('index') | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 54 table_df_transposed = table_df_transposed.merge(annotations_df, left_index=True, right_index=True) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 55 table_df = table_df_transposed.T | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 56 return table_df | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 57 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 58 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 59 def excel_formatter(json_file_name, excel_file_name, group, annotation_dict): | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 60 pandas.io.formats.excel.header_style = None | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 61 table_df = pandas.read_json(json_file_name, orient='split') | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 62 if annotation_dict is not None: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 63 table_df = annotate_table(table_df, group, annotation_dict) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 64 else: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 65 table_df = table_df.append(pandas.Series(name='no annotations')) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 66 writer = pandas.ExcelWriter(excel_file_name, engine='xlsxwriter') | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 67 table_df.to_excel(writer, sheet_name='Sheet1') | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 68 writer_book = writer.book | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 69 ws = writer.sheets['Sheet1'] | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 70 format_a = writer_book.add_format({'bg_color': '#58FA82'}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 71 format_g = writer_book.add_format({'bg_color': '#F7FE2E'}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 72 format_c = writer_book.add_format({'bg_color': '#0000FF'}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 73 format_t = writer_book.add_format({'bg_color': '#FF0000'}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 74 format_normal = writer_book.add_format({'bg_color': '#FDFEFE'}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 75 formatlowqual = writer_book.add_format({'font_color': '#C70039', 'bg_color': '#E2CFDD'}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 76 format_ambigous = writer_book.add_format({'font_color': '#C70039', 'bg_color': '#E2CFDD'}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 77 format_n = writer_book.add_format({'bg_color': '#E2CFDD'}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 78 rows, cols = table_df.shape | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 79 ws.set_column(0, 0, 30) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 80 ws.set_column(1, cols, 2.1) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 81 ws.freeze_panes(2, 1) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 82 format_annotation = writer_book.add_format({'font_color': '#0A028C', 'rotation': '-90', 'align': 'top'}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 83 # Set last row. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 84 ws.set_row(rows + 1, cols + 1, format_annotation) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 85 # Make sure that row/column locations don't overlap. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 86 ws.conditional_format(rows - 2, 1, rows - 1, cols, {'type': 'cell', 'criteria': '<', 'value': 55, 'format': formatlowqual}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 87 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'cell', 'criteria': '==', 'value': 'B$2', 'format': format_normal}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 88 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'A', 'format': format_a}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 89 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'G', 'format': format_g}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 90 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'C', 'format': format_c}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 91 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'T', 'format': format_t}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 92 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'S', 'format': format_ambigous}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 93 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'Y', 'format': format_ambigous}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 94 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'R', 'format': format_ambigous}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 95 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'W', 'format': format_ambigous}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 96 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'K', 'format': format_ambigous}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 97 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'M', 'format': format_ambigous}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 98 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': 'N', 'format': format_n}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 99 ws.conditional_format(2, 1, rows - 2, cols, {'type': 'text', 'criteria': 'containing', 'value': '-', 'format': format_n}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 100 format_rotation = writer_book.add_format({}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 101 format_rotation.set_rotation(90) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 102 for column_num, column_name in enumerate(list(table_df.columns)): | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 103 ws.write(0, column_num + 1, column_name, format_rotation) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 104 format_annotation = writer_book.add_format({'font_color': '#0A028C', 'rotation': '-90', 'align': 'top'}) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 105 # Set last row. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 106 ws.set_row(rows, 400, format_annotation) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 107 writer.save() | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 108 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 109 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 110 def get_annotation_dict(gbk_file): | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 111 gbk_dict = SeqIO.to_dict(SeqIO.parse(gbk_file, "genbank")) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 112 annotation_dict = {} | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 113 tmp_file = "features.csv" | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 114 # Create a file of chromosomes and features. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 115 for chromosome in list(gbk_dict.keys()): | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 116 with open(tmp_file, 'w+') as fh: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 117 for feature in gbk_dict[chromosome].features: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 118 if "CDS" in feature.type or "rRNA" in feature.type: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 119 try: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 120 product = feature.qualifiers['product'][0] | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 121 except KeyError: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 122 product = None | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 123 try: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 124 locus = feature.qualifiers['locus_tag'][0] | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 125 except KeyError: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 126 locus = None | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 127 try: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 128 gene = feature.qualifiers['gene'][0] | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 129 except KeyError: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 130 gene = None | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 131 fh.write("%s\t%d\t%d\t%s\t%s\t%s\n" % (chromosome, int(feature.location.start), int(feature.location.end), locus, product, gene)) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 132 # Read the chromosomes and features file into a data frame. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 133 df = pandas.read_csv(tmp_file, sep='\t', names=["chrom", "start", "stop", "locus", "product", "gene"]) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 134 # Process the data. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 135 df = df.sort_values(['start', 'gene'], ascending=[True, False]) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 136 df = df.drop_duplicates('start') | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 137 pro = df.reset_index(drop=True) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 138 pro.index = pandas.IntervalIndex.from_arrays(pro['start'], pro['stop'], closed='both') | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 139 annotation_dict[chromosome] = pro | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 140 return annotation_dict | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 141 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 142 | 
| 3 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 143 def get_sample_name(file_path): | 
| 0 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 144 base_file_name = os.path.basename(file_path) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 145 if base_file_name.find(".") > 0: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 146 # Eliminate the extension. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 147 return os.path.splitext(base_file_name)[0] | 
| 3 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 148 return base_file_name | 
| 0 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 149 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 150 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 151 def output_cascade_table(cascade_order, mqdf, group, annotation_dict): | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 152 cascade_order_mq = pandas.concat([cascade_order, mqdf], join='inner') | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 153 output_table(cascade_order_mq, "cascade", group, annotation_dict) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 154 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 155 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 156 def output_excel(df, type_str, group, annotation_dict, count=None): | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 157 # Output the temporary json file that | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 158 # is used by the excel_formatter. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 159 if count is None: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 160 if group is None: | 
| 3 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 161 json_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_order_mq.json" % type_str) | 
| 0 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 162 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_table.xlsx" % type_str) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 163 else: | 
| 3 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 164 json_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_%s_order_mq.json" % (group, type_str)) | 
| 0 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 165 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_%s_table.xlsx" % (group, type_str)) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 166 else: | 
| 3 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 167 # The table has more columns than is allowed by the | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 168 # MAXCOLS setting, so multiple files will be produced | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 169 # as an output collection. | 
| 0 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 170 if group is None: | 
| 3 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 171 json_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_order_mq_%d.json" % (type_str, count)) | 
| 0 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 172 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_table_%d.xlsx" % (type_str, count)) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 173 else: | 
| 3 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 174 json_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_%s_order_mq_%d.json" % (group, type_str, count)) | 
| 0 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 175 excel_file_name = os.path.join(OUTPUT_EXCEL_DIR, "%s_%s_table_%d.xlsx" % (group, type_str, count)) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 176 df.to_json(json_file_name, orient='split') | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 177 # Output the Excel file. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 178 excel_formatter(json_file_name, excel_file_name, group, annotation_dict) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 179 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 180 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 181 def output_sort_table(cascade_order, mqdf, group, annotation_dict): | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 182 sort_df = cascade_order.T | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 183 sort_df['abs_value'] = sort_df.index | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 184 sort_df[['chrom', 'pos']] = sort_df['abs_value'].str.split(':', expand=True) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 185 sort_df = sort_df.drop(['abs_value', 'chrom'], axis=1) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 186 sort_df.pos = sort_df.pos.astype(int) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 187 sort_df = sort_df.sort_values(by=['pos']) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 188 sort_df = sort_df.drop(['pos'], axis=1) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 189 sort_df = sort_df.T | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 190 sort_order_mq = pandas.concat([sort_df, mqdf], join='inner') | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 191 output_table(sort_order_mq, "sort", group, annotation_dict) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 192 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 193 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 194 def output_table(df, type_str, group, annotation_dict): | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 195 if isinstance(group, str) and group.startswith("dataset"): | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 196 # Inputs are single files, not collections, | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 197 # so input file names are not useful for naming | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 198 # output files. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 199 group_str = None | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 200 else: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 201 group_str = group | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 202 count = 0 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 203 chunk_start = 0 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 204 chunk_end = 0 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 205 column_count = df.shape[1] | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 206 if column_count >= MAXCOLS: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 207 # Here the number of columns is greater than | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 208 # the maximum allowed by Excel, so multiple | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 209 # outputs will be produced. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 210 while column_count >= MAXCOLS: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 211 count += 1 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 212 chunk_end += MAXCOLS | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 213 df_of_type = df.iloc[:, chunk_start:chunk_end] | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 214 output_excel(df_of_type, type_str, group_str, annotation_dict, count=count) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 215 chunk_start += MAXCOLS | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 216 column_count -= MAXCOLS | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 217 count += 1 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 218 df_of_type = df.iloc[:, chunk_start:] | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 219 output_excel(df_of_type, type_str, group_str, annotation_dict, count=count) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 220 else: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 221 output_excel(df, type_str, group_str, annotation_dict) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 222 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 223 | 
| 3 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 224 def preprocess_tables(newick_file, json_file, json_avg_mq_file, annotation_dict): | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 225 avg_mq_series = pandas.read_json(json_avg_mq_file, typ='series', orient='split') | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 226 # Map quality to dataframe. | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 227 mqdf = avg_mq_series.to_frame(name='MQ') | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 228 mqdf = mqdf.T | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 229 # Get the group. | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 230 group = get_sample_name(newick_file) | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 231 snps_df = pandas.read_json(json_file, orient='split') | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 232 with open(newick_file, 'r') as fh: | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 233 for line in fh: | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 234 line = re.sub('[:,]', '\n', line) | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 235 line = re.sub('[)(]', '', line) | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 236 line = re.sub(r'[0-9].*\.[0-9].*\n', '', line) | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 237 line = re.sub('root\n', '', line) | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 238 sample_order = line.split('\n') | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 239 sample_order = list([_f for _f in sample_order if _f]) | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 240 sample_order.insert(0, 'root') | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 241 tree_order = snps_df.loc[sample_order] | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 242 # Count number of SNPs in each column. | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 243 snp_per_column = [] | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 244 for column_header in tree_order: | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 245 count = 0 | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 246 column = tree_order[column_header] | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 247 for element in column: | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 248 if element != column[0]: | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 249 count = count + 1 | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 250 snp_per_column.append(count) | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 251 row1 = pandas.Series(snp_per_column, tree_order.columns, name="snp_per_column") | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 252 # Count number of SNPS from the | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 253 # top of each column in the table. | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 254 snp_from_top = [] | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 255 for column_header in tree_order: | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 256 count = 0 | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 257 column = tree_order[column_header] | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 258 # for each element in the column | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 259 # skip the first element | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 260 for element in column[1:]: | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 261 if element == column[0]: | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 262 count = count + 1 | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 263 else: | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 264 break | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 265 snp_from_top.append(count) | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 266 row2 = pandas.Series(snp_from_top, tree_order.columns, name="snp_from_top") | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 267 tree_order = tree_order.append([row1]) | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 268 tree_order = tree_order.append([row2]) | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 269 # In pandas=0.18.1 even this does not work: | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 270 # abc = row1.to_frame() | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 271 # abc = abc.T --> tree_order.shape (5, 18), abc.shape (1, 18) | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 272 # tree_order.append(abc) | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 273 # Continue to get error: "*** ValueError: all the input arrays must have same number of dimensions" | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 274 tree_order = tree_order.T | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 275 tree_order = tree_order.sort_values(['snp_from_top', 'snp_per_column'], ascending=[True, False]) | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 276 tree_order = tree_order.T | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 277 # Remove snp_per_column and snp_from_top rows. | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 278 cascade_order = tree_order[:-2] | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 279 # Output the cascade table. | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 280 output_cascade_table(cascade_order, mqdf, group, annotation_dict) | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 281 # Output the sorted table. | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 282 output_sort_table(cascade_order, mqdf, group, annotation_dict) | 
| 0 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 283 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 284 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 285 if __name__ == '__main__': | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 286 parser = argparse.ArgumentParser() | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 287 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 288 parser.add_argument('--gbk_file', action='store', dest='gbk_file', required=False, default=None, help='Optional gbk file'), | 
| 3 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 289 parser.add_argument('--input_avg_mq_json', action='store', dest='input_avg_mq_json', help='Average MQ json file') | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 290 parser.add_argument('--input_newick', action='store', dest='input_newick', help='Newick file') | 
| 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 291 parser.add_argument('--input_snps_json', action='store', dest='input_snps_json', help='SNPs json file') | 
| 0 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 292 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 293 args = parser.parse_args() | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 294 | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 295 if args.gbk_file is not None: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 296 # Create the annotation_dict for annotating | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 297 # the Excel tables. | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 298 annotation_dict = get_annotation_dict(args.gbk_file) | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 299 else: | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 300 annotation_dict = None | 
| 
0ad85e7db2fc
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 6a0c9a857c1f4638ef18e106b1f8c0681303acc5"
 iuc parents: diff
changeset | 301 | 
| 3 
2e863710a2f0
"planemo upload for repository https://github.com/galaxyproject/tools-iuc/tree/master/tools/vsnp commit 2e312886647244b416c64eca91e1a61dd1be939b"
 iuc parents: 
1diff
changeset | 302 preprocess_tables(args.input_newick, args.input_snps_json, args.input_avg_mq_json, annotation_dict) | 
