Mercurial > repos > mheinzl > variant_analyzer2
comparison read2mut.py @ 13:02bf6425fc25 draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
| author | mheinzl |
|---|---|
| date | Mon, 22 Feb 2021 14:18:57 +0000 |
| parents | 7a418148319d |
| children | bcdb63df70ce |
comparison
equal
deleted
inserted
replaced
| 12:7a418148319d | 13:02bf6425fc25 |
|---|---|
| 21 """ | 21 """ |
| 22 | 22 |
| 23 from __future__ import division | 23 from __future__ import division |
| 24 | 24 |
| 25 import argparse | 25 import argparse |
| 26 import csv | |
| 26 import json | 27 import json |
| 27 import operator | 28 import operator |
| 28 import os | 29 import os |
| 29 import re | 30 import re |
| 30 import sys | 31 import sys |
| 32 | |
| 31 | 33 |
| 32 import numpy as np | 34 import numpy as np |
| 33 import pysam | 35 import pysam |
| 34 import xlsxwriter | 36 import xlsxwriter |
| 35 from cyvcf2 import VCF | 37 from cyvcf2 import VCF |
| 45 help='JSON file with data collected by mut2read.py.') | 47 help='JSON file with data collected by mut2read.py.') |
| 46 parser.add_argument('--sscsJson', | 48 parser.add_argument('--sscsJson', |
| 47 help='JSON file with SSCS counts collected by mut2sscs.py.') | 49 help='JSON file with SSCS counts collected by mut2sscs.py.') |
| 48 parser.add_argument('--outputFile', | 50 parser.add_argument('--outputFile', |
| 49 help='Output xlsx file with summary of mutations.') | 51 help='Output xlsx file with summary of mutations.') |
| 52 parser.add_argument('--outputFile_csv', | |
| 53 help='Output csv file with summary of mutations.') | |
| 50 parser.add_argument('--outputFile2', | 54 parser.add_argument('--outputFile2', |
| 51 help='Output xlsx file with allele frequencies of mutations.') | 55 help='Output xlsx file with allele frequencies of mutations.') |
| 52 parser.add_argument('--outputFile3', | 56 parser.add_argument('--outputFile3', |
| 53 help='Output xlsx file with examples of the tier classification.') | 57 help='Output xlsx file with examples of the tier classification.') |
| 54 parser.add_argument('--thresh', type=int, default=0, | 58 parser.add_argument('--thresh', type=int, default=0, |
| 57 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') | 61 help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') |
| 58 parser.add_argument('--trim', type=int, default=10, | 62 parser.add_argument('--trim', type=int, default=10, |
| 59 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') | 63 help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') |
| 60 parser.add_argument('--chimera_correction', action="store_true", | 64 parser.add_argument('--chimera_correction', action="store_true", |
| 61 help='Count chimeric variants and correct the variant frequencies') | 65 help='Count chimeric variants and correct the variant frequencies') |
| 66 | |
| 67 | |
| 62 return parser | 68 return parser |
| 63 | 69 |
| 64 | 70 |
| 65 def safe_div(x, y): | 71 def safe_div(x, y): |
| 66 if y == 0: | 72 if y == 0: |
| 76 json_file = args.inputJson | 82 json_file = args.inputJson |
| 77 sscs_json = args.sscsJson | 83 sscs_json = args.sscsJson |
| 78 outfile = args.outputFile | 84 outfile = args.outputFile |
| 79 outfile2 = args.outputFile2 | 85 outfile2 = args.outputFile2 |
| 80 outfile3 = args.outputFile3 | 86 outfile3 = args.outputFile3 |
| 87 outputFile_csv = args.outputFile_csv | |
| 81 thresh = args.thresh | 88 thresh = args.thresh |
| 82 phred_score = args.phred | 89 phred_score = args.phred |
| 83 trim = args.trim | 90 trim = args.trim |
| 84 chimera_correction = args.chimera_correction | 91 chimera_correction = args.chimera_correction |
| 85 | 92 |
| 225 if len(value) < thresh: | 232 if len(value) < thresh: |
| 226 pure_tags_dict_short[key] = value | 233 pure_tags_dict_short[key] = value |
| 227 else: | 234 else: |
| 228 pure_tags_dict_short = pure_tags_dict | 235 pure_tags_dict_short = pure_tags_dict |
| 229 | 236 |
| 237 csv_data = open(outputFile_csv, "w") | |
| 238 csv_writer = csv.writer(csv_data) | |
| 239 | |
| 230 # output summary with threshold | 240 # output summary with threshold |
| 231 workbook = xlsxwriter.Workbook(outfile) | 241 workbook = xlsxwriter.Workbook(outfile) |
| 232 workbook2 = xlsxwriter.Workbook(outfile2) | 242 workbook2 = xlsxwriter.Workbook(outfile2) |
| 233 workbook3 = xlsxwriter.Workbook(outfile3) | 243 workbook3 = xlsxwriter.Workbook(outfile3) |
| 234 ws1 = workbook.add_worksheet("Results") | 244 ws1 = workbook.add_worksheet("Results") |
| 253 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', | 263 'rel. ref.ab', 'rel. ref.ba', 'rel. alt.ab', 'rel. alt.ba', |
| 254 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', | 264 'na.ab', 'na.ba', 'lowq.ab', 'lowq.ba', 'trim.ab', 'trim.ba', |
| 255 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', | 265 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', |
| 256 'in phase', 'chimeric tag') | 266 'in phase', 'chimeric tag') |
| 257 ws1.write_row(0, 0, header_line) | 267 ws1.write_row(0, 0, header_line) |
| 268 csv_writer.writerow(header_line) | |
| 258 counter_tier11 = 0 | 269 counter_tier11 = 0 |
| 259 counter_tier12 = 0 | 270 counter_tier12 = 0 |
| 260 counter_tier21 = 0 | 271 counter_tier21 = 0 |
| 261 counter_tier22 = 0 | 272 counter_tier22 = 0 |
| 262 counter_tier23 = 0 | 273 counter_tier23 = 0 |
| 680 read_pos2 = read_len_median2 = None | 691 read_pos2 = read_len_median2 = None |
| 681 if (read_pos3 == -1): | 692 if (read_pos3 == -1): |
| 682 read_pos3 = read_len_median3 = None | 693 read_pos3 = read_len_median3 = None |
| 683 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) | 694 line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) |
| 684 ws1.write_row(row, 0, line) | 695 ws1.write_row(row, 0, line) |
| 696 csv_writer.writerow(line) | |
| 685 line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) | 697 line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) |
| 686 ws1.write_row(row + 1, 0, line) | 698 ws1.write_row(row + 1, 0, line) |
| 699 csv_writer.writerow(line) | |
| 687 | 700 |
| 688 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), | 701 ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), |
| 689 {'type': 'formula', | 702 {'type': 'formula', |
| 690 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), | 703 'criteria': '=OR($B${}="1.1", $B${}="1.2")'.format(row + 1, row + 1), |
| 691 'format': format1, | 704 'format': format1, |
| 902 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)}) | 915 'multi_range': 'L{}:M{} T{}:U{} B{}'.format(start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2, start_row + 2 + row + i + k + 3, start_row + 2 + row + i + k + 2)}) |
| 903 row += 3 | 916 row += 3 |
| 904 workbook.close() | 917 workbook.close() |
| 905 workbook2.close() | 918 workbook2.close() |
| 906 workbook3.close() | 919 workbook3.close() |
| 920 csv_data.close() | |
| 907 | 921 |
| 908 | 922 |
| 909 if __name__ == '__main__': | 923 if __name__ == '__main__': |
| 910 sys.exit(read2mut(sys.argv)) | 924 sys.exit(read2mut(sys.argv)) |
