Mercurial > repos > mheinzl > variant_analyzer2
changeset 28:afda74e874ac draft
planemo upload for repository https://github.com/Single-Molecule-Genetics/VariantAnalyzerGalaxy/tree/master/tools/variant_analyzer commit ee4a8e6cf290e6c8a4d55f9cd2839d60ab3b11c8
author | mheinzl |
---|---|
date | Wed, 24 Feb 2021 09:39:24 +0000 |
parents | 5992e30ae50e |
children | b14b69697cf6 |
files | read2mut.py read2mut.xml |
diffstat | 2 files changed, 128 insertions(+), 51 deletions(-) [+] |
line wrap: on
line diff
--- a/read2mut.py Mon Feb 22 16:56:05 2021 +0000 +++ b/read2mut.py Wed Feb 24 09:39:24 2021 +0000 @@ -16,19 +16,21 @@ USAGE: python read2mut.py --mutFile DCS_Mutations.tabular --bamFile Interesting_Reads.trim.bam --inputJson tag_count_dict.json --sscsJson SSCS_counts.json - --outputFile mutant_reads_summary_short_trim.xlsx --thresh 10 --phred 20 --trim 10 --chimera_correction + --outputFile mutant_reads_summary_short_trim.xlsx --thresh 10 --phred 20 --trim5 10 --trim3 10 --chimera_correction """ from __future__ import division import argparse +import csv import json import operator import os import re import sys + import numpy as np import pysam import xlsxwriter @@ -47,6 +49,8 @@ help='JSON file with SSCS counts collected by mut2sscs.py.') parser.add_argument('--outputFile', help='Output xlsx file with summary of mutations.') + parser.add_argument('--outputFile_csv', + help='Output csv file with summary of mutations.') parser.add_argument('--outputFile2', help='Output xlsx file with allele frequencies of mutations.') parser.add_argument('--outputFile3', @@ -55,8 +59,10 @@ help='Integer threshold for displaying mutations. Only mutations occuring less than thresh times are displayed. Default of 0 displays all.') parser.add_argument('--phred', type=int, default=20, help='Integer threshold for Phred score. Only reads higher than this threshold are considered. Default 20.') - parser.add_argument('--trim', type=int, default=10, - help='Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10.') + parser.add_argument('--trim5', type=int, default=10, + help='Integer threshold for assigning mutations at start of reads to lower tier. Default 10.') + parser.add_argument('--trim3', type=int, default=10, + help='Integer threshold for assigning mutations at end of reads to lower tier. Default 10.') parser.add_argument('--chimera_correction', action="store_true", help='Count chimeric variants and correct the variant frequencies') return parser @@ -78,9 +84,11 @@ outfile = args.outputFile outfile2 = args.outputFile2 outfile3 = args.outputFile3 + outputFile_csv = args.outputFile_csv thresh = args.thresh phred_score = args.phred - trim = args.trim + trim5 = args.trim5 + trim3 = args.trim3 chimera_correction = args.chimera_correction if os.path.isfile(file1) is False: @@ -93,8 +101,10 @@ sys.exit("Error: thresh is '{}', but only non-negative integers allowed".format(thresh)) if phred_score < 0: sys.exit("Error: phred is '{}', but only non-negative integers allowed".format(phred_score)) - if trim < 0: - sys.exit("Error: trim is '{}', but only non-negative integers allowed".format(thresh)) + if trim5 < 0: + sys.exit("Error: trim5 is '{}', but only non-negative integers allowed".format(trim5)) + if trim3 < 0: + sys.exit("Error: trim3 is '{}', but only non-negative integers allowed".format(trim3)) # load dicts with open(json_file, "r") as f: @@ -227,10 +237,13 @@ else: pure_tags_dict_short = pure_tags_dict + #csv_data = open(outputFile_csv, "w") + #csv_writer = csv.writer(csv_data, delimiter=",") + # output summary with threshold workbook = xlsxwriter.Workbook(outfile) - workbook2 = xlsxwriter.Workbook(outfile2) - workbook3 = xlsxwriter.Workbook(outfile3) + workbook2 = xlsxwriter.Workbook(outfile2, {'in_memory': True}) + workbook3 = xlsxwriter.Workbook(outfile3, {'in_memory': True}) ws1 = workbook.add_worksheet("Results") ws2 = workbook2.add_worksheet("Allele frequencies") ws3 = workbook3.add_worksheet("Tiers") @@ -255,6 +268,7 @@ 'SSCS alt.ab', 'SSCS alt.ba', 'SSCS ref.ab', 'SSCS ref.ba', 'in phase', 'chimeric tag') ws1.write_row(0, 0, header_line) + #csv_writer.writerow(header_line) counter_tier11 = 0 counter_tier12 = 0 counter_tier21 = 0 @@ -266,6 +280,7 @@ counter_tier41 = 0 counter_tier42 = 0 counter_tier5 = 0 + counter_tier6 = 0 row = 1 tier_dict = {} chimera_dict = {} @@ -281,7 +296,8 @@ whole_array = list(pure_tags_dict_short[key1].keys()) tier_dict[key1] = {} - values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0), ("tier 5", 0)] + values_tier_dict = [("tier 1.1", 0), ("tier 1.2", 0), ("tier 2.1", 0), ("tier 2.2", 0), ("tier 2.3", 0), ("tier 2.4", 0), + ("tier 3.1", 0), ("tier 3.2", 0), ("tier 4.1", 0), ("tier 4.2", 0), ("tier 5", 0), ("tier 6", 0)] for k, v in values_tier_dict: tier_dict[key1][k] = v @@ -499,7 +515,8 @@ details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) - trimmed = False + trimmed_five = False + trimmed_three = False contradictory = False if ((all(float(ij) >= 0.5 for ij in [alt1ff, alt4ff]) & all(float(ij) == 0. for ij in [alt2ff, alt3ff])) | (all(float(ij) >= 0.5 for ij in [alt2ff, alt3ff]) & all(float(ij) == 0. for ij in [alt1ff, alt4ff]))): @@ -507,36 +524,66 @@ alt4ff = 0 alt2ff = 0 alt3ff = 0 - trimmed = False + trimmed_five = False + trimmed_three = False contradictory = True else: - if ((read_pos1 >= 0) and ((read_pos1 <= trim) | (abs(read_len_median1 - read_pos1) <= trim))): + if ((read_pos1 >= 0) and (read_pos1 <= trim5)): beg1 = total1new total1new = 0 alt1ff = 0 alt1f = 0 - trimmed = True + trimmed_five = True + + if ((read_pos1 >= 0) and (abs(read_len_median1 - read_pos1) <= trim3)): + beg1 = total1new + total1new = 0 + alt1ff = 0 + alt1f = 0 + trimmed_three = True - if ((read_pos4 >= 0) and ((read_pos4 <= trim) | (abs(read_len_median4 - read_pos4) <= trim))): + if ((read_pos4 >= 0) and (read_pos4 <= trim5)): + beg4 = total4new + total4new = 0 + alt4ff = 0 + alt4f = 0 + trimmed_five = True + + if ((read_pos4 >= 0) and (abs(read_len_median4 - read_pos4) <= trim3)): beg4 = total4new total4new = 0 alt4ff = 0 alt4f = 0 - trimmed = True + trimmed_three = True - if ((read_pos2 >= 0) and ((read_pos2 <= trim) | (abs(read_len_median2 - read_pos2) <= trim))): + if ((read_pos2 >= 0) and (read_pos2 <= trim5)): + beg2 = total2new + total2new = 0 + alt2ff = 0 + alt2f = 0 + trimmed_five = True + + if ((read_pos2 >= 0) and (abs(read_len_median2 - read_pos2) <= trim3)): beg2 = total2new total2new = 0 alt2ff = 0 alt2f = 0 - trimmed = True + trimmed_three = True - if ((read_pos3 >= 0) and ((read_pos3 <= trim) | (abs(read_len_median3 - read_pos3) <= trim))): + if ((read_pos3 >= 0) and (read_pos3 <= trim5)): beg3 = total3new total3new = 0 alt3ff = 0 alt3f = 0 - trimmed = True + trimmed_five = True + + if ((read_pos3 >= 0) and (abs(read_len_median3 - read_pos3) <= trim3)): + beg3 = total3new + total3new = 0 + alt3ff = 0 + alt3f = 0 + trimmed_three = True + details1 = (total1, total4, total1new, total4new, ref1, ref4, alt1, alt4, ref1f, ref4f, alt1f, alt4f, na1, na4, lowq1, lowq4, beg1, beg4) details2 = (total2, total3, total2new, total3new, ref2, ref3, alt2, alt3, ref2f, ref3f, alt2f, alt3f, na2, na3, lowq2, lowq3, beg2, beg3) @@ -586,20 +633,24 @@ counter_tier32 += 1 tier_dict[key1]["tier 3.2"] += 1 - elif (trimmed): + elif trimmed_five: tier = "4.1" counter_tier41 += 1 tier_dict[key1]["tier 4.1"] += 1 - elif (contradictory): + elif trimmed_three: tier = "4.2" counter_tier42 += 1 tier_dict[key1]["tier 4.2"] += 1 - else: + elif contradictory: tier = "5" counter_tier5 += 1 tier_dict[key1]["tier 5"] += 1 + else: + tier = "6" + counter_tier6 += 1 + tier_dict[key1]["tier 6"] += 1 chrom, pos, ref_a, alt_a = re.split(r'\#', key1) var_id = '-'.join([chrom, str(int(pos) + 1), ref, alt]) @@ -682,8 +733,10 @@ read_pos3 = read_len_median3 = None line = (var_id, tier, key2[:-5], 'ab1.ba2', read_pos1, read_pos4, read_len_median1, read_len_median4, dcs_median) + details1 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut14, chimera) ws1.write_row(row, 0, line) + #csv_writer.writerow(line) line = ("", "", key2[:-5], 'ab2.ba1', read_pos2, read_pos3, read_len_median2, read_len_median3, dcs_median) + details2 + (sscs_mut_ab, sscs_mut_ba, sscs_ref_ab, sscs_ref_ba, add_mut23, chimera) ws1.write_row(row + 1, 0, line) + #csv_writer.writerow(line) ws1.conditional_format('L{}:M{}'.format(row + 1, row + 2), {'type': 'formula', @@ -714,18 +767,19 @@ else: chimeric_dcs_high_tiers += high_tiers chimera_dict[key1] = (chimeric_dcs, chimeric_dcs_high_tiers) + #csv_data.close() # sheet 2 if chimera_correction: header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'chimeras in AC alt (all tiers)', 'chimera-corrected cvrg', 'chimera-corrected AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'chimeras in AC alt (tiers 1.1-2.4)', 'chimera-corrected cvrg (tiers 1.1-2.4)', 'chimera-corrected AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', - 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', - 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5') + 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'tier 6', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', + 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5', 'AF 1.1-6') else: header_line2 = ('variant ID', 'cvrg', 'AC alt (all tiers)', 'AF (all tiers)', 'cvrg (tiers 1.1-2.4)', 'AC alt (tiers 1.1-2.4)', 'AF (tiers 1.1-2.4)', 'AC alt (orginal DCS)', 'AF (original DCS)', 'tier 1.1', 'tier 1.2', 'tier 2.1', 'tier 2.2', 'tier 2.3', 'tier 2.4', - 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', - 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5') + 'tier 3.1', 'tier 3.2', 'tier 4.1', 'tier 4.2', 'tier 5', 'tier 6', 'AF 1.1-1.2', 'AF 1.1-2.1', 'AF 1.1-2.2', + 'AF 1.1-2.3', 'AF 1.1-2.4', 'AF 1.1-3.1', 'AF 1.1-3.2', 'AF 1.1-4.1', 'AF 1.1-4.2', 'AF 1.1-5', 'AF 1.1-6') ws2.write_row(0, 0, header_line2) row = 0 @@ -760,14 +814,14 @@ fraction_chimeras = 0. new_cvrg = cvrg * (1. - fraction_chimeras) lst.extend([chimeras_all, new_cvrg, safe_div(new_alt, new_cvrg)]) - lst.extend([(cvrg - sum(used_tiers[-5:])), sum(used_tiers[0:6]), safe_div(sum(used_tiers[0:6]), (cvrg - sum(used_tiers[-5:])))]) + lst.extend([(cvrg - sum(used_tiers[-6:])), sum(used_tiers[0:6]), safe_div(sum(used_tiers[0:6]), (cvrg - sum(used_tiers[-6:])))]) if chimera_correction: chimeras_all = chimera_dict[key1][1] new_alt = sum(used_tiers[0:6]) - chimeras_all fraction_chimeras = safe_div(chimeras_all, float(sum(used_tiers[0:6]))) if fraction_chimeras is None: fraction_chimeras = 0. - new_cvrg = (cvrg - sum(used_tiers[-5:])) * (1. - fraction_chimeras) + new_cvrg = (cvrg - sum(used_tiers[-6:])) * (1. - fraction_chimeras) lst.extend([chimeras_all, new_cvrg, safe_div(new_alt, new_cvrg)]) lst.extend([alt_count, safe_div(alt_count, cvrg)]) lst.extend(used_tiers) @@ -777,18 +831,18 @@ if chimera_correction: ws2.conditional_format('P{}:Q{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 1.1"', 'format': format1, 'multi_range': 'P{}:Q{} P1:Q1'.format(row + 2, row + 2)}) ws2.conditional_format('R{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$R$1="tier 2.1"', 'format': format3, 'multi_range': 'R{}:U{} R1:U1'.format(row + 2, row + 2)}) - ws2.conditional_format('V{}:Z{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$V$1="tier 3.1"', 'format': format2, 'multi_range': 'V{}:Z{} V1:Z1'.format(row + 2, row + 2)}) + ws2.conditional_format('V{}:AA{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$V$1="tier 3.1"', 'format': format2, 'multi_range': 'V{}:AA{} V1:AA1'.format(row + 2, row + 2)}) else: ws2.conditional_format('J{}:K{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$J$1="tier 1.1"', 'format': format1, 'multi_range': 'J{}:K{} J1:K1'.format(row + 2, row + 2)}) ws2.conditional_format('L{}:O{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$L$1="tier 2.1"', 'format': format3, 'multi_range': 'L{}:O{} L1:O1'.format(row + 2, row + 2)}) - ws2.conditional_format('P{}:T{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format2, 'multi_range': 'P{}:T{} P1:T1'.format(row + 2, row + 2)}) + ws2.conditional_format('P{}:U{}'.format(row + 2, row + 2), {'type': 'formula', 'criteria': '=$P$1="tier 3.1"', 'format': format2, 'multi_range': 'P{}:U{} P1:U1'.format(row + 2, row + 2)}) row += 1 # sheet 3 sheet3 = [("tier 1.1", counter_tier11), ("tier 1.2", counter_tier12), ("tier 2.1", counter_tier21), ("tier 2.2", counter_tier22), ("tier 2.3", counter_tier23), ("tier 2.4", counter_tier24), ("tier 3.1", counter_tier31), ("tier 3.2", counter_tier32), ("tier 4.1", counter_tier41), - ("tier 4.2", counter_tier42), ("tier 5", counter_tier5)] + ("tier 4.2", counter_tier42), ("tier 5", counter_tier5), ("tier 6", counter_tier6)] header = ("tier", "count") ws3.write_row(0, 0, header) @@ -805,76 +859,92 @@ 'format': format3}) ws3.conditional_format('A{}:B{}'.format(i + 2, i + 2), {'type': 'formula', - 'criteria': '=$A${}>="3"'.format(i + 2), + 'criteria': '=OR($A${}="tier 3.1", $A${}="tier 3.2", $A${}="tier 4.1", $A${}="tier 4.2", $A${}="tier 5", $A${}="tier 6")'.format(i + 2, i + 2, i + 2, i + 2, i + 2, i + 2), 'format': format2}) - description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), ("Tier 4.1", "variants at the start or end of the reads"), ("Tier 4.2", "mates with contradictory information"), ("Tier 5", "remaining variants")] - examples_tiers = [[("Chr5:5-20000-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", + description_tiers = [("Tier 1.1", "both ab and ba SSCS present (>75% of the sites with alternative base) and minimal FS>=3 for both SSCS in at least one mate"), ("", ""), + ("Tier 1.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1) and minimal FS>=3 for at least one of the SSCS"), + ("Tier 2.1", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS>=3 for at least one of the SSCS in at least one mate"), + ("Tier 2.2", "both ab and ba SSCS present (>75% of the sites with alt. base) and mate pair validation (min. FS=1)"), + ("Tier 2.3", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in one mate and minimal FS>=3 for at least one of the SSCS in the other mate"), + ("Tier 2.4", "both ab and ba SSCS present (>75% of the sites with alt. base) and minimal FS=1 for both SSCS in at least one mate"), + ("Tier 3.1", "both ab and ba SSCS present (>50% of the sites with alt. base) and recurring mutation on this position"), + ("Tier 3.2", "both ab and ba SSCS present (>50% of the sites with alt. base) and minimal FS>=1 for both SSCS in at least one mate"), + ("Tier 4.1", "variants at the beginning of the reads"), + ("Tier 4.2", "variants at the end of the reads"), + ("Tier 5", "mates with contradictory information"), + ("Tier 6", "remaining variants")] + examples_tiers = [[("chr5-11068-C-G", "1.1", "AAAAAGATGCCGACTACCTT", "ab1.ba2", "254", "228", "287", "288", "289", "3", "6", "3", "6", "0", "0", "3", "6", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""), ("", "", "AAAAAGATGCCGACTACCTT", "ab2.ba1", None, None, None, None, "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], - [("Chr5:5-20000-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289", + [("chr5-11068-C-G", "1.1", "AAAAATGCGTAGAAATATGC", "ab1.ba2", "254", "228", "287", "288", "289", "33", "43", "33", "43", "0", "0", "33", "43", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""), ("", "", "AAAAATGCGTAGAAATATGC", "ab2.ba1", "268", "268", "270", "288", "289", "11", "34", "10", "27", "0", "0", "10", "27", "0", "0", "1", "1", "0", "0", "1", "7", "0", "0", "4081", "4098", "5", "10", "", "")], - [("Chr5:5-20000-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290", + [("chr5-10776-G-T", "1.2", "CTATGACCCGTGAGCCCATG", "ab1.ba2", "132", "132", "287", "288", "290", "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "1", "6", "47170", "41149", "", ""), ("", "", "CTATGACCCGTGAGCCCATG", "ab2.ba1", "77", "132", "233", "200", "290", "4", "1", "4", "1", "0", "0", "4", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "1", "6", "47170", "41149", "", "")], - [("Chr5:5-20000-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289", + [("chr5-11068-C-G", "2.1", "AAAAAAACATCATACACCCA", "ab1.ba2", "246", "244", "287", "288", "289", "2", "8", "2", "8", "0", "0", "2", "8", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""), ("", "", "AAAAAAACATCATACACCCA", "ab2.ba1", None, None, None, None, "289", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], - [("Chr5:5-20000-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289", + [("chr5-11068-C-G", "2.2", "ATCAGCCATGGCTATTATTG", "ab1.ba2", "72", "72", "217", "288", "289", "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""), ("", "", "ATCAGCCATGGCTATTATTG", "ab2.ba1", "153", "164", "217", "260", "289", "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], - [("Chr5:5-20000-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None, + [("chr5-11068-C-G", "2.3", "ATCAATATGGCCTCGCCACG", "ab1.ba2", None, None, None, None, "289", "0", "5", "0", "5", "0", "0", "0", "5", None, None, None, "1", "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""), ("", "", "ATCAATATGGCCTCGCCACG", "ab2.ba1", "202", "255", "277", "290", "289", "1", "3", "1", "3", "0", "0", "1", "3", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], - [("Chr5:5-20000-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289", + [("chr5-11068-C-G", "2.4", "ATCAGCCATGGCTATTTTTT", "ab1.ba2", "72", "72", "217", "288", "289", "1", "1", "1", "1", "0", "0", "1", "1", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "4081", "4098", "5", "10", "", ""), ("", "", "ATCAGCCATGGCTATTTTTT", "ab2.ba1", "153", "164", "217", "260", "289", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "0", "0", "0", "0", "4081", "4098", "5", "10", "", "")], - [("Chr5:5-20000-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290", + [("chr5-10776-G-T", "3.1", "ATGCCTACCTCATTTGTCGT", "ab1.ba2", "46", "15", "287", "288", "290", "3", "3", "3", "2", "3", "1", "0", "1", "1", "0.5", "0", "0.5", "0", "0", "0", "1", "0", "0", "3", "3", "47170", "41149", "", ""), ("", "", "ATGCCTACCTCATTTGTCGT", "ab2.ba1", None, "274", None, "288", "290", "0", "3", "0", "2", "0", "1", "0", "1", None, "0.5", None, "0.5", "0", "0", "0", "1", "0", "0", "3", "3", "47170", "41149", "", "")], - [("Chr5:5-20000-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271", + [("chr5-11315-C-T", "3.2", "ACAACATCACGTATTCAGGT", "ab1.ba2", "197", "197", "240", "255", "271", "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", ""), ("", "", "ACAACATCACGTATTCAGGT", "ab2.ba1", "35", "35", "240", "258", "271", "2", "3", "2", "3", "0", "1", "2", "2", "0", "0.333333333333333", "1", "0.666666666666667", "0", "0", "0", "0", "0", "0", "1", "1", "6584", "6482", "", "")], - [("Chr5:5-20000-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "0", "100", "255", "276", "269", - "5", "6", "0", "6", "0", "0", "5", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""), + [("chr5-13983-G-C", "4.1", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "1", "100", "255", "276", "269", + "5", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "5", "0", "1", "1", "5348", "5350", "", ""), ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], - [("Chr5:5-20000-13963-T-C", "4.2", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263", + [("chr5-13983-G-C", "4.2", "AAAAAAAGAATAACCCACAC", "ab1.ba2", "20", "270", "255", "276", "269", + "5", "6", "5", "0", "0", "0", "5", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "6", "1", "1", "5348", "5350", "", ""), + ("", "", "AAAAAAAGAATAACCCACAC", "ab2.ba1", None, None, None, None, + "269", "0", "0", "0", "0", "0", "0", "0", "0", None, None, None, None, "0", + "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], + [("chr5-13963-T-C", "5", "TTTTTAAGAATAACCCACAC", "ab1.ba2", "38", "38", "240", "283", "263", "110", "54", "110", "54", "0", "0", "110", "54", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", ""), ("", "", "TTTTTAAGAATAACCCACAC", "ab2.ba1", "100", "112", "140", "145", "263", "7", "12", "7", "12", "7", "12", "0", "0", "1", "1", "0", "0", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", "")], - [("Chr5:5-20000-13983-G-C", "5", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", + [("chr5-13983-G-C", "6", "ATGTTGTGAATAACCCACAC", "ab1.ba2", None, "186", None, "276", "269", "0", "6", "0", "6", "0", "0", "0", "6", "0", "0", "0", "1", "0", "0", "0", "0", "0", "0", "1", "1", "5348", "5350", "", ""), ("", "", "ATGTTGTGAATAACCCACAC", "ab2.ba1", None, None, None, None, @@ -904,6 +974,7 @@ workbook.close() workbook2.close() workbook3.close() + if __name__ == '__main__':
--- a/read2mut.xml Mon Feb 22 16:56:05 2021 +0000 +++ b/read2mut.xml Wed Feb 24 09:39:24 2021 +0000 @@ -1,5 +1,5 @@ <?xml version="1.0" encoding="UTF-8"?> -<tool id="read2mut" name="Call specific mutations in reads:" version="2.0.1" profile="19.01"> +<tool id="read2mut" name="Call specific mutations in reads:" version="2.0.4" profile="17.01"> <description>Looks for reads with mutation at known positions and calculates frequencies and stats.</description> <macros> <import>va_macros.xml</import> @@ -17,9 +17,11 @@ --sscsJson '$file4' --thresh '$thresh' --phred '$phred' - --trim '$trim' + --trim5 '$trim5' + --trim3 '$trim3' $chimera_correction --outputFile '$output_xlsx' + --outputFile_csv '$outputFile_csv' --outputFile2 '$output_xlsx2' --outputFile3 '$output_xlsx3' ]]> @@ -31,11 +33,13 @@ <param name="file4" type="data" format="json" label="JSON File with SSCS tag stats" optional="false" help="JSON file generated by DCS mutations to SSCS stats."/> <param name="thresh" type="integer" label="Tag count threshold" value="0" help="Integer threshold for displaying mutations. Only mutations occuring in DCS of less than thresh tags are displayed. Default of 0 displays all."/> <param name="phred" type="integer" label="Phred quality score threshold" min="0" max="41" value="20" help="Integer threshold for Phred quality score. Only reads higher than this threshold are considered. Default = 20."/> - <param name="trim" type="integer" label="Trimming threshold" value="10" help="Integer threshold for assigning mutations at start and end of reads to lower tier. Default 10."/> + <param name="trim5" type="integer" label="Trimming threshold at 5' end" value="10" help="Integer threshold for assigning mutations at the beginning of reads to lower tier. Default 10."/> + <param name="trim3" type="integer" label="Trimming threshold at 3' end" value="10" help="Integer threshold for assigning mutations at the end of reads to lower tier. Default 10."/> <param name="chimera_correction" type="boolean" label="Apply chimera correction?" truevalue="--chimera_correction" falsevalue="" checked="False" help="Count chimeric variants and correct the variant frequencies."/> </inputs> <outputs> <data name="output_xlsx" format="xlsx" label="${tool.name} on ${on_string}: XLSX summary"/> + <data name="outputFile_csv" format="csv" label="${tool.name} on ${on_string}: CSV summary"/> <data name="output_xlsx2" format="xlsx" label="${tool.name} on ${on_string}: XLSX allele frequencies"/> <data name="output_xlsx3" format="xlsx" label="${tool.name} on ${on_string}: XLSX tiers"/> </outputs> @@ -47,9 +51,11 @@ <param name="file4" value="SSCS_counts_test.json"/> <param name="thresh" value="0"/> <param name="phred" value="20"/> - <param name="trim" value="10"/> - <param name="chimera_correction" value="True"/> + <param name="trim5" value="10"/> + <param name="trim3" value="10"/> + <param name="delim_csv" value=","/> <output name="output_xlsx" file="Variant_Analyzer_summary_test.xlsx" decompress="true" lines_diff="10"/> + <output name="outputFile_csv" file="Variant_Analyzer_summary_test.csv" decompress="true" lines_diff="10"/> <output name="output_xlsx2" file="Variant_Analyzer_allele_frequencies_test.xlsx" decompress="true" lines_diff="10"/> <output name="output_xlsx3" file="Variant_Analyzer_tiers_test.xlsx" decompress="true" lines_diff="10"/> </test>