Mercurial > repos > mheinzl > fsd
changeset 17:2e517a54eedc draft
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/fsd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6-dirty
author | mheinzl |
---|---|
date | Tue, 02 Apr 2019 05:10:09 -0400 |
parents | 6bd9ef49d013 |
children | c825a29a7d9f |
files | fsd.py fsd.xml test-data/Test_data.tabular test-data/Test_data2.tabular test-data/Test_data3.tabular test-data/Test_data4.tabular test-data/fsd_data1.tab test-data/fsd_data2.tab test-data/fsd_data3.tab test-data/fsd_data4.tab test-data/fsd_output1.pdf test-data/fsd_output1.tab test-data/fsd_output2.pdf test-data/fsd_output2.tab test-data/output_file.pdf test-data/output_file.tabular test-data/output_file2.pdf test-data/output_file2.tabular |
diffstat | 18 files changed, 1046 insertions(+), 830 deletions(-) [+] |
line wrap: on
line diff
--- a/fsd.py Mon Oct 08 05:50:18 2018 -0400 +++ b/fsd.py Tue Apr 02 05:10:09 2019 -0400 @@ -45,19 +45,17 @@ def compare_read_families(argv): + parser = make_argparser() args = parser.parse_args(argv[1:]) - firstFile = args.inputFile1 name1 = args.inputName1 - secondFile = args.inputFile2 name2 = args.inputName2 thirdFile = args.inputFile3 name3 = args.inputName3 fourthFile = args.inputFile4 name4 = args.inputName4 - title_file = args.output_tabular title_file2 = args.output_pdf @@ -90,24 +88,32 @@ data_array_list.append(file1) legend = "\n\n\n{}".format(name1) - plt.text(0.1, 0.11, legend, size=12, transform=plt.gcf().transFigure) - legend1 = "singletons:\nabsolute nr.\n{:,}".format(numpy.bincount(data1)[1]) - plt.text(0.4, 0.11, legend1, size=12, transform=plt.gcf().transFigure) + plt.text(0.05, 0.11, legend, size=10, transform=plt.gcf().transFigure) + legend1 = "singletons:\nnr. of tags\n{:,}".format(numpy.bincount(data1)[1]) + plt.text(0.32, 0.11, legend1, size=10, transform=plt.gcf().transFigure) - legend3 = "rel. freq\n{:.3f}".format(float(numpy.bincount(data1)[1]) / len(data1)) - plt.text(0.5, 0.11, legend3, size=12, transform=plt.gcf().transFigure) + legend3 = "freq. of tags\n{:.3f}".format(float(numpy.bincount(data1)[1]) / len(data1)) + plt.text(0.41, 0.11, legend3, size=10, transform=plt.gcf().transFigure) + + legend3b = "PE reads\n{:.3f}".format(float(numpy.bincount(data1)[1]) / sum(integers)) + plt.text(0.5, 0.11, legend3b, size=10, transform=plt.gcf().transFigure) - legend4 = "family size > 20:\nabsolute nr.\n{:,}".format(numpy.bincount(data1)[len(numpy.bincount(data1)) - 1].astype(int)) - plt.text(0.6, 0.11, legend4, size=12, transform=plt.gcf().transFigure) + legend4 = "family size > 20:\nnr. of tags\n{:,} ({:.3f})".format(numpy.bincount(data1)[len(numpy.bincount(data1)) - 1].astype(int), float(numpy.bincount(data1)[len(numpy.bincount(data1)) - 1]) / len(data1)) + plt.text(0.58, 0.11, legend4, size=10, transform=plt.gcf().transFigure) + + legend5 = "PE reads\n{:,} ({:.3f})".format(sum(integers[integers > 20]), float(sum(integers[integers > 20])) / sum(integers)) + plt.text(0.70, 0.11, legend5, size=10, transform=plt.gcf().transFigure) - legend5 = "rel. freq\n{:.3f}".format(float(numpy.bincount(data1)[len(numpy.bincount(data1)) - 1]) / len(data1)) - plt.text(0.7, 0.11, legend5, size=12, transform=plt.gcf().transFigure) + legend6 = "total nr. of\ntags\n{:,}".format(len(data1)) + plt.text(0.82, 0.11, legend6, size=10, transform=plt.gcf().transFigure) - legend6 = "total length\n{:,}".format(len(data1)) - plt.text(0.8, 0.11, legend6, size=12, transform=plt.gcf().transFigure) + legend6b = "PE reads\n{:,}".format(sum(integers)) + plt.text(0.89, 0.11, legend6b, size=10, transform=plt.gcf().transFigure) if secondFile != str(None): file2 = readFileReferenceFree(secondFile) + integers2 = numpy.array(file2[:, 0]).astype(int) # keep original family sizes + data2 = numpy.asarray(file2[:, 0]).astype(int) bigFamilies2 = numpy.where(data2 > 20)[0] data2[bigFamilies2] = 22 @@ -117,25 +123,34 @@ label.append(name2) data_array_list.append(file2) - plt.text(0.1, 0.09, name2, size=12, transform=plt.gcf().transFigure) + plt.text(0.05, 0.09, name2, size=10, transform=plt.gcf().transFigure) legend1 = "{:,}".format(numpy.bincount(data2)[1]) - plt.text(0.4, 0.09, legend1, size=12, transform=plt.gcf().transFigure) + plt.text(0.32, 0.09, legend1, size=10, transform=plt.gcf().transFigure) legend3 = "{:.3f}".format(float(numpy.bincount(data2)[1]) / len(data2)) - plt.text(0.5, 0.09, legend3, size=12, transform=plt.gcf().transFigure) + plt.text(0.41, 0.09, legend3, size=10, transform=plt.gcf().transFigure) + + legend3b = "{:.3f}".format(float(numpy.bincount(data2)[1]) / sum(integers2)) + plt.text(0.5, 0.09, legend3b, size=10, transform=plt.gcf().transFigure) - legend4 = "{:,}".format(numpy.bincount(data2)[len(numpy.bincount(data2)) - 1].astype(int)) - plt.text(0.6, 0.09, legend4, size=12, transform=plt.gcf().transFigure) + legend4 = "{:,} ({:.3f})".format( + numpy.bincount(data2)[len(numpy.bincount(data2)) - 1].astype(int), + float(numpy.bincount(data2)[len(numpy.bincount(data2)) - 1]) / len(data2)) + plt.text(0.58, 0.09, legend4, size=10, transform=plt.gcf().transFigure) - legend5 = "{:.3f}".format(float(numpy.bincount(data2)[len(numpy.bincount(data2)) - 1]) / len(data2)) - plt.text(0.7, 0.09, legend5, size=12, transform=plt.gcf().transFigure) + legend5 = "{:,} ({:.3f})".format(sum(integers2[integers2 > 20]), float(sum(integers2[integers2 > 20])) / sum(integers2)) + plt.text(0.70, 0.09, legend5, size=10, transform=plt.gcf().transFigure) legend6 = "{:,}".format(len(data2)) - plt.text(0.8, 0.09, legend6, size=12, transform=plt.gcf().transFigure) + plt.text(0.82, 0.09, legend6, size=10, transform=plt.gcf().transFigure) + + legend6b = "{:,}".format(sum(integers2)) + plt.text(0.89, 0.09, legend6b, size=10, transform=plt.gcf().transFigure) if thirdFile != str(None): file3 = readFileReferenceFree(thirdFile) + integers3 = numpy.array(file3[:, 0]).astype(int) # keep original family sizes data3 = numpy.asarray(file3[:, 0]).astype(int) bigFamilies3 = numpy.where(data3 > 20)[0] @@ -146,25 +161,35 @@ label.append(name3) data_array_list.append(file3) - plt.text(0.1, 0.07, name3, size=12, transform=plt.gcf().transFigure) + plt.text(0.05, 0.07, name3, size=10, transform=plt.gcf().transFigure) legend1 = "{:,}".format(numpy.bincount(data3)[1]) - plt.text(0.4, 0.07, legend1, size=12, transform=plt.gcf().transFigure) + plt.text(0.32, 0.07, legend1, size=10, transform=plt.gcf().transFigure) legend3 = "{:.3f}".format(float(numpy.bincount(data3)[1]) / len(data3)) - plt.text(0.5, 0.07, legend3, size=12, transform=plt.gcf().transFigure) + plt.text(0.41, 0.07, legend3, size=10, transform=plt.gcf().transFigure) + + legend3b = "{:.3f}".format(float(numpy.bincount(data3)[1]) / sum(integers3)) + plt.text(0.5, 0.07, legend3b, size=10, transform=plt.gcf().transFigure) - legend4 = "{:,}".format(numpy.bincount(data3)[len(numpy.bincount(data3)) - 1].astype(int)) - plt.text(0.6, 0.07, legend4, size=12, transform=plt.gcf().transFigure) + legend4 = "{:,} ({:.3f})".format( + numpy.bincount(data3)[len(numpy.bincount(data3)) - 1].astype(int), + float(numpy.bincount(data3)[len(numpy.bincount(data3)) - 1]) / len(data3)) + plt.text(0.58, 0.07, legend4, size=10, transform=plt.gcf().transFigure) - legend5 = "{:.3f}".format(float(numpy.bincount(data3)[len(numpy.bincount(data3)) - 1]) / len(data3)) - plt.text(0.7, 0.07, legend5, size=12, transform=plt.gcf().transFigure) + legend5 = "{:,} ({:.3f})".format(sum(integers3[integers3 > 20]), + float(sum(integers3[integers3 > 20])) / sum(integers3)) + plt.text(0.70, 0.07, legend5, size=10, transform=plt.gcf().transFigure) legend6 = "{:,}".format(len(data3)) - plt.text(0.8, 0.07, legend6, size=12, transform=plt.gcf().transFigure) + plt.text(0.82, 0.07, legend6, size=10, transform=plt.gcf().transFigure) + + legend6b = "{:,}".format(sum(integers3)) + plt.text(0.89, 0.07, legend6b, size=10, transform=plt.gcf().transFigure) if fourthFile != str(None): file4 = readFileReferenceFree(fourthFile) + integers4 = numpy.array(file4[:, 0]).astype(int) # keep original family sizes data4 = numpy.asarray(file4[:, 0]).astype(int) @@ -176,28 +201,37 @@ label.append(name4) data_array_list.append(file4) - plt.text(0.1, 0.05, name4, size=12, transform=plt.gcf().transFigure) + plt.text(0.05, 0.05, name4, size=10, transform=plt.gcf().transFigure) legend1 = "{:,}".format(numpy.bincount(data4)[1]) - plt.text(0.4, 0.05, legend1, size=12, transform=plt.gcf().transFigure) + plt.text(0.32, 0.05, legend1, size=10, transform=plt.gcf().transFigure) - legend4 = "{:.3f}".format(float(numpy.bincount(data4)[1]) / len(data4)) - plt.text(0.5, 0.05, legend4, size=12, transform=plt.gcf().transFigure) + legend3 = "{:.3f}".format(float(numpy.bincount(data4)[1]) / len(data4)) + plt.text(0.41, 0.05, legend3, size=10, transform=plt.gcf().transFigure) + + legend3b = "{:.3f}".format(float(numpy.bincount(data4)[1]) / sum(integers4)) + plt.text(0.5, 0.05, legend3b, size=10, transform=plt.gcf().transFigure) - legend4 = "{:,}".format(numpy.bincount(data4)[len(numpy.bincount(data4)) - 1].astype(int)) - plt.text(0.6, 0.05, legend4, size=12, transform=plt.gcf().transFigure) + legend4 = "{:,} ({:.3f})".format( + numpy.bincount(data4)[len(numpy.bincount(data4)) - 1].astype(int), + float(numpy.bincount(data4)[len(numpy.bincount(data4)) - 1]) / len(data4)) + plt.text(0.58, 0.05, legend4, size=10, transform=plt.gcf().transFigure) - legend5 = "{:.3f}".format(float(numpy.bincount(data4)[len(numpy.bincount(data4)) - 1]) / len(data4)) - plt.text(0.7, 0.05, legend5, size=12, transform=plt.gcf().transFigure) + legend5 = "{:,} ({:.3f})".format(sum(integers4[integers4 > 20]), + float(sum(integers4[integers4 > 20])) / sum(integers4)) + plt.text(0.70, 0.05, legend5, size=10, transform=plt.gcf().transFigure) legend6 = "{:,}".format(len(data4)) - plt.text(0.8, 0.05, legend6, size=12, transform=plt.gcf().transFigure) + plt.text(0.82, 0.05, legend6, size=10, transform=plt.gcf().transFigure) + + legend6b = "{:,}".format(sum(integers4)) + plt.text(0.89, 0.05, legend6b, size=10, transform=plt.gcf().transFigure) maximumX = numpy.amax(numpy.concatenate(list_to_plot)) minimumX = numpy.amin(numpy.concatenate(list_to_plot)) counts = plt.hist(list_to_plot, bins=range(minimumX, maximumX + 1), stacked=False, edgecolor="black", - linewidth=1, label=label, align="left", alpha=0.7, rwidth=0.8) + linewidth=1, label=label, align="left", rwidth=0.8, alpha=0.7) ticks = numpy.arange(minimumX - 1, maximumX, 1) ticks1 = map(str, ticks) @@ -242,53 +276,71 @@ output_file.write("{}{}".format(int(sum(i)), sep)) # Family size distribution after DCS and SSCS - for dataset, data, name_file in zip(list_to_plot, data_array_list, label): + for dataset, data_o, name_file in zip(list_to_plot, data_array_list, label): maximumX = numpy.amax(dataset) minimumX = numpy.amin(dataset) - tags = numpy.array(data[:, 2]) - seq = numpy.array(data[:, 1]) + tags = numpy.array(data_o[:, 2]) + seq = numpy.array(data_o[:, 1]) data = numpy.array(dataset) - + data_o = numpy.array(data_o[:, 0]).astype(int) # find all unique tags and get the indices for ALL tags, but only once u, index_unique, c = numpy.unique(numpy.array(seq), return_counts=True, return_index=True) d = u[c > 1] # get family sizes, tag for duplicates duplTags_double = data[numpy.in1d(seq, d)] - duplTags = duplTags_double[0::2] # ab of DCS - duplTagsBA = duplTags_double[1::2] # ba of DCS + duplTags_double_o = data_o[numpy.in1d(seq, d)] - # duplTags_double_tag = tags[numpy.in1d(seq, d)] - # duplTags_double_seq = seq[numpy.in1d(seq, d)] + duplTags = duplTags_double[0::2] # ab of DCS + duplTags_o = duplTags_double_o[0::2] # ab of DCS + + duplTagsBA = duplTags_double[1::2] # ba of DCS + duplTagsBA_o = duplTags_double_o[1::2] # ba of DCS # get family sizes for SSCS with no partner ab = numpy.where(tags == "ab")[0] abSeq = seq[ab] + ab_o = data_o[ab] ab = data[ab] + ba = numpy.where(tags == "ba")[0] baSeq = seq[ba] + ba_o = data_o[ba] ba = data[ba] dataAB = ab[numpy.in1d(abSeq, d, invert=True)] + dataAB_o = ab_o[numpy.in1d(abSeq, d, invert=True)] + dataBA = ba[numpy.in1d(baSeq, d, invert=True)] + dataBA_o = ba_o[numpy.in1d(baSeq, d, invert=True)] list1 = [duplTags_double, dataAB, dataBA] # list for plotting # information for family size >= 3 dataAB_FS3 = dataAB[dataAB >= 3] + dataAB_FS3_o = dataAB_o[dataAB_o >= 3] dataBA_FS3 = dataBA[dataBA >= 3] + dataBA_FS3_o = dataBA_o[dataBA_o >= 3] ab_FS3 = ab[ab >= 3] ba_FS3 = ba[ba >= 3] + ab_FS3_o = ab_o[ab_o >= 3] + ba_FS3_o = ba_o[ba_o >= 3] duplTags_FS3 = duplTags[(duplTags >= 3) & (duplTagsBA >= 3)] # ab+ba with FS>=3 duplTags_FS3_BA = duplTagsBA[(duplTags >= 3) & (duplTagsBA >= 3)] # ba+ab with FS>=3 duplTags_double_FS3 = len(duplTags_FS3) + len(duplTags_FS3_BA) # both ab and ba strands with FS>=3 - fig = plt.figure() + # original FS + duplTags_FS3_o = duplTags_o[(duplTags_o >= 3) & (duplTagsBA_o >= 3)] # ab+ba with FS>=3 + duplTags_FS3_BA_o = duplTagsBA_o[(duplTags_o >= 3) & (duplTagsBA_o >= 3)] # ba+ab with FS>=3 + duplTags_double_FS3_o = sum(duplTags_FS3_o) + sum(duplTags_FS3_BA_o) # both ab and ba strands with FS>=3 + fig = plt.figure() plt.subplots_adjust(bottom=0.3) - counts = plt.hist(list1, bins=range(minimumX, maximumX + 1), stacked=True, label=["duplex", "ab", "ba"], edgecolor="black", linewidth=1, align="left", color=["#FF0000", "#5FB404", "#FFBF00"]) + counts = plt.hist(list1, bins=range(minimumX, maximumX + 1), stacked=True, label=["duplex", "ab", "ba"], + edgecolor="black", linewidth=1, align="left", color=["#FF0000", "#5FB404", "#FFBF00"], + rwidth=0.8) # tick labels of x axis ticks = numpy.arange(minimumX - 1, maximumX, 1) ticks1 = map(str, ticks) @@ -298,33 +350,56 @@ last = counts[0][2][len(counts[0][0]) - 1] # large families plt.legend(loc='upper right', fontsize=14, bbox_to_anchor=(0.9, 1), frameon=True) - # plt.title(name1, fontsize=14) + plt.title(name_file, fontsize=14) plt.xlabel("Family size", fontsize=14) plt.ylabel("Absolute Frequency", fontsize=14) plt.margins(0.01, None) plt.grid(b=True, which="major", color="#424242", linestyle=":") # extra information beneath the plot - legend = "SSCS ab= \nSSCS ba= \nDCS (total)= \nlength of dataset=" - plt.text(0.1, 0.09, legend, size=12, transform=plt.gcf().transFigure) + legend = "SSCS ab= \nSSCS ba= \nDCS (total)= \ntotal nr. of tags=" + plt.text(0.1, 0.09, legend, size=10, transform=plt.gcf().transFigure) + + legend = "nr. of tags\n\n{:,}\n{:,}\n{:,} ({:,})\n{:,}".format(len(dataAB), len(dataBA), len(duplTags), len(duplTags_double), (len(dataAB) + len(dataBA) + len(duplTags))) + plt.text(0.23, 0.09, legend, size=10, transform=plt.gcf().transFigure) - legend = "absolute numbers\n\n{:,}\n{:,}\n{:,} ({:,})\n{:,}".format(len(dataAB), len(dataBA), len(duplTags), len(duplTags_double), (len(dataAB) + len(dataBA) + len(duplTags))) - plt.text(0.35, 0.09, legend, size=12, transform=plt.gcf().transFigure) + legend5 = "PE reads\n\n{:,}\n{:,}\n{:,} ({:,})\n{:,}".format(sum(dataAB_o), sum(dataBA_o), sum(duplTags_o), sum(duplTags_double_o), (sum(dataAB_o) + sum(dataBA_o) + sum(duplTags_o))) + plt.text(0.38, 0.09, legend5, size=10, transform=plt.gcf().transFigure) - legend = "relative frequencies\nunique\n{:.3f}\n{:.3f}\n{:.3f}\n{:,}".format(float(len(dataAB)) / (len(dataAB) + len(dataBA) + len(duplTags)), float(len(dataBA)) / (len(dataAB) + len(dataBA) + len(duplTags)), float(len(duplTags)) / (len(dataAB) + len(dataBA) + len(duplTags)), (len(dataAB) + len(dataBA) + len(duplTags))) - plt.text(0.54, 0.09, legend, size=12, transform=plt.gcf().transFigure) + legend = "rel. freq. of tags\nunique\n{:.3f}\n{:.3f}\n{:.3f}\n{:,}".format(float(len(dataAB)) / (len(dataAB) + len(dataBA) + len(duplTags)), float(len(dataBA)) / (len(dataAB) + len(dataBA) + len(duplTags)), float(len(duplTags)) / (len(dataAB) + len(dataBA) + len(duplTags)), (len(dataAB) + len(dataBA) + len(duplTags))) + plt.text(0.54, 0.09, legend, size=10, transform=plt.gcf().transFigure) legend = "total\n{:.3f}\n{:.3f}\n{:.3f} ({:.3f})\n{:,}".format(float(len(dataAB)) / (len(ab) + len(ba)), float(len(dataBA)) / (len(ab) + len(ba)), float(len(duplTags)) / (len(ab) + len(ba)), float(len(duplTags_double)) / (len(ab) + len(ba)), (len(ab) + len(ba))) - plt.text(0.64, 0.09, legend, size=12, transform=plt.gcf().transFigure) + plt.text(0.64, 0.09, legend, size=10, transform=plt.gcf().transFigure) legend1 = "\nsingletons:\nfamily size > 20:" - plt.text(0.1, 0.03, legend1, size=12, transform=plt.gcf().transFigure) + plt.text(0.1, 0.03, legend1, size=10, transform=plt.gcf().transFigure) legend4 = "{:,}\n{:,}".format(singl.astype(int), last.astype(int)) - plt.text(0.35, 0.03, legend4, size=12, transform=plt.gcf().transFigure) + plt.text(0.23, 0.03, legend4, size=10, transform=plt.gcf().transFigure) legend3 = "{:.3f}\n{:.3f}".format(singl / len(data), last / len(data)) - plt.text(0.54, 0.03, legend3, size=12, transform=plt.gcf().transFigure) + plt.text(0.64, 0.03, legend3, size=10, transform=plt.gcf().transFigure) + + legend3 = "\n\n{:,}".format(sum(data_o[data_o > 20])) + plt.text(0.38, 0.03, legend3, size=10, transform=plt.gcf().transFigure) + + legend3 = "{:.3f}\n{:.3f}".format(float(singl)/sum(data_o), float(sum(data_o[data_o > 20])) / sum(data_o)) + plt.text(0.84, 0.03, legend3, size=10, transform=plt.gcf().transFigure) + + legend = "PE reads\nunique\n{:.3f}\n{:.3f}\n{:.3f}\n{:,}".format( + float(sum(dataAB_o)) / (sum(dataAB_o) + sum(dataBA_o) + sum(duplTags_o)), + float(sum(dataBA_o)) / (sum(dataAB_o) + sum(dataBA_o) + sum(duplTags_o)), + float(sum(duplTags_o)) / (sum(dataAB_o) + sum(dataBA_o) + sum(duplTags_o)), + (sum(dataAB_o) + sum(dataBA_o) + sum(duplTags_o))) + plt.text(0.74, 0.09, legend, size=10, transform=plt.gcf().transFigure) + + legend = "total\n{:.3f}\n{:.3f}\n{:.3f} ({:.3f})\n{:,}".format( + float(sum(dataAB_o)) / (sum(ab_o) + sum(ba_o)), + float(sum(dataBA_o)) / (sum(ab_o) + sum(ba_o)), + float(sum(duplTags_o)) / (sum(ab_o) + sum(ba_o)), + float(sum(duplTags_double_o)) / (sum(ab_o) + sum(ba_o)), (sum(ab_o) + sum(ba_o))) + plt.text(0.84, 0.09, legend, size=10, transform=plt.gcf().transFigure) pdf.savefig(fig) plt.close() @@ -336,23 +411,62 @@ output_file.write("absolute frequency:{}{}\n".format(sep, count[len(count) - 1])) output_file.write("relative frequency:{}{:.3f}\n\n".format(sep, float(count[len(count) - 1]) / sum(count))) - output_file.write("{}singletons:{}{}family size > 20:\n".format(sep, sep, sep)) - output_file.write("{}absolute nr.{}rel. freq{}absolute nr.{}rel. freq{}total length\n".format(sep, sep, sep, sep, sep)) - output_file.write("{}{}{}{}{:.3f}{}{}{}{:.3f}{}{}\n\n".format(name_file, sep, singl.astype(int), sep, singl / len(data), sep, last.astype(int), sep, last / len(data), sep, len(data))) + output_file.write("{}singletons:{}{}{}family size > 20:\n".format(sep, sep, sep, sep)) + output_file.write("{}nr. of tags{}rel. freq of tags{}rel.freq of PE reads{}nr. of tags{}rel. freq of tags{}nr. of PE reads{}rel. freq of PE reads{}total nr. of tags{}total nr. of PE reads\n".format(sep, sep, sep, sep, sep, sep, sep, sep, sep)) + output_file.write("{}{}{}{}{:.3f}{}{:.3f}{}{}{}{:.3f}{}{}{}{:.3f}{}{}{}{}\n\n".format( + name_file, sep, singl.astype(int), sep, singl / len(data), sep, float(singl)/sum(data_o), sep, + last.astype(int), sep, last / len(data), sep, sum(data_o[data_o > 20]), sep, float(sum(data_o[data_o > 20])) / sum(data_o), sep, len(data), sep, sum(data_o))) # information for FS >= 1 - output_file.write("The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS)\nWhereas the total frequencies were calculated from the whole dataset (=including the DCS).\n\n") - output_file.write("FS >= 1{}{}unique:{}total:\n".format(sep, sep, sep)) - output_file.write("nr./rel. freq of ab={}{}{}{:.3f}{}{:.3f}\n".format(sep, len(dataAB), sep, float(len(dataAB)) / (len(dataAB) + len(dataBA) + len( duplTags)), sep, float(len(dataAB)) / (len(ab) + len(ba)))) - output_file.write("nr./rel. freq of ba={}{}{}{:.3f}{}{:.3f}\n".format(sep, len(dataBA), sep, float(len(dataBA)) / (len(dataBA) + len(dataBA) + len(duplTags)), sep, float(len(dataBA)) / (len(ba) + len(ba)))) - output_file.write("nr./rel. freq of DCS (total)={}{} ({}){}{:.3f}{}{:.3f} ({:.3f})\n".format(sep, len(duplTags), len(duplTags_double), sep, float(len(duplTags)) / (len(dataAB) + len(dataBA) + len(duplTags)), sep, float(len(duplTags)) / ( len(ab) + len(ba)), float(len(duplTags_double)) / (len(ab) + len(ba)))) - output_file.write("length of dataset={}{}{}{}{}{}\n".format(sep, (len(dataAB) + len(dataBA) + len(duplTags)), sep, (len(dataAB) + len(dataBA) + len(duplTags)), sep, (len(ab) + len(ba)))) + output_file.write("The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS)\n" + "Whereas the total frequencies were calculated from the whole dataset (=including the DCS).\n\n") + output_file.write("FS >= 1{}nr. of tags{}nr. of PE reads{}rel. freq of tags{}{}rel. freq of PE reads:\n".format(sep, sep, sep, sep, sep)) + output_file.write("{}{}{}unique:{}total{}unique{}total:\n".format(sep, sep, sep, sep, sep, sep)) + output_file.write("SSCS ab{}{}{}{}{}{:.3f}{}{:.3f}{}{:.3f}{}{:.3f}\n".format( + sep, len(dataAB), sep, sum(dataAB_o), sep, float(len(dataAB)) / (len(dataAB) + len(dataBA) + len(duplTags)), + sep, float(sum(dataAB_o)) / (sum(dataAB_o) + sum(dataBA_o) + sum(duplTags_o)), sep, + float(len(dataAB)) / (len(ab) + len(ba)), sep, float(sum(dataAB_o)) / (sum(ab_o) + sum(ba_o)))) + output_file.write("SSCS ba{}{}{}{}{}{:.3f}{}{:.3f}{}{:.3f}{}{:.3f}\n".format( + sep, len(dataBA), sep, sum(dataBA_o), sep, float(len(dataBA)) / (len(dataBA) + len(dataBA) + len(duplTags)), + sep, float(sum(dataBA_o)) / (sum(dataBA_o) + sum(dataBA_o) + sum(duplTags_o)), sep, float(len(dataBA)) / (len(ba) + len(ba)), + sep, float(sum(dataBA_o)) / (sum(ba_o) + sum(ba_o)))) + output_file.write("DCS (total){}{} ({}){}{} ({}){}{:.3f}{}{:.3f} ({:.3f}){}{:.3f}{}{:.3f} ({:.3f})\n".format( + sep, len(duplTags), len(duplTags_double), sep, sum(duplTags_o), sum(duplTags_double_o), sep, + float(len(duplTags)) / (len(dataAB) + len(dataBA) + len(duplTags)), sep, + float(len(duplTags)) / (len(ab) + len(ba)), float(len(duplTags_double)) / (len(ab) + len(ba)), sep, + float(sum(duplTags_o)) / (sum(dataAB_o) + sum(dataBA_o) + sum(duplTags_o)), sep, + float(sum(duplTags_o)) / (sum(ab_o) + sum(ba_o)), float(sum(duplTags_double_o)) / (sum(ab_o) + sum(ba_o)))) + output_file.write("total nr. of tags{}{}{}{}{}{}{}{}{}{}{}{}\n".format( + sep, (len(dataAB) + len(dataBA) + len(duplTags)), sep, (sum(dataAB_o) + sum(dataBA_o) + sum(duplTags_o)), sep, + (len(dataAB) + len(dataBA) + len(duplTags)), sep, (len(ab) + len(ba)), sep, + (sum(dataAB_o) + sum(dataBA_o) + sum(duplTags_o)), sep, (sum(ab_o) + sum(ba_o)))) # information for FS >= 3 - output_file.write("FS >= 3{}{}unique:{}total:\n".format(sep, sep, sep)) - output_file.write("nr./rel. freq of ab={}{}{}{:.3f}{}{:.3f}\n".format(sep, len(dataAB_FS3), sep, float(len(dataAB_FS3)) / (len(dataAB_FS3) + len(dataBA_FS3) + len(duplTags_FS3)), sep, float(len(dataAB_FS3)) / (len(ab_FS3) + len(ba_FS3)))) - output_file.write("nr./rel. freq of ba={}{}{}{:.3f}{}{:.3f}\n".format(sep, len(dataBA_FS3), sep, float(len(dataBA_FS3)) / (len(dataBA_FS3) + len(dataBA_FS3) + len(duplTags_FS3)), sep, float(len(dataBA_FS3)) / (len(ba_FS3) + len(ba_FS3)))) - output_file.write("nr./rel. freq of DCS (total)={}{} ({}){}{:.3f}{}{:.3f} ({:.3f})\n".format(sep, len(duplTags_FS3), duplTags_double_FS3, sep, float(len( duplTags_FS3)) / (len(dataBA_FS3) + len(duplTags_FS3)), sep, float(len(duplTags_FS3)) / (len(ab_FS3) + len(ba_FS3)), float(duplTags_double_FS3) / (len(ab_FS3) + len(ba_FS3)))) - output_file.write("length of dataset={}{}{}{}{}{}\n".format(sep, (len(dataAB_FS3) + len(dataBA_FS3) + len(duplTags_FS3)), sep, (len(dataAB_FS3) + len(dataBA_FS3) + len(duplTags_FS3)), sep, (len(ab_FS3) + len(ba_FS3)))) + output_file.write("\nFS >= 3{}nr. of tags{}nr. of PE reads{}rel. freq of tags{}{}rel. freq of PE reads:\n".format(sep, sep, sep, sep, sep)) + output_file.write("{}{}{}unique:{}total{}unique{}total:\n".format(sep, sep, sep, sep, sep, sep)) + output_file.write("SSCS ab{}{}{}{}{}{:.3f}{}{:.3f}{}{:.3f}{}{:.3f}\n".format( + sep, len(dataAB_FS3), sep, sum(dataAB_FS3_o), sep, + float(len(dataAB_FS3)) / (len(dataAB_FS3) + len(dataBA_FS3) + len(duplTags_FS3)), sep, + float(len(dataAB_FS3)) / (len(dataBA_FS3) + len(dataBA_FS3) + duplTags_double_FS3), + sep, float(sum(dataAB_FS3_o)) / (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + sum(duplTags_FS3_o)), + sep, float(sum(dataAB_FS3_o)) / (sum(dataBA_FS3_o) + sum(dataBA_FS3_o) + duplTags_double_FS3_o))) + output_file.write("SSCS ba{}{}{}{}{}{:.3f}{}{:.3f}{}{:.3f}{}{:.3f}\n".format( + sep, len(dataBA_FS3), sep, sum(dataBA_FS3_o), sep, + float(len(dataBA_FS3)) / (len(dataBA_FS3) + len(dataBA_FS3) + len(duplTags_FS3)), + sep, float(len(dataBA_FS3)) / (len(dataBA_FS3) + len(dataBA_FS3) + duplTags_double_FS3), + sep, float(sum(dataBA_FS3_o)) / (sum(dataBA_FS3_o) + sum(dataBA_FS3_o) + sum(duplTags_FS3_o)), + sep, float(sum(dataBA_FS3_o)) / (sum(dataBA_FS3_o) + sum(dataBA_FS3_o) + duplTags_double_FS3_o))) + output_file.write("DCS (total){}{} ({}){}{} ({}){}{:.3f}{}{:.3f} ({:.3f}){}{:.3f}{}{:.3f} ({:.3f})\n".format( + sep, len(duplTags_FS3), duplTags_double_FS3, sep, sum(duplTags_FS3_o), duplTags_double_FS3_o, sep, + float(len(duplTags_FS3)) / (len(dataAB_FS3) + len(dataBA_FS3) + len(duplTags_FS3)), sep, + float(len(duplTags_FS3)) / (len(dataAB_FS3) + len(dataBA_FS3) + duplTags_double_FS3), + float(duplTags_double_FS3) / (len(dataAB_FS3) + len(dataBA_FS3) + duplTags_double_FS3), + sep, float(sum(duplTags_FS3_o)) / (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + sum(duplTags_FS3_o)), sep, + float(sum(duplTags_FS3_o)) / (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + duplTags_double_FS3_o), + float(duplTags_double_FS3_o) / (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + duplTags_double_FS3_o))) + output_file.write("total nr. of tags{}{}{}{}{}{}{}{}{}{}{}{}\n".format( + sep, (len(dataAB_FS3) + len(dataBA_FS3) + len(duplTags_FS3)), sep, (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + sum(duplTags_FS3_o)), + sep, (len(dataAB_FS3) + len(dataBA_FS3) + len(duplTags_FS3)), sep, (len(dataAB_FS3) + len(dataBA_FS3) + duplTags_double_FS3), + sep, (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + sum(duplTags_FS3_o)), sep, (sum(dataAB_FS3_o) + sum(dataBA_FS3_o) + duplTags_double_FS3_o))) output_file.write("\nValues from family size distribution\n") output_file.write("{}duplex{}ab{}ba{}sum\n".format(sep, sep, sep, sep))
--- a/fsd.xml Mon Oct 08 05:50:18 2018 -0400 +++ b/fsd.xml Tue Apr 02 05:10:09 2019 -0400 @@ -24,19 +24,19 @@ </outputs> <tests> <test> - <param name="file1" value="Test_data.tabular"/> - <param name="file2" value="Test_data2.tabular"/> - <param name="file3" value="Test_data3.tabular"/> - <param name="file4" value="Test_data4.tabular"/> - <output name="output_pdf" file="output_file.pdf" lines_diff="285"/> - <output name="output_tabular" file="output_file.tabular"/> + <param name="file1" value="fsd_data1.tab"/> + <param name="file2" value="fsd_data2.tab"/> + <param name="file3" value="fsd_data3.tab"/> + <param name="file4" value="fsd_data4.tab"/> + <output name="output_pdf" file="fsd_output1.pdf" lines_diff="285"/> + <output name="output_tabular" file="fsd_output1.tab"/> </test> <test> - <param name="file1" value="Test_data.tabular"/> - <param name="file2" value="Test_data2.tabular"/> - <param name="file3" value="Test_data3.tabular"/> - <output name="output_pdf" file="output_file2.pdf" lines_diff="285"/> - <output name="output_tabular" file="output_file2.tabular"/> + <param name="file1" value="fsd_data1.tab"/> + <param name="file2" value="fsd_data2.tab"/> + <param name="file3" value="fsd_data3.tab"/> + <output name="output_pdf" file="fsd_output2.pdf" lines_diff="285"/> + <output name="output_tabular" file="fsd_output2.tab"/> </test> </tests> <help><![CDATA[
--- a/test-data/Test_data.tabular Mon Oct 08 05:50:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,112 +0,0 @@ -1 AAAAAAAAAAAAAACCAAAACTTC ba -1 AAAAAAAAAAAAACCAGGCGTCGA ba -1 AAAAAAAAAAAAAGCTCCACGTTG ba -1 AAAAAAAAAAAAATCGTGGTTTGT ba -1 AAAAAAAAAAAAATTCACCCTTGT ba -7 AAAAAAAAAAAACACACTTAACTT ba -1 AAAAAAAAAAAACAGTGTTGAGAC ba -4 AAAAAAAAAAAACCGCTCCTCACA ba -1 AAAAAAAAAAAAGGCAACACAGAA ab -2 AAAAAAAAAAAATCTTTCTTTGAG ab -1 AAAAAAAAAAAATTGGGTTCCTTA ab -1 AAAAAAAAAAAGAGTCGCACCCAG ba -4 AAAAAAAAAAAGATCGTGGTTTGT ba -1 AAAAAAAAAAAGCGCAACACAGAA ab -3 AAAAAAAAAAAGGGCAACACAGAA ab -1 AAAAAAAAAAAGTAGCCCTAAACG ab -1 AAAAAAAAAAAGTCTTTCTTTGAG ab -1 AAAAAAAAAAATATCATAGACTCT ab -6 AAAAAAAAAAATATTCACCCTTGT ba -1 AAAAAAAAAAATATTCGAAAGTTA ba -3 AAAAAAAAAAATCACACTTAACTT ba -1 AAAAAAAAAAATCCGCTCCTCACA ba -1 AAAAAAAAAAATTAACTAAACTTA ab -1 AAAAAAAAAACAAATTCTATTATT ab -1 AAAAAAAAAACTCCCAGATTTTTT ab -1 AAAAAAAAAACTTCTGCTTGGCGG ba -11 AAAAAAAAAAGAATCGTGGTTTGT ba -5 AAAAAAAAAAGATAGCCCTAAACG ab -1 AAAAAAAAAAGCAATAATGCCAGT ab -2 AAAAAAAAAAGTACCGCACTCTCA ba -1 AAAAAAAAAAGTTCTTTCTTTGAG ab -1 AAAAAAAAAATAACTTCAATAATG ba -2 AAAAAAAAAATAATCATAGACTCT ab -1 AAAAAAAAAATAGTCTCACATTTA ab -1 AAAAAAAAAATATAACCTTTGGCG ab -3 AAAAAAAAACAAAATTCTATTATT ab -1 AAAAAAAAACAAGTACGCGGCATT ab -1 AAAAAAAAACAAGTACGCGGTATT ab -1 AAAAAAAAACAATATCGAATTAAC ab -3 AAAAAAAAACACGGTGAGACAAGG ba -1 AAAAAAAAACACGTTTCTCCCCTT ba -1 AAAAAAAAACATATCGTCCCGAGC ba -1 AAAAAAAAACCTACCTGAGGCCCC ab -3 AAAAAAAAACCTTATTACAGCGGA ab -1 AAAAAAAAACGATTCTCTGTATCT ba -1 AAAAAAAAACGTACCGCACTCTCA ba -4 AAAAAAAAACTACCCAGATTTTTT ba -1 AAAAAAAAACTAGATGAGACGACC ba -4 AAAAAAAAACTGTCTGCTTGGCGG ba -1 AAAAAAAAAGAAGTTTAATTTTAA ab -1 AAAAAAAAAGAATGCCTAAGACGA ba -6 AAAAAAAAAGACCGGCCTTAGACA ba -1 AAAAAAAAAGATATCGTGGTTTGT ba -1 AAAAAAAAAGCAATACTCAAGCTG ba -6 AAAAAAAAAGCAATGTCTAAGCCT ba -1 AAAAAAAAAGCACTGTCTAAGCCT ab -2 AAAAAAAAAGCTAATAATGCCAGT ab -1 AAAAAAAAAGTTTCGTGAAGGTCC ba -1 AAAAAAAAATAAAGGTCCGAATCT ab -1 AAAAAAAAATAAATGAGAGTGTAA ba -8 AAAAAAAAATAAGTCTCACATTTA ab -1 AAAAAAAAATAATAACCTCTGGCG ab -10 AAAAAAAAATAATAACCTTTGGCG ab -1 AAAAAAAAATAATCCCCTTTGTCG ab -6 AAAAAAAAATACGCAAACGCTGAG ab -4 AAAAAAAAATAGATCATAGACTCT ab -10 AAAAAAAAATAGATCATAGACTCT ba -10 AAAAAAAAATAGTAGGATTTCATG ba -7 AAAAAAAAATATGAATACCCTCGT ba -1 AAAAAAAAATATGCCACTTGATCC ba -1 AAAAAAAAATATTCTGCCACTTGA ba -3 AAAAAAAAATCAAACCAAGAGGAC ba -1 AAAAAAAAATCAGTACCCCTAAAC ab -12 AAAAAAAAATCCTAGTTAATGAAG ba -1 AAAAAAAAATCGATTCTTTATGCG ab -1 AAAAAAAAATGTCTGAAAATATCT ab -4 AAAAAAAAATGTCTGAAAATATCT ba -1 AAAAAAAAATTTCCGCAGACCGTT ba -8 AAAAAAAAATTTGGGCTACTACAA ba -1 AAAAAAAACAAAATTAGAACCCTT ab -1 AAAAAAAACAAACCGCTCCTCACA ba -5 AAAAAAAACAACGTACGCGGTATT ab -4 AAAAAAAACAATATCGTTGATATG ba -4 AAAAAAAACAATCACGTTAATAGG ab -1 AAAAAAAACAGAATCGTGGTTTGT ba -1 AAAAAAAACCAAATCGTTGATATG ba -9 AAAAAAAACCAAGTCCAGGCATCT ba -2 AAAAAAAACCACGGTGAGACAAGG ba -1 AAAAAAAACCGCCCAACTGCCGGT ab -5 AAAAAAAACCTCTCAACCCCAAAT ba -7 AAAAAAAACCTCTTGCGATGTTGT ab -1 AAAAAAAACCTCTTGCGCTGTTGT ab -1 AAAAAAAACCTCTTGTGATGTTGT ab -12 AAAAAAAACCTGAGCAATGGTTCC ab -3 AAAAAAAACCTTGACCCTCACATG ba -6 AAAAAAAACCTTGCACTCGTCCTA ba -9 AAAAAAAACGAAATAAAAAAACCT ba -1 AAAAAAAACGACCGGCCTTAGACA ba -4 AAAAAAAACGCCACCACCCCCTTT ab -12 AAAAAAAACGCCACGGGCACTATT ba -13 AAAAAAAACGTATCAGTAGATCCT ab -1 AAAAAAAACTAGTAGGATTTCATG ba -3 AAAAAAAACTATAGAAAATCCATT ba -1 AAAAAAAACTATTCTATTTCCGAT ba -13 AAAAAAAACTGATCTGCTTGGCGG ba -8 AAAAAAAACTTGCGAATAGCATCG ba -4 AAAAAAAACTTGTTATCAAAACGT ab -1 AAAAAAAAGAAAAGTTCAACACGC ba -1 AAAAAAAAGAAGTTCGCCCTCCGA ab -13 AAAAAAAAGAGAGTTTAGTCATGG ab -1 AAAAAAAAGAGAGTTTAGTCATGG ba -1 AAAAAAAAGAGAGTTTAGTCCTGG ab \ No newline at end of file
--- a/test-data/Test_data2.tabular Mon Oct 08 05:50:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,112 +0,0 @@ -1 AAAAAAAAAAAAAACCAAAACTTC ba -1 AAAAAAAAAAAAACCAGGCGTCGA ba -1 AAAAAAAAAAAAAGCTCCACGTTG ba -1 AAAAAAAAAAAAATCGTGGTTTGT ba -1 AAAAAAAAAAAAATTCACCCTTGT ba -7 AAAAAAAAAAAACACACTTAACTT ba -1 AAAAAAAAAAAACAGTGTTGAGAC ba -4 AAAAAAAAAAAACCGCTCCTCACA ba -1 AAAAAAAAAAAAGGCAACACAGAA ab -2 AAAAAAAAAAAATCTTTCTTTGAG ab -1 AAAAAAAAAAAATTGGGTTCCTTA ab -1 AAAAAAAAAAAGAGTCGCACCCAG ba -4 AAAAAAAAAAAGATCGTGGTTTGT ba -1 AAAAAAAAAAAGCGCAACACAGAA ab -3 AAAAAAAAAAAGGGCAACACAGAA ab -1 AAAAAAAAAAAGTAGCCCTAAACG ab -1 AAAAAAAAAAAGTCTTTCTTTGAG ab -1 AAAAAAAAAAATATCATAGACTCT ab -6 AAAAAAAAAAATATTCACCCTTGT ba -1 AAAAAAAAAAATATTCGAAAGTTA ba -3 AAAAAAAAAAATCACACTTAACTT ba -1 AAAAAAAAAAATCCGCTCCTCACA ba -1 AAAAAAAAAAATTAACTAAACTTA ab -1 AAAAAAAAAACAAATTCTATTATT ab -1 AAAAAAAAAACTCCCAGATTTTTT ab -1 AAAAAAAAAACTTCTGCTTGGCGG ba -11 AAAAAAAAAAGAATCGTGGTTTGT ba -5 AAAAAAAAAAGATAGCCCTAAACG ab -1 AAAAAAAAAAGCAATAATGCCAGT ab -2 AAAAAAAAAAGTACCGCACTCTCA ba -1 AAAAAAAAAAGTTCTTTCTTTGAG ab -1 AAAAAAAAAATAACTTCAATAATG ba -2 AAAAAAAAAATAATCATAGACTCT ab -1 AAAAAAAAAATAGTCTCACATTTA ab -1 AAAAAAAAAATATAACCTTTGGCG ab -3 AAAAAAAAACAAAATTCTATTATT ab -1 AAAAAAAAACAAGTACGCGGCATT ab -1 AAAAAAAAACAAGTACGCGGTATT ab -1 AAAAAAAAACAATATCGAATTAAC ab -3 AAAAAAAAACACGGTGAGACAAGG ba -1 AAAAAAAAACACGTTTCTCCCCTT ba -1 AAAAAAAAACATATCGTCCCGAGC ba -1 AAAAAAAAACCTACCTGAGGCCCC ab -3 AAAAAAAAACCTTATTACAGCGGA ab -1 AAAAAAAAACGATTCTCTGTATCT ba -1 AAAAAAAAACGTACCGCACTCTCA ba -4 AAAAAAAAACTACCCAGATTTTTT ba -1 AAAAAAAAACTAGATGAGACGACC ba -4 AAAAAAAAACTGTCTGCTTGGCGG ba -1 AAAAAAAAAGAAGTTTAATTTTAA ab -1 AAAAAAAAAGAATGCCTAAGACGA ba -6 AAAAAAAAAGACCGGCCTTAGACA ba -1 AAAAAAAAAGATATCGTGGTTTGT ba -1 AAAAAAAAAGCAATACTCAAGCTG ba -6 AAAAAAAAAGCAATGTCTAAGCCT ba -1 AAAAAAAAAGCACTGTCTAAGCCT ab -2 AAAAAAAAAGCTAATAATGCCAGT ab -1 AAAAAAAAAGTTTCGTGAAGGTCC ba -1 AAAAAAAAATAAAGGTCCGAATCT ab -1 AAAAAAAAATAAATGAGAGTGTAA ba -8 AAAAAAAAATAAGTCTCACATTTA ab -1 AAAAAAAAATAATAACCTCTGGCG ab -10 AAAAAAAAATAATAACCTTTGGCG ab -1 AAAAAAAAATAATCCCCTTTGTCG ab -6 AAAAAAAAATACGCAAACGCTGAG ab -4 AAAAAAAAATAGATCATAGACTCT ab -10 AAAAAAAAATAGATCATAGACTCT ba -10 AAAAAAAAATAGTAGGATTTCATG ba -7 AAAAAAAAATATGAATACCCTCGT ba -1 AAAAAAAAATATGCCACTTGATCC ba -1 AAAAAAAAATATTCTGCCACTTGA ba -3 AAAAAAAAATCAAACCAAGAGGAC ba -1 AAAAAAAAATCAGTACCCCTAAAC ab -12 AAAAAAAAATCCTAGTTAATGAAG ba -1 AAAAAAAAATCGATTCTTTATGCG ab -1 AAAAAAAAATGTCTGAAAATATCT ab -4 AAAAAAAAATGTCTGAAAATATCT ba -1 AAAAAAAAATTTCCGCAGACCGTT ba -8 AAAAAAAAATTTGGGCTACTACAA ba -1 AAAAAAAACAAAATTAGAACCCTT ab -1 AAAAAAAACAAACCGCTCCTCACA ba -5 AAAAAAAACAACGTACGCGGTATT ab -4 AAAAAAAACAATATCGTTGATATG ba -4 AAAAAAAACAATCACGTTAATAGG ab -1 AAAAAAAACAGAATCGTGGTTTGT ba -1 AAAAAAAACCAAATCGTTGATATG ba -9 AAAAAAAACCAAGTCCAGGCATCT ba -2 AAAAAAAACCACGGTGAGACAAGG ba -1 AAAAAAAACCGCCCAACTGCCGGT ab -5 AAAAAAAACCTCTCAACCCCAAAT ba -7 AAAAAAAACCTCTTGCGATGTTGT ab -1 AAAAAAAACCTCTTGCGCTGTTGT ab -1 AAAAAAAACCTCTTGTGATGTTGT ab -12 AAAAAAAACCTGAGCAATGGTTCC ab -3 AAAAAAAACCTTGACCCTCACATG ba -6 AAAAAAAACCTTGCACTCGTCCTA ba -9 AAAAAAAACGAAATAAAAAAACCT ba -1 AAAAAAAACGACCGGCCTTAGACA ba -4 AAAAAAAACGCCACCACCCCCTTT ab -12 AAAAAAAACGCCACGGGCACTATT ba -13 AAAAAAAACGTATCAGTAGATCCT ab -1 AAAAAAAACTAGTAGGATTTCATG ba -3 AAAAAAAACTATAGAAAATCCATT ba -1 AAAAAAAACTATTCTATTTCCGAT ba -13 AAAAAAAACTGATCTGCTTGGCGG ba -8 AAAAAAAACTTGCGAATAGCATCG ba -4 AAAAAAAACTTGTTATCAAAACGT ab -1 AAAAAAAAGAAAAGTTCAACACGC ba -1 AAAAAAAAGAAGTTCGCCCTCCGA ab -13 AAAAAAAAGAGAGTTTAGTCATGG ab -1 AAAAAAAAGAGAGTTTAGTCATGG ba -1 AAAAAAAAGAGAGTTTAGTCCTGG ab \ No newline at end of file
--- a/test-data/Test_data3.tabular Mon Oct 08 05:50:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,112 +0,0 @@ -1 AAAAAAAAAAAAAACCAAAACTTC ba -1 AAAAAAAAAAAAACCAGGCGTCGA ba -1 AAAAAAAAAAAAAGCTCCACGTTG ba -1 AAAAAAAAAAAAATCGTGGTTTGT ba -1 AAAAAAAAAAAAATTCACCCTTGT ba -7 AAAAAAAAAAAACACACTTAACTT ba -1 AAAAAAAAAAAACAGTGTTGAGAC ba -4 AAAAAAAAAAAACCGCTCCTCACA ba -1 AAAAAAAAAAAAGGCAACACAGAA ab -2 AAAAAAAAAAAATCTTTCTTTGAG ab -1 AAAAAAAAAAAATTGGGTTCCTTA ab -1 AAAAAAAAAAAGAGTCGCACCCAG ba -4 AAAAAAAAAAAGATCGTGGTTTGT ba -1 AAAAAAAAAAAGCGCAACACAGAA ab -3 AAAAAAAAAAAGGGCAACACAGAA ab -1 AAAAAAAAAAAGTAGCCCTAAACG ab -1 AAAAAAAAAAAGTCTTTCTTTGAG ab -1 AAAAAAAAAAATATCATAGACTCT ab -6 AAAAAAAAAAATATTCACCCTTGT ba -1 AAAAAAAAAAATATTCGAAAGTTA ba -3 AAAAAAAAAAATCACACTTAACTT ba -1 AAAAAAAAAAATCCGCTCCTCACA ba -1 AAAAAAAAAAATTAACTAAACTTA ab -1 AAAAAAAAAACAAATTCTATTATT ab -1 AAAAAAAAAACTCCCAGATTTTTT ab -1 AAAAAAAAAACTTCTGCTTGGCGG ba -11 AAAAAAAAAAGAATCGTGGTTTGT ba -5 AAAAAAAAAAGATAGCCCTAAACG ab -1 AAAAAAAAAAGCAATAATGCCAGT ab -2 AAAAAAAAAAGTACCGCACTCTCA ba -1 AAAAAAAAAAGTTCTTTCTTTGAG ab -1 AAAAAAAAAATAACTTCAATAATG ba -2 AAAAAAAAAATAATCATAGACTCT ab -1 AAAAAAAAAATAGTCTCACATTTA ab -1 AAAAAAAAAATATAACCTTTGGCG ab -3 AAAAAAAAACAAAATTCTATTATT ab -1 AAAAAAAAACAAGTACGCGGCATT ab -1 AAAAAAAAACAAGTACGCGGTATT ab -1 AAAAAAAAACAATATCGAATTAAC ab -3 AAAAAAAAACACGGTGAGACAAGG ba -1 AAAAAAAAACACGTTTCTCCCCTT ba -1 AAAAAAAAACATATCGTCCCGAGC ba -1 AAAAAAAAACCTACCTGAGGCCCC ab -3 AAAAAAAAACCTTATTACAGCGGA ab -1 AAAAAAAAACGATTCTCTGTATCT ba -1 AAAAAAAAACGTACCGCACTCTCA ba -4 AAAAAAAAACTACCCAGATTTTTT ba -1 AAAAAAAAACTAGATGAGACGACC ba -4 AAAAAAAAACTGTCTGCTTGGCGG ba -1 AAAAAAAAAGAAGTTTAATTTTAA ab -1 AAAAAAAAAGAATGCCTAAGACGA ba -6 AAAAAAAAAGACCGGCCTTAGACA ba -1 AAAAAAAAAGATATCGTGGTTTGT ba -1 AAAAAAAAAGCAATACTCAAGCTG ba -6 AAAAAAAAAGCAATGTCTAAGCCT ba -1 AAAAAAAAAGCACTGTCTAAGCCT ab -2 AAAAAAAAAGCTAATAATGCCAGT ab -1 AAAAAAAAAGTTTCGTGAAGGTCC ba -1 AAAAAAAAATAAAGGTCCGAATCT ab -1 AAAAAAAAATAAATGAGAGTGTAA ba -8 AAAAAAAAATAAGTCTCACATTTA ab -1 AAAAAAAAATAATAACCTCTGGCG ab -10 AAAAAAAAATAATAACCTTTGGCG ab -1 AAAAAAAAATAATCCCCTTTGTCG ab -6 AAAAAAAAATACGCAAACGCTGAG ab -4 AAAAAAAAATAGATCATAGACTCT ab -10 AAAAAAAAATAGATCATAGACTCT ba -10 AAAAAAAAATAGTAGGATTTCATG ba -7 AAAAAAAAATATGAATACCCTCGT ba -1 AAAAAAAAATATGCCACTTGATCC ba -1 AAAAAAAAATATTCTGCCACTTGA ba -3 AAAAAAAAATCAAACCAAGAGGAC ba -1 AAAAAAAAATCAGTACCCCTAAAC ab -12 AAAAAAAAATCCTAGTTAATGAAG ba -1 AAAAAAAAATCGATTCTTTATGCG ab -1 AAAAAAAAATGTCTGAAAATATCT ab -4 AAAAAAAAATGTCTGAAAATATCT ba -1 AAAAAAAAATTTCCGCAGACCGTT ba -8 AAAAAAAAATTTGGGCTACTACAA ba -1 AAAAAAAACAAAATTAGAACCCTT ab -1 AAAAAAAACAAACCGCTCCTCACA ba -5 AAAAAAAACAACGTACGCGGTATT ab -4 AAAAAAAACAATATCGTTGATATG ba -4 AAAAAAAACAATCACGTTAATAGG ab -1 AAAAAAAACAGAATCGTGGTTTGT ba -1 AAAAAAAACCAAATCGTTGATATG ba -9 AAAAAAAACCAAGTCCAGGCATCT ba -2 AAAAAAAACCACGGTGAGACAAGG ba -1 AAAAAAAACCGCCCAACTGCCGGT ab -5 AAAAAAAACCTCTCAACCCCAAAT ba -7 AAAAAAAACCTCTTGCGATGTTGT ab -1 AAAAAAAACCTCTTGCGCTGTTGT ab -1 AAAAAAAACCTCTTGTGATGTTGT ab -12 AAAAAAAACCTGAGCAATGGTTCC ab -3 AAAAAAAACCTTGACCCTCACATG ba -6 AAAAAAAACCTTGCACTCGTCCTA ba -9 AAAAAAAACGAAATAAAAAAACCT ba -1 AAAAAAAACGACCGGCCTTAGACA ba -4 AAAAAAAACGCCACCACCCCCTTT ab -12 AAAAAAAACGCCACGGGCACTATT ba -13 AAAAAAAACGTATCAGTAGATCCT ab -1 AAAAAAAACTAGTAGGATTTCATG ba -3 AAAAAAAACTATAGAAAATCCATT ba -1 AAAAAAAACTATTCTATTTCCGAT ba -13 AAAAAAAACTGATCTGCTTGGCGG ba -8 AAAAAAAACTTGCGAATAGCATCG ba -4 AAAAAAAACTTGTTATCAAAACGT ab -1 AAAAAAAAGAAAAGTTCAACACGC ba -1 AAAAAAAAGAAGTTCGCCCTCCGA ab -13 AAAAAAAAGAGAGTTTAGTCATGG ab -1 AAAAAAAAGAGAGTTTAGTCATGG ba -1 AAAAAAAAGAGAGTTTAGTCCTGG ab \ No newline at end of file
--- a/test-data/Test_data4.tabular Mon Oct 08 05:50:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,112 +0,0 @@ -1 AAAAAAAAAAAAAACCAAAACTTC ba -1 AAAAAAAAAAAAACCAGGCGTCGA ba -1 AAAAAAAAAAAAAGCTCCACGTTG ba -1 AAAAAAAAAAAAATCGTGGTTTGT ba -1 AAAAAAAAAAAAATTCACCCTTGT ba -7 AAAAAAAAAAAACACACTTAACTT ba -1 AAAAAAAAAAAACAGTGTTGAGAC ba -4 AAAAAAAAAAAACCGCTCCTCACA ba -1 AAAAAAAAAAAAGGCAACACAGAA ab -2 AAAAAAAAAAAATCTTTCTTTGAG ab -1 AAAAAAAAAAAATTGGGTTCCTTA ab -1 AAAAAAAAAAAGAGTCGCACCCAG ba -4 AAAAAAAAAAAGATCGTGGTTTGT ba -1 AAAAAAAAAAAGCGCAACACAGAA ab -3 AAAAAAAAAAAGGGCAACACAGAA ab -1 AAAAAAAAAAAGTAGCCCTAAACG ab -1 AAAAAAAAAAAGTCTTTCTTTGAG ab -1 AAAAAAAAAAATATCATAGACTCT ab -6 AAAAAAAAAAATATTCACCCTTGT ba -1 AAAAAAAAAAATATTCGAAAGTTA ba -3 AAAAAAAAAAATCACACTTAACTT ba -1 AAAAAAAAAAATCCGCTCCTCACA ba -1 AAAAAAAAAAATTAACTAAACTTA ab -1 AAAAAAAAAACAAATTCTATTATT ab -1 AAAAAAAAAACTCCCAGATTTTTT ab -1 AAAAAAAAAACTTCTGCTTGGCGG ba -11 AAAAAAAAAAGAATCGTGGTTTGT ba -5 AAAAAAAAAAGATAGCCCTAAACG ab -1 AAAAAAAAAAGCAATAATGCCAGT ab -2 AAAAAAAAAAGTACCGCACTCTCA ba -1 AAAAAAAAAAGTTCTTTCTTTGAG ab -1 AAAAAAAAAATAACTTCAATAATG ba -2 AAAAAAAAAATAATCATAGACTCT ab -1 AAAAAAAAAATAGTCTCACATTTA ab -1 AAAAAAAAAATATAACCTTTGGCG ab -3 AAAAAAAAACAAAATTCTATTATT ab -1 AAAAAAAAACAAGTACGCGGCATT ab -1 AAAAAAAAACAAGTACGCGGTATT ab -1 AAAAAAAAACAATATCGAATTAAC ab -3 AAAAAAAAACACGGTGAGACAAGG ba -1 AAAAAAAAACACGTTTCTCCCCTT ba -1 AAAAAAAAACATATCGTCCCGAGC ba -1 AAAAAAAAACCTACCTGAGGCCCC ab -3 AAAAAAAAACCTTATTACAGCGGA ab -1 AAAAAAAAACGATTCTCTGTATCT ba -1 AAAAAAAAACGTACCGCACTCTCA ba -4 AAAAAAAAACTACCCAGATTTTTT ba -1 AAAAAAAAACTAGATGAGACGACC ba -4 AAAAAAAAACTGTCTGCTTGGCGG ba -1 AAAAAAAAAGAAGTTTAATTTTAA ab -1 AAAAAAAAAGAATGCCTAAGACGA ba -6 AAAAAAAAAGACCGGCCTTAGACA ba -1 AAAAAAAAAGATATCGTGGTTTGT ba -1 AAAAAAAAAGCAATACTCAAGCTG ba -6 AAAAAAAAAGCAATGTCTAAGCCT ba -1 AAAAAAAAAGCACTGTCTAAGCCT ab -2 AAAAAAAAAGCTAATAATGCCAGT ab -1 AAAAAAAAAGTTTCGTGAAGGTCC ba -1 AAAAAAAAATAAAGGTCCGAATCT ab -1 AAAAAAAAATAAATGAGAGTGTAA ba -8 AAAAAAAAATAAGTCTCACATTTA ab -1 AAAAAAAAATAATAACCTCTGGCG ab -10 AAAAAAAAATAATAACCTTTGGCG ab -1 AAAAAAAAATAATCCCCTTTGTCG ab -6 AAAAAAAAATACGCAAACGCTGAG ab -4 AAAAAAAAATAGATCATAGACTCT ab -10 AAAAAAAAATAGATCATAGACTCT ba -10 AAAAAAAAATAGTAGGATTTCATG ba -7 AAAAAAAAATATGAATACCCTCGT ba -1 AAAAAAAAATATGCCACTTGATCC ba -1 AAAAAAAAATATTCTGCCACTTGA ba -3 AAAAAAAAATCAAACCAAGAGGAC ba -1 AAAAAAAAATCAGTACCCCTAAAC ab -12 AAAAAAAAATCCTAGTTAATGAAG ba -1 AAAAAAAAATCGATTCTTTATGCG ab -1 AAAAAAAAATGTCTGAAAATATCT ab -4 AAAAAAAAATGTCTGAAAATATCT ba -1 AAAAAAAAATTTCCGCAGACCGTT ba -8 AAAAAAAAATTTGGGCTACTACAA ba -1 AAAAAAAACAAAATTAGAACCCTT ab -1 AAAAAAAACAAACCGCTCCTCACA ba -5 AAAAAAAACAACGTACGCGGTATT ab -4 AAAAAAAACAATATCGTTGATATG ba -4 AAAAAAAACAATCACGTTAATAGG ab -1 AAAAAAAACAGAATCGTGGTTTGT ba -1 AAAAAAAACCAAATCGTTGATATG ba -9 AAAAAAAACCAAGTCCAGGCATCT ba -2 AAAAAAAACCACGGTGAGACAAGG ba -1 AAAAAAAACCGCCCAACTGCCGGT ab -5 AAAAAAAACCTCTCAACCCCAAAT ba -7 AAAAAAAACCTCTTGCGATGTTGT ab -1 AAAAAAAACCTCTTGCGCTGTTGT ab -1 AAAAAAAACCTCTTGTGATGTTGT ab -12 AAAAAAAACCTGAGCAATGGTTCC ab -3 AAAAAAAACCTTGACCCTCACATG ba -6 AAAAAAAACCTTGCACTCGTCCTA ba -9 AAAAAAAACGAAATAAAAAAACCT ba -1 AAAAAAAACGACCGGCCTTAGACA ba -4 AAAAAAAACGCCACCACCCCCTTT ab -12 AAAAAAAACGCCACGGGCACTATT ba -13 AAAAAAAACGTATCAGTAGATCCT ab -1 AAAAAAAACTAGTAGGATTTCATG ba -3 AAAAAAAACTATAGAAAATCCATT ba -1 AAAAAAAACTATTCTATTTCCGAT ba -13 AAAAAAAACTGATCTGCTTGGCGG ba -8 AAAAAAAACTTGCGAATAGCATCG ba -4 AAAAAAAACTTGTTATCAAAACGT ab -1 AAAAAAAAGAAAAGTTCAACACGC ba -1 AAAAAAAAGAAGTTCGCCCTCCGA ab -13 AAAAAAAAGAGAGTTTAGTCATGG ab -1 AAAAAAAAGAGAGTTTAGTCATGG ba -1 AAAAAAAAGAGAGTTTAGTCCTGG ab \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fsd_data1.tab Tue Apr 02 05:10:09 2019 -0400 @@ -0,0 +1,112 @@ +1 AAAAAAAAAAAAAACCAAAACTTC ba +1 AAAAAAAAAAAAACCAGGCGTCGA ba +1 AAAAAAAAAAAAAGCTCCACGTTG ba +1 AAAAAAAAAAAAATCGTGGTTTGT ba +1 AAAAAAAAAAAAATTCACCCTTGT ba +7 AAAAAAAAAAAACACACTTAACTT ba +1 AAAAAAAAAAAACAGTGTTGAGAC ba +4 AAAAAAAAAAAACCGCTCCTCACA ba +1 AAAAAAAAAAAAGGCAACACAGAA ab +2 AAAAAAAAAAAATCTTTCTTTGAG ab +1 AAAAAAAAAAAATTGGGTTCCTTA ab +1 AAAAAAAAAAAGAGTCGCACCCAG ba +21 AAAAAAAAAAAGATCGTGGTTTGT ba +1 AAAAAAAAAAAGCGCAACACAGAA ab +3 AAAAAAAAAAAGGGCAACACAGAA ab +1 AAAAAAAAAAAGTAGCCCTAAACG ab +1 AAAAAAAAAAAGTCTTTCTTTGAG ab +1 AAAAAAAAAAATATCATAGACTCT ab +6 AAAAAAAAAAATATTCACCCTTGT ba +1 AAAAAAAAAAATATTCGAAAGTTA ba +3 AAAAAAAAAAATCACACTTAACTT ba +1 AAAAAAAAAAATCCGCTCCTCACA ba +1 AAAAAAAAAAATTAACTAAACTTA ab +1 AAAAAAAAAACAAATTCTATTATT ab +1 AAAAAAAAAACTCCCAGATTTTTT ab +1 AAAAAAAAAACTTCTGCTTGGCGG ba +11 AAAAAAAAAAGAATCGTGGTTTGT ba +5 AAAAAAAAAAGATAGCCCTAAACG ab +1 AAAAAAAAAAGCAATAATGCCAGT ab +2 AAAAAAAAAAGTACCGCACTCTCA ba +1 AAAAAAAAAAGTTCTTTCTTTGAG ab +1 AAAAAAAAAATAACTTCAATAATG ba +2 AAAAAAAAAATAATCATAGACTCT ab +1 AAAAAAAAAATAGTCTCACATTTA ab +1 AAAAAAAAAATATAACCTTTGGCG ab +3 AAAAAAAAACAAAATTCTATTATT ab +1 AAAAAAAAACAAGTACGCGGCATT ab +1 AAAAAAAAACAAGTACGCGGTATT ab +1 AAAAAAAAACAATATCGAATTAAC ab +3 AAAAAAAAACACGGTGAGACAAGG ba +1 AAAAAAAAACACGTTTCTCCCCTT ba +1 AAAAAAAAACATATCGTCCCGAGC ba +1 AAAAAAAAACCTACCTGAGGCCCC ab +3 AAAAAAAAACCTTATTACAGCGGA ab +1 AAAAAAAAACGATTCTCTGTATCT ba +1 AAAAAAAAACGTACCGCACTCTCA ba +4 AAAAAAAAACTACCCAGATTTTTT ba +1 AAAAAAAAACTAGATGAGACGACC ba +4 AAAAAAAAACTGTCTGCTTGGCGG ba +1 AAAAAAAAAGAAGTTTAATTTTAA ab +1 AAAAAAAAAGAATGCCTAAGACGA ba +6 AAAAAAAAAGACCGGCCTTAGACA ba +1 AAAAAAAAAGATATCGTGGTTTGT ba +1 AAAAAAAAAGCAATACTCAAGCTG ba +6 AAAAAAAAAGCAATGTCTAAGCCT ba +1 AAAAAAAAAGCACTGTCTAAGCCT ab +2 AAAAAAAAAGCTAATAATGCCAGT ab +1 AAAAAAAAAGTTTCGTGAAGGTCC ba +1 AAAAAAAAATAAAGGTCCGAATCT ab +1 AAAAAAAAATAAATGAGAGTGTAA ba +8 AAAAAAAAATAAGTCTCACATTTA ab +1 AAAAAAAAATAATAACCTCTGGCG ab +10 AAAAAAAAATAATAACCTTTGGCG ab +1 AAAAAAAAATAATCCCCTTTGTCG ab +6 AAAAAAAAATACGCAAACGCTGAG ab +4 AAAAAAAAATAGATCATAGACTCT ab +10 AAAAAAAAATAGATCATAGACTCT ba +10 AAAAAAAAATAGTAGGATTTCATG ba +7 AAAAAAAAATATGAATACCCTCGT ba +1 AAAAAAAAATATGCCACTTGATCC ba +1 AAAAAAAAATATTCTGCCACTTGA ba +3 AAAAAAAAATCAAACCAAGAGGAC ba +1 AAAAAAAAATCAGTACCCCTAAAC ab +12 AAAAAAAAATCCTAGTTAATGAAG ba +1 AAAAAAAAATCGATTCTTTATGCG ab +1 AAAAAAAAATGTCTGAAAATATCT ab +4 AAAAAAAAATGTCTGAAAATATCT ba +1 AAAAAAAAATTTCCGCAGACCGTT ba +8 AAAAAAAAATTTGGGCTACTACAA ba +1 AAAAAAAACAAAATTAGAACCCTT ab +1 AAAAAAAACAAACCGCTCCTCACA ba +5 AAAAAAAACAACGTACGCGGTATT ab +4 AAAAAAAACAATATCGTTGATATG ba +4 AAAAAAAACAATCACGTTAATAGG ab +1 AAAAAAAACAGAATCGTGGTTTGT ba +1 AAAAAAAACCAAATCGTTGATATG ba +9 AAAAAAAACCAAGTCCAGGCATCT ba +2 AAAAAAAACCACGGTGAGACAAGG ba +1 AAAAAAAACCGCCCAACTGCCGGT ab +5 AAAAAAAACCTCTCAACCCCAAAT ba +7 AAAAAAAACCTCTTGCGATGTTGT ab +1 AAAAAAAACCTCTTGCGCTGTTGT ab +1 AAAAAAAACCTCTTGTGATGTTGT ab +12 AAAAAAAACCTGAGCAATGGTTCC ab +3 AAAAAAAACCTTGACCCTCACATG ba +6 AAAAAAAACCTTGCACTCGTCCTA ba +9 AAAAAAAACGAAATAAAAAAACCT ba +1 AAAAAAAACGACCGGCCTTAGACA ba +4 AAAAAAAACGCCACCACCCCCTTT ab +12 AAAAAAAACGCCACGGGCACTATT ba +13 AAAAAAAACGTATCAGTAGATCCT ab +1 AAAAAAAACTAGTAGGATTTCATG ba +3 AAAAAAAACTATAGAAAATCCATT ba +1 AAAAAAAACTATTCTATTTCCGAT ba +13 AAAAAAAACTGATCTGCTTGGCGG ba +8 AAAAAAAACTTGCGAATAGCATCG ba +4 AAAAAAAACTTGTTATCAAAACGT ab +1 AAAAAAAAGAAAAGTTCAACACGC ba +1 AAAAAAAAGAAGTTCGCCCTCCGA ab +13 AAAAAAAAGAGAGTTTAGTCATGG ab +1 AAAAAAAAGAGAGTTTAGTCATGG ba +1 AAAAAAAAGAGAGTTTAGTCCTGG ab \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fsd_data2.tab Tue Apr 02 05:10:09 2019 -0400 @@ -0,0 +1,112 @@ +1 AAAAAAAAAAAAAACCAAAACTTC ba +1 AAAAAAAAAAAAACCAGGCGTCGA ba +1 AAAAAAAAAAAAAGCTCCACGTTG ba +1 AAAAAAAAAAAAATCGTGGTTTGT ba +1 AAAAAAAAAAAAATTCACCCTTGT ba +7 AAAAAAAAAAAACACACTTAACTT ba +1 AAAAAAAAAAAACAGTGTTGAGAC ba +4 AAAAAAAAAAAACCGCTCCTCACA ba +1 AAAAAAAAAAAAGGCAACACAGAA ab +2 AAAAAAAAAAAATCTTTCTTTGAG ab +1 AAAAAAAAAAAATTGGGTTCCTTA ab +1 AAAAAAAAAAAGAGTCGCACCCAG ba +21 AAAAAAAAAAAGATCGTGGTTTGT ba +1 AAAAAAAAAAAGCGCAACACAGAA ab +3 AAAAAAAAAAAGGGCAACACAGAA ab +1 AAAAAAAAAAAGTAGCCCTAAACG ab +1 AAAAAAAAAAAGTCTTTCTTTGAG ab +1 AAAAAAAAAAATATCATAGACTCT ab +6 AAAAAAAAAAATATTCACCCTTGT ba +1 AAAAAAAAAAATATTCGAAAGTTA ba +3 AAAAAAAAAAATCACACTTAACTT ba +1 AAAAAAAAAAATCCGCTCCTCACA ba +1 AAAAAAAAAAATTAACTAAACTTA ab +1 AAAAAAAAAACAAATTCTATTATT ab +1 AAAAAAAAAACTCCCAGATTTTTT ab +1 AAAAAAAAAACTTCTGCTTGGCGG ba +11 AAAAAAAAAAGAATCGTGGTTTGT ba +5 AAAAAAAAAAGATAGCCCTAAACG ab +1 AAAAAAAAAAGCAATAATGCCAGT ab +2 AAAAAAAAAAGTACCGCACTCTCA ba +1 AAAAAAAAAAGTTCTTTCTTTGAG ab +1 AAAAAAAAAATAACTTCAATAATG ba +2 AAAAAAAAAATAATCATAGACTCT ab +1 AAAAAAAAAATAGTCTCACATTTA ab +1 AAAAAAAAAATATAACCTTTGGCG ab +3 AAAAAAAAACAAAATTCTATTATT ab +1 AAAAAAAAACAAGTACGCGGCATT ab +1 AAAAAAAAACAAGTACGCGGTATT ab +1 AAAAAAAAACAATATCGAATTAAC ab +3 AAAAAAAAACACGGTGAGACAAGG ba +1 AAAAAAAAACACGTTTCTCCCCTT ba +1 AAAAAAAAACATATCGTCCCGAGC ba +1 AAAAAAAAACCTACCTGAGGCCCC ab +3 AAAAAAAAACCTTATTACAGCGGA ab +1 AAAAAAAAACGATTCTCTGTATCT ba +1 AAAAAAAAACGTACCGCACTCTCA ba +4 AAAAAAAAACTACCCAGATTTTTT ba +1 AAAAAAAAACTAGATGAGACGACC ba +4 AAAAAAAAACTGTCTGCTTGGCGG ba +1 AAAAAAAAAGAAGTTTAATTTTAA ab +1 AAAAAAAAAGAATGCCTAAGACGA ba +6 AAAAAAAAAGACCGGCCTTAGACA ba +1 AAAAAAAAAGATATCGTGGTTTGT ba +1 AAAAAAAAAGCAATACTCAAGCTG ba +6 AAAAAAAAAGCAATGTCTAAGCCT ba +1 AAAAAAAAAGCACTGTCTAAGCCT ab +2 AAAAAAAAAGCTAATAATGCCAGT ab +1 AAAAAAAAAGTTTCGTGAAGGTCC ba +1 AAAAAAAAATAAAGGTCCGAATCT ab +1 AAAAAAAAATAAATGAGAGTGTAA ba +8 AAAAAAAAATAAGTCTCACATTTA ab +1 AAAAAAAAATAATAACCTCTGGCG ab +10 AAAAAAAAATAATAACCTTTGGCG ab +1 AAAAAAAAATAATCCCCTTTGTCG ab +6 AAAAAAAAATACGCAAACGCTGAG ab +4 AAAAAAAAATAGATCATAGACTCT ab +10 AAAAAAAAATAGATCATAGACTCT ba +10 AAAAAAAAATAGTAGGATTTCATG ba +7 AAAAAAAAATATGAATACCCTCGT ba +1 AAAAAAAAATATGCCACTTGATCC ba +1 AAAAAAAAATATTCTGCCACTTGA ba +3 AAAAAAAAATCAAACCAAGAGGAC ba +1 AAAAAAAAATCAGTACCCCTAAAC ab +12 AAAAAAAAATCCTAGTTAATGAAG ba +1 AAAAAAAAATCGATTCTTTATGCG ab +1 AAAAAAAAATGTCTGAAAATATCT ab +4 AAAAAAAAATGTCTGAAAATATCT ba +1 AAAAAAAAATTTCCGCAGACCGTT ba +8 AAAAAAAAATTTGGGCTACTACAA ba +1 AAAAAAAACAAAATTAGAACCCTT ab +1 AAAAAAAACAAACCGCTCCTCACA ba +5 AAAAAAAACAACGTACGCGGTATT ab +4 AAAAAAAACAATATCGTTGATATG ba +4 AAAAAAAACAATCACGTTAATAGG ab +1 AAAAAAAACAGAATCGTGGTTTGT ba +1 AAAAAAAACCAAATCGTTGATATG ba +9 AAAAAAAACCAAGTCCAGGCATCT ba +2 AAAAAAAACCACGGTGAGACAAGG ba +1 AAAAAAAACCGCCCAACTGCCGGT ab +5 AAAAAAAACCTCTCAACCCCAAAT ba +7 AAAAAAAACCTCTTGCGATGTTGT ab +1 AAAAAAAACCTCTTGCGCTGTTGT ab +1 AAAAAAAACCTCTTGTGATGTTGT ab +12 AAAAAAAACCTGAGCAATGGTTCC ab +3 AAAAAAAACCTTGACCCTCACATG ba +6 AAAAAAAACCTTGCACTCGTCCTA ba +9 AAAAAAAACGAAATAAAAAAACCT ba +1 AAAAAAAACGACCGGCCTTAGACA ba +4 AAAAAAAACGCCACCACCCCCTTT ab +12 AAAAAAAACGCCACGGGCACTATT ba +13 AAAAAAAACGTATCAGTAGATCCT ab +1 AAAAAAAACTAGTAGGATTTCATG ba +3 AAAAAAAACTATAGAAAATCCATT ba +1 AAAAAAAACTATTCTATTTCCGAT ba +13 AAAAAAAACTGATCTGCTTGGCGG ba +8 AAAAAAAACTTGCGAATAGCATCG ba +4 AAAAAAAACTTGTTATCAAAACGT ab +1 AAAAAAAAGAAAAGTTCAACACGC ba +1 AAAAAAAAGAAGTTCGCCCTCCGA ab +13 AAAAAAAAGAGAGTTTAGTCATGG ab +1 AAAAAAAAGAGAGTTTAGTCATGG ba +1 AAAAAAAAGAGAGTTTAGTCCTGG ab \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fsd_data3.tab Tue Apr 02 05:10:09 2019 -0400 @@ -0,0 +1,112 @@ +1 AAAAAAAAAAAAAACCAAAACTTC ba +1 AAAAAAAAAAAAACCAGGCGTCGA ba +1 AAAAAAAAAAAAAGCTCCACGTTG ba +1 AAAAAAAAAAAAATCGTGGTTTGT ba +1 AAAAAAAAAAAAATTCACCCTTGT ba +7 AAAAAAAAAAAACACACTTAACTT ba +1 AAAAAAAAAAAACAGTGTTGAGAC ba +4 AAAAAAAAAAAACCGCTCCTCACA ba +1 AAAAAAAAAAAAGGCAACACAGAA ab +2 AAAAAAAAAAAATCTTTCTTTGAG ab +1 AAAAAAAAAAAATTGGGTTCCTTA ab +1 AAAAAAAAAAAGAGTCGCACCCAG ba +21 AAAAAAAAAAAGATCGTGGTTTGT ba +1 AAAAAAAAAAAGCGCAACACAGAA ab +3 AAAAAAAAAAAGGGCAACACAGAA ab +1 AAAAAAAAAAAGTAGCCCTAAACG ab +1 AAAAAAAAAAAGTCTTTCTTTGAG ab +1 AAAAAAAAAAATATCATAGACTCT ab +6 AAAAAAAAAAATATTCACCCTTGT ba +1 AAAAAAAAAAATATTCGAAAGTTA ba +3 AAAAAAAAAAATCACACTTAACTT ba +1 AAAAAAAAAAATCCGCTCCTCACA ba +1 AAAAAAAAAAATTAACTAAACTTA ab +1 AAAAAAAAAACAAATTCTATTATT ab +1 AAAAAAAAAACTCCCAGATTTTTT ab +1 AAAAAAAAAACTTCTGCTTGGCGG ba +11 AAAAAAAAAAGAATCGTGGTTTGT ba +5 AAAAAAAAAAGATAGCCCTAAACG ab +1 AAAAAAAAAAGCAATAATGCCAGT ab +2 AAAAAAAAAAGTACCGCACTCTCA ba +1 AAAAAAAAAAGTTCTTTCTTTGAG ab +1 AAAAAAAAAATAACTTCAATAATG ba +2 AAAAAAAAAATAATCATAGACTCT ab +1 AAAAAAAAAATAGTCTCACATTTA ab +1 AAAAAAAAAATATAACCTTTGGCG ab +3 AAAAAAAAACAAAATTCTATTATT ab +1 AAAAAAAAACAAGTACGCGGCATT ab +1 AAAAAAAAACAAGTACGCGGTATT ab +1 AAAAAAAAACAATATCGAATTAAC ab +3 AAAAAAAAACACGGTGAGACAAGG ba +1 AAAAAAAAACACGTTTCTCCCCTT ba +1 AAAAAAAAACATATCGTCCCGAGC ba +1 AAAAAAAAACCTACCTGAGGCCCC ab +3 AAAAAAAAACCTTATTACAGCGGA ab +1 AAAAAAAAACGATTCTCTGTATCT ba +1 AAAAAAAAACGTACCGCACTCTCA ba +4 AAAAAAAAACTACCCAGATTTTTT ba +1 AAAAAAAAACTAGATGAGACGACC ba +4 AAAAAAAAACTGTCTGCTTGGCGG ba +1 AAAAAAAAAGAAGTTTAATTTTAA ab +1 AAAAAAAAAGAATGCCTAAGACGA ba +6 AAAAAAAAAGACCGGCCTTAGACA ba +1 AAAAAAAAAGATATCGTGGTTTGT ba +1 AAAAAAAAAGCAATACTCAAGCTG ba +6 AAAAAAAAAGCAATGTCTAAGCCT ba +1 AAAAAAAAAGCACTGTCTAAGCCT ab +2 AAAAAAAAAGCTAATAATGCCAGT ab +1 AAAAAAAAAGTTTCGTGAAGGTCC ba +1 AAAAAAAAATAAAGGTCCGAATCT ab +1 AAAAAAAAATAAATGAGAGTGTAA ba +8 AAAAAAAAATAAGTCTCACATTTA ab +1 AAAAAAAAATAATAACCTCTGGCG ab +10 AAAAAAAAATAATAACCTTTGGCG ab +1 AAAAAAAAATAATCCCCTTTGTCG ab +6 AAAAAAAAATACGCAAACGCTGAG ab +4 AAAAAAAAATAGATCATAGACTCT ab +10 AAAAAAAAATAGATCATAGACTCT ba +10 AAAAAAAAATAGTAGGATTTCATG ba +7 AAAAAAAAATATGAATACCCTCGT ba +1 AAAAAAAAATATGCCACTTGATCC ba +1 AAAAAAAAATATTCTGCCACTTGA ba +3 AAAAAAAAATCAAACCAAGAGGAC ba +1 AAAAAAAAATCAGTACCCCTAAAC ab +12 AAAAAAAAATCCTAGTTAATGAAG ba +1 AAAAAAAAATCGATTCTTTATGCG ab +1 AAAAAAAAATGTCTGAAAATATCT ab +4 AAAAAAAAATGTCTGAAAATATCT ba +1 AAAAAAAAATTTCCGCAGACCGTT ba +8 AAAAAAAAATTTGGGCTACTACAA ba +1 AAAAAAAACAAAATTAGAACCCTT ab +1 AAAAAAAACAAACCGCTCCTCACA ba +5 AAAAAAAACAACGTACGCGGTATT ab +4 AAAAAAAACAATATCGTTGATATG ba +4 AAAAAAAACAATCACGTTAATAGG ab +1 AAAAAAAACAGAATCGTGGTTTGT ba +1 AAAAAAAACCAAATCGTTGATATG ba +9 AAAAAAAACCAAGTCCAGGCATCT ba +2 AAAAAAAACCACGGTGAGACAAGG ba +1 AAAAAAAACCGCCCAACTGCCGGT ab +5 AAAAAAAACCTCTCAACCCCAAAT ba +7 AAAAAAAACCTCTTGCGATGTTGT ab +1 AAAAAAAACCTCTTGCGCTGTTGT ab +1 AAAAAAAACCTCTTGTGATGTTGT ab +12 AAAAAAAACCTGAGCAATGGTTCC ab +3 AAAAAAAACCTTGACCCTCACATG ba +6 AAAAAAAACCTTGCACTCGTCCTA ba +9 AAAAAAAACGAAATAAAAAAACCT ba +1 AAAAAAAACGACCGGCCTTAGACA ba +4 AAAAAAAACGCCACCACCCCCTTT ab +12 AAAAAAAACGCCACGGGCACTATT ba +13 AAAAAAAACGTATCAGTAGATCCT ab +1 AAAAAAAACTAGTAGGATTTCATG ba +3 AAAAAAAACTATAGAAAATCCATT ba +1 AAAAAAAACTATTCTATTTCCGAT ba +13 AAAAAAAACTGATCTGCTTGGCGG ba +8 AAAAAAAACTTGCGAATAGCATCG ba +4 AAAAAAAACTTGTTATCAAAACGT ab +1 AAAAAAAAGAAAAGTTCAACACGC ba +1 AAAAAAAAGAAGTTCGCCCTCCGA ab +13 AAAAAAAAGAGAGTTTAGTCATGG ab +1 AAAAAAAAGAGAGTTTAGTCATGG ba +1 AAAAAAAAGAGAGTTTAGTCCTGG ab \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fsd_data4.tab Tue Apr 02 05:10:09 2019 -0400 @@ -0,0 +1,112 @@ +1 AAAAAAAAAAAAAACCAAAACTTC ba +1 AAAAAAAAAAAAACCAGGCGTCGA ba +1 AAAAAAAAAAAAAGCTCCACGTTG ba +1 AAAAAAAAAAAAATCGTGGTTTGT ba +1 AAAAAAAAAAAAATTCACCCTTGT ba +7 AAAAAAAAAAAACACACTTAACTT ba +1 AAAAAAAAAAAACAGTGTTGAGAC ba +4 AAAAAAAAAAAACCGCTCCTCACA ba +1 AAAAAAAAAAAAGGCAACACAGAA ab +2 AAAAAAAAAAAATCTTTCTTTGAG ab +1 AAAAAAAAAAAATTGGGTTCCTTA ab +1 AAAAAAAAAAAGAGTCGCACCCAG ba +21 AAAAAAAAAAAGATCGTGGTTTGT ba +1 AAAAAAAAAAAGCGCAACACAGAA ab +3 AAAAAAAAAAAGGGCAACACAGAA ab +1 AAAAAAAAAAAGTAGCCCTAAACG ab +1 AAAAAAAAAAAGTCTTTCTTTGAG ab +1 AAAAAAAAAAATATCATAGACTCT ab +6 AAAAAAAAAAATATTCACCCTTGT ba +1 AAAAAAAAAAATATTCGAAAGTTA ba +3 AAAAAAAAAAATCACACTTAACTT ba +1 AAAAAAAAAAATCCGCTCCTCACA ba +1 AAAAAAAAAAATTAACTAAACTTA ab +1 AAAAAAAAAACAAATTCTATTATT ab +1 AAAAAAAAAACTCCCAGATTTTTT ab +1 AAAAAAAAAACTTCTGCTTGGCGG ba +11 AAAAAAAAAAGAATCGTGGTTTGT ba +5 AAAAAAAAAAGATAGCCCTAAACG ab +1 AAAAAAAAAAGCAATAATGCCAGT ab +2 AAAAAAAAAAGTACCGCACTCTCA ba +1 AAAAAAAAAAGTTCTTTCTTTGAG ab +1 AAAAAAAAAATAACTTCAATAATG ba +2 AAAAAAAAAATAATCATAGACTCT ab +1 AAAAAAAAAATAGTCTCACATTTA ab +1 AAAAAAAAAATATAACCTTTGGCG ab +3 AAAAAAAAACAAAATTCTATTATT ab +1 AAAAAAAAACAAGTACGCGGCATT ab +1 AAAAAAAAACAAGTACGCGGTATT ab +1 AAAAAAAAACAATATCGAATTAAC ab +3 AAAAAAAAACACGGTGAGACAAGG ba +1 AAAAAAAAACACGTTTCTCCCCTT ba +1 AAAAAAAAACATATCGTCCCGAGC ba +1 AAAAAAAAACCTACCTGAGGCCCC ab +3 AAAAAAAAACCTTATTACAGCGGA ab +1 AAAAAAAAACGATTCTCTGTATCT ba +1 AAAAAAAAACGTACCGCACTCTCA ba +4 AAAAAAAAACTACCCAGATTTTTT ba +1 AAAAAAAAACTAGATGAGACGACC ba +4 AAAAAAAAACTGTCTGCTTGGCGG ba +1 AAAAAAAAAGAAGTTTAATTTTAA ab +1 AAAAAAAAAGAATGCCTAAGACGA ba +6 AAAAAAAAAGACCGGCCTTAGACA ba +1 AAAAAAAAAGATATCGTGGTTTGT ba +1 AAAAAAAAAGCAATACTCAAGCTG ba +6 AAAAAAAAAGCAATGTCTAAGCCT ba +1 AAAAAAAAAGCACTGTCTAAGCCT ab +2 AAAAAAAAAGCTAATAATGCCAGT ab +1 AAAAAAAAAGTTTCGTGAAGGTCC ba +1 AAAAAAAAATAAAGGTCCGAATCT ab +1 AAAAAAAAATAAATGAGAGTGTAA ba +8 AAAAAAAAATAAGTCTCACATTTA ab +1 AAAAAAAAATAATAACCTCTGGCG ab +10 AAAAAAAAATAATAACCTTTGGCG ab +1 AAAAAAAAATAATCCCCTTTGTCG ab +6 AAAAAAAAATACGCAAACGCTGAG ab +4 AAAAAAAAATAGATCATAGACTCT ab +10 AAAAAAAAATAGATCATAGACTCT ba +10 AAAAAAAAATAGTAGGATTTCATG ba +7 AAAAAAAAATATGAATACCCTCGT ba +1 AAAAAAAAATATGCCACTTGATCC ba +1 AAAAAAAAATATTCTGCCACTTGA ba +3 AAAAAAAAATCAAACCAAGAGGAC ba +1 AAAAAAAAATCAGTACCCCTAAAC ab +12 AAAAAAAAATCCTAGTTAATGAAG ba +1 AAAAAAAAATCGATTCTTTATGCG ab +1 AAAAAAAAATGTCTGAAAATATCT ab +4 AAAAAAAAATGTCTGAAAATATCT ba +1 AAAAAAAAATTTCCGCAGACCGTT ba +8 AAAAAAAAATTTGGGCTACTACAA ba +1 AAAAAAAACAAAATTAGAACCCTT ab +1 AAAAAAAACAAACCGCTCCTCACA ba +5 AAAAAAAACAACGTACGCGGTATT ab +4 AAAAAAAACAATATCGTTGATATG ba +4 AAAAAAAACAATCACGTTAATAGG ab +1 AAAAAAAACAGAATCGTGGTTTGT ba +1 AAAAAAAACCAAATCGTTGATATG ba +9 AAAAAAAACCAAGTCCAGGCATCT ba +2 AAAAAAAACCACGGTGAGACAAGG ba +1 AAAAAAAACCGCCCAACTGCCGGT ab +5 AAAAAAAACCTCTCAACCCCAAAT ba +7 AAAAAAAACCTCTTGCGATGTTGT ab +1 AAAAAAAACCTCTTGCGCTGTTGT ab +1 AAAAAAAACCTCTTGTGATGTTGT ab +12 AAAAAAAACCTGAGCAATGGTTCC ab +3 AAAAAAAACCTTGACCCTCACATG ba +6 AAAAAAAACCTTGCACTCGTCCTA ba +9 AAAAAAAACGAAATAAAAAAACCT ba +1 AAAAAAAACGACCGGCCTTAGACA ba +4 AAAAAAAACGCCACCACCCCCTTT ab +12 AAAAAAAACGCCACGGGCACTATT ba +13 AAAAAAAACGTATCAGTAGATCCT ab +1 AAAAAAAACTAGTAGGATTTCATG ba +3 AAAAAAAACTATAGAAAATCCATT ba +1 AAAAAAAACTATTCTATTTCCGAT ba +13 AAAAAAAACTGATCTGCTTGGCGG ba +8 AAAAAAAACTTGCGAATAGCATCG ba +4 AAAAAAAACTTGTTATCAAAACGT ab +1 AAAAAAAAGAAAAGTTCAACACGC ba +1 AAAAAAAAGAAGTTCGCCCTCCGA ab +13 AAAAAAAAGAGAGTTTAGTCATGG ab +1 AAAAAAAAGAGAGTTTAGTCATGG ba +1 AAAAAAAAGAGAGTTTAGTCCTGG ab \ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fsd_output1.tab Tue Apr 02 05:10:09 2019 -0400 @@ -0,0 +1,224 @@ +Values from family size distribution with all datasets + +Family size fsd_data1.tab fsd_data2.tab fsd_data3.tab fsd_data4.tab +FS=1 63 63 63 63 +FS=2 5 5 5 5 +FS=3 8 8 8 8 +FS=4 9 9 9 9 +FS=5 3 3 3 3 +FS=6 5 5 5 5 +FS=7 3 3 3 3 +FS=8 3 3 3 3 +FS=9 2 2 2 2 +FS=10 3 3 3 3 +FS=11 1 1 1 1 +FS=12 3 3 3 3 +FS=13 3 3 3 3 +FS=14 0 0 0 0 +FS=15 0 0 0 0 +FS=16 0 0 0 0 +FS=17 0 0 0 0 +FS=18 0 0 0 0 +FS=19 0 0 0 0 +FS=20 0 0 0 0 +FS>20 1 1 1 1 +sum 112 112 112 112 +Dataset: fsd_data1.tab +max. family size: 21 +absolute frequency: 1 +relative frequency: 0.009 + + singletons: family size > 20: + nr. of tags rel. freq of tags rel.freq of PE reads nr. of tags rel. freq of tags nr. of PE reads rel. freq of PE reads total nr. of tags total nr. of PE reads +fsd_data1.tab 63 0.562 0.167 1 0.009 21 0.056 112 378 + +The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS) +Whereas the total frequencies were calculated from the whole dataset (=including the DCS). + +FS >= 1 nr. of tags nr. of PE reads rel. freq of tags rel. freq of PE reads: + unique: total unique total: +SSCS ab 47 123 0.431 0.339 0.420 0.325 +SSCS ba 59 222 0.488 0.481 0.476 0.468 +DCS (total) 3 (6) 18 (33) 0.028 0.027 (0.054) 0.050 0.048 (0.087) +total nr. of tags 109 363 109 112 363 378 + +FS >= 3 nr. of tags nr. of PE reads rel. freq of tags rel. freq of PE reads: + unique: total unique total: +SSCS ab 14 87 0.341 0.259 0.313 0.224 +SSCS ba 26 187 0.491 0.481 0.495 0.482 +DCS (total) 1 (2) 4 (14) 0.024 0.024 (0.048) 0.014 0.014 (0.049) +total nr. of tags 41 278 41 42 278 288 + +Values from family size distribution + duplex ab ba sum +FS=1 2 30 31 63 +FS=2 0 3 2 5 +FS=3 0 3 5 8 +FS=4 2 3 4 9 +FS=5 0 2 1 3 +FS=6 0 1 4 5 +FS=7 0 1 2 3 +FS=8 0 1 2 3 +FS=9 0 0 2 2 +FS=10 1 1 1 3 +FS=11 0 0 1 1 +FS=12 0 1 2 3 +FS=13 1 1 1 3 +FS=14 0 0 0 0 +FS=15 0 0 0 0 +FS=16 0 0 0 0 +FS=17 0 0 0 0 +FS=18 0 0 0 0 +FS=19 0 0 0 0 +FS=20 0 0 0 0 +FS>20 0 0 1 1 + +Dataset: fsd_data2.tab +max. family size: 21 +absolute frequency: 1 +relative frequency: 0.009 + + singletons: family size > 20: + nr. of tags rel. freq of tags rel.freq of PE reads nr. of tags rel. freq of tags nr. of PE reads rel. freq of PE reads total nr. of tags total nr. of PE reads +fsd_data2.tab 63 0.562 0.167 1 0.009 21 0.056 112 378 + +The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS) +Whereas the total frequencies were calculated from the whole dataset (=including the DCS). + +FS >= 1 nr. of tags nr. of PE reads rel. freq of tags rel. freq of PE reads: + unique: total unique total: +SSCS ab 47 123 0.431 0.339 0.420 0.325 +SSCS ba 59 222 0.488 0.481 0.476 0.468 +DCS (total) 3 (6) 18 (33) 0.028 0.027 (0.054) 0.050 0.048 (0.087) +total nr. of tags 109 363 109 112 363 378 + +FS >= 3 nr. of tags nr. of PE reads rel. freq of tags rel. freq of PE reads: + unique: total unique total: +SSCS ab 14 87 0.341 0.259 0.313 0.224 +SSCS ba 26 187 0.491 0.481 0.495 0.482 +DCS (total) 1 (2) 4 (14) 0.024 0.024 (0.048) 0.014 0.014 (0.049) +total nr. of tags 41 278 41 42 278 288 + +Values from family size distribution + duplex ab ba sum +FS=1 2 30 31 63 +FS=2 0 3 2 5 +FS=3 0 3 5 8 +FS=4 2 3 4 9 +FS=5 0 2 1 3 +FS=6 0 1 4 5 +FS=7 0 1 2 3 +FS=8 0 1 2 3 +FS=9 0 0 2 2 +FS=10 1 1 1 3 +FS=11 0 0 1 1 +FS=12 0 1 2 3 +FS=13 1 1 1 3 +FS=14 0 0 0 0 +FS=15 0 0 0 0 +FS=16 0 0 0 0 +FS=17 0 0 0 0 +FS=18 0 0 0 0 +FS=19 0 0 0 0 +FS=20 0 0 0 0 +FS>20 0 0 1 1 + +Dataset: fsd_data3.tab +max. family size: 21 +absolute frequency: 1 +relative frequency: 0.009 + + singletons: family size > 20: + nr. of tags rel. freq of tags rel.freq of PE reads nr. of tags rel. freq of tags nr. of PE reads rel. freq of PE reads total nr. of tags total nr. of PE reads +fsd_data3.tab 63 0.562 0.167 1 0.009 21 0.056 112 378 + +The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS) +Whereas the total frequencies were calculated from the whole dataset (=including the DCS). + +FS >= 1 nr. of tags nr. of PE reads rel. freq of tags rel. freq of PE reads: + unique: total unique total: +SSCS ab 47 123 0.431 0.339 0.420 0.325 +SSCS ba 59 222 0.488 0.481 0.476 0.468 +DCS (total) 3 (6) 18 (33) 0.028 0.027 (0.054) 0.050 0.048 (0.087) +total nr. of tags 109 363 109 112 363 378 + +FS >= 3 nr. of tags nr. of PE reads rel. freq of tags rel. freq of PE reads: + unique: total unique total: +SSCS ab 14 87 0.341 0.259 0.313 0.224 +SSCS ba 26 187 0.491 0.481 0.495 0.482 +DCS (total) 1 (2) 4 (14) 0.024 0.024 (0.048) 0.014 0.014 (0.049) +total nr. of tags 41 278 41 42 278 288 + +Values from family size distribution + duplex ab ba sum +FS=1 2 30 31 63 +FS=2 0 3 2 5 +FS=3 0 3 5 8 +FS=4 2 3 4 9 +FS=5 0 2 1 3 +FS=6 0 1 4 5 +FS=7 0 1 2 3 +FS=8 0 1 2 3 +FS=9 0 0 2 2 +FS=10 1 1 1 3 +FS=11 0 0 1 1 +FS=12 0 1 2 3 +FS=13 1 1 1 3 +FS=14 0 0 0 0 +FS=15 0 0 0 0 +FS=16 0 0 0 0 +FS=17 0 0 0 0 +FS=18 0 0 0 0 +FS=19 0 0 0 0 +FS=20 0 0 0 0 +FS>20 0 0 1 1 + +Dataset: fsd_data4.tab +max. family size: 21 +absolute frequency: 1 +relative frequency: 0.009 + + singletons: family size > 20: + nr. of tags rel. freq of tags rel.freq of PE reads nr. of tags rel. freq of tags nr. of PE reads rel. freq of PE reads total nr. of tags total nr. of PE reads +fsd_data4.tab 63 0.562 0.167 1 0.009 21 0.056 112 378 + +The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS) +Whereas the total frequencies were calculated from the whole dataset (=including the DCS). + +FS >= 1 nr. of tags nr. of PE reads rel. freq of tags rel. freq of PE reads: + unique: total unique total: +SSCS ab 47 123 0.431 0.339 0.420 0.325 +SSCS ba 59 222 0.488 0.481 0.476 0.468 +DCS (total) 3 (6) 18 (33) 0.028 0.027 (0.054) 0.050 0.048 (0.087) +total nr. of tags 109 363 109 112 363 378 + +FS >= 3 nr. of tags nr. of PE reads rel. freq of tags rel. freq of PE reads: + unique: total unique total: +SSCS ab 14 87 0.341 0.259 0.313 0.224 +SSCS ba 26 187 0.491 0.481 0.495 0.482 +DCS (total) 1 (2) 4 (14) 0.024 0.024 (0.048) 0.014 0.014 (0.049) +total nr. of tags 41 278 41 42 278 288 + +Values from family size distribution + duplex ab ba sum +FS=1 2 30 31 63 +FS=2 0 3 2 5 +FS=3 0 3 5 8 +FS=4 2 3 4 9 +FS=5 0 2 1 3 +FS=6 0 1 4 5 +FS=7 0 1 2 3 +FS=8 0 1 2 3 +FS=9 0 0 2 2 +FS=10 1 1 1 3 +FS=11 0 0 1 1 +FS=12 0 1 2 3 +FS=13 1 1 1 3 +FS=14 0 0 0 0 +FS=15 0 0 0 0 +FS=16 0 0 0 0 +FS=17 0 0 0 0 +FS=18 0 0 0 0 +FS=19 0 0 0 0 +FS=20 0 0 0 0 +FS>20 0 0 1 1
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/test-data/fsd_output2.tab Tue Apr 02 05:10:09 2019 -0400 @@ -0,0 +1,174 @@ +Values from family size distribution with all datasets + +Family size fsd_data1.tab fsd_data2.tab fsd_data3.tab +FS=1 63 63 63 +FS=2 5 5 5 +FS=3 8 8 8 +FS=4 9 9 9 +FS=5 3 3 3 +FS=6 5 5 5 +FS=7 3 3 3 +FS=8 3 3 3 +FS=9 2 2 2 +FS=10 3 3 3 +FS=11 1 1 1 +FS=12 3 3 3 +FS=13 3 3 3 +FS=14 0 0 0 +FS=15 0 0 0 +FS=16 0 0 0 +FS=17 0 0 0 +FS=18 0 0 0 +FS=19 0 0 0 +FS=20 0 0 0 +FS>20 1 1 1 +sum 112 112 112 +Dataset: fsd_data1.tab +max. family size: 21 +absolute frequency: 1 +relative frequency: 0.009 + + singletons: family size > 20: + nr. of tags rel. freq of tags rel.freq of PE reads nr. of tags rel. freq of tags nr. of PE reads rel. freq of PE reads total nr. of tags total nr. of PE reads +fsd_data1.tab 63 0.562 0.167 1 0.009 21 0.056 112 378 + +The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS) +Whereas the total frequencies were calculated from the whole dataset (=including the DCS). + +FS >= 1 nr. of tags nr. of PE reads rel. freq of tags rel. freq of PE reads: + unique: total unique total: +SSCS ab 47 123 0.431 0.339 0.420 0.325 +SSCS ba 59 222 0.488 0.481 0.476 0.468 +DCS (total) 3 (6) 18 (33) 0.028 0.027 (0.054) 0.050 0.048 (0.087) +total nr. of tags 109 363 109 112 363 378 + +FS >= 3 nr. of tags nr. of PE reads rel. freq of tags rel. freq of PE reads: + unique: total unique total: +SSCS ab 14 87 0.341 0.259 0.313 0.224 +SSCS ba 26 187 0.491 0.481 0.495 0.482 +DCS (total) 1 (2) 4 (14) 0.024 0.024 (0.048) 0.014 0.014 (0.049) +total nr. of tags 41 278 41 42 278 288 + +Values from family size distribution + duplex ab ba sum +FS=1 2 30 31 63 +FS=2 0 3 2 5 +FS=3 0 3 5 8 +FS=4 2 3 4 9 +FS=5 0 2 1 3 +FS=6 0 1 4 5 +FS=7 0 1 2 3 +FS=8 0 1 2 3 +FS=9 0 0 2 2 +FS=10 1 1 1 3 +FS=11 0 0 1 1 +FS=12 0 1 2 3 +FS=13 1 1 1 3 +FS=14 0 0 0 0 +FS=15 0 0 0 0 +FS=16 0 0 0 0 +FS=17 0 0 0 0 +FS=18 0 0 0 0 +FS=19 0 0 0 0 +FS=20 0 0 0 0 +FS>20 0 0 1 1 + +Dataset: fsd_data2.tab +max. family size: 21 +absolute frequency: 1 +relative frequency: 0.009 + + singletons: family size > 20: + nr. of tags rel. freq of tags rel.freq of PE reads nr. of tags rel. freq of tags nr. of PE reads rel. freq of PE reads total nr. of tags total nr. of PE reads +fsd_data2.tab 63 0.562 0.167 1 0.009 21 0.056 112 378 + +The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS) +Whereas the total frequencies were calculated from the whole dataset (=including the DCS). + +FS >= 1 nr. of tags nr. of PE reads rel. freq of tags rel. freq of PE reads: + unique: total unique total: +SSCS ab 47 123 0.431 0.339 0.420 0.325 +SSCS ba 59 222 0.488 0.481 0.476 0.468 +DCS (total) 3 (6) 18 (33) 0.028 0.027 (0.054) 0.050 0.048 (0.087) +total nr. of tags 109 363 109 112 363 378 + +FS >= 3 nr. of tags nr. of PE reads rel. freq of tags rel. freq of PE reads: + unique: total unique total: +SSCS ab 14 87 0.341 0.259 0.313 0.224 +SSCS ba 26 187 0.491 0.481 0.495 0.482 +DCS (total) 1 (2) 4 (14) 0.024 0.024 (0.048) 0.014 0.014 (0.049) +total nr. of tags 41 278 41 42 278 288 + +Values from family size distribution + duplex ab ba sum +FS=1 2 30 31 63 +FS=2 0 3 2 5 +FS=3 0 3 5 8 +FS=4 2 3 4 9 +FS=5 0 2 1 3 +FS=6 0 1 4 5 +FS=7 0 1 2 3 +FS=8 0 1 2 3 +FS=9 0 0 2 2 +FS=10 1 1 1 3 +FS=11 0 0 1 1 +FS=12 0 1 2 3 +FS=13 1 1 1 3 +FS=14 0 0 0 0 +FS=15 0 0 0 0 +FS=16 0 0 0 0 +FS=17 0 0 0 0 +FS=18 0 0 0 0 +FS=19 0 0 0 0 +FS=20 0 0 0 0 +FS>20 0 0 1 1 + +Dataset: fsd_data3.tab +max. family size: 21 +absolute frequency: 1 +relative frequency: 0.009 + + singletons: family size > 20: + nr. of tags rel. freq of tags rel.freq of PE reads nr. of tags rel. freq of tags nr. of PE reads rel. freq of PE reads total nr. of tags total nr. of PE reads +fsd_data3.tab 63 0.562 0.167 1 0.009 21 0.056 112 378 + +The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS) +Whereas the total frequencies were calculated from the whole dataset (=including the DCS). + +FS >= 1 nr. of tags nr. of PE reads rel. freq of tags rel. freq of PE reads: + unique: total unique total: +SSCS ab 47 123 0.431 0.339 0.420 0.325 +SSCS ba 59 222 0.488 0.481 0.476 0.468 +DCS (total) 3 (6) 18 (33) 0.028 0.027 (0.054) 0.050 0.048 (0.087) +total nr. of tags 109 363 109 112 363 378 + +FS >= 3 nr. of tags nr. of PE reads rel. freq of tags rel. freq of PE reads: + unique: total unique total: +SSCS ab 14 87 0.341 0.259 0.313 0.224 +SSCS ba 26 187 0.491 0.481 0.495 0.482 +DCS (total) 1 (2) 4 (14) 0.024 0.024 (0.048) 0.014 0.014 (0.049) +total nr. of tags 41 278 41 42 278 288 + +Values from family size distribution + duplex ab ba sum +FS=1 2 30 31 63 +FS=2 0 3 2 5 +FS=3 0 3 5 8 +FS=4 2 3 4 9 +FS=5 0 2 1 3 +FS=6 0 1 4 5 +FS=7 0 1 2 3 +FS=8 0 1 2 3 +FS=9 0 0 2 2 +FS=10 1 1 1 3 +FS=11 0 0 1 1 +FS=12 0 1 2 3 +FS=13 1 1 1 3 +FS=14 0 0 0 0 +FS=15 0 0 0 0 +FS=16 0 0 0 0 +FS=17 0 0 0 0 +FS=18 0 0 0 0 +FS=19 0 0 0 0 +FS=20 0 0 0 0 +FS>20 0 0 1 1
--- a/test-data/output_file.tabular Mon Oct 08 05:50:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,167 +0,0 @@ -Values from family size distribution with all datasets - -Family size Test_data Test_data2 Test_data3 Test_data4 -FS=1 63 63 63 63 -FS=2 5 5 5 5 -FS=3 8 8 8 8 -FS=4 10 10 10 10 -FS=5 3 3 3 3 -FS=6 5 5 5 5 -FS=7 3 3 3 3 -FS=8 3 3 3 3 -FS=9 2 2 2 2 -FS=10 3 3 3 3 -FS=11 1 1 1 1 -FS=12 6 6 6 6 -sum 112 112 112 112 -Dataset: Test_data -max. family size: 13 -absolute frequency: 3 -relative frequency: 0.027 - - singletons: family size > 20: - absolute nr. rel. freq absolute nr. rel. freq total length -Test_data 63 0.562 6 0.054 112 - -The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS) -Whereas the total frequencies were calculated from the whole dataset (=including the DCS). - -FS >= 1 unique: total: -nr./rel. freq of ab= 47 0.431 0.420 -nr./rel. freq of ba= 59 0.488 0.476 -nr./rel. freq of DCS (total)= 3 (6) 0.028 0.027 (0.054) -length of dataset= 109 109 112 -FS >= 3 unique: total: -nr./rel. freq of ab= 14 0.341 0.318 -nr./rel. freq of ba= 26 0.491 0.464 -nr./rel. freq of DCS (total)= 1 (2) 0.037 0.023 (0.045) -length of dataset= 41 41 44 - -Values from family size distribution - duplex ab ba sum -FS=1 2 30 31 63 -FS=2 0 3 2 5 -FS=3 0 3 5 8 -FS=4 2 3 5 10 -FS=5 0 2 1 3 -FS=6 0 1 4 5 -FS=7 0 1 2 3 -FS=8 0 1 2 3 -FS=9 0 0 2 2 -FS=10 1 1 1 3 -FS=11 0 0 1 1 -FS=12 1 2 3 6 - -Dataset: Test_data2 -max. family size: 13 -absolute frequency: 3 -relative frequency: 0.027 - - singletons: family size > 20: - absolute nr. rel. freq absolute nr. rel. freq total length -Test_data2 63 0.562 6 0.054 112 - -The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS) -Whereas the total frequencies were calculated from the whole dataset (=including the DCS). - -FS >= 1 unique: total: -nr./rel. freq of ab= 47 0.431 0.420 -nr./rel. freq of ba= 59 0.488 0.476 -nr./rel. freq of DCS (total)= 3 (6) 0.028 0.027 (0.054) -length of dataset= 109 109 112 -FS >= 3 unique: total: -nr./rel. freq of ab= 14 0.341 0.318 -nr./rel. freq of ba= 26 0.491 0.464 -nr./rel. freq of DCS (total)= 1 (2) 0.037 0.023 (0.045) -length of dataset= 41 41 44 - -Values from family size distribution - duplex ab ba sum -FS=1 2 30 31 63 -FS=2 0 3 2 5 -FS=3 0 3 5 8 -FS=4 2 3 5 10 -FS=5 0 2 1 3 -FS=6 0 1 4 5 -FS=7 0 1 2 3 -FS=8 0 1 2 3 -FS=9 0 0 2 2 -FS=10 1 1 1 3 -FS=11 0 0 1 1 -FS=12 1 2 3 6 - -Dataset: Test_data3 -max. family size: 13 -absolute frequency: 3 -relative frequency: 0.027 - - singletons: family size > 20: - absolute nr. rel. freq absolute nr. rel. freq total length -Test_data3 63 0.562 6 0.054 112 - -The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS) -Whereas the total frequencies were calculated from the whole dataset (=including the DCS). - -FS >= 1 unique: total: -nr./rel. freq of ab= 47 0.431 0.420 -nr./rel. freq of ba= 59 0.488 0.476 -nr./rel. freq of DCS (total)= 3 (6) 0.028 0.027 (0.054) -length of dataset= 109 109 112 -FS >= 3 unique: total: -nr./rel. freq of ab= 14 0.341 0.318 -nr./rel. freq of ba= 26 0.491 0.464 -nr./rel. freq of DCS (total)= 1 (2) 0.037 0.023 (0.045) -length of dataset= 41 41 44 - -Values from family size distribution - duplex ab ba sum -FS=1 2 30 31 63 -FS=2 0 3 2 5 -FS=3 0 3 5 8 -FS=4 2 3 5 10 -FS=5 0 2 1 3 -FS=6 0 1 4 5 -FS=7 0 1 2 3 -FS=8 0 1 2 3 -FS=9 0 0 2 2 -FS=10 1 1 1 3 -FS=11 0 0 1 1 -FS=12 1 2 3 6 - -Dataset: Test_data4 -max. family size: 13 -absolute frequency: 3 -relative frequency: 0.027 - - singletons: family size > 20: - absolute nr. rel. freq absolute nr. rel. freq total length -Test_data4 63 0.562 6 0.054 112 - -The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS) -Whereas the total frequencies were calculated from the whole dataset (=including the DCS). - -FS >= 1 unique: total: -nr./rel. freq of ab= 47 0.431 0.420 -nr./rel. freq of ba= 59 0.488 0.476 -nr./rel. freq of DCS (total)= 3 (6) 0.028 0.027 (0.054) -length of dataset= 109 109 112 -FS >= 3 unique: total: -nr./rel. freq of ab= 14 0.341 0.318 -nr./rel. freq of ba= 26 0.491 0.464 -nr./rel. freq of DCS (total)= 1 (2) 0.037 0.023 (0.045) -length of dataset= 41 41 44 - -Values from family size distribution - duplex ab ba sum -FS=1 2 30 31 63 -FS=2 0 3 2 5 -FS=3 0 3 5 8 -FS=4 2 3 5 10 -FS=5 0 2 1 3 -FS=6 0 1 4 5 -FS=7 0 1 2 3 -FS=8 0 1 2 3 -FS=9 0 0 2 2 -FS=10 1 1 1 3 -FS=11 0 0 1 1 -FS=12 1 2 3 6
--- a/test-data/output_file2.tabular Mon Oct 08 05:50:18 2018 -0400 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,129 +0,0 @@ -Values from family size distribution with all datasets - -Family size Test_data Test_data2 Test_data3 -FS=1 63 63 63 -FS=2 5 5 5 -FS=3 8 8 8 -FS=4 10 10 10 -FS=5 3 3 3 -FS=6 5 5 5 -FS=7 3 3 3 -FS=8 3 3 3 -FS=9 2 2 2 -FS=10 3 3 3 -FS=11 1 1 1 -FS=12 6 6 6 -sum 112 112 112 -Dataset: Test_data -max. family size: 13 -absolute frequency: 3 -relative frequency: 0.027 - - singletons: family size > 20: - absolute nr. rel. freq absolute nr. rel. freq total length -Test_data 63 0.562 6 0.054 112 - -The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS) -Whereas the total frequencies were calculated from the whole dataset (=including the DCS). - -FS >= 1 unique: total: -nr./rel. freq of ab= 47 0.431 0.420 -nr./rel. freq of ba= 59 0.488 0.476 -nr./rel. freq of DCS (total)= 3 (6) 0.028 0.027 (0.054) -length of dataset= 109 109 112 -FS >= 3 unique: total: -nr./rel. freq of ab= 14 0.341 0.318 -nr./rel. freq of ba= 26 0.491 0.464 -nr./rel. freq of DCS (total)= 1 (2) 0.037 0.023 (0.045) -length of dataset= 41 41 44 - -Values from family size distribution - duplex ab ba sum -FS=1 2 30 31 63 -FS=2 0 3 2 5 -FS=3 0 3 5 8 -FS=4 2 3 5 10 -FS=5 0 2 1 3 -FS=6 0 1 4 5 -FS=7 0 1 2 3 -FS=8 0 1 2 3 -FS=9 0 0 2 2 -FS=10 1 1 1 3 -FS=11 0 0 1 1 -FS=12 1 2 3 6 - -Dataset: Test_data2 -max. family size: 13 -absolute frequency: 3 -relative frequency: 0.027 - - singletons: family size > 20: - absolute nr. rel. freq absolute nr. rel. freq total length -Test_data2 63 0.562 6 0.054 112 - -The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS) -Whereas the total frequencies were calculated from the whole dataset (=including the DCS). - -FS >= 1 unique: total: -nr./rel. freq of ab= 47 0.431 0.420 -nr./rel. freq of ba= 59 0.488 0.476 -nr./rel. freq of DCS (total)= 3 (6) 0.028 0.027 (0.054) -length of dataset= 109 109 112 -FS >= 3 unique: total: -nr./rel. freq of ab= 14 0.341 0.318 -nr./rel. freq of ba= 26 0.491 0.464 -nr./rel. freq of DCS (total)= 1 (2) 0.037 0.023 (0.045) -length of dataset= 41 41 44 - -Values from family size distribution - duplex ab ba sum -FS=1 2 30 31 63 -FS=2 0 3 2 5 -FS=3 0 3 5 8 -FS=4 2 3 5 10 -FS=5 0 2 1 3 -FS=6 0 1 4 5 -FS=7 0 1 2 3 -FS=8 0 1 2 3 -FS=9 0 0 2 2 -FS=10 1 1 1 3 -FS=11 0 0 1 1 -FS=12 1 2 3 6 - -Dataset: Test_data3 -max. family size: 13 -absolute frequency: 3 -relative frequency: 0.027 - - singletons: family size > 20: - absolute nr. rel. freq absolute nr. rel. freq total length -Test_data3 63 0.562 6 0.054 112 - -The unique frequencies were calculated from the dataset where the tags occured only once (=ab without DCS, ba without DCS) -Whereas the total frequencies were calculated from the whole dataset (=including the DCS). - -FS >= 1 unique: total: -nr./rel. freq of ab= 47 0.431 0.420 -nr./rel. freq of ba= 59 0.488 0.476 -nr./rel. freq of DCS (total)= 3 (6) 0.028 0.027 (0.054) -length of dataset= 109 109 112 -FS >= 3 unique: total: -nr./rel. freq of ab= 14 0.341 0.318 -nr./rel. freq of ba= 26 0.491 0.464 -nr./rel. freq of DCS (total)= 1 (2) 0.037 0.023 (0.045) -length of dataset= 41 41 44 - -Values from family size distribution - duplex ab ba sum -FS=1 2 30 31 63 -FS=2 0 3 2 5 -FS=3 0 3 5 8 -FS=4 2 3 5 10 -FS=5 0 2 1 3 -FS=6 0 1 4 5 -FS=7 0 1 2 3 -FS=8 0 1 2 3 -FS=9 0 0 2 2 -FS=10 1 1 1 3 -FS=11 0 0 1 1 -FS=12 1 2 3 6