Mercurial > repos > mheinzl > hd
comparison hd.py @ 21:9919024d7778 draft
planemo upload for repository https://github.com/monikaheinzl/duplexanalysis_galaxy/tree/master/tools/hd commit b8a2f7b7615b2bcd3b602027af31f4e677da94f6
author | mheinzl |
---|---|
date | Fri, 14 Dec 2018 05:03:24 -0500 |
parents | b084b6a8e3ac |
children | 7e570ba56b83 |
comparison
equal
deleted
inserted
replaced
20:b084b6a8e3ac | 21:9919024d7778 |
---|---|
74 plt.xlim((0, maximumXFS + 1)) | 74 plt.xlim((0, maximumXFS + 1)) |
75 if len(numpy.concatenate(familySizeList1)) != 0: | 75 if len(numpy.concatenate(familySizeList1)) != 0: |
76 plt.ylim((0, max(numpy.bincount(numpy.concatenate(familySizeList1))) * 1.1)) | 76 plt.ylim((0, max(numpy.bincount(numpy.concatenate(familySizeList1))) * 1.1)) |
77 | 77 |
78 plt.ylim((0, maximumY * 1.2)) | 78 plt.ylim((0, maximumY * 1.2)) |
79 legend = "\nmax. family size: \nabsolute frequency: \nrelative frequency: " | 79 legend = "\nfamily size: \nabsolute frequency: \nrelative frequency: " |
80 plt.text(0.15, -0.08, legend, size=12, transform=plt.gcf().transFigure) | 80 plt.text(0.15, -0.08, legend, size=12, transform=plt.gcf().transFigure) |
81 | 81 |
82 count = numpy.bincount(originalCounts) # original counts | 82 count = numpy.bincount(originalCounts) # original counts |
83 legend1 = "{}\n{}\n{:.5f}".format(max(originalCounts), count[len(count) - 1], float(count[len(count) - 1]) / sum(count)) | 83 if max(originalCounts) >= 20: |
84 max_count = ">= 20" | |
85 else: | |
86 max_count = max(originalCounts) | |
87 legend1 = "{}\n{}\n{:.5f}".format(max_count, count[len(count) - 1], float(count[len(count) - 1]) / sum(count)) | |
84 plt.text(0.5, -0.08, legend1, size=12, transform=plt.gcf().transFigure) | 88 plt.text(0.5, -0.08, legend1, size=12, transform=plt.gcf().transFigure) |
85 legend3 = "singletons\n{:,}\n{:.5f}".format(int(counts[0][len(counts[0]) - 1][1]), float(counts[0][len(counts[0]) - 1][1]) / sum(counts[0][len(counts[0]) - 1])) | 89 legend3 = "singletons\n{:,}\n{:.5f}".format(int(counts[0][len(counts[0]) - 1][1]), float(counts[0][len(counts[0]) - 1][1]) / sum(counts[0][len(counts[0]) - 1])) |
86 plt.text(0.7, -0.08, legend3, transform=plt.gcf().transFigure, size=12) | 90 plt.text(0.7, -0.08, legend3, transform=plt.gcf().transFigure, size=12) |
87 plt.grid(b=True, which='major', color='#424242', linestyle=':') | 91 plt.grid(b=True, which='major', color='#424242', linestyle=':') |
88 | 92 |
957 # FSD | 961 # FSD |
958 createFileFSD2(summary5, sumCol5, overallSum5, output_file, | 962 createFileFSD2(summary5, sumCol5, overallSum5, output_file, |
959 "Family size distribution separated by Hamming distance", sep, | 963 "Family size distribution separated by Hamming distance", sep, |
960 diff=False) | 964 diff=False) |
961 | 965 |
962 count = numpy.bincount(quant) | |
963 # output_file.write("{}{}\n".format(sep, name1)) | 966 # output_file.write("{}{}\n".format(sep, name1)) |
964 output_file.write("\n") | 967 output_file.write("\n") |
965 output_file.write("max. family size:{}{}\n".format(sep, max(quant))) | 968 max_fs = numpy.bincount(integers[result]) |
966 output_file.write("absolute frequency:{}{}\n".format(sep, count[len(count) - 1])) | 969 output_file.write("max. family size in sample:{}{}\n".format(sep, max(integers[result]))) |
970 output_file.write("absolute frequency:{}{}\n".format(sep, max_fs[len(max_fs) - 1])) | |
967 output_file.write( | 971 output_file.write( |
968 "relative frequency:{}{}\n\n".format(sep, float(count[len(count) - 1]) / sum(count))) | 972 "relative frequency:{}{}\n\n".format(sep, float(max_fs[len(max_fs) - 1]) / sum(max_fs))) |
969 | 973 |
970 # HD within tags | 974 # HD within tags |
971 output_file.write( | 975 output_file.write( |
972 "The hamming distances were calculated by comparing each half of all tags against the tag(s) with the minimum Hamming distance per half.\n" | 976 "The hamming distances were calculated by comparing each half of all tags against the tag(s) with the minimum Hamming distance per half.\n" |
973 "It is possible that one tag can have the minimum HD from multiple tags, so the sample size in this calculation differs from the sample size entered by the user.\n") | 977 "It is possible that one tag can have the minimum HD from multiple tags, so the sample size in this calculation differs from the sample size entered by the user.\n") |