Mercurial > repos > mheinzl > hd
comparison hd.py @ 11:7adc48c8a03d draft
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
author | mheinzl |
---|---|
date | Tue, 15 May 2018 13:31:02 -0400 |
parents | 69aa17354a6e |
children | 5b0a95f205ad |
comparison
equal
deleted
inserted
replaced
10:69aa17354a6e | 11:7adc48c8a03d |
---|---|
28 import sys | 28 import sys |
29 import os | 29 import os |
30 from matplotlib.backends.backend_pdf import PdfPages | 30 from matplotlib.backends.backend_pdf import PdfPages |
31 from collections import Counter | 31 from collections import Counter |
32 | 32 |
33 def plotFSDwithHD2(familySizeList1,maximumXFS,minimumXFS, quant, | 33 def plotFSDwithHD2(familySizeList1,maximumXFS,minimumXFS, originalCounts, |
34 title_file1, subtitle, pdf, relative=False, diff = True): | 34 title_file1, subtitle, pdf, relative=False, diff = True): |
35 if diff is False: | 35 if diff is False: |
36 colors = ["#e6194b", "#3cb44b", "#ffe119", "#0082c8", "#f58231", "#911eb4"] | 36 colors = ["#e6194b", "#3cb44b", "#ffe119", "#0082c8", "#f58231", "#911eb4"] |
37 labels = ["HD=1", "HD=2", "HD=3", "HD=4", "HD=5-8","HD>8"] | 37 labels = ["HD=1", "HD=2", "HD=3", "HD=4", "HD=5-8","HD>8"] |
38 else: | 38 else: |
76 | 76 |
77 plt.ylim((0, maximumY * 1.2)) | 77 plt.ylim((0, maximumY * 1.2)) |
78 legend = "\nmax. family size: \nabsolute frequency: \nrelative frequency: " | 78 legend = "\nmax. family size: \nabsolute frequency: \nrelative frequency: " |
79 plt.text(0.15, -0.08, legend, size=12, transform=plt.gcf().transFigure) | 79 plt.text(0.15, -0.08, legend, size=12, transform=plt.gcf().transFigure) |
80 | 80 |
81 count = numpy.bincount(quant) # original counts | 81 count = numpy.bincount(originalCounts) # original counts |
82 legend1 = "{}\n{}\n{:.5f}" \ | 82 legend1 = "{}\n{}\n{:.5f}" \ |
83 .format(max(quant), count[len(count) - 1], float(count[len(count) - 1]) / sum(count)) | 83 .format(max(originalCounts), count[len(count) - 1], float(count[len(count) - 1]) / sum(count)) |
84 plt.text(0.5, -0.08, legend1, size=12, transform=plt.gcf().transFigure) | 84 plt.text(0.5, -0.08, legend1, size=12, transform=plt.gcf().transFigure) |
85 legend3 = "singletons\n{:,}\n{:.5f}".format(int(counts[0][len(counts[0]) - 1][1]), | 85 legend3 = "singletons\n{:,}\n{:.5f}".format(int(counts[0][len(counts[0]) - 1][1]), |
86 float(counts[0][len(counts[0]) - 1][1]) / sum( | 86 float(counts[0][len(counts[0]) - 1][1]) / sum( |
87 counts[0][len(counts[0]) - 1])) | 87 counts[0][len(counts[0]) - 1])) |
88 plt.text(0.7, -0.08, legend3, transform=plt.gcf().transFigure, size=12) | 88 plt.text(0.7, -0.08, legend3, transform=plt.gcf().transFigure, size=12) |
506 difference1_zeros = abs(d - d2) | 506 difference1_zeros = abs(d - d2) |
507 diff11_zeros.append(difference1_zeros) | 507 diff11_zeros.append(difference1_zeros) |
508 i += 1 | 508 i += 1 |
509 | 509 |
510 #print(i) | 510 #print(i) |
511 diff11 = [st for st in diff11 if st != 999] | 511 #diff11 = [st for st in diff11 if st != 999] |
512 ham1 = [st for st in ham1 if st != 999] | 512 #ham1 = [st for st in ham1 if st != 999] |
513 ham2 = [st for st in ham2 if st != 999] | 513 #ham2 = [st for st in ham2 if st != 999] |
514 min_valueList = [st for st in min_valueList if st != 999] | 514 #min_valueList = [st for st in min_valueList if st != 999] |
515 min_tagsList = [st for st in min_tagsList if st != 999] | 515 #min_tagsList = [st for st in min_tagsList if st != 999] |
516 relativeDiffList = [st for st in relativeDiffList if st != 999] | 516 #relativeDiffList = [st for st in relativeDiffList if st != 999] |
517 diff11_zeros = [st for st in diff11_zeros if st != 999] | 517 #diff11_zeros = [st for st in diff11_zeros if st != 999] |
518 min_tagsList_zeros = [st for st in min_tagsList_zeros if st != 999] | 518 #min_tagsList_zeros = [st for st in min_tagsList_zeros if st != 999] |
519 | 519 |
520 return ([diff11, ham1, ham2, min_valueList, min_tagsList, relativeDiffList, diff11_zeros, min_tagsList_zeros]) | 520 return ([diff11, ham1, ham2, min_valueList, min_tagsList, relativeDiffList, diff11_zeros, min_tagsList_zeros]) |
521 | 521 |
522 def readFileReferenceFree(file): | 522 def readFileReferenceFree(file): |
523 with open(file, 'r') as dest_f: | 523 with open(file, 'r') as dest_f: |
524 data_array = numpy.genfromtxt(dest_f, skip_header=0, delimiter='\t', comments='#', dtype='string') | 524 data_array = numpy.genfromtxt(dest_f, skip_header=0, delimiter='\t', comments='#', dtype='string') |
525 integers = numpy.array(data_array[:, 0]).astype(int) | 525 integers = numpy.array(data_array[:, 0]).astype(int) |
526 return(integers, data_array) | 526 return(integers, data_array) |
527 | 527 |
528 def hammingDistanceWithFS(quant, ham): | 528 def hammingDistanceWithFS(fs, ham): |
529 quant = numpy.asarray(quant) | 529 fs = numpy.asarray(fs) |
530 maximum = max(ham) | 530 maximum = max(ham) |
531 minimum = min(ham) | 531 minimum = min(ham) |
532 ham = numpy.asarray(ham) | 532 ham = numpy.asarray(ham) |
533 | 533 |
534 singletons = numpy.where(quant == 1)[0] | 534 singletons = numpy.where(fs == 1)[0] |
535 data = ham[singletons] | 535 data = ham[singletons] |
536 | 536 |
537 hd2 = numpy.where(quant == 2)[0] | 537 hd2 = numpy.where(fs == 2)[0] |
538 data2 = ham[hd2] | 538 data2 = ham[hd2] |
539 | 539 |
540 hd3 = numpy.where(quant == 3)[0] | 540 hd3 = numpy.where(fs == 3)[0] |
541 data3 = ham[hd3] | 541 data3 = ham[hd3] |
542 | 542 |
543 hd4 = numpy.where(quant == 4)[0] | 543 hd4 = numpy.where(fs == 4)[0] |
544 data4 = ham[hd4] | 544 data4 = ham[hd4] |
545 | 545 |
546 hd5 = numpy.where((quant >= 5) & (quant <= 10))[0] | 546 hd5 = numpy.where((fs >= 5) & (fs <= 10))[0] |
547 data5 = ham[hd5] | 547 data5 = ham[hd5] |
548 | 548 |
549 hd6 = numpy.where(quant > 10)[0] | 549 hd6 = numpy.where(fs > 10)[0] |
550 data6 = ham[hd6] | 550 data6 = ham[hd6] |
551 | 551 |
552 list1 = [data, data2, data3, data4, data5, data6] | 552 list1 = [data, data2, data3, data4, data5, data6] |
553 return(list1, maximum, minimum) | 553 return(list1, maximum, minimum) |
554 | 554 |
882 lenTags=lenTags,xlabel="Hamming distance") | 882 lenTags=lenTags,xlabel="Hamming distance") |
883 | 883 |
884 ########################## Plot FSD with separation after HD ############################################### | 884 ########################## Plot FSD with separation after HD ############################################### |
885 ######################################################################################################################## | 885 ######################################################################################################################## |
886 plotFSDwithHD2(familySizeList1, maximumXFS, minimumXFS, | 886 plotFSDwithHD2(familySizeList1, maximumXFS, minimumXFS, |
887 quant=quant, subtitle="Family size distribution separated by Hamming distance", | 887 originalCounts=quant, subtitle="Family size distribution separated by Hamming distance", |
888 pdf=pdf,relative=False, title_file1=name_file, diff=False) | 888 pdf=pdf,relative=False, title_file1=name_file, diff=False) |
889 | 889 |
890 ########################## Plot difference between HD's separated after FSD ########################################## | 890 ########################## Plot difference between HD's separated after FSD ########################################## |
891 ######################################################################################################################## | 891 ######################################################################################################################## |
892 plotHDwithFSD(listDifference1, maximumXDifference, minimumXDifference, pdf=pdf, | 892 plotHDwithFSD(listDifference1, maximumXDifference, minimumXDifference, pdf=pdf, |
901 | 901 |
902 #################### Plot FSD separated after difference between HD's ##################################### | 902 #################### Plot FSD separated after difference between HD's ##################################### |
903 ######################################################################################################################## | 903 ######################################################################################################################## |
904 plotFSDwithHD2(familySizeList1_diff, maximumXFS_diff, minimumXFS_diff, | 904 plotFSDwithHD2(familySizeList1_diff, maximumXFS_diff, minimumXFS_diff, |
905 subtitle="Family size distribution separated by delta Hamming distances within the tags", | 905 subtitle="Family size distribution separated by delta Hamming distances within the tags", |
906 pdf=pdf,relative=False, diff=True, title_file1=name_file, quant=quant) | 906 pdf=pdf,relative=False, diff=True, title_file1=name_file, originalCounts=quant) |
907 | 907 |
908 plotFSDwithHD2(familySizeList1_reldiff, maximumXFS_reldiff, minimumXFS_reldiff, quant=quant, pdf=pdf, | 908 plotFSDwithHD2(familySizeList1_reldiff, maximumXFS_reldiff, minimumXFS_reldiff, originalCounts=quant, pdf=pdf, |
909 subtitle="Family size distribution separated by delta Hamming distances within the tags", | 909 subtitle="Family size distribution separated by delta Hamming distances within the tags", |
910 relative=True, diff=True, title_file1=name_file) | 910 relative=True, diff=True, title_file1=name_file) |
911 | 911 |
912 | 912 |
913 # plots for chimeric reads | 913 # plots for chimeric reads |
917 subtitle="Hamming distance of the non-identical half of chimeras", | 917 subtitle="Hamming distance of the non-identical half of chimeras", |
918 title_file1=name_file, lenTags=lenTags,xlabel="Hamming distance", relative=False) | 918 title_file1=name_file, lenTags=lenTags,xlabel="Hamming distance", relative=False) |
919 | 919 |
920 ## FSD | 920 ## FSD |
921 plotFSDwithHD2(familySizeList1_diff_zeros, maximumXFS_diff_zeros, minimumXFS_diff_zeros, | 921 plotFSDwithHD2(familySizeList1_diff_zeros, maximumXFS_diff_zeros, minimumXFS_diff_zeros, |
922 quant=quant, pdf=pdf, | 922 originalCounts=quant, pdf=pdf, |
923 subtitle="Family size distribution separated by Hamming distance of the non-identical half of chimeras", | 923 subtitle="Family size distribution separated by Hamming distance of the non-identical half of chimeras", |
924 relative=False, diff=False, title_file1=name_file) | 924 relative=False, diff=False, title_file1=name_file) |
925 | 925 |
926 ### print all data to a CSV file | 926 ### print all data to a CSV file |
927 #### HD #### | 927 #### HD #### |