Mercurial > repos > mheinzl > hd
comparison hd.py @ 10:69aa17354a6e draft
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f01678e9bfead9f9e1b54dd9ecf7141f057dd9de
author | mheinzl |
---|---|
date | Tue, 15 May 2018 11:27:27 -0400 |
parents | c81bc96bea1c |
children | 7adc48c8a03d |
comparison
equal
deleted
inserted
replaced
9:c81bc96bea1c | 10:69aa17354a6e |
---|---|
22 import matplotlib.pyplot as plt | 22 import matplotlib.pyplot as plt |
23 import os.path | 23 import os.path |
24 import cPickle as pickle | 24 import cPickle as pickle |
25 from multiprocessing.pool import Pool | 25 from multiprocessing.pool import Pool |
26 from functools import partial | 26 from functools import partial |
27 #from HDAnalysis_plots.plot_HDwithFSD import plotHDwithFSD | |
28 #from HDAnalysis_plots.plot_FSDwithHD2 import plotFSDwithHD2 | |
29 #from HDAnalysis_plots.plot_HDwithinSeq_Sum2 import plotHDwithinSeq_Sum2 | |
30 #from HDAnalysis_plots.table_HD import createTableHD, createFileHD, createTableHDwithTags, createFileHDwithinTag | |
31 #from HDAnalysis_plots.table_FSD import createTableFSD2, createFileFSD2 | |
32 import argparse | 27 import argparse |
33 import sys | 28 import sys |
34 import os | 29 import os |
35 from matplotlib.backends.backend_pdf import PdfPages | 30 from matplotlib.backends.backend_pdf import PdfPages |
36 from collections import Counter | 31 from collections import Counter |
166 else: | 161 else: |
167 range1 = range(minimumX, maximumX + 2) | 162 range1 = range(minimumX, maximumX + 2) |
168 | 163 |
169 counts = plt.hist(ham, align="left", rwidth=0.8, stacked=False, | 164 counts = plt.hist(ham, align="left", rwidth=0.8, stacked=False, |
170 label=[ "HD a", "HD b","HD a+b"], | 165 label=[ "HD a", "HD b","HD a+b"], |
171 bins=range1, color=["#585858", "#58ACFA", "#FA5858"], edgecolor='black', linewidth=1) | 166 bins=range1, color=[ "#58ACFA", "#FA5858","#585858"], edgecolor='black', linewidth=1) |
172 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.55, 1)) | 167 plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.55, 1)) |
173 plt.suptitle('Hamming distances within tags', fontsize=14) | 168 plt.suptitle('Hamming distances within tags', fontsize=14) |
174 #plt.title(title_file1, fontsize=12) | 169 #plt.title(title_file1, fontsize=12) |
175 plt.xlabel("Hamming Distance", fontsize=14) | 170 plt.xlabel("Hamming Distance", fontsize=14) |
176 plt.ylabel("Absolute Frequency", fontsize=14) | 171 plt.ylabel("Absolute Frequency", fontsize=14) |
691 print("subset_tag is smaller or equal zero.") | 686 print("subset_tag is smaller or equal zero.") |
692 exit(5) | 687 exit(5) |
693 | 688 |
694 ### PLOT ### | 689 ### PLOT ### |
695 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color | 690 plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color |
696 plt.rcParams['xtick.labelsize'] = 12 | 691 plt.rcParams['xtick.labelsize'] = 14 |
697 plt.rcParams['ytick.labelsize'] = 12 | 692 plt.rcParams['ytick.labelsize'] = 14 |
698 plt.rcParams['patch.edgecolor'] = "#000000" | 693 plt.rcParams['patch.edgecolor'] = "#000000" |
699 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format | 694 plt.rc('figure', figsize=(11.69, 8.27)) # A4 format |
700 | 695 |
701 if file2 != str(None): | 696 if file2 != str(None): |
702 files = [file1, file2] | 697 files = [file1, file2] |
709 files = [file1] | 704 files = [file1] |
710 name1 = name1.split(".tabular")[0] | 705 name1 = name1.split(".tabular")[0] |
711 names = [name1] | 706 names = [name1] |
712 pdf_files = [title_savedFile_pdf] | 707 pdf_files = [title_savedFile_pdf] |
713 csv_files = [title_savedFile_csv] | 708 csv_files = [title_savedFile_csv] |
714 | |
715 print(type(onlyDuplicates)) | |
716 print(onlyDuplicates) | |
717 | 709 |
718 for f, name_file, pdf_f, csv_f in zip(files, names, pdf_files, csv_files): | 710 for f, name_file, pdf_f, csv_f in zip(files, names, pdf_files, csv_files): |
719 with open(csv_f, "w") as output_file, PdfPages(pdf_f) as pdf: | 711 with open(csv_f, "w") as output_file, PdfPages(pdf_f) as pdf: |
720 print("dataset: ", name_file) | 712 print("dataset: ", name_file) |
721 integers, data_array = readFileReferenceFree(f) | 713 integers, data_array = readFileReferenceFree(f) |
803 # for h, tag in zip(ham, result1): | 795 # for h, tag in zip(ham, result1): |
804 # output_file1.write("{}\t{}\n".format(tag, h)) | 796 # output_file1.write("{}\t{}\n".format(tag, h)) |
805 | 797 |
806 # HD analysis for chimeric reads | 798 # HD analysis for chimeric reads |
807 proc_pool_b = Pool(nproc) | 799 proc_pool_b = Pool(nproc) |
808 print(chunks_sample) | |
809 print(result2) | |
810 print(data_array) | |
811 diff_list_a = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=False), chunks_sample) | 800 diff_list_a = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=False), chunks_sample) |
812 diff_list_b = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=True), chunks_sample) | 801 diff_list_b = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=True), chunks_sample) |
813 proc_pool_b.close() | 802 proc_pool_b.close() |
814 proc_pool_b.join() | 803 proc_pool_b.join() |
815 diff = numpy.concatenate((numpy.concatenate([item[0] for item in diff_list_a]), | 804 diff = numpy.concatenate((numpy.concatenate([item[0] for item in diff_list_a]), |
901 ########################## Plot difference between HD's separated after FSD ########################################## | 890 ########################## Plot difference between HD's separated after FSD ########################################## |
902 ######################################################################################################################## | 891 ######################################################################################################################## |
903 plotHDwithFSD(listDifference1, maximumXDifference, minimumXDifference, pdf=pdf, | 892 plotHDwithFSD(listDifference1, maximumXDifference, minimumXDifference, pdf=pdf, |
904 subtitle="Delta Hamming distance within tags", | 893 subtitle="Delta Hamming distance within tags", |
905 title_file1=name_file, lenTags=lenTags, | 894 title_file1=name_file, lenTags=lenTags, |
906 xlabel="abs delta Hamming distance", relative=False) | 895 xlabel="absolute delta Hamming distance", relative=False) |
907 | 896 |
908 plotHDwithFSD(listRelDifference1, maximumXRelDifference, minimumXRelDifference, pdf=pdf, | 897 plotHDwithFSD(listRelDifference1, maximumXRelDifference, minimumXRelDifference, pdf=pdf, |
909 subtitle="Relative delta Hamming distances within tags", | 898 subtitle="Relative delta Hamming distances within tags", |
910 title_file1=name_file, lenTags=lenTags, | 899 title_file1=name_file, lenTags=lenTags, |
911 xlabel="rel delta Hamming distance", relative=True) | 900 xlabel="relative delta Hamming distance", relative=True) |
912 | 901 |
913 #################### Plot FSD separated after difference between HD's ##################################### | 902 #################### Plot FSD separated after difference between HD's ##################################### |
914 ######################################################################################################################## | 903 ######################################################################################################################## |
915 plotFSDwithHD2(familySizeList1_diff, maximumXFS_diff, minimumXFS_diff, | 904 plotFSDwithHD2(familySizeList1_diff, maximumXFS_diff, minimumXFS_diff, |
916 subtitle="Family size distribution with delta Hamming distances within the tags", | 905 subtitle="Family size distribution separated by delta Hamming distances within the tags", |
917 pdf=pdf,relative=False, diff=True, title_file1=name_file, quant=quant) | 906 pdf=pdf,relative=False, diff=True, title_file1=name_file, quant=quant) |
918 | 907 |
919 plotFSDwithHD2(familySizeList1_reldiff, maximumXFS_reldiff, minimumXFS_reldiff, quant=quant, pdf=pdf, | 908 plotFSDwithHD2(familySizeList1_reldiff, maximumXFS_reldiff, minimumXFS_reldiff, quant=quant, pdf=pdf, |
920 subtitle="Family size distribution with delta Hamming distances within the tags", | 909 subtitle="Family size distribution separated by delta Hamming distances within the tags", |
921 relative=True, diff=True, title_file1=name_file) | 910 relative=True, diff=True, title_file1=name_file) |
922 | 911 |
923 | 912 |
924 # plots for chimeric reads | 913 # plots for chimeric reads |
925 if len(minHD_tags_zeros) != 0: | 914 if len(minHD_tags_zeros) != 0: |
929 title_file1=name_file, lenTags=lenTags,xlabel="Hamming distance", relative=False) | 918 title_file1=name_file, lenTags=lenTags,xlabel="Hamming distance", relative=False) |
930 | 919 |
931 ## FSD | 920 ## FSD |
932 plotFSDwithHD2(familySizeList1_diff_zeros, maximumXFS_diff_zeros, minimumXFS_diff_zeros, | 921 plotFSDwithHD2(familySizeList1_diff_zeros, maximumXFS_diff_zeros, minimumXFS_diff_zeros, |
933 quant=quant, pdf=pdf, | 922 quant=quant, pdf=pdf, |
934 subtitle="Family size distribution with Hamming distance from the non-identical half of chimeras", | 923 subtitle="Family size distribution separated by Hamming distance of the non-identical half of chimeras", |
935 relative=False, diff=False, title_file1=name_file) | 924 relative=False, diff=False, title_file1=name_file) |
936 | 925 |
937 ### print all data to a CSV file | 926 ### print all data to a CSV file |
938 #### HD #### | 927 #### HD #### |
939 summary, sumCol = createTableHD(list1, "HD=") | 928 summary, sumCol = createTableHD(list1, "HD=") |