Mercurial > repos > mheinzl > hd
diff hd.py @ 10:69aa17354a6e draft
planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit f01678e9bfead9f9e1b54dd9ecf7141f057dd9de
author | mheinzl |
---|---|
date | Tue, 15 May 2018 11:27:27 -0400 |
parents | c81bc96bea1c |
children | 7adc48c8a03d |
line wrap: on
line diff
--- a/hd.py Tue May 15 11:19:07 2018 -0400 +++ b/hd.py Tue May 15 11:27:27 2018 -0400 @@ -24,11 +24,6 @@ import cPickle as pickle from multiprocessing.pool import Pool from functools import partial -#from HDAnalysis_plots.plot_HDwithFSD import plotHDwithFSD -#from HDAnalysis_plots.plot_FSDwithHD2 import plotFSDwithHD2 -#from HDAnalysis_plots.plot_HDwithinSeq_Sum2 import plotHDwithinSeq_Sum2 -#from HDAnalysis_plots.table_HD import createTableHD, createFileHD, createTableHDwithTags, createFileHDwithinTag -#from HDAnalysis_plots.table_FSD import createTableFSD2, createFileFSD2 import argparse import sys import os @@ -168,7 +163,7 @@ counts = plt.hist(ham, align="left", rwidth=0.8, stacked=False, label=[ "HD a", "HD b","HD a+b"], - bins=range1, color=["#585858", "#58ACFA", "#FA5858"], edgecolor='black', linewidth=1) + bins=range1, color=[ "#58ACFA", "#FA5858","#585858"], edgecolor='black', linewidth=1) plt.legend(loc='upper right', fontsize=14, frameon=True, bbox_to_anchor=(1.55, 1)) plt.suptitle('Hamming distances within tags', fontsize=14) #plt.title(title_file1, fontsize=12) @@ -693,8 +688,8 @@ ### PLOT ### plt.rcParams['axes.facecolor'] = "E0E0E0" # grey background color - plt.rcParams['xtick.labelsize'] = 12 - plt.rcParams['ytick.labelsize'] = 12 + plt.rcParams['xtick.labelsize'] = 14 + plt.rcParams['ytick.labelsize'] = 14 plt.rcParams['patch.edgecolor'] = "#000000" plt.rc('figure', figsize=(11.69, 8.27)) # A4 format @@ -712,9 +707,6 @@ pdf_files = [title_savedFile_pdf] csv_files = [title_savedFile_csv] - print(type(onlyDuplicates)) - print(onlyDuplicates) - for f, name_file, pdf_f, csv_f in zip(files, names, pdf_files, csv_files): with open(csv_f, "w") as output_file, PdfPages(pdf_f) as pdf: print("dataset: ", name_file) @@ -805,9 +797,6 @@ # HD analysis for chimeric reads proc_pool_b = Pool(nproc) - print(chunks_sample) - print(result2) - print(data_array) diff_list_a = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=False), chunks_sample) diff_list_b = proc_pool_b.map(partial(hamming_difference, array2=result2, mate_b=True), chunks_sample) proc_pool_b.close() @@ -903,21 +892,21 @@ plotHDwithFSD(listDifference1, maximumXDifference, minimumXDifference, pdf=pdf, subtitle="Delta Hamming distance within tags", title_file1=name_file, lenTags=lenTags, - xlabel="abs delta Hamming distance", relative=False) + xlabel="absolute delta Hamming distance", relative=False) plotHDwithFSD(listRelDifference1, maximumXRelDifference, minimumXRelDifference, pdf=pdf, subtitle="Relative delta Hamming distances within tags", title_file1=name_file, lenTags=lenTags, - xlabel="rel delta Hamming distance", relative=True) + xlabel="relative delta Hamming distance", relative=True) #################### Plot FSD separated after difference between HD's ##################################### ######################################################################################################################## plotFSDwithHD2(familySizeList1_diff, maximumXFS_diff, minimumXFS_diff, - subtitle="Family size distribution with delta Hamming distances within the tags", + subtitle="Family size distribution separated by delta Hamming distances within the tags", pdf=pdf,relative=False, diff=True, title_file1=name_file, quant=quant) plotFSDwithHD2(familySizeList1_reldiff, maximumXFS_reldiff, minimumXFS_reldiff, quant=quant, pdf=pdf, - subtitle="Family size distribution with delta Hamming distances within the tags", + subtitle="Family size distribution separated by delta Hamming distances within the tags", relative=True, diff=True, title_file1=name_file) @@ -931,7 +920,7 @@ ## FSD plotFSDwithHD2(familySizeList1_diff_zeros, maximumXFS_diff_zeros, minimumXFS_diff_zeros, quant=quant, pdf=pdf, - subtitle="Family size distribution with Hamming distance from the non-identical half of chimeras", + subtitle="Family size distribution separated by Hamming distance of the non-identical half of chimeras", relative=False, diff=False, title_file1=name_file) ### print all data to a CSV file