diff hd.py @ 11:7adc48c8a03d draft

planemo upload for repository https://github.com/monikaheinzl/galaxyProject/tree/master/tools/hd commit 8d9bdadb5e154dadc455720c99700afbd9aafae9
author mheinzl
date Tue, 15 May 2018 13:31:02 -0400
parents 69aa17354a6e
children 5b0a95f205ad
line wrap: on
line diff
--- a/hd.py	Tue May 15 11:27:27 2018 -0400
+++ b/hd.py	Tue May 15 13:31:02 2018 -0400
@@ -30,7 +30,7 @@
 from matplotlib.backends.backend_pdf import PdfPages
 from collections import Counter
 
-def plotFSDwithHD2(familySizeList1,maximumXFS,minimumXFS, quant,
+def plotFSDwithHD2(familySizeList1,maximumXFS,minimumXFS, originalCounts,
                    title_file1, subtitle, pdf, relative=False, diff = True):
     if diff is False:
         colors = ["#e6194b", "#3cb44b", "#ffe119", "#0082c8", "#f58231", "#911eb4"]
@@ -78,9 +78,9 @@
     legend = "\nmax. family size: \nabsolute frequency: \nrelative frequency: "
     plt.text(0.15, -0.08, legend, size=12, transform=plt.gcf().transFigure)
 
-    count = numpy.bincount(quant)  # original counts
+    count = numpy.bincount(originalCounts)  # original counts
     legend1 = "{}\n{}\n{:.5f}" \
-        .format(max(quant), count[len(count) - 1], float(count[len(count) - 1]) / sum(count))
+        .format(max(originalCounts), count[len(count) - 1], float(count[len(count) - 1]) / sum(count))
     plt.text(0.5, -0.08, legend1, size=12, transform=plt.gcf().transFigure)
     legend3 = "singletons\n{:,}\n{:.5f}".format(int(counts[0][len(counts[0]) - 1][1]),
                                                 float(counts[0][len(counts[0]) - 1][1]) / sum(
@@ -508,14 +508,14 @@
         i += 1
             
         #print(i)
-    diff11 = [st for st in diff11 if st != 999]
-    ham1 = [st for st in ham1 if st != 999]
-    ham2 = [st for st in ham2 if st != 999]
-    min_valueList = [st for st in min_valueList if st != 999]
-    min_tagsList = [st for st in min_tagsList if st != 999]
-    relativeDiffList = [st for st in relativeDiffList if st != 999]
-    diff11_zeros = [st for st in diff11_zeros if st != 999]
-    min_tagsList_zeros = [st for st in min_tagsList_zeros if st != 999]
+    #diff11 = [st for st in diff11 if st != 999]
+    #ham1 = [st for st in ham1 if st != 999]
+    #ham2 = [st for st in ham2 if st != 999]
+    #min_valueList = [st for st in min_valueList if st != 999]
+    #min_tagsList = [st for st in min_tagsList if st != 999]
+    #relativeDiffList = [st for st in relativeDiffList if st != 999]
+    #diff11_zeros = [st for st in diff11_zeros if st != 999]
+    #min_tagsList_zeros = [st for st in min_tagsList_zeros if st != 999]
 
     return ([diff11, ham1, ham2, min_valueList, min_tagsList, relativeDiffList, diff11_zeros, min_tagsList_zeros])
 
@@ -525,28 +525,28 @@
         integers = numpy.array(data_array[:, 0]).astype(int)
         return(integers, data_array)
 
-def hammingDistanceWithFS(quant, ham):
-    quant = numpy.asarray(quant)
+def hammingDistanceWithFS(fs, ham):
+    fs = numpy.asarray(fs)
     maximum = max(ham)
     minimum = min(ham)
     ham = numpy.asarray(ham)
 
-    singletons = numpy.where(quant == 1)[0]
+    singletons = numpy.where(fs == 1)[0]
     data = ham[singletons]
 
-    hd2 = numpy.where(quant == 2)[0]
+    hd2 = numpy.where(fs == 2)[0]
     data2 = ham[hd2]
 
-    hd3 = numpy.where(quant == 3)[0]
+    hd3 = numpy.where(fs == 3)[0]
     data3 = ham[hd3]
 
-    hd4 = numpy.where(quant == 4)[0]
+    hd4 = numpy.where(fs == 4)[0]
     data4 = ham[hd4]
 
-    hd5 = numpy.where((quant >= 5) & (quant <= 10))[0]
+    hd5 = numpy.where((fs >= 5) & (fs <= 10))[0]
     data5 = ham[hd5]
 
-    hd6 = numpy.where(quant > 10)[0]
+    hd6 = numpy.where(fs > 10)[0]
     data6 = ham[hd6]
 
     list1 = [data, data2, data3, data4, data5, data6]
@@ -884,7 +884,7 @@
             ##########################       Plot FSD with separation after HD       ###############################################
             ########################################################################################################################
             plotFSDwithHD2(familySizeList1, maximumXFS, minimumXFS,
-                           quant=quant, subtitle="Family size distribution separated by Hamming distance",
+                           originalCounts=quant, subtitle="Family size distribution separated by Hamming distance",
                            pdf=pdf,relative=False, title_file1=name_file, diff=False)
 
             ##########################       Plot difference between HD's separated after FSD       ##########################################
@@ -903,9 +903,9 @@
             ########################################################################################################################
             plotFSDwithHD2(familySizeList1_diff, maximumXFS_diff, minimumXFS_diff,
                            subtitle="Family size distribution separated by delta Hamming distances within the tags",
-                           pdf=pdf,relative=False, diff=True, title_file1=name_file, quant=quant)
+                           pdf=pdf,relative=False, diff=True, title_file1=name_file, originalCounts=quant)
 
-            plotFSDwithHD2(familySizeList1_reldiff, maximumXFS_reldiff, minimumXFS_reldiff, quant=quant, pdf=pdf,
+            plotFSDwithHD2(familySizeList1_reldiff, maximumXFS_reldiff, minimumXFS_reldiff, originalCounts=quant, pdf=pdf,
                            subtitle="Family size distribution separated by delta Hamming distances within the tags",
                            relative=True, diff=True, title_file1=name_file)
 
@@ -919,7 +919,7 @@
 
                 ## FSD
                 plotFSDwithHD2(familySizeList1_diff_zeros, maximumXFS_diff_zeros, minimumXFS_diff_zeros,
-                               quant=quant, pdf=pdf,
+                               originalCounts=quant, pdf=pdf,
                                subtitle="Family size distribution separated by Hamming distance of the non-identical half of chimeras",
                                relative=False, diff=False, title_file1=name_file)