diff readmap.py @ 2:ebfc73c72652 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
author mvdbeek
date Wed, 03 Feb 2016 09:20:34 -0500
parents ac7d8e55bb67
children 9b62e6b0d219
line wrap: on
line diff
--- a/readmap.py	Thu Nov 26 11:56:07 2015 -0500
+++ b/readmap.py	Wed Feb 03 09:20:34 2016 -0500
@@ -66,6 +66,35 @@
       filtered_list.append(line) 
   return filtered_list
 
+
+def listify_plottable_item(item):
+  """
+  plottable is a list of strings:
+  'FBti0020401\t78\t-1.0\tR'
+  split on tab and return gene, coordinate, count and orientation
+  """
+  gene, coordinate, count, orientation = item.split("\t")
+  return gene, coordinate, count, orientation
+
+def lookup_gene_length(gene, readDict):
+  return readDict[readDict.keys()[0]].instanceDict["size"]
+
+def handle_start_stop_coordinates(plottable, readDict):
+  """
+  To ensure that the plot area always includes the correct start and end coordinates,
+  we add an entry at start [coordinate 0] and end [last coordinate] of count 0, if these do not exist.
+  """
+  first_line = plottable[0]
+  last_line = plottable[-1]
+  gene, coordinate, count, orientation = listify_plottable_item(first_line)
+  if not coordinate == "0":
+    new_line = "\t".join([gene, "0", "0", "F"])
+    plottable = new_line + plottable
+  gene_length = str(lookup_gene_length(gene, readDict))
+  if not coordinate == gene_length:
+    last_line = "\t".join([gene, gene_length, "0", "F"])
+    plottable = plottable + last_line
+
 def write_readplot_dataframe(readDict, readmap_file):
   listoflines = []
   with open(readmap_file, 'w') as readmap:
@@ -77,6 +106,7 @@
         dict=readDict[sample].instanceDict
       for gene in dict.keys():
         plottable = dict[gene].readplot()
+        handle_start_stop_coordinates(plottable, readDict)
         for line in plottable:
           #print >>readmap, "%s\t%s" % (line, sample)
           listoflines.append ("%s\t%s" % (line, sample))