comparison readmap.py @ 2:ebfc73c72652 draft

planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
author mvdbeek
date Wed, 03 Feb 2016 09:20:34 -0500
parents ac7d8e55bb67
children 9b62e6b0d219
comparison
equal deleted inserted replaced
1:e4874d1ae69d 2:ebfc73c72652
64 fields= line.split("\t") 64 fields= line.split("\t")
65 if Dict[fields[0]] != 0: 65 if Dict[fields[0]] != 0:
66 filtered_list.append(line) 66 filtered_list.append(line)
67 return filtered_list 67 return filtered_list
68 68
69
70 def listify_plottable_item(item):
71 """
72 plottable is a list of strings:
73 'FBti0020401\t78\t-1.0\tR'
74 split on tab and return gene, coordinate, count and orientation
75 """
76 gene, coordinate, count, orientation = item.split("\t")
77 return gene, coordinate, count, orientation
78
79 def lookup_gene_length(gene, readDict):
80 return readDict[readDict.keys()[0]].instanceDict["size"]
81
82 def handle_start_stop_coordinates(plottable, readDict):
83 """
84 To ensure that the plot area always includes the correct start and end coordinates,
85 we add an entry at start [coordinate 0] and end [last coordinate] of count 0, if these do not exist.
86 """
87 first_line = plottable[0]
88 last_line = plottable[-1]
89 gene, coordinate, count, orientation = listify_plottable_item(first_line)
90 if not coordinate == "0":
91 new_line = "\t".join([gene, "0", "0", "F"])
92 plottable = new_line + plottable
93 gene_length = str(lookup_gene_length(gene, readDict))
94 if not coordinate == gene_length:
95 last_line = "\t".join([gene, gene_length, "0", "F"])
96 plottable = plottable + last_line
97
69 def write_readplot_dataframe(readDict, readmap_file): 98 def write_readplot_dataframe(readDict, readmap_file):
70 listoflines = [] 99 listoflines = []
71 with open(readmap_file, 'w') as readmap: 100 with open(readmap_file, 'w') as readmap:
72 print >>readmap, "gene\tcoord\tcount\tpolarity\tsample" 101 print >>readmap, "gene\tcoord\tcount\tpolarity\tsample"
73 for sample in readDict.keys(): 102 for sample in readDict.keys():
75 dict=readDict[sample] 104 dict=readDict[sample]
76 else: 105 else:
77 dict=readDict[sample].instanceDict 106 dict=readDict[sample].instanceDict
78 for gene in dict.keys(): 107 for gene in dict.keys():
79 plottable = dict[gene].readplot() 108 plottable = dict[gene].readplot()
109 handle_start_stop_coordinates(plottable, readDict)
80 for line in plottable: 110 for line in plottable:
81 #print >>readmap, "%s\t%s" % (line, sample) 111 #print >>readmap, "%s\t%s" % (line, sample)
82 listoflines.append ("%s\t%s" % (line, sample)) 112 listoflines.append ("%s\t%s" % (line, sample))
83 listoflines = dataframe_sanityzer(listoflines) 113 listoflines = dataframe_sanityzer(listoflines)
84 for line in listoflines: 114 for line in listoflines: