Mercurial > repos > drosofff > msp_sr_readmap_and_size_histograms
comparison readmap.py @ 2:ebfc73c72652 draft
planemo upload for repository https://github.com/ARTbio/tools-artbio/tree/master/tools/msp_sr_readmap_and_size_histograms commit c9ca5272028c778ad137601e75ddbea3459d9a05-dirty
author | mvdbeek |
---|---|
date | Wed, 03 Feb 2016 09:20:34 -0500 |
parents | ac7d8e55bb67 |
children | 9b62e6b0d219 |
comparison
equal
deleted
inserted
replaced
1:e4874d1ae69d | 2:ebfc73c72652 |
---|---|
64 fields= line.split("\t") | 64 fields= line.split("\t") |
65 if Dict[fields[0]] != 0: | 65 if Dict[fields[0]] != 0: |
66 filtered_list.append(line) | 66 filtered_list.append(line) |
67 return filtered_list | 67 return filtered_list |
68 | 68 |
69 | |
70 def listify_plottable_item(item): | |
71 """ | |
72 plottable is a list of strings: | |
73 'FBti0020401\t78\t-1.0\tR' | |
74 split on tab and return gene, coordinate, count and orientation | |
75 """ | |
76 gene, coordinate, count, orientation = item.split("\t") | |
77 return gene, coordinate, count, orientation | |
78 | |
79 def lookup_gene_length(gene, readDict): | |
80 return readDict[readDict.keys()[0]].instanceDict["size"] | |
81 | |
82 def handle_start_stop_coordinates(plottable, readDict): | |
83 """ | |
84 To ensure that the plot area always includes the correct start and end coordinates, | |
85 we add an entry at start [coordinate 0] and end [last coordinate] of count 0, if these do not exist. | |
86 """ | |
87 first_line = plottable[0] | |
88 last_line = plottable[-1] | |
89 gene, coordinate, count, orientation = listify_plottable_item(first_line) | |
90 if not coordinate == "0": | |
91 new_line = "\t".join([gene, "0", "0", "F"]) | |
92 plottable = new_line + plottable | |
93 gene_length = str(lookup_gene_length(gene, readDict)) | |
94 if not coordinate == gene_length: | |
95 last_line = "\t".join([gene, gene_length, "0", "F"]) | |
96 plottable = plottable + last_line | |
97 | |
69 def write_readplot_dataframe(readDict, readmap_file): | 98 def write_readplot_dataframe(readDict, readmap_file): |
70 listoflines = [] | 99 listoflines = [] |
71 with open(readmap_file, 'w') as readmap: | 100 with open(readmap_file, 'w') as readmap: |
72 print >>readmap, "gene\tcoord\tcount\tpolarity\tsample" | 101 print >>readmap, "gene\tcoord\tcount\tpolarity\tsample" |
73 for sample in readDict.keys(): | 102 for sample in readDict.keys(): |
75 dict=readDict[sample] | 104 dict=readDict[sample] |
76 else: | 105 else: |
77 dict=readDict[sample].instanceDict | 106 dict=readDict[sample].instanceDict |
78 for gene in dict.keys(): | 107 for gene in dict.keys(): |
79 plottable = dict[gene].readplot() | 108 plottable = dict[gene].readplot() |
109 handle_start_stop_coordinates(plottable, readDict) | |
80 for line in plottable: | 110 for line in plottable: |
81 #print >>readmap, "%s\t%s" % (line, sample) | 111 #print >>readmap, "%s\t%s" % (line, sample) |
82 listoflines.append ("%s\t%s" % (line, sample)) | 112 listoflines.append ("%s\t%s" % (line, sample)) |
83 listoflines = dataframe_sanityzer(listoflines) | 113 listoflines = dataframe_sanityzer(listoflines) |
84 for line in listoflines: | 114 for line in listoflines: |