Mercurial > repos > rnateam > graphclust_motif_finder_plot
diff MotifFinderPlot.py @ 0:edcf58ab7552 draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
author | rnateam |
---|---|
date | Wed, 22 Feb 2017 16:53:29 -0500 |
parents | |
children | adf18db4c14a |
line wrap: on
line diff
--- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/MotifFinderPlot.py Wed Feb 22 16:53:29 2017 -0500 @@ -0,0 +1,60 @@ +#!/usr/bin/env python + +import matplotlib +matplotlib.use('Agg') +from matplotlib import pyplot as plt +import matplotlib.patches as mpatches +from collections import defaultdict +import glob +import pandas as pd +import itertools +import seaborn as sns + + +def plot_bar(ranges, colors, orig_names, cluster_nums): + fig, ax = plt.subplots() + for i, k in enumerate(sorted(ranges.keys())): + ax.broken_barh(ranges[k], (i-0.25, 0.5), facecolors=colors[k]) + + ax.set_xlim(0) + ax.set_xlabel('position in sequence') + ax.set_yticklabels(['']+[k+'-'+orig_names[k] for k in sorted(ranges.keys())]) + ax.grid(True) + fig.suptitle('Structure motif prediction\nRegions with same color are prediticted to have similar structures') + # Add the legend + patches = [mpatches.Patch(color=cluster_nums[lab], label=lab) for lab in sorted(cluster_nums)] + ax.legend(handles=patches, loc='best') # , bbox_to_anchor=(1, 0.5), loc='center left') + plt.savefig("motif_plot.png", bbox_inches='tight') + + +def parse_clusters(): + currentdir_files = sorted(list(glob.glob('*'))) + print ("currentdir_files are: ", currentdir_files) + print ("RESULTS_files are: ", sorted(list(glob.glob('RESULTS/*')))) + + cluster_files = sorted(list(glob.glob('RESULTS/*.cluster.all'))) + if len(cluster_files) == 0: + raise RuntimeError('Expected cluster.all search path is empty:{}'.format(cluster_files)) + palette = itertools.cycle(sns.color_palette("Set2", len(cluster_files))) + + + ranges = defaultdict(list) + colors = defaultdict(list) + orig_names = defaultdict(list) + cluster_nums = defaultdict(list) + for cluster_file in cluster_files: + cluster_color = next(palette) + df_cluster = pd.read_csv(cluster_file, sep='\s+', header=None) + for irow, row in df_cluster.iterrows(): + seq, start, end, strand = row[0].split("#") + ranges[seq].append((int(start), int(end)-int(start)+1)) + colors[seq].append(cluster_color) + assert row[1] == 'RESULT' + cluster_nums['cluster-{}'.format(row[2])] = cluster_color + assert row[9] == 'ORIGHEAD' + orig_names[seq] = row[10] + return ranges, colors, orig_names, cluster_nums + + +my_ranges, my_colors, my_orig_names, my_cluster_nums = parse_clusters() +plot_bar(my_ranges, my_colors, my_orig_names, my_cluster_nums)