Mercurial > repos > rnateam > graphclust_motif_finder_plot
comparison MotifFinderPlot.py @ 0:edcf58ab7552 draft
planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
| author | rnateam | 
|---|---|
| date | Wed, 22 Feb 2017 16:53:29 -0500 | 
| parents | |
| children | adf18db4c14a | 
   comparison
  equal
  deleted
  inserted
  replaced
| -1:000000000000 | 0:edcf58ab7552 | 
|---|---|
| 1 #!/usr/bin/env python | |
| 2 | |
| 3 import matplotlib | |
| 4 matplotlib.use('Agg') | |
| 5 from matplotlib import pyplot as plt | |
| 6 import matplotlib.patches as mpatches | |
| 7 from collections import defaultdict | |
| 8 import glob | |
| 9 import pandas as pd | |
| 10 import itertools | |
| 11 import seaborn as sns | |
| 12 | |
| 13 | |
| 14 def plot_bar(ranges, colors, orig_names, cluster_nums): | |
| 15 fig, ax = plt.subplots() | |
| 16 for i, k in enumerate(sorted(ranges.keys())): | |
| 17 ax.broken_barh(ranges[k], (i-0.25, 0.5), facecolors=colors[k]) | |
| 18 | |
| 19 ax.set_xlim(0) | |
| 20 ax.set_xlabel('position in sequence') | |
| 21 ax.set_yticklabels(['']+[k+'-'+orig_names[k] for k in sorted(ranges.keys())]) | |
| 22 ax.grid(True) | |
| 23 fig.suptitle('Structure motif prediction\nRegions with same color are prediticted to have similar structures') | |
| 24 # Add the legend | |
| 25 patches = [mpatches.Patch(color=cluster_nums[lab], label=lab) for lab in sorted(cluster_nums)] | |
| 26 ax.legend(handles=patches, loc='best') # , bbox_to_anchor=(1, 0.5), loc='center left') | |
| 27 plt.savefig("motif_plot.png", bbox_inches='tight') | |
| 28 | |
| 29 | |
| 30 def parse_clusters(): | |
| 31 currentdir_files = sorted(list(glob.glob('*'))) | |
| 32 print ("currentdir_files are: ", currentdir_files) | |
| 33 print ("RESULTS_files are: ", sorted(list(glob.glob('RESULTS/*')))) | |
| 34 | |
| 35 cluster_files = sorted(list(glob.glob('RESULTS/*.cluster.all'))) | |
| 36 if len(cluster_files) == 0: | |
| 37 raise RuntimeError('Expected cluster.all search path is empty:{}'.format(cluster_files)) | |
| 38 palette = itertools.cycle(sns.color_palette("Set2", len(cluster_files))) | |
| 39 | |
| 40 | |
| 41 ranges = defaultdict(list) | |
| 42 colors = defaultdict(list) | |
| 43 orig_names = defaultdict(list) | |
| 44 cluster_nums = defaultdict(list) | |
| 45 for cluster_file in cluster_files: | |
| 46 cluster_color = next(palette) | |
| 47 df_cluster = pd.read_csv(cluster_file, sep='\s+', header=None) | |
| 48 for irow, row in df_cluster.iterrows(): | |
| 49 seq, start, end, strand = row[0].split("#") | |
| 50 ranges[seq].append((int(start), int(end)-int(start)+1)) | |
| 51 colors[seq].append(cluster_color) | |
| 52 assert row[1] == 'RESULT' | |
| 53 cluster_nums['cluster-{}'.format(row[2])] = cluster_color | |
| 54 assert row[9] == 'ORIGHEAD' | |
| 55 orig_names[seq] = row[10] | |
| 56 return ranges, colors, orig_names, cluster_nums | |
| 57 | |
| 58 | |
| 59 my_ranges, my_colors, my_orig_names, my_cluster_nums = parse_clusters() | |
| 60 plot_bar(my_ranges, my_colors, my_orig_names, my_cluster_nums) | 
