annotate MotifFinderPlot.py @ 0:edcf58ab7552 draft

planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
author rnateam
date Wed, 22 Feb 2017 16:53:29 -0500
parents
children adf18db4c14a
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
rev   line source
0
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
1 #!/usr/bin/env python
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
2
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
3 import matplotlib
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
4 matplotlib.use('Agg')
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
5 from matplotlib import pyplot as plt
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
6 import matplotlib.patches as mpatches
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
7 from collections import defaultdict
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
8 import glob
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
9 import pandas as pd
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
10 import itertools
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
11 import seaborn as sns
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
12
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
13
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
14 def plot_bar(ranges, colors, orig_names, cluster_nums):
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
15 fig, ax = plt.subplots()
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
16 for i, k in enumerate(sorted(ranges.keys())):
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
17 ax.broken_barh(ranges[k], (i-0.25, 0.5), facecolors=colors[k])
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
18
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
19 ax.set_xlim(0)
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
20 ax.set_xlabel('position in sequence')
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
21 ax.set_yticklabels(['']+[k+'-'+orig_names[k] for k in sorted(ranges.keys())])
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
22 ax.grid(True)
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
23 fig.suptitle('Structure motif prediction\nRegions with same color are prediticted to have similar structures')
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
24 # Add the legend
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
25 patches = [mpatches.Patch(color=cluster_nums[lab], label=lab) for lab in sorted(cluster_nums)]
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
26 ax.legend(handles=patches, loc='best') # , bbox_to_anchor=(1, 0.5), loc='center left')
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
27 plt.savefig("motif_plot.png", bbox_inches='tight')
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
28
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
29
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
30 def parse_clusters():
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
31 currentdir_files = sorted(list(glob.glob('*')))
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
32 print ("currentdir_files are: ", currentdir_files)
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
33 print ("RESULTS_files are: ", sorted(list(glob.glob('RESULTS/*'))))
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
34
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
35 cluster_files = sorted(list(glob.glob('RESULTS/*.cluster.all')))
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
36 if len(cluster_files) == 0:
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
37 raise RuntimeError('Expected cluster.all search path is empty:{}'.format(cluster_files))
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
38 palette = itertools.cycle(sns.color_palette("Set2", len(cluster_files)))
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
39
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
40
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
41 ranges = defaultdict(list)
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
42 colors = defaultdict(list)
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
43 orig_names = defaultdict(list)
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
44 cluster_nums = defaultdict(list)
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
45 for cluster_file in cluster_files:
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
46 cluster_color = next(palette)
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
47 df_cluster = pd.read_csv(cluster_file, sep='\s+', header=None)
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
48 for irow, row in df_cluster.iterrows():
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
49 seq, start, end, strand = row[0].split("#")
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
50 ranges[seq].append((int(start), int(end)-int(start)+1))
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
51 colors[seq].append(cluster_color)
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
52 assert row[1] == 'RESULT'
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
53 cluster_nums['cluster-{}'.format(row[2])] = cluster_color
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
54 assert row[9] == 'ORIGHEAD'
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
55 orig_names[seq] = row[10]
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
56 return ranges, colors, orig_names, cluster_nums
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
57
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
58
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
59 my_ranges, my_colors, my_orig_names, my_cluster_nums = parse_clusters()
edcf58ab7552 planemo upload for repository https://github.com/eteriSokhoyan/galaxytools/tree/master/tools/GraphClust/Plotting commit 057c2fd398055dc86eb2c00d8a74f301d5c231d9-dirty
rnateam
parents:
diff changeset
60 plot_bar(my_ranges, my_colors, my_orig_names, my_cluster_nums)