diff ramachandran_auto_protein.py @ 7:ffd6f8d159e1 draft

"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit 1b23e024af45cc0999d9142d07de6897d4189ec2"
author chemteam
date Mon, 24 Aug 2020 16:14:05 -0400
parents
children
line wrap: on
line diff
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ramachandran_auto_protein.py	Mon Aug 24 16:14:05 2020 -0400
@@ -0,0 +1,163 @@
+#!/usr/bin/env python
+
+import argparse
+import base64
+import importlib
+import sys
+
+import MDAnalysis as mda
+from MDAnalysis.analysis.dihedrals import Ramachandran
+
+import h5py
+
+from jinja2 import Environment, FileSystemLoader
+
+import matplotlib
+import matplotlib.pyplot as plt
+import matplotlib.ticker as ticker
+
+import numpy as np
+import numpy.linalg
+
+import seaborn as sns
+
+
+matplotlib.use('Agg')  # noqa
+
+
+def parse_command_line(argv):
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--itraj', help='input traj')
+    parser.add_argument('--istr', help='input str')
+    parser.add_argument('--itrajext', help='input traj ext')
+    parser.add_argument('--istrext', help='input str ext')
+    parser.add_argument('--isegid1', help='segid 1')
+    parser.add_argument('--iresid1', help='resid start')
+    parser.add_argument('--iresid2', help='resid end')
+    parser.add_argument('--iresname', help='resname e.g. ALA')
+    parser.add_argument('--igroupby', help='groupby names or ids')
+    parser.add_argument('--itemplatepath', help='template path')
+    parser.add_argument('--o_plot1', help='MDA Ramachandran plot')
+    parser.add_argument('--o_plot2', help='Seaborn Ramachandran plot')
+    parser.add_argument('--o_data1', help='Timeseries in HDF5 format')
+    parser.add_argument('--o_html1', help='Html overview output of all plots')
+    return parser.parse_args()
+
+
+args = parse_command_line(sys.argv)
+
+currentpath = "."
+if args.itemplatepath is not None:
+    currentpath = args.itemplatepath
+
+
+u = mda.Universe(args.istr, args.itraj,
+                 topology_format=args.istrext, format=args.itrajext)
+selection = "(segid %s)" % \
+    (args.isegid1)
+
+if args.iresname is not None:
+    selection = "(segid %s and resname %s)" % \
+        (args.isegid1, args.iresname)
+
+if args.iresid1 is not None and args.iresid2 is not None:
+    assert(int(args.iresid1) > 0), "ResID numbering starts at 1 for this tool."
+    assert(int(args.iresid2) > 0), "ResID numbering starts at 1 for this tool."
+    assert(int(args.iresid2) > int(args.iresid1)
+           ), "ResID2 must be at least ResID1+1"
+    selection = "(segid %s and resid %s-%s)" % \
+        (args.isegid1, int(args.iresid1), int(args.iresid2))
+    if args.iresname is not None:
+        selection = "(segid %s and resid %s-%s and resname %s)" % \
+            (args.isegid1, int(args.iresid1), int(args.iresid2), args.iresname)
+
+r = u.select_atoms(selection)
+
+assert(r != u.select_atoms('name thiscannotpossiblyexist')
+       ), \
+               """The selection you specified returns an empty result.
+               Check segment names and residue ID's. Also check the
+                structure and trajectory file selected are the correct ones"""
+
+if args.igroupby is not None:
+    group_selections = {}  # dictionary of selections
+    if args.igroupby == 'name':
+        groupby = sorted(list(set(r.resnames)))
+        for e in groupby:
+            s = r & u.select_atoms("resname %s" % e)
+            this_sel = "%s and resname %s" % (selection, e)
+            group_selections[this_sel] = s
+    elif args.igroupby == 'id':
+        groupby = sorted(list(set(r.resids)))
+        for e in groupby:
+            s = r & u.select_atoms("resid %s" % e)
+            this_sel = "%s and resid %s" % (selection, e)
+            group_selections[this_sel] = s
+    else:
+        assert False, ("Invalid argument for igroupby. "
+                       "Only name and id are valid options.")
+
+
+def ramachandran_plot(atomgroup, selection, outputfile1, outputfile2,
+                      image_format='png'):
+    # plot standard mdanalysis and seaborn 2D with kde
+    R = Ramachandran(atomgroup).run()
+    fig, ax = plt.subplots(figsize=plt.figaspect(1))
+    R.plot(ax=ax, color='k', marker='.', ref=True)
+
+    a = R.angles.reshape(np.prod(R.angles.shape[:2]), 2)
+    # open hdf file
+    with h5py.File(args.o_data1, 'a') as f:
+        setname = "%s" % (selection)
+        f["/" + setname + "/ramachandran/phi"] = a[:, 0]
+        f["/" + setname + "/ramachandran/psi"] = a[:, 1]
+    plt.tight_layout()
+    # svg is better but sticking with png for now
+    plt.savefig(outputfile1, format=image_format)
+
+    sns.reset_defaults()
+    importlib.reload(plt)
+    importlib.reload(sns)
+    with sns.axes_style("white"):
+        h = sns.jointplot(x=a[:, 0], y=a[:, 1],
+                          kind="kde", space=0)
+        h.set_axis_labels(r'$\phi$ (deg)', r'$\psi$ (deg)')
+        h.ax_joint.set_xlim(-180, 180)
+        h.ax_joint.set_ylim(-180, 180)
+        h.ax_joint.xaxis.set_major_locator(ticker.MultipleLocator(60))
+        h.ax_joint.yaxis.set_major_locator(ticker.MultipleLocator(60))
+        plt.savefig(outputfile2, format=image_format, bbox_inches='tight')
+
+
+def get_base64_encoded_image(image_path):
+    """  encode image to string for use in html later"""
+    with open(image_path, "rb") as img_file:
+        return base64.b64encode(img_file.read()).decode('utf-8')
+
+
+plots = []
+if args.igroupby is not None:
+    for k, v in group_selections.items():
+        print(k, v)
+        try:
+            ramachandran_plot(v, str(k), "ramachandran1" +
+                              str(k), "ramachandran2" + str(k))
+            plots.append({'Name': "%s" % (k), 'plot1':
+                         get_base64_encoded_image("ramachandran1" + str(k)),
+                         'plot2': get_base64_encoded_image("ramachandran2"
+                          + str(k))})
+        except Exception as einstance:
+            print(type(einstance))
+            print(einstance.args)
+            print(einstance)
+
+ramachandran_plot(r, selection, args.o_plot1, args.o_plot2)
+plots.insert(0, {'Name': selection, 'plot1': get_base64_encoded_image(
+    args.o_plot1), 'plot2': get_base64_encoded_image(args.o_plot2)})
+
+template_environment = Environment(loader=FileSystemLoader(
+    currentpath), lstrip_blocks=True, trim_blocks=True)
+template = template_environment.get_template(
+    'ramachandran_auto_protein_html.j2')
+with open(args.o_html1, 'w+') as f:
+    f.write(template.render(title="Ramachandran Plots", plots=plots))