Mercurial > repos > chemteam > mdanalysis_ramachandran_protein
view extract_rmsd.py @ 2:060b9d9ec8cf draft default tip
"planemo upload for repository https://github.com/galaxycomputationalchemistry/galaxy-tools-compchem/ commit f1c3c88c7395f2e84cbc533199406aadb79c5c07"
author | chemteam |
---|---|
date | Fri, 13 Nov 2020 19:42:10 +0000 |
parents | 631b2a62d500 |
children |
line wrap: on
line source
import argparse import json import MDAnalysis as m from MDAnalysis.analysis import rms import numpy as np def calc_rmsd(str_files, traj_files, str_format, traj_format, filepath_out, group, start, end, step): """ the function will cycle through range 0 to no_t and load all files found. str_files: text file with filepaths for structures, one on each line traj_files: text file with filepaths for trajectories, one on each line filepath_in: directory where the files are located filepath_out: pickle file where results (3D matrix) should be saved to group: atoms for which RMSD should be calculated; use the MDAnalysis selection language start: first trajectory frame to calculate RMSD end: last trajectory frame to calculate RMSD step: how frequently frames are sampled between start and end; obviously, the larger the step, the quicker the script finishes """ # open list of files with open(str_files) as f1, open(traj_files) as f2: str_file_list = f1.read().strip().split('\n') traj_file_list = f2.read().strip().split('\n') if sum(1 for line in f1) != sum(1 for line in f2): raise IOError('Number of structure and trajectory files unequal.') no_t = len(traj_file_list) # hard to find array size before loading files universe_coordinate_data = [] for traj in range(no_t): # We no longer align here, users should do this themselves. u = m.Universe(str_file_list[traj], traj_file_list[traj], format=traj_format, topology_format=str_format) u.transfer_to_memory() grp = u.select_atoms(group).universe coordinates = grp.trajectory.coordinate_array[start:end:step] universe_coordinate_data.append(coordinates) universe_coordinate_data = np.array(universe_coordinate_data) print("All trajs loaded by MDAnalysis") data = np.zeros((no_t, no_t, universe_coordinate_data.shape[1])) # calculate differences for traj1 in range(no_t): print("Calculating differences for traj {}".format(traj1)) for traj2 in range(traj1): for frame in range(data.shape[2]): A = universe_coordinate_data[traj1, frame] B = universe_coordinate_data[traj2, frame] r = rms.rmsd(A, B) data[traj1, traj2, frame] = r data[traj2, traj1, frame] = r with open(filepath_out, 'w') as f: json.dump(data.tolist(), f, indent=4, sort_keys=True) print("Done!") return def main(): parser = argparse.ArgumentParser() parser.add_argument('--trajs', required=True, help='File containing trajectory filepaths.') parser.add_argument("--strs", help='File containing structure filepaths.') parser.add_argument('--traj-format', required=True, help='Trajectory format.') parser.add_argument("--str-format", help='Structure format.') parser.add_argument('-o', '--outfile', help="Path to the output JSON file") parser.add_argument('--group', help="Atoms for which RMSD should be" "calculated in MDAnalysis selection language") parser.add_argument('--start', type=int, help="First trajectory frame to calculate RMSD") parser.add_argument('--end', type=int, help="Last trajectory frame to calculate RMSD") parser.add_argument('--step', type=int, help="Frame sampling frequency for RMSD calculation") args = parser.parse_args() calc_rmsd(args.strs, args.trajs, args.str_format, args.traj_format, args.outfile, args.group, args.start, args.end, args.step) if __name__ == "__main__": main()