# HG changeset patch # User galaxy-australia # Date 1646103185 0 # Node ID 6c92e000d6840a810c26c7202cd764456093eea0 # Parent 7ae9d78b06f558e5adbe3216f762a76cfc3ec0cc "planemo upload for repository https://github.com/usegalaxy-au/galaxy-local-tools commit a510e97ebd604a5e30b1f16e5031f62074f23e86" diff -r 7ae9d78b06f5 -r 6c92e000d684 README.md --- a/README.md Fri Jan 28 04:56:29 2022 +0000 +++ b/README.md Tue Mar 01 02:53:05 2022 +0000 @@ -3,9 +3,9 @@ ## Overview -Alphafold requires a customised compute environment to run. The machine needs a GPU, and access to a 2.2 Tb reference data store. +Alphafold requires a customised compute environment to run. The machine needs a GPU, and access to a 2.2 Tb reference data store. -This document is designed to provide details on the compute environment required for Alphafold operation, and the Galaxy job destination settings to run the wrapper. +This document is designed to provide details on the compute environment required for Alphafold operation, and the Galaxy job destination settings to run the wrapper. For full details on Alphafold requirements, see https://github.com/deepmind/alphafold. @@ -13,11 +13,11 @@ ### HARDWARE -The machine is recommended to have the following specs: +The machine is recommended to have the following specs: - 12 cores - 80 Gb RAM - 2.5 Tb storage -- A fast Nvidia GPU. +- A fast Nvidia GPU. As a minimum, the Nvidia GPU must have 8Gb RAM. It also requires ***unified memory*** to be switched on.
Unified memory is usually enabled by default, but some HPC systems will turn it off so the GPU can be shared between multiple jobs concurrently. @@ -31,7 +31,7 @@ - [Singularity](https://sylabs.io/guides/3.0/user-guide/installation.html) - [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) -As Alphafold uses an Nvidia GPU, the NVIDIA Container Toolkit is needed. This makes the GPU available inside the running singularity container. +As Alphafold uses an Nvidia GPU, the NVIDIA Container Toolkit is needed. This makes the GPU available inside the running singularity container. To check that everything has been set up correctly, run the following @@ -79,7 +79,7 @@ bash scripts/download_all_data.sh /data/alphafold_databases ``` -This will install the reference data to `/data/alphafold_databases`. To check this has worked, ensure the final folder structure is as follows: +This will install the reference data to `/data/alphafold_databases`. To check this has worked, ensure the final folder structure is as follows: ``` data/alphafold_databases @@ -128,9 +128,9 @@ ### JOB DESTINATION -Alphafold needs a custom singularity job destination to run. +Alphafold needs a custom singularity job destination to run. The destination needs to be configured for singularity, and some -extra singularity params need to be set as seen below. +extra singularity params need to be set as seen below. Specify the job runner. For example, a local runner @@ -154,4 +154,4 @@ ### Closing -If you are experiencing technical issues, feel free to write to help@genome.edu.au. We may be able to provide comment on setting up Alphafold on your compute environment. +If you are experiencing technical issues, feel free to write to help@genome.edu.au. We may be able to provide advice on setting up Alphafold on your compute environment. diff -r 7ae9d78b06f5 -r 6c92e000d684 README.rst --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/README.rst Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,164 @@ +Alphafold compute setup +======================= + +Overview +-------- + +Alphafold requires a customised compute environment to run. The machine +needs a GPU, and access to a 2.2 Tb reference data store. + +This document is designed to provide details on the compute environment +required for Alphafold operation, and the Galaxy job destination +settings to run the wrapper. + +For full details on Alphafold requirements, see +https://github.com/deepmind/alphafold. + +HARDWARE +~~~~~~~~ + +The machine is recommended to have the following specs: - 12 cores - 80 +Gb RAM - 2.5 Tb storage - A fast Nvidia GPU. + +As a minimum, the Nvidia GPU must have 8Gb RAM. It also requires +**unified memory** to be switched on. Unified memory is usually enabled +by default, but some HPC systems will turn it off so the GPU can be +shared between multiple jobs concurrently. + +ENVIRONMENT +~~~~~~~~~~~ + +This wrapper runs Alphafold as a singularity container. The following +software are needed: + +- `Singularity `_ +- `NVIDIA Container + Toolkit `_ + +As Alphafold uses an Nvidia GPU, the NVIDIA Container Toolkit is needed. +This makes the GPU available inside the running singularity container. + +To check that everything has been set up correctly, run the following + +:: + + singularity run --nv docker://nvidia/cuda:11.0-base nvidia-smi + +If you can see something similar to this output (details depend on your +GPU), it has been set up correctly. + +:: + + +-----------------------------------------------------------------------------+ + | NVIDIA-SMI 470.57.02 Driver Version: 470.57.02 CUDA Version: 11.4 | + |-------------------------------+----------------------+----------------------+ + | GPU Name Persistence-M| Bus-Id Disp.A | Volatile Uncorr. ECC | + | Fan Temp Perf Pwr:Usage/Cap| Memory-Usage | GPU-Util Compute M. | + | | | MIG M. | + |===============================+======================+======================| + | 0 Tesla T4 Off | 00000000:00:05.0 Off | 0 | + | N/A 49C P0 28W / 70W | 0MiB / 15109MiB | 0% Default | + | | | N/A | + +-------------------------------+----------------------+----------------------+ + + +-----------------------------------------------------------------------------+ + | Processes: | + | GPU GI CI PID Type Process name GPU Memory | + | ID ID Usage | + |=============================================================================| + | No running processes found | + +-----------------------------------------------------------------------------+ + +REFERENCE DATA +~~~~~~~~~~~~~~ + +Alphafold needs reference data to run. The wrapper expects this data to +be present at ``/data/alphafold_databases``. To download, run the +following shell script command in the tool directory. + +:: + + # make folders if needed + mkdir /data /data/alphafold_databases + + # download ref data + bash scripts/download_all_data.sh /data/alphafold_databases + +This will install the reference data to ``/data/alphafold_databases``. +To check this has worked, ensure the final folder structure is as +follows: + +:: + + data/alphafold_databases + ├── bfd + │   ├── bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt_a3m.ffdata + │   ├── bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt_a3m.ffindex + │   ├── bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt_cs219.ffdata + │   ├── bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt_cs219.ffindex + │   ├── bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt_hhm.ffdata + │   └── bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt_hhm.ffindex + ├── mgnify + │   └── mgy_clusters_2018_12.fa + ├── params + │   ├── LICENSE + │   ├── params_model_1.npz + │   ├── params_model_1_ptm.npz + │   ├── params_model_2.npz + │   ├── params_model_2_ptm.npz + │   ├── params_model_3.npz + │   ├── params_model_3_ptm.npz + │   ├── params_model_4.npz + │   ├── params_model_4_ptm.npz + │   ├── params_model_5.npz + │   └── params_model_5_ptm.npz + ├── pdb70 + │   ├── md5sum + │   ├── pdb70_a3m.ffdata + │   ├── pdb70_a3m.ffindex + │   ├── pdb70_clu.tsv + │   ├── pdb70_cs219.ffdata + │   ├── pdb70_cs219.ffindex + │   ├── pdb70_hhm.ffdata + │   ├── pdb70_hhm.ffindex + │   └── pdb_filter.dat + ├── pdb_mmcif + │   ├── mmcif_files + │   └── obsolete.dat + ├── uniclust30 + │   └── uniclust30_2018_08 + └── uniref90 + └── uniref90.fasta + +JOB DESTINATION +~~~~~~~~~~~~~~~ + +Alphafold needs a custom singularity job destination to run. The +destination needs to be configured for singularity, and some extra +singularity params need to be set as seen below. + +Specify the job runner. For example, a local runner + +:: + + + +Customise the job destination with required singularity settings. The +settings below are mandatory, but you may include other settings as +needed. + +:: + + + 'none' + true + --nv + "$job_directory:ro,$tool_directory:ro,$job_directory/outputs:rw,$working_directory:rw,/data/alphafold_databases:/data:ro" + + +Closing +~~~~~~~ + +If you are experiencing technical issues, feel free to write to +help@genome.edu.au. We may be able to provide advice on setting up +Alphafold on your compute environment. diff -r 7ae9d78b06f5 -r 6c92e000d684 alphafold.fasta --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alphafold.fasta Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,4 @@ +>AAB30827.1 thyroid-stimulating hormone alpha subunit Homo sapiens +MDYYRKYAAIFLVTLSVFLHVLHSAPDVQDCPECTLQENPFFSQPGAPILQCMGCCFSRA +YPTPLRSKKTMLVQKNVTSESTCCVAKSYNRVTVMGGFKVENHTACHCSTCYYHKS + diff -r 7ae9d78b06f5 -r 6c92e000d684 alphafold.xml --- a/alphafold.xml Fri Jan 28 04:56:29 2022 +0000 +++ b/alphafold.xml Tue Mar 01 02:53:05 2022 +0000 @@ -1,64 +1,68 @@ Alphafold v2.0: AI-guided 3D structure prediction of proteins - 2.0.0 - 0 + 2.0.0 + 0 - topic_0082 + topic_0082 - operation_0474 + operation_0474 + + alphafold_2.0 + - neoformit/alphafold-galaxy@sha256:6adf7f07062b307d08c11130c39a28abc7c290b23f6c347b09c2c649c054c338 + neoformit/alphafold:latest input.fasta && - #end if +#elif $fasta_or_text.input_mode == 'textbox': + echo '$fasta_or_text.fasta_text' > input.fasta && +#end if - python3 '$__tool_directory__/validate_fasta.py' input.fasta && +python3 '$__tool_directory__/validate_fasta.py' input.fasta && - ## env vars ------------------------------- - export TF_FORCE_UNIFIED_MEMORY=1 && - export XLA_PYTHON_CLIENT_MEM_FRACTION=4.0 && - export DATE=`date +"%Y-%m-%d"` && +## env vars ------------------------------- +export TF_FORCE_UNIFIED_MEMORY=1 && +export XLA_PYTHON_CLIENT_MEM_FRACTION=4.0 && +export DATE=`date +"%Y-%m-%d"` && - ## run alphafold ------------------------- - ln -s /app/alphafold/alphafold alphafold && - python /app/alphafold/run_alphafold.py - --fasta_paths alphafold.fasta - --output_dir output - --data_dir /data ## location of the alphafold databases on pulsar node --> could this maybe a env var? $ALPHAFOLD_DB --> \${ALPHAFOLD_DB:-/data} - --uniref90_database_path /data/uniref90/uniref90.fasta - --mgnify_database_path /data/mgnify/mgy_clusters_2018_12.fa - --pdb70_database_path /data/pdb70/pdb70 - --template_mmcif_dir /data/pdb_mmcif/mmcif_files - --obsolete_pdbs_path /data/pdb_mmcif/obsolete.dat - --max_template_date=\$DATE - --bfd_database_path /data/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt - --uniclust30_database_path /data/uniclust30/uniclust30_2018_08/uniclust30_2018_08 - && +## run alphafold ------------------------- +python /app/alphafold/run_alphafold.py +--fasta_paths alphafold.fasta +--output_dir output +--data_dir \${ALPHAFOLD_DB:-/data} +--uniref90_database_path \${ALPHAFOLD_DB:-/data}/uniref90/uniref90.fasta +--mgnify_database_path \${ALPHAFOLD_DB:-/data}/mgnify/mgy_clusters_2018_12.fa +--pdb70_database_path \${ALPHAFOLD_DB:-/data}/pdb70/pdb70 +--template_mmcif_dir \${ALPHAFOLD_DB:-/data}/pdb_mmcif/mmcif_files +--obsolete_pdbs_path \${ALPHAFOLD_DB:-/data}/pdb_mmcif/obsolete.dat +--max_template_date=\$DATE +--bfd_database_path \${ALPHAFOLD_DB:-/data}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt +--uniclust30_database_path \${ALPHAFOLD_DB:-/data}/uniclust30/uniclust30_2018_08/uniclust30_2018_08 +--use_gpu_relax=True +&& - ## for dry run testing - ## cp -r '$__tool_directory__/output' . && +## Uncomment for "dummy run" - skip alphafold run and read output from test-data +## cp -r '$__tool_directory__/output' . && - ## generate extra outputs ----------------- - ## plddts - python3 '$__tool_directory__/gen_extra_outputs.py' output/alphafold $output_plddts && +## Generate additional outputs ------------ +python3 '$__tool_directory__/gen_extra_outputs.py' output/alphafold $output_plddts && - ## html - mkdir -p '${ html.files_path }' && - cp '$__tool_directory__/alphafold.html' ${html} && - cp output/alphafold/ranked_*.pdb '${html.files_path}' && +## HTML output +mkdir -p '${ html.files_path }' && +cp '$__tool_directory__/alphafold.html' '${html}' && +cp output/alphafold/ranked_*.pdb '${html.files_path}' - ## For some reason the working directory ends up being one level too deep! - mv working/* . ]]> diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/Dockerfile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/Dockerfile Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,26 @@ +FROM clairemcwhite/alphafold + +ARG CUDA=11.2 +ARG CUDA_FULL=11.2.2 + +# Copy in updated alphafold repo (last commit 05/11/2021) +# https://github.com/deepmind/alphafold/tree/be37a41d6f83e4145bd4912cbe8bf6a24af80c29 +RUN rm -rf /app/alphafold/alphafold +COPY alphafold /app/alphafold/ + +RUN conda update -qy conda \ + && conda install -y -c conda-forge \ + openmm=7.5.1 \ + cudatoolkit==${CUDA_FULL} \ + pdbfixer \ + pip \ + python=3.7 + +RUN wget -q -P /app/alphafold/alphafold/common/ \ + https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt + +# Fix correct jax version for Cuda 11.2: https://github.com/google/jax/issues/5668 +RUN pip3 install --upgrade pip \ + && pip3 install -r /app/alphafold/requirements.txt \ + && pip3 install --upgrade jax jaxlib==0.1.61+cuda112 -f \ + https://storage.googleapis.com/jax-releases/jax_releases.html diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/README.md Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,12 @@ +# What is this? + +These are `alphafold` git repos copied from: + - the `clairemcwhite/alphafold` docker container (originates from a fork/branch https://github.com/deisseroth-lab/alphafold/tree/cudnn-runtime) + - The upstream https://github.com/deepmind/alphafold + +### Diffs +- According to [the closed pull request](https://github.com/deepmind/alphafold/pull/36), the main diff is updates to Dockerfile Cuda deps in the fork +- These issues have since been resolved in the upstream +- Can probably copy the new repo into the image in a new Dockerfile `FROM clairemcwhite/alphafold` +- And hope that alphafold on pulsar can work with the new container! + (There were lots of dependency issues...) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/.dockerignore --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/.dockerignore Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,3 @@ +.dockerignore +docker/Dockerfile +README.md diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/CONTRIBUTING.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/CONTRIBUTING.md Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,23 @@ +# How to Contribute + +We welcome small patches related to bug fixes and documentation, but we do not +plan to make any major changes to this repository. + +## Contributor License Agreement + +Contributions to this project must be accompanied by a Contributor License +Agreement. You (or your employer) retain the copyright to your contribution, +this simply gives us permission to use and redistribute your contributions as +part of the project. Head over to to see +your current agreements on file or to sign a new one. + +You generally only need to submit a CLA once, so if you've already submitted one +(even if it was for a different project), you probably don't need to do it +again. + +## Code reviews + +All submissions, including submissions by project members, require review. We +use GitHub pull requests for this purpose. Consult +[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more +information on using pull requests. diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/LICENSE --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/LICENSE Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/README.md Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,666 @@ +![header](imgs/header.jpg) + +# AlphaFold + +This package provides an implementation of the inference pipeline of AlphaFold +v2.0. This is a completely new model that was entered in CASP14 and published in +Nature. For simplicity, we refer to this model as AlphaFold throughout the rest +of this document. + +We also provide an implementation of AlphaFold-Multimer. This represents a work +in progress and AlphaFold-Multimer isn't expected to be as stable as our monomer +AlphaFold system. +[Read the guide](#updating-existing-alphafold-installation-to-include-alphafold-multimers) +for how to upgrade and update code. + +Any publication that discloses findings arising from using this source code or the model parameters should [cite](#citing-this-work) the +[AlphaFold paper](https://doi.org/10.1038/s41586-021-03819-2) and, if +applicable, the [AlphaFold-Multimer paper](https://www.biorxiv.org/content/10.1101/2021.10.04.463034v1). + +Please also refer to the +[Supplementary Information](https://static-content.springer.com/esm/art%3A10.1038%2Fs41586-021-03819-2/MediaObjects/41586_2021_3819_MOESM1_ESM.pdf) +for a detailed description of the method. + +**You can use a slightly simplified version of AlphaFold with +[this Colab +notebook](https://colab.research.google.com/github/deepmind/alphafold/blob/main/notebooks/AlphaFold.ipynb)** +or community-supported versions (see below). + +![CASP14 predictions](imgs/casp14_predictions.gif) + +## First time setup + +The following steps are required in order to run AlphaFold: + +1. Install [Docker](https://www.docker.com/). + * Install + [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) + for GPU support. + * Setup running + [Docker as a non-root user](https://docs.docker.com/engine/install/linux-postinstall/#manage-docker-as-a-non-root-user). +1. Download genetic databases (see below). +1. Download model parameters (see below). +1. Check that AlphaFold will be able to use a GPU by running: + + ```bash + docker run --rm --gpus all nvidia/cuda:11.0-base nvidia-smi + ``` + + The output of this command should show a list of your GPUs. If it doesn't, + check if you followed all steps correctly when setting up the + [NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/install-guide.html) + or take a look at the following + [NVIDIA Docker issue](https://github.com/NVIDIA/nvidia-docker/issues/1447#issuecomment-801479573). + +If you wish to run AlphaFold using Singularity (a common containerization platform on HPC systems) we recommend using some of the +third party Singularity setups as linked in +https://github.com/deepmind/alphafold/issues/10 or +https://github.com/deepmind/alphafold/issues/24. + +### Genetic databases + +This step requires `aria2c` to be installed on your machine. + +AlphaFold needs multiple genetic (sequence) databases to run: + +* [BFD](https://bfd.mmseqs.com/), +* [MGnify](https://www.ebi.ac.uk/metagenomics/), +* [PDB70](http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/), +* [PDB](https://www.rcsb.org/) (structures in the mmCIF format), +* [PDB seqres](https://www.rcsb.org/) – only for AlphaFold-Multimer, +* [Uniclust30](https://uniclust.mmseqs.com/), +* [UniProt](https://www.uniprot.org/uniprot/) – only for AlphaFold-Multimer, +* [UniRef90](https://www.uniprot.org/help/uniref). + +We provide a script `scripts/download_all_data.sh` that can be used to download +and set up all of these databases: + +* Default: + + ```bash + scripts/download_all_data.sh + ``` + + will download the full databases. + +* With `reduced_dbs`: + + ```bash + scripts/download_all_data.sh reduced_dbs + ``` + + will download a reduced version of the databases to be used with the + `reduced_dbs` database preset. + +:ledger: **Note: The download directory `` should _not_ be a +subdirectory in the AlphaFold repository directory.** If it is, the Docker build +will be slow as the large databases will be copied during the image creation. + +We don't provide exactly the database versions used in CASP14 – see the [note on +reproducibility](#note-on-reproducibility). Some of the databases are mirrored +for speed, see [mirrored databases](#mirrored-databases). + +:ledger: **Note: The total download size for the full databases is around 415 GB +and the total size when unzipped is 2.2 TB. Please make sure you have a large +enough hard drive space, bandwidth and time to download. We recommend using an +SSD for better genetic search performance.** + +The `download_all_data.sh` script will also download the model parameter files. +Once the script has finished, you should have the following directory structure: + +``` +$DOWNLOAD_DIR/ # Total: ~ 2.2 TB (download: 438 GB) + bfd/ # ~ 1.7 TB (download: 271.6 GB) + # 6 files. + mgnify/ # ~ 64 GB (download: 32.9 GB) + mgy_clusters_2018_12.fa + params/ # ~ 3.5 GB (download: 3.5 GB) + # 5 CASP14 models, + # 5 pTM models, + # 5 AlphaFold-Multimer models, + # LICENSE, + # = 16 files. + pdb70/ # ~ 56 GB (download: 19.5 GB) + # 9 files. + pdb_mmcif/ # ~ 206 GB (download: 46 GB) + mmcif_files/ + # About 180,000 .cif files. + obsolete.dat + pdb_seqres/ # ~ 0.2 GB (download: 0.2 GB) + pdb_seqres.txt + small_bfd/ # ~ 17 GB (download: 9.6 GB) + bfd-first_non_consensus_sequences.fasta + uniclust30/ # ~ 86 GB (download: 24.9 GB) + uniclust30_2018_08/ + # 13 files. + uniprot/ # ~ 98.3 GB (download: 49 GB) + uniprot.fasta + uniref90/ # ~ 58 GB (download: 29.7 GB) + uniref90.fasta +``` + +`bfd/` is only downloaded if you download the full databases, and `small_bfd/` +is only downloaded if you download the reduced databases. + +### Model parameters + +While the AlphaFold code is licensed under the Apache 2.0 License, the AlphaFold +parameters are made available for non-commercial use only under the terms of the +CC BY-NC 4.0 license. Please see the [Disclaimer](#license-and-disclaimer) below +for more detail. + +The AlphaFold parameters are available from +https://storage.googleapis.com/alphafold/alphafold_params_2021-10-27.tar, and +are downloaded as part of the `scripts/download_all_data.sh` script. This script +will download parameters for: + +* 5 models which were used during CASP14, and were extensively validated for + structure prediction quality (see Jumper et al. 2021, Suppl. Methods 1.12 + for details). +* 5 pTM models, which were fine-tuned to produce pTM (predicted TM-score) and + (PAE) predicted aligned error values alongside their structure predictions + (see Jumper et al. 2021, Suppl. Methods 1.9.7 for details). +* 5 AlphaFold-Multimer models that produce pTM and PAE values alongside their + structure predictions. + +### Updating existing AlphaFold installation to include AlphaFold-Multimers + +If you have AlphaFold v2.0.0 or v2.0.1 you can either reinstall AlphaFold fully +from scratch (remove everything and run the setup from scratch) or you can do an +incremental update that will be significantly faster but will require a bit more +work. Make sure you follow these steps in the exact order they are listed below: + +1. **Update the code.** + * Go to the directory with the cloned AlphaFold repository and run + `git fetch origin main` to get all code updates. +1. **Download the UniProt and PDB seqres databases.** + * Run `scripts/download_uniprot.sh `. + * Remove `/pdb_mmcif`. It is needed to have PDB SeqRes and + PDB from exactly the same date. Failure to do this step will result in + potential errors when searching for templates when running + AlphaFold-Multimer. + * Run `scripts/download_pdb_mmcif.sh `. + * Run `scripts/download_pdb_seqres.sh `. +1. **Update the model parameters.** + * Remove the old model parameters in `/params`. + * Download new model parameters using + `scripts/download_alphafold_params.sh `. +1. **Follow [Running AlphaFold](#running-alphafold).** + +#### API changes between v2.0.0 and v2.1.0 + +We tried to keep the API as much backwards compatible as possible, but we had to +change the following: + +* The `RunModel.predict()` now needs a `random_seed` argument as MSA sampling + happens inside the Multimer model. +* The `preset` flag in `run_alphafold.py` and `run_docker.py` was split into + `db_preset` and `model_preset`. +* The models to use are not specified using `model_names` but rather using the + `model_preset` flag. If you want to customize which models are used for each + preset, you will have to modify the the `MODEL_PRESETS` dictionary in + `alphafold/model/config.py`. +* Setting the `data_dir` flag is now needed when using `run_docker.py`. + + +## Running AlphaFold + +**The simplest way to run AlphaFold is using the provided Docker script.** This +was tested on Google Cloud with a machine using the `nvidia-gpu-cloud-image` +with 12 vCPUs, 85 GB of RAM, a 100 GB boot disk, the databases on an additional +3 TB disk, and an A100 GPU. + +1. Clone this repository and `cd` into it. + + ```bash + git clone https://github.com/deepmind/alphafold.git + ``` + +1. Build the Docker image: + + ```bash + docker build -f docker/Dockerfile -t alphafold . + ``` + +1. Install the `run_docker.py` dependencies. Note: You may optionally wish to + create a + [Python Virtual Environment](https://docs.python.org/3/tutorial/venv.html) + to prevent conflicts with your system's Python environment. + + ```bash + pip3 install -r docker/requirements.txt + ``` + +1. Run `run_docker.py` pointing to a FASTA file containing the protein + sequence(s) for which you wish to predict the structure. If you are + predicting the structure of a protein that is already in PDB and you wish to + avoid using it as a template, then `max_template_date` must be set to be + before the release date of the structure. You must also provide the path to + the directory containing the downloaded databases. For example, for the + T1050 CASP14 target: + + ```bash + python3 docker/run_docker.py \ + --fasta_paths=T1050.fasta \ + --max_template_date=2020-05-14 \ + --data_dir=$DOWNLOAD_DIR + ``` + + By default, Alphafold will attempt to use all visible GPU devices. To use a + subset, specify a comma-separated list of GPU UUID(s) or index(es) using the + `--gpu_devices` flag. See + [GPU enumeration](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#gpu-enumeration) + for more details. + +1. You can control which AlphaFold model to run by adding the + `--model_preset=` flag. We provide the following models: + + * **monomer**: This is the original model used at CASP14 with no ensembling. + + * **monomer\_casp14**: This is the original model used at CASP14 with + `num_ensemble=8`, matching our CASP14 configuration. This is largely + provided for reproducibility as it is 8x more computationally + expensive for limited accuracy gain (+0.1 average GDT gain on CASP14 + domains). + + * **monomer\_ptm**: This is the original CASP14 model fine tuned with the + pTM head, providing a pairwise confidence measure. It is slightly less + accurate than the normal monomer model. + + * **multimer**: This is the [AlphaFold-Multimer](#citing-this-work) model. + To use this model, provide a multi-sequence FASTA file. In addition, the + UniProt database should have been downloaded. + +1. You can control MSA speed/quality tradeoff by adding + `--db_preset=reduced_dbs` or `--db_preset=full_dbs` to the run command. We + provide the following presets: + + * **reduced\_dbs**: This preset is optimized for speed and lower hardware + requirements. It runs with a reduced version of the BFD database. + It requires 8 CPU cores (vCPUs), 8 GB of RAM, and 600 GB of disk space. + + * **full\_dbs**: This runs with all genetic databases used at CASP14. + + Running the command above with the `monomer` model preset and the + `reduced_dbs` data preset would look like this: + + ```bash + python3 docker/run_docker.py \ + --fasta_paths=T1050.fasta \ + --max_template_date=2020-05-14 \ + --model_preset=monomer \ + --db_preset=reduced_dbs \ + --data_dir=$DOWNLOAD_DIR + ``` + +### Running AlphaFold-Multimer + +All steps are the same as when running the monomer system, but you will have to + +* provide an input fasta with multiple sequences, +* set `--model_preset=multimer`, +* optionally set the `--is_prokaryote_list` flag with booleans that determine + whether all input sequences in the given fasta file are prokaryotic. If that + is not the case or the origin is unknown, set to `false` for that fasta. + +An example that folds a protein complex `multimer.fasta` that is prokaryotic: + +```bash +python3 docker/run_docker.py \ + --fasta_paths=multimer.fasta \ + --is_prokaryote_list=true \ + --max_template_date=2020-05-14 \ + --model_preset=multimer \ + --data_dir=$DOWNLOAD_DIR +``` + +### Examples + +Below are examples on how to use AlphaFold in different scenarios. + +#### Folding a monomer + +Say we have a monomer with the sequence ``. The input fasta should be: + +```fasta +>sequence_name + +``` + +Then run the following command: + +```bash +python3 docker/run_docker.py \ + --fasta_paths=monomer.fasta \ + --max_template_date=2021-11-01 \ + --model_preset=monomer \ + --data_dir=$DOWNLOAD_DIR +``` + +#### Folding a homomer + +Say we have a homomer from a prokaryote with 3 copies of the same sequence +``. The input fasta should be: + +```fasta +>sequence_1 + +>sequence_2 + +>sequence_3 + +``` + +Then run the following command: + +```bash +python3 docker/run_docker.py \ + --fasta_paths=homomer.fasta \ + --is_prokaryote_list=true \ + --max_template_date=2021-11-01 \ + --model_preset=multimer \ + --data_dir=$DOWNLOAD_DIR +``` + +#### Folding a heteromer + +Say we have a heteromer A2B3 of unknown origin, i.e. with 2 copies of +`` and 3 copies of ``. The input fasta should be: + +```fasta +>sequence_1 + +>sequence_2 + +>sequence_3 + +>sequence_4 + +>sequence_5 + +``` + +Then run the following command: + +```bash +python3 docker/run_docker.py \ + --fasta_paths=heteromer.fasta \ + --is_prokaryote_list=false \ + --max_template_date=2021-11-01 \ + --model_preset=multimer \ + --data_dir=$DOWNLOAD_DIR +``` + +#### Folding multiple monomers one after another + +Say we have a two monomers, `monomer1.fasta` and `monomer2.fasta`. + +We can fold both sequentially by using the following command: + +```bash +python3 docker/run_docker.py \ + --fasta_paths=monomer1.fasta,monomer2.fasta \ + --max_template_date=2021-11-01 \ + --model_preset=monomer \ + --data_dir=$DOWNLOAD_DIR +``` + +#### Folding multiple multimers one after another + +Say we have a two multimers, `multimer1.fasta` and `multimer2.fasta`. Both are +from a prokaryotic organism. + +We can fold both sequentially by using the following command: + +```bash +python3 docker/run_docker.py \ + --fasta_paths=multimer1.fasta,multimer2.fasta \ + --is_prokaryote_list=true,true \ + --max_template_date=2021-11-01 \ + --model_preset=multimer \ + --data_dir=$DOWNLOAD_DIR +``` + +### AlphaFold output + +The outputs will be saved in a subdirectory of the directory provided via the +`--output_dir` flag of `run_docker.py` (defaults to `/tmp/alphafold/`). The +outputs include the computed MSAs, unrelaxed structures, relaxed structures, +ranked structures, raw model outputs, prediction metadata, and section timings. +The `--output_dir` directory will have the following structure: + +``` +/ + features.pkl + ranked_{0,1,2,3,4}.pdb + ranking_debug.json + relaxed_model_{1,2,3,4,5}.pdb + result_model_{1,2,3,4,5}.pkl + timings.json + unrelaxed_model_{1,2,3,4,5}.pdb + msas/ + bfd_uniclust_hits.a3m + mgnify_hits.sto + uniref90_hits.sto +``` + +The contents of each output file are as follows: + +* `features.pkl` – A `pickle` file containing the input feature NumPy arrays + used by the models to produce the structures. +* `unrelaxed_model_*.pdb` – A PDB format text file containing the predicted + structure, exactly as outputted by the model. +* `relaxed_model_*.pdb` – A PDB format text file containing the predicted + structure, after performing an Amber relaxation procedure on the unrelaxed + structure prediction (see Jumper et al. 2021, Suppl. Methods 1.8.6 for + details). +* `ranked_*.pdb` – A PDB format text file containing the relaxed predicted + structures, after reordering by model confidence. Here `ranked_0.pdb` should + contain the prediction with the highest confidence, and `ranked_4.pdb` the + prediction with the lowest confidence. To rank model confidence, we use + predicted LDDT (pLDDT) scores (see Jumper et al. 2021, Suppl. Methods 1.9.6 + for details). +* `ranking_debug.json` – A JSON format text file containing the pLDDT values + used to perform the model ranking, and a mapping back to the original model + names. +* `timings.json` – A JSON format text file containing the times taken to run + each section of the AlphaFold pipeline. +* `msas/` - A directory containing the files describing the various genetic + tool hits that were used to construct the input MSA. +* `result_model_*.pkl` – A `pickle` file containing a nested dictionary of the + various NumPy arrays directly produced by the model. In addition to the + output of the structure module, this includes auxiliary outputs such as: + + * Distograms (`distogram/logits` contains a NumPy array of shape [N_res, + N_res, N_bins] and `distogram/bin_edges` contains the definition of the + bins). + * Per-residue pLDDT scores (`plddt` contains a NumPy array of shape + [N_res] with the range of possible values from `0` to `100`, where `100` + means most confident). This can serve to identify sequence regions + predicted with high confidence or as an overall per-target confidence + score when averaged across residues. + * Present only if using pTM models: predicted TM-score (`ptm` field + contains a scalar). As a predictor of a global superposition metric, + this score is designed to also assess whether the model is confident in + the overall domain packing. + * Present only if using pTM models: predicted pairwise aligned errors + (`predicted_aligned_error` contains a NumPy array of shape [N_res, + N_res] with the range of possible values from `0` to + `max_predicted_aligned_error`, where `0` means most confident). This can + serve for a visualisation of domain packing confidence within the + structure. + +The pLDDT confidence measure is stored in the B-factor field of the output PDB +files (although unlike a B-factor, higher pLDDT is better, so care must be taken +when using for tasks such as molecular replacement). + +This code has been tested to match mean top-1 accuracy on a CASP14 test set with +pLDDT ranking over 5 model predictions (some CASP targets were run with earlier +versions of AlphaFold and some had manual interventions; see our forthcoming +publication for details). Some targets such as T1064 may also have high +individual run variance over random seeds. + +## Inferencing many proteins + +The provided inference script is optimized for predicting the structure of a +single protein, and it will compile the neural network to be specialized to +exactly the size of the sequence, MSA, and templates. For large proteins, the +compile time is a negligible fraction of the runtime, but it may become more +significant for small proteins or if the multi-sequence alignments are already +precomputed. In the bulk inference case, it may make sense to use our +`make_fixed_size` function to pad the inputs to a uniform size, thereby reducing +the number of compilations required. + +We do not provide a bulk inference script, but it should be straightforward to +develop on top of the `RunModel.predict` method with a parallel system for +precomputing multi-sequence alignments. Alternatively, this script can be run +repeatedly with only moderate overhead. + +## Note on CASP14 reproducibility + +AlphaFold's output for a small number of proteins has high inter-run variance, +and may be affected by changes in the input data. The CASP14 target T1064 is a +notable example; the large number of SARS-CoV-2-related sequences recently +deposited changes its MSA significantly. This variability is somewhat mitigated +by the model selection process; running 5 models and taking the most confident. + +To reproduce the results of our CASP14 system as closely as possible you must +use the same database versions we used in CASP. These may not match the default +versions downloaded by our scripts. + +For genetics: + +* UniRef90: + [v2020_01](https://ftp.uniprot.org/pub/databases/uniprot/previous_releases/release-2020_01/uniref/) +* MGnify: + [v2018_12](http://ftp.ebi.ac.uk/pub/databases/metagenomics/peptide_database/2018_12/) +* Uniclust30: [v2018_08](http://wwwuser.gwdg.de/~compbiol/uniclust/2018_08/) +* BFD: [only version available](https://bfd.mmseqs.com/) + +For templates: + +* PDB: (downloaded 2020-05-14) +* PDB70: [2020-05-13](http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200513.tar.gz) + +An alternative for templates is to use the latest PDB and PDB70, but pass the +flag `--max_template_date=2020-05-14`, which restricts templates only to +structures that were available at the start of CASP14. + +## Citing this work + +If you use the code or data in this package, please cite: + +```bibtex +@Article{AlphaFold2021, + author = {Jumper, John and Evans, Richard and Pritzel, Alexander and Green, Tim and Figurnov, Michael and Ronneberger, Olaf and Tunyasuvunakool, Kathryn and Bates, Russ and {\v{Z}}{\'\i}dek, Augustin and Potapenko, Anna and Bridgland, Alex and Meyer, Clemens and Kohl, Simon A A and Ballard, Andrew J and Cowie, Andrew and Romera-Paredes, Bernardino and Nikolov, Stanislav and Jain, Rishub and Adler, Jonas and Back, Trevor and Petersen, Stig and Reiman, David and Clancy, Ellen and Zielinski, Michal and Steinegger, Martin and Pacholska, Michalina and Berghammer, Tamas and Bodenstein, Sebastian and Silver, David and Vinyals, Oriol and Senior, Andrew W and Kavukcuoglu, Koray and Kohli, Pushmeet and Hassabis, Demis}, + journal = {Nature}, + title = {Highly accurate protein structure prediction with {AlphaFold}}, + year = {2021}, + volume = {596}, + number = {7873}, + pages = {583--589}, + doi = {10.1038/s41586-021-03819-2} +} +``` + +In addition, if you use the AlphaFold-Multimer mode, please cite: + +```bibtex +@article {AlphaFold-Multimer2021, + author = {Evans, Richard and O{\textquoteright}Neill, Michael and Pritzel, Alexander and Antropova, Natasha and Senior, Andrew and Green, Tim and {\v{Z}}{\'\i}dek, Augustin and Bates, Russ and Blackwell, Sam and Yim, Jason and Ronneberger, Olaf and Bodenstein, Sebastian and Zielinski, Michal and Bridgland, Alex and Potapenko, Anna and Cowie, Andrew and Tunyasuvunakool, Kathryn and Jain, Rishub and Clancy, Ellen and Kohli, Pushmeet and Jumper, John and Hassabis, Demis}, + journal = {bioRxiv} + title = {Protein complex prediction with AlphaFold-Multimer}, + year = {2021}, + elocation-id = {2021.10.04.463034}, + doi = {10.1101/2021.10.04.463034}, + URL = {https://www.biorxiv.org/content/early/2021/10/04/2021.10.04.463034}, + eprint = {https://www.biorxiv.org/content/early/2021/10/04/2021.10.04.463034.full.pdf}, +} +``` + +## Community contributions + +Colab notebooks provided by the community (please note that these notebooks may +vary from our full AlphaFold system and we did not validate their accuracy): + +* The [ColabFold AlphaFold2 notebook](https://colab.research.google.com/github/sokrypton/ColabFold/blob/main/AlphaFold2.ipynb) + by Martin Steinegger, Sergey Ovchinnikov and Milot Mirdita, which uses an + API hosted at the Södinglab based on the MMseqs2 server [(Mirdita et al. + 2019, Bioinformatics)](https://academic.oup.com/bioinformatics/article/35/16/2856/5280135) + for the multiple sequence alignment creation. + +## Acknowledgements + +AlphaFold communicates with and/or references the following separate libraries +and packages: + +* [Abseil](https://github.com/abseil/abseil-py) +* [Biopython](https://biopython.org) +* [Chex](https://github.com/deepmind/chex) +* [Colab](https://research.google.com/colaboratory/) +* [Docker](https://www.docker.com) +* [HH Suite](https://github.com/soedinglab/hh-suite) +* [HMMER Suite](http://eddylab.org/software/hmmer) +* [Haiku](https://github.com/deepmind/dm-haiku) +* [Immutabledict](https://github.com/corenting/immutabledict) +* [JAX](https://github.com/google/jax/) +* [Kalign](https://msa.sbc.su.se/cgi-bin/msa.cgi) +* [matplotlib](https://matplotlib.org/) +* [ML Collections](https://github.com/google/ml_collections) +* [NumPy](https://numpy.org) +* [OpenMM](https://github.com/openmm/openmm) +* [OpenStructure](https://openstructure.org) +* [pandas](https://pandas.pydata.org/) +* [pymol3d](https://github.com/avirshup/py3dmol) +* [SciPy](https://scipy.org) +* [Sonnet](https://github.com/deepmind/sonnet) +* [TensorFlow](https://github.com/tensorflow/tensorflow) +* [Tree](https://github.com/deepmind/tree) +* [tqdm](https://github.com/tqdm/tqdm) + +We thank all their contributors and maintainers! + +## License and Disclaimer + +This is not an officially supported Google product. + +Copyright 2021 DeepMind Technologies Limited. + +### AlphaFold Code License + +Licensed under the Apache License, Version 2.0 (the "License"); you may not use +this file except in compliance with the License. You may obtain a copy of the +License at https://www.apache.org/licenses/LICENSE-2.0. + +Unless required by applicable law or agreed to in writing, software distributed +under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +CONDITIONS OF ANY KIND, either express or implied. See the License for the +specific language governing permissions and limitations under the License. + +### Model Parameters License + +The AlphaFold parameters are made available for non-commercial use only, under +the terms of the Creative Commons Attribution-NonCommercial 4.0 International +(CC BY-NC 4.0) license. You can find details at: +https://creativecommons.org/licenses/by-nc/4.0/legalcode + +### Third-party software + +Use of the third-party software, libraries or code referred to in the +[Acknowledgements](#acknowledgements) section above may be governed by separate +terms and conditions or license provisions. Your use of the third-party +software, libraries or code is subject to any such terms and you should check +that you can comply with any applicable restrictions or terms and conditions +before use. + +### Mirrored Databases + +The following databases have been mirrored by DeepMind, and are available with reference to the following: + +* [BFD](https://bfd.mmseqs.com/) (unmodified), by Steinegger M. and Söding J., available under a [Creative Commons Attribution-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-sa/4.0/). + +* [BFD](https://bfd.mmseqs.com/) (modified), by Steinegger M. and Söding J., modified by DeepMind, available under a [Creative Commons Attribution-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-sa/4.0/). See the Methods section of the [AlphaFold proteome paper](https://www.nature.com/articles/s41586-021-03828-1) for details. + +* [Uniclust30: v2018_08](http://wwwuser.gwdg.de/~compbiol/uniclust/2018_08/) (unmodified), by Mirdita M. et al., available under a [Creative Commons Attribution-ShareAlike 4.0 International License](http://creativecommons.org/licenses/by-sa/4.0/). + +* [MGnify: v2018_12](http://ftp.ebi.ac.uk/pub/databases/metagenomics/peptide_database/current_release/README.txt) (unmodified), by Mitchell AL et al., available free of all copyright restrictions and made fully and freely available for both non-commercial and commercial use under [CC0 1.0 Universal (CC0 1.0) Public Domain Dedication](https://creativecommons.org/publicdomain/zero/1.0/). diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/__init__.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,14 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""An implementation of the inference pipeline of AlphaFold v2.0.""" diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/common/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/common/__init__.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,14 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Common data types and constants used within Alphafold.""" diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/common/confidence.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/common/confidence.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,168 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Functions for processing confidence metrics.""" + +from typing import Dict, Optional, Tuple +import numpy as np +import scipy.special + + +def compute_plddt(logits: np.ndarray) -> np.ndarray: + """Computes per-residue pLDDT from logits. + + Args: + logits: [num_res, num_bins] output from the PredictedLDDTHead. + + Returns: + plddt: [num_res] per-residue pLDDT. + """ + num_bins = logits.shape[-1] + bin_width = 1.0 / num_bins + bin_centers = np.arange(start=0.5 * bin_width, stop=1.0, step=bin_width) + probs = scipy.special.softmax(logits, axis=-1) + predicted_lddt_ca = np.sum(probs * bin_centers[None, :], axis=-1) + return predicted_lddt_ca * 100 + + +def _calculate_bin_centers(breaks: np.ndarray): + """Gets the bin centers from the bin edges. + + Args: + breaks: [num_bins - 1] the error bin edges. + + Returns: + bin_centers: [num_bins] the error bin centers. + """ + step = (breaks[1] - breaks[0]) + + # Add half-step to get the center + bin_centers = breaks + step / 2 + # Add a catch-all bin at the end. + bin_centers = np.concatenate([bin_centers, [bin_centers[-1] + step]], + axis=0) + return bin_centers + + +def _calculate_expected_aligned_error( + alignment_confidence_breaks: np.ndarray, + aligned_distance_error_probs: np.ndarray) -> Tuple[np.ndarray, np.ndarray]: + """Calculates expected aligned distance errors for every pair of residues. + + Args: + alignment_confidence_breaks: [num_bins - 1] the error bin edges. + aligned_distance_error_probs: [num_res, num_res, num_bins] the predicted + probs for each error bin, for each pair of residues. + + Returns: + predicted_aligned_error: [num_res, num_res] the expected aligned distance + error for each pair of residues. + max_predicted_aligned_error: The maximum predicted error possible. + """ + bin_centers = _calculate_bin_centers(alignment_confidence_breaks) + + # Tuple of expected aligned distance error and max possible error. + return (np.sum(aligned_distance_error_probs * bin_centers, axis=-1), + np.asarray(bin_centers[-1])) + + +def compute_predicted_aligned_error( + logits: np.ndarray, + breaks: np.ndarray) -> Dict[str, np.ndarray]: + """Computes aligned confidence metrics from logits. + + Args: + logits: [num_res, num_res, num_bins] the logits output from + PredictedAlignedErrorHead. + breaks: [num_bins - 1] the error bin edges. + + Returns: + aligned_confidence_probs: [num_res, num_res, num_bins] the predicted + aligned error probabilities over bins for each residue pair. + predicted_aligned_error: [num_res, num_res] the expected aligned distance + error for each pair of residues. + max_predicted_aligned_error: The maximum predicted error possible. + """ + aligned_confidence_probs = scipy.special.softmax( + logits, + axis=-1) + predicted_aligned_error, max_predicted_aligned_error = ( + _calculate_expected_aligned_error( + alignment_confidence_breaks=breaks, + aligned_distance_error_probs=aligned_confidence_probs)) + return { + 'aligned_confidence_probs': aligned_confidence_probs, + 'predicted_aligned_error': predicted_aligned_error, + 'max_predicted_aligned_error': max_predicted_aligned_error, + } + + +def predicted_tm_score( + logits: np.ndarray, + breaks: np.ndarray, + residue_weights: Optional[np.ndarray] = None, + asym_id: Optional[np.ndarray] = None, + interface: bool = False) -> np.ndarray: + """Computes predicted TM alignment or predicted interface TM alignment score. + + Args: + logits: [num_res, num_res, num_bins] the logits output from + PredictedAlignedErrorHead. + breaks: [num_bins] the error bins. + residue_weights: [num_res] the per residue weights to use for the + expectation. + asym_id: [num_res] the asymmetric unit ID - the chain ID. Only needed for + ipTM calculation, i.e. when interface=True. + interface: If True, interface predicted TM score is computed. + + Returns: + ptm_score: The predicted TM alignment or the predicted iTM score. + """ + + # residue_weights has to be in [0, 1], but can be floating-point, i.e. the + # exp. resolved head's probability. + if residue_weights is None: + residue_weights = np.ones(logits.shape[0]) + + bin_centers = _calculate_bin_centers(breaks) + + num_res = int(np.sum(residue_weights)) + # Clip num_res to avoid negative/undefined d0. + clipped_num_res = max(num_res, 19) + + # Compute d_0(num_res) as defined by TM-score, eqn. (5) in Yang & Skolnick + # "Scoring function for automated assessment of protein structure template + # quality", 2004: http://zhanglab.ccmb.med.umich.edu/papers/2004_3.pdf + d0 = 1.24 * (clipped_num_res - 15) ** (1./3) - 1.8 + + # Convert logits to probs. + probs = scipy.special.softmax(logits, axis=-1) + + # TM-Score term for every bin. + tm_per_bin = 1. / (1 + np.square(bin_centers) / np.square(d0)) + # E_distances tm(distance). + predicted_tm_term = np.sum(probs * tm_per_bin, axis=-1) + + pair_mask = np.ones(shape=(num_res, num_res), dtype=bool) + if interface: + pair_mask *= asym_id[:, None] != asym_id[None, :] + + predicted_tm_term *= pair_mask + + pair_residue_weights = pair_mask * ( + residue_weights[None, :] * residue_weights[:, None]) + normed_residue_mask = pair_residue_weights / (1e-8 + np.sum( + pair_residue_weights, axis=-1, keepdims=True)) + per_alignment = np.sum(predicted_tm_term * normed_residue_mask, axis=-1) + return np.asarray(per_alignment[(per_alignment * residue_weights).argmax()]) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/common/protein.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/common/protein.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,278 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Protein data type.""" +import dataclasses +import io +from typing import Any, Mapping, Optional +from alphafold.common import residue_constants +from Bio.PDB import PDBParser +import numpy as np + +FeatureDict = Mapping[str, np.ndarray] +ModelOutput = Mapping[str, Any] # Is a nested dict. + +# Complete sequence of chain IDs supported by the PDB format. +PDB_CHAIN_IDS = 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789' +PDB_MAX_CHAINS = len(PDB_CHAIN_IDS) # := 62. + + +@dataclasses.dataclass(frozen=True) +class Protein: + """Protein structure representation.""" + + # Cartesian coordinates of atoms in angstroms. The atom types correspond to + # residue_constants.atom_types, i.e. the first three are N, CA, CB. + atom_positions: np.ndarray # [num_res, num_atom_type, 3] + + # Amino-acid type for each residue represented as an integer between 0 and + # 20, where 20 is 'X'. + aatype: np.ndarray # [num_res] + + # Binary float mask to indicate presence of a particular atom. 1.0 if an atom + # is present and 0.0 if not. This should be used for loss masking. + atom_mask: np.ndarray # [num_res, num_atom_type] + + # Residue index as used in PDB. It is not necessarily continuous or 0-indexed. + residue_index: np.ndarray # [num_res] + + # 0-indexed number corresponding to the chain in the protein that this residue + # belongs to. + chain_index: np.ndarray # [num_res] + + # B-factors, or temperature factors, of each residue (in sq. angstroms units), + # representing the displacement of the residue from its ground truth mean + # value. + b_factors: np.ndarray # [num_res, num_atom_type] + + def __post_init__(self): + if len(np.unique(self.chain_index)) > PDB_MAX_CHAINS: + raise ValueError( + f'Cannot build an instance with more than {PDB_MAX_CHAINS} chains ' + 'because these cannot be written to PDB format.') + + +def from_pdb_string(pdb_str: str, chain_id: Optional[str] = None) -> Protein: + """Takes a PDB string and constructs a Protein object. + + WARNING: All non-standard residue types will be converted into UNK. All + non-standard atoms will be ignored. + + Args: + pdb_str: The contents of the pdb file + chain_id: If chain_id is specified (e.g. A), then only that chain + is parsed. Otherwise all chains are parsed. + + Returns: + A new `Protein` parsed from the pdb contents. + """ + pdb_fh = io.StringIO(pdb_str) + parser = PDBParser(QUIET=True) + structure = parser.get_structure('none', pdb_fh) + models = list(structure.get_models()) + if len(models) != 1: + raise ValueError( + f'Only single model PDBs are supported. Found {len(models)} models.') + model = models[0] + + atom_positions = [] + aatype = [] + atom_mask = [] + residue_index = [] + chain_ids = [] + b_factors = [] + + for chain in model: + if chain_id is not None and chain.id != chain_id: + continue + for res in chain: + if res.id[2] != ' ': + raise ValueError( + f'PDB contains an insertion code at chain {chain.id} and residue ' + f'index {res.id[1]}. These are not supported.') + res_shortname = residue_constants.restype_3to1.get(res.resname, 'X') + restype_idx = residue_constants.restype_order.get( + res_shortname, residue_constants.restype_num) + pos = np.zeros((residue_constants.atom_type_num, 3)) + mask = np.zeros((residue_constants.atom_type_num,)) + res_b_factors = np.zeros((residue_constants.atom_type_num,)) + for atom in res: + if atom.name not in residue_constants.atom_types: + continue + pos[residue_constants.atom_order[atom.name]] = atom.coord + mask[residue_constants.atom_order[atom.name]] = 1. + res_b_factors[residue_constants.atom_order[atom.name]] = atom.bfactor + if np.sum(mask) < 0.5: + # If no known atom positions are reported for the residue then skip it. + continue + aatype.append(restype_idx) + atom_positions.append(pos) + atom_mask.append(mask) + residue_index.append(res.id[1]) + chain_ids.append(chain.id) + b_factors.append(res_b_factors) + + # Chain IDs are usually characters so map these to ints. + unique_chain_ids = np.unique(chain_ids) + chain_id_mapping = {cid: n for n, cid in enumerate(unique_chain_ids)} + chain_index = np.array([chain_id_mapping[cid] for cid in chain_ids]) + + return Protein( + atom_positions=np.array(atom_positions), + atom_mask=np.array(atom_mask), + aatype=np.array(aatype), + residue_index=np.array(residue_index), + chain_index=chain_index, + b_factors=np.array(b_factors)) + + +def _chain_end(atom_index, end_resname, chain_name, residue_index) -> str: + chain_end = 'TER' + return (f'{chain_end:<6}{atom_index:>5} {end_resname:>3} ' + f'{chain_name:>1}{residue_index:>4}') + + +def to_pdb(prot: Protein) -> str: + """Converts a `Protein` instance to a PDB string. + + Args: + prot: The protein to convert to PDB. + + Returns: + PDB string. + """ + restypes = residue_constants.restypes + ['X'] + res_1to3 = lambda r: residue_constants.restype_1to3.get(restypes[r], 'UNK') + atom_types = residue_constants.atom_types + + pdb_lines = [] + + atom_mask = prot.atom_mask + aatype = prot.aatype + atom_positions = prot.atom_positions + residue_index = prot.residue_index.astype(np.int32) + chain_index = prot.chain_index.astype(np.int32) + b_factors = prot.b_factors + + if np.any(aatype > residue_constants.restype_num): + raise ValueError('Invalid aatypes.') + + # Construct a mapping from chain integer indices to chain ID strings. + chain_ids = {} + for i in np.unique(chain_index): # np.unique gives sorted output. + if i >= PDB_MAX_CHAINS: + raise ValueError( + f'The PDB format supports at most {PDB_MAX_CHAINS} chains.') + chain_ids[i] = PDB_CHAIN_IDS[i] + + pdb_lines.append('MODEL 1') + atom_index = 1 + last_chain_index = chain_index[0] + # Add all atom sites. + for i in range(aatype.shape[0]): + # Close the previous chain if in a multichain PDB. + if last_chain_index != chain_index[i]: + pdb_lines.append(_chain_end( + atom_index, res_1to3(aatype[i - 1]), chain_ids[chain_index[i - 1]], + residue_index[i - 1])) + last_chain_index = chain_index[i] + atom_index += 1 # Atom index increases at the TER symbol. + + res_name_3 = res_1to3(aatype[i]) + for atom_name, pos, mask, b_factor in zip( + atom_types, atom_positions[i], atom_mask[i], b_factors[i]): + if mask < 0.5: + continue + + record_type = 'ATOM' + name = atom_name if len(atom_name) == 4 else f' {atom_name}' + alt_loc = '' + insertion_code = '' + occupancy = 1.00 + element = atom_name[0] # Protein supports only C, N, O, S, this works. + charge = '' + # PDB is a columnar format, every space matters here! + atom_line = (f'{record_type:<6}{atom_index:>5} {name:<4}{alt_loc:>1}' + f'{res_name_3:>3} {chain_ids[chain_index[i]]:>1}' + f'{residue_index[i]:>4}{insertion_code:>1} ' + f'{pos[0]:>8.3f}{pos[1]:>8.3f}{pos[2]:>8.3f}' + f'{occupancy:>6.2f}{b_factor:>6.2f} ' + f'{element:>2}{charge:>2}') + pdb_lines.append(atom_line) + atom_index += 1 + + # Close the final chain. + pdb_lines.append(_chain_end(atom_index, res_1to3(aatype[-1]), + chain_ids[chain_index[-1]], residue_index[-1])) + pdb_lines.append('ENDMDL') + pdb_lines.append('END') + + # Pad all lines to 80 characters. + pdb_lines = [line.ljust(80) for line in pdb_lines] + return '\n'.join(pdb_lines) + '\n' # Add terminating newline. + + +def ideal_atom_mask(prot: Protein) -> np.ndarray: + """Computes an ideal atom mask. + + `Protein.atom_mask` typically is defined according to the atoms that are + reported in the PDB. This function computes a mask according to heavy atoms + that should be present in the given sequence of amino acids. + + Args: + prot: `Protein` whose fields are `numpy.ndarray` objects. + + Returns: + An ideal atom mask. + """ + return residue_constants.STANDARD_ATOM_MASK[prot.aatype] + + +def from_prediction( + features: FeatureDict, + result: ModelOutput, + b_factors: Optional[np.ndarray] = None, + remove_leading_feature_dimension: bool = True) -> Protein: + """Assembles a protein from a prediction. + + Args: + features: Dictionary holding model inputs. + result: Dictionary holding model outputs. + b_factors: (Optional) B-factors to use for the protein. + remove_leading_feature_dimension: Whether to remove the leading dimension + of the `features` values. + + Returns: + A protein instance. + """ + fold_output = result['structure_module'] + + def _maybe_remove_leading_dim(arr: np.ndarray) -> np.ndarray: + return arr[0] if remove_leading_feature_dimension else arr + + if 'asym_id' in features: + chain_index = _maybe_remove_leading_dim(features['asym_id']) + else: + chain_index = np.zeros_like(_maybe_remove_leading_dim(features['aatype'])) + + if b_factors is None: + b_factors = np.zeros_like(fold_output['final_atom_mask']) + + return Protein( + aatype=_maybe_remove_leading_dim(features['aatype']), + atom_positions=fold_output['final_atom_positions'], + atom_mask=fold_output['final_atom_mask'], + residue_index=_maybe_remove_leading_dim(features['residue_index']) + 1, + chain_index=chain_index, + b_factors=b_factors) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/common/protein_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/common/protein_test.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,114 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for protein.""" + +import os + +from absl.testing import absltest +from absl.testing import parameterized +from alphafold.common import protein +from alphafold.common import residue_constants +import numpy as np +# Internal import (7716). + +TEST_DATA_DIR = 'alphafold/common/testdata/' + + +class ProteinTest(parameterized.TestCase): + + def _check_shapes(self, prot, num_res): + """Check that the processed shapes are correct.""" + num_atoms = residue_constants.atom_type_num + self.assertEqual((num_res, num_atoms, 3), prot.atom_positions.shape) + self.assertEqual((num_res,), prot.aatype.shape) + self.assertEqual((num_res, num_atoms), prot.atom_mask.shape) + self.assertEqual((num_res,), prot.residue_index.shape) + self.assertEqual((num_res,), prot.chain_index.shape) + self.assertEqual((num_res, num_atoms), prot.b_factors.shape) + + @parameterized.named_parameters( + dict(testcase_name='chain_A', + pdb_file='2rbg.pdb', chain_id='A', num_res=282, num_chains=1), + dict(testcase_name='chain_B', + pdb_file='2rbg.pdb', chain_id='B', num_res=282, num_chains=1), + dict(testcase_name='multichain', + pdb_file='2rbg.pdb', chain_id=None, num_res=564, num_chains=2)) + def test_from_pdb_str(self, pdb_file, chain_id, num_res, num_chains): + pdb_file = os.path.join(absltest.get_default_test_srcdir(), TEST_DATA_DIR, + pdb_file) + with open(pdb_file) as f: + pdb_string = f.read() + prot = protein.from_pdb_string(pdb_string, chain_id) + self._check_shapes(prot, num_res) + self.assertGreaterEqual(prot.aatype.min(), 0) + # Allow equal since unknown restypes have index equal to restype_num. + self.assertLessEqual(prot.aatype.max(), residue_constants.restype_num) + self.assertLen(np.unique(prot.chain_index), num_chains) + + def test_to_pdb(self): + with open( + os.path.join(absltest.get_default_test_srcdir(), TEST_DATA_DIR, + '2rbg.pdb')) as f: + pdb_string = f.read() + prot = protein.from_pdb_string(pdb_string) + pdb_string_reconstr = protein.to_pdb(prot) + + for line in pdb_string_reconstr.splitlines(): + self.assertLen(line, 80) + + prot_reconstr = protein.from_pdb_string(pdb_string_reconstr) + + np.testing.assert_array_equal(prot_reconstr.aatype, prot.aatype) + np.testing.assert_array_almost_equal( + prot_reconstr.atom_positions, prot.atom_positions) + np.testing.assert_array_almost_equal( + prot_reconstr.atom_mask, prot.atom_mask) + np.testing.assert_array_equal( + prot_reconstr.residue_index, prot.residue_index) + np.testing.assert_array_equal( + prot_reconstr.chain_index, prot.chain_index) + np.testing.assert_array_almost_equal( + prot_reconstr.b_factors, prot.b_factors) + + def test_ideal_atom_mask(self): + with open( + os.path.join(absltest.get_default_test_srcdir(), TEST_DATA_DIR, + '2rbg.pdb')) as f: + pdb_string = f.read() + prot = protein.from_pdb_string(pdb_string) + ideal_mask = protein.ideal_atom_mask(prot) + non_ideal_residues = set([102] + list(range(127, 286))) + for i, (res, atom_mask) in enumerate( + zip(prot.residue_index, prot.atom_mask)): + if res in non_ideal_residues: + self.assertFalse(np.all(atom_mask == ideal_mask[i]), msg=f'{res}') + else: + self.assertTrue(np.all(atom_mask == ideal_mask[i]), msg=f'{res}') + + def test_too_many_chains(self): + num_res = protein.PDB_MAX_CHAINS + 1 + num_atom_type = residue_constants.atom_type_num + with self.assertRaises(ValueError): + _ = protein.Protein( + atom_positions=np.random.random([num_res, num_atom_type, 3]), + aatype=np.random.randint(0, 21, [num_res]), + atom_mask=np.random.randint(0, 2, [num_res]).astype(np.float32), + residue_index=np.arange(1, num_res+1), + chain_index=np.arange(num_res), + b_factors=np.random.uniform(1, 100, [num_res])) + + +if __name__ == '__main__': + absltest.main() diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/common/residue_constants.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/common/residue_constants.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,897 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Constants used in AlphaFold.""" + +import collections +import functools +import os +from typing import List, Mapping, Tuple + +import numpy as np +import tree + +# Internal import (35fd). + + +# Distance from one CA to next CA [trans configuration: omega = 180]. +ca_ca = 3.80209737096 + +# Format: The list for each AA type contains chi1, chi2, chi3, chi4 in +# this order (or a relevant subset from chi1 onwards). ALA and GLY don't have +# chi angles so their chi angle lists are empty. +chi_angles_atoms = { + 'ALA': [], + # Chi5 in arginine is always 0 +- 5 degrees, so ignore it. + 'ARG': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], + ['CB', 'CG', 'CD', 'NE'], ['CG', 'CD', 'NE', 'CZ']], + 'ASN': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']], + 'ASP': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'OD1']], + 'CYS': [['N', 'CA', 'CB', 'SG']], + 'GLN': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], + ['CB', 'CG', 'CD', 'OE1']], + 'GLU': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], + ['CB', 'CG', 'CD', 'OE1']], + 'GLY': [], + 'HIS': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'ND1']], + 'ILE': [['N', 'CA', 'CB', 'CG1'], ['CA', 'CB', 'CG1', 'CD1']], + 'LEU': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], + 'LYS': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD'], + ['CB', 'CG', 'CD', 'CE'], ['CG', 'CD', 'CE', 'NZ']], + 'MET': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'SD'], + ['CB', 'CG', 'SD', 'CE']], + 'PHE': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], + 'PRO': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD']], + 'SER': [['N', 'CA', 'CB', 'OG']], + 'THR': [['N', 'CA', 'CB', 'OG1']], + 'TRP': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], + 'TYR': [['N', 'CA', 'CB', 'CG'], ['CA', 'CB', 'CG', 'CD1']], + 'VAL': [['N', 'CA', 'CB', 'CG1']], +} + +# If chi angles given in fixed-length array, this matrix determines how to mask +# them for each AA type. The order is as per restype_order (see below). +chi_angles_mask = [ + [0.0, 0.0, 0.0, 0.0], # ALA + [1.0, 1.0, 1.0, 1.0], # ARG + [1.0, 1.0, 0.0, 0.0], # ASN + [1.0, 1.0, 0.0, 0.0], # ASP + [1.0, 0.0, 0.0, 0.0], # CYS + [1.0, 1.0, 1.0, 0.0], # GLN + [1.0, 1.0, 1.0, 0.0], # GLU + [0.0, 0.0, 0.0, 0.0], # GLY + [1.0, 1.0, 0.0, 0.0], # HIS + [1.0, 1.0, 0.0, 0.0], # ILE + [1.0, 1.0, 0.0, 0.0], # LEU + [1.0, 1.0, 1.0, 1.0], # LYS + [1.0, 1.0, 1.0, 0.0], # MET + [1.0, 1.0, 0.0, 0.0], # PHE + [1.0, 1.0, 0.0, 0.0], # PRO + [1.0, 0.0, 0.0, 0.0], # SER + [1.0, 0.0, 0.0, 0.0], # THR + [1.0, 1.0, 0.0, 0.0], # TRP + [1.0, 1.0, 0.0, 0.0], # TYR + [1.0, 0.0, 0.0, 0.0], # VAL +] + +# The following chi angles are pi periodic: they can be rotated by a multiple +# of pi without affecting the structure. +chi_pi_periodic = [ + [0.0, 0.0, 0.0, 0.0], # ALA + [0.0, 0.0, 0.0, 0.0], # ARG + [0.0, 0.0, 0.0, 0.0], # ASN + [0.0, 1.0, 0.0, 0.0], # ASP + [0.0, 0.0, 0.0, 0.0], # CYS + [0.0, 0.0, 0.0, 0.0], # GLN + [0.0, 0.0, 1.0, 0.0], # GLU + [0.0, 0.0, 0.0, 0.0], # GLY + [0.0, 0.0, 0.0, 0.0], # HIS + [0.0, 0.0, 0.0, 0.0], # ILE + [0.0, 0.0, 0.0, 0.0], # LEU + [0.0, 0.0, 0.0, 0.0], # LYS + [0.0, 0.0, 0.0, 0.0], # MET + [0.0, 1.0, 0.0, 0.0], # PHE + [0.0, 0.0, 0.0, 0.0], # PRO + [0.0, 0.0, 0.0, 0.0], # SER + [0.0, 0.0, 0.0, 0.0], # THR + [0.0, 0.0, 0.0, 0.0], # TRP + [0.0, 1.0, 0.0, 0.0], # TYR + [0.0, 0.0, 0.0, 0.0], # VAL + [0.0, 0.0, 0.0, 0.0], # UNK +] + +# Atoms positions relative to the 8 rigid groups, defined by the pre-omega, phi, +# psi and chi angles: +# 0: 'backbone group', +# 1: 'pre-omega-group', (empty) +# 2: 'phi-group', (currently empty, because it defines only hydrogens) +# 3: 'psi-group', +# 4,5,6,7: 'chi1,2,3,4-group' +# The atom positions are relative to the axis-end-atom of the corresponding +# rotation axis. The x-axis is in direction of the rotation axis, and the y-axis +# is defined such that the dihedral-angle-definiting atom (the last entry in +# chi_angles_atoms above) is in the xy-plane (with a positive y-coordinate). +# format: [atomname, group_idx, rel_position] +rigid_group_atom_positions = { + 'ALA': [ + ['N', 0, (-0.525, 1.363, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.526, -0.000, -0.000)], + ['CB', 0, (-0.529, -0.774, -1.205)], + ['O', 3, (0.627, 1.062, 0.000)], + ], + 'ARG': [ + ['N', 0, (-0.524, 1.362, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.525, -0.000, -0.000)], + ['CB', 0, (-0.524, -0.778, -1.209)], + ['O', 3, (0.626, 1.062, 0.000)], + ['CG', 4, (0.616, 1.390, -0.000)], + ['CD', 5, (0.564, 1.414, 0.000)], + ['NE', 6, (0.539, 1.357, -0.000)], + ['NH1', 7, (0.206, 2.301, 0.000)], + ['NH2', 7, (2.078, 0.978, -0.000)], + ['CZ', 7, (0.758, 1.093, -0.000)], + ], + 'ASN': [ + ['N', 0, (-0.536, 1.357, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.526, -0.000, -0.000)], + ['CB', 0, (-0.531, -0.787, -1.200)], + ['O', 3, (0.625, 1.062, 0.000)], + ['CG', 4, (0.584, 1.399, 0.000)], + ['ND2', 5, (0.593, -1.188, 0.001)], + ['OD1', 5, (0.633, 1.059, 0.000)], + ], + 'ASP': [ + ['N', 0, (-0.525, 1.362, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.527, 0.000, -0.000)], + ['CB', 0, (-0.526, -0.778, -1.208)], + ['O', 3, (0.626, 1.062, -0.000)], + ['CG', 4, (0.593, 1.398, -0.000)], + ['OD1', 5, (0.610, 1.091, 0.000)], + ['OD2', 5, (0.592, -1.101, -0.003)], + ], + 'CYS': [ + ['N', 0, (-0.522, 1.362, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.524, 0.000, 0.000)], + ['CB', 0, (-0.519, -0.773, -1.212)], + ['O', 3, (0.625, 1.062, -0.000)], + ['SG', 4, (0.728, 1.653, 0.000)], + ], + 'GLN': [ + ['N', 0, (-0.526, 1.361, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.526, 0.000, 0.000)], + ['CB', 0, (-0.525, -0.779, -1.207)], + ['O', 3, (0.626, 1.062, -0.000)], + ['CG', 4, (0.615, 1.393, 0.000)], + ['CD', 5, (0.587, 1.399, -0.000)], + ['NE2', 6, (0.593, -1.189, -0.001)], + ['OE1', 6, (0.634, 1.060, 0.000)], + ], + 'GLU': [ + ['N', 0, (-0.528, 1.361, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.526, -0.000, -0.000)], + ['CB', 0, (-0.526, -0.781, -1.207)], + ['O', 3, (0.626, 1.062, 0.000)], + ['CG', 4, (0.615, 1.392, 0.000)], + ['CD', 5, (0.600, 1.397, 0.000)], + ['OE1', 6, (0.607, 1.095, -0.000)], + ['OE2', 6, (0.589, -1.104, -0.001)], + ], + 'GLY': [ + ['N', 0, (-0.572, 1.337, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.517, -0.000, -0.000)], + ['O', 3, (0.626, 1.062, -0.000)], + ], + 'HIS': [ + ['N', 0, (-0.527, 1.360, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.525, 0.000, 0.000)], + ['CB', 0, (-0.525, -0.778, -1.208)], + ['O', 3, (0.625, 1.063, 0.000)], + ['CG', 4, (0.600, 1.370, -0.000)], + ['CD2', 5, (0.889, -1.021, 0.003)], + ['ND1', 5, (0.744, 1.160, -0.000)], + ['CE1', 5, (2.030, 0.851, 0.002)], + ['NE2', 5, (2.145, -0.466, 0.004)], + ], + 'ILE': [ + ['N', 0, (-0.493, 1.373, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.527, -0.000, -0.000)], + ['CB', 0, (-0.536, -0.793, -1.213)], + ['O', 3, (0.627, 1.062, -0.000)], + ['CG1', 4, (0.534, 1.437, -0.000)], + ['CG2', 4, (0.540, -0.785, -1.199)], + ['CD1', 5, (0.619, 1.391, 0.000)], + ], + 'LEU': [ + ['N', 0, (-0.520, 1.363, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.525, -0.000, -0.000)], + ['CB', 0, (-0.522, -0.773, -1.214)], + ['O', 3, (0.625, 1.063, -0.000)], + ['CG', 4, (0.678, 1.371, 0.000)], + ['CD1', 5, (0.530, 1.430, -0.000)], + ['CD2', 5, (0.535, -0.774, 1.200)], + ], + 'LYS': [ + ['N', 0, (-0.526, 1.362, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.526, 0.000, 0.000)], + ['CB', 0, (-0.524, -0.778, -1.208)], + ['O', 3, (0.626, 1.062, -0.000)], + ['CG', 4, (0.619, 1.390, 0.000)], + ['CD', 5, (0.559, 1.417, 0.000)], + ['CE', 6, (0.560, 1.416, 0.000)], + ['NZ', 7, (0.554, 1.387, 0.000)], + ], + 'MET': [ + ['N', 0, (-0.521, 1.364, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.525, 0.000, 0.000)], + ['CB', 0, (-0.523, -0.776, -1.210)], + ['O', 3, (0.625, 1.062, -0.000)], + ['CG', 4, (0.613, 1.391, -0.000)], + ['SD', 5, (0.703, 1.695, 0.000)], + ['CE', 6, (0.320, 1.786, -0.000)], + ], + 'PHE': [ + ['N', 0, (-0.518, 1.363, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.524, 0.000, -0.000)], + ['CB', 0, (-0.525, -0.776, -1.212)], + ['O', 3, (0.626, 1.062, -0.000)], + ['CG', 4, (0.607, 1.377, 0.000)], + ['CD1', 5, (0.709, 1.195, -0.000)], + ['CD2', 5, (0.706, -1.196, 0.000)], + ['CE1', 5, (2.102, 1.198, -0.000)], + ['CE2', 5, (2.098, -1.201, -0.000)], + ['CZ', 5, (2.794, -0.003, -0.001)], + ], + 'PRO': [ + ['N', 0, (-0.566, 1.351, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.527, -0.000, 0.000)], + ['CB', 0, (-0.546, -0.611, -1.293)], + ['O', 3, (0.621, 1.066, 0.000)], + ['CG', 4, (0.382, 1.445, 0.0)], + # ['CD', 5, (0.427, 1.440, 0.0)], + ['CD', 5, (0.477, 1.424, 0.0)], # manually made angle 2 degrees larger + ], + 'SER': [ + ['N', 0, (-0.529, 1.360, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.525, -0.000, -0.000)], + ['CB', 0, (-0.518, -0.777, -1.211)], + ['O', 3, (0.626, 1.062, -0.000)], + ['OG', 4, (0.503, 1.325, 0.000)], + ], + 'THR': [ + ['N', 0, (-0.517, 1.364, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.526, 0.000, -0.000)], + ['CB', 0, (-0.516, -0.793, -1.215)], + ['O', 3, (0.626, 1.062, 0.000)], + ['CG2', 4, (0.550, -0.718, -1.228)], + ['OG1', 4, (0.472, 1.353, 0.000)], + ], + 'TRP': [ + ['N', 0, (-0.521, 1.363, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.525, -0.000, 0.000)], + ['CB', 0, (-0.523, -0.776, -1.212)], + ['O', 3, (0.627, 1.062, 0.000)], + ['CG', 4, (0.609, 1.370, -0.000)], + ['CD1', 5, (0.824, 1.091, 0.000)], + ['CD2', 5, (0.854, -1.148, -0.005)], + ['CE2', 5, (2.186, -0.678, -0.007)], + ['CE3', 5, (0.622, -2.530, -0.007)], + ['NE1', 5, (2.140, 0.690, -0.004)], + ['CH2', 5, (3.028, -2.890, -0.013)], + ['CZ2', 5, (3.283, -1.543, -0.011)], + ['CZ3', 5, (1.715, -3.389, -0.011)], + ], + 'TYR': [ + ['N', 0, (-0.522, 1.362, 0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.524, -0.000, -0.000)], + ['CB', 0, (-0.522, -0.776, -1.213)], + ['O', 3, (0.627, 1.062, -0.000)], + ['CG', 4, (0.607, 1.382, -0.000)], + ['CD1', 5, (0.716, 1.195, -0.000)], + ['CD2', 5, (0.713, -1.194, -0.001)], + ['CE1', 5, (2.107, 1.200, -0.002)], + ['CE2', 5, (2.104, -1.201, -0.003)], + ['OH', 5, (4.168, -0.002, -0.005)], + ['CZ', 5, (2.791, -0.001, -0.003)], + ], + 'VAL': [ + ['N', 0, (-0.494, 1.373, -0.000)], + ['CA', 0, (0.000, 0.000, 0.000)], + ['C', 0, (1.527, -0.000, -0.000)], + ['CB', 0, (-0.533, -0.795, -1.213)], + ['O', 3, (0.627, 1.062, -0.000)], + ['CG1', 4, (0.540, 1.429, -0.000)], + ['CG2', 4, (0.533, -0.776, 1.203)], + ], +} + +# A list of atoms (excluding hydrogen) for each AA type. PDB naming convention. +residue_atoms = { + 'ALA': ['C', 'CA', 'CB', 'N', 'O'], + 'ARG': ['C', 'CA', 'CB', 'CG', 'CD', 'CZ', 'N', 'NE', 'O', 'NH1', 'NH2'], + 'ASP': ['C', 'CA', 'CB', 'CG', 'N', 'O', 'OD1', 'OD2'], + 'ASN': ['C', 'CA', 'CB', 'CG', 'N', 'ND2', 'O', 'OD1'], + 'CYS': ['C', 'CA', 'CB', 'N', 'O', 'SG'], + 'GLU': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'O', 'OE1', 'OE2'], + 'GLN': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'NE2', 'O', 'OE1'], + 'GLY': ['C', 'CA', 'N', 'O'], + 'HIS': ['C', 'CA', 'CB', 'CG', 'CD2', 'CE1', 'N', 'ND1', 'NE2', 'O'], + 'ILE': ['C', 'CA', 'CB', 'CG1', 'CG2', 'CD1', 'N', 'O'], + 'LEU': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'N', 'O'], + 'LYS': ['C', 'CA', 'CB', 'CG', 'CD', 'CE', 'N', 'NZ', 'O'], + 'MET': ['C', 'CA', 'CB', 'CG', 'CE', 'N', 'O', 'SD'], + 'PHE': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'N', 'O'], + 'PRO': ['C', 'CA', 'CB', 'CG', 'CD', 'N', 'O'], + 'SER': ['C', 'CA', 'CB', 'N', 'O', 'OG'], + 'THR': ['C', 'CA', 'CB', 'CG2', 'N', 'O', 'OG1'], + 'TRP': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE2', 'CE3', 'CZ2', 'CZ3', + 'CH2', 'N', 'NE1', 'O'], + 'TYR': ['C', 'CA', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'N', 'O', + 'OH'], + 'VAL': ['C', 'CA', 'CB', 'CG1', 'CG2', 'N', 'O'] +} + +# Naming swaps for ambiguous atom names. +# Due to symmetries in the amino acids the naming of atoms is ambiguous in +# 4 of the 20 amino acids. +# (The LDDT paper lists 7 amino acids as ambiguous, but the naming ambiguities +# in LEU, VAL and ARG can be resolved by using the 3d constellations of +# the 'ambiguous' atoms and their neighbours) +residue_atom_renaming_swaps = { + 'ASP': {'OD1': 'OD2'}, + 'GLU': {'OE1': 'OE2'}, + 'PHE': {'CD1': 'CD2', 'CE1': 'CE2'}, + 'TYR': {'CD1': 'CD2', 'CE1': 'CE2'}, +} + +# Van der Waals radii [Angstroem] of the atoms (from Wikipedia) +van_der_waals_radius = { + 'C': 1.7, + 'N': 1.55, + 'O': 1.52, + 'S': 1.8, +} + +Bond = collections.namedtuple( + 'Bond', ['atom1_name', 'atom2_name', 'length', 'stddev']) +BondAngle = collections.namedtuple( + 'BondAngle', + ['atom1_name', 'atom2_name', 'atom3name', 'angle_rad', 'stddev']) + + +@functools.lru_cache(maxsize=None) +def load_stereo_chemical_props() -> Tuple[Mapping[str, List[Bond]], + Mapping[str, List[Bond]], + Mapping[str, List[BondAngle]]]: + """Load stereo_chemical_props.txt into a nice structure. + + Load literature values for bond lengths and bond angles and translate + bond angles into the length of the opposite edge of the triangle + ("residue_virtual_bonds"). + + Returns: + residue_bonds: Dict that maps resname -> list of Bond tuples. + residue_virtual_bonds: Dict that maps resname -> list of Bond tuples. + residue_bond_angles: Dict that maps resname -> list of BondAngle tuples. + """ + stereo_chemical_props_path = os.path.join( + os.path.dirname(os.path.abspath(__file__)), 'stereo_chemical_props.txt' + ) + with open(stereo_chemical_props_path, 'rt') as f: + stereo_chemical_props = f.read() + lines_iter = iter(stereo_chemical_props.splitlines()) + # Load bond lengths. + residue_bonds = {} + next(lines_iter) # Skip header line. + for line in lines_iter: + if line.strip() == '-': + break + bond, resname, length, stddev = line.split() + atom1, atom2 = bond.split('-') + if resname not in residue_bonds: + residue_bonds[resname] = [] + residue_bonds[resname].append( + Bond(atom1, atom2, float(length), float(stddev))) + residue_bonds['UNK'] = [] + + # Load bond angles. + residue_bond_angles = {} + next(lines_iter) # Skip empty line. + next(lines_iter) # Skip header line. + for line in lines_iter: + if line.strip() == '-': + break + bond, resname, angle_degree, stddev_degree = line.split() + atom1, atom2, atom3 = bond.split('-') + if resname not in residue_bond_angles: + residue_bond_angles[resname] = [] + residue_bond_angles[resname].append( + BondAngle(atom1, atom2, atom3, + float(angle_degree) / 180. * np.pi, + float(stddev_degree) / 180. * np.pi)) + residue_bond_angles['UNK'] = [] + + def make_bond_key(atom1_name, atom2_name): + """Unique key to lookup bonds.""" + return '-'.join(sorted([atom1_name, atom2_name])) + + # Translate bond angles into distances ("virtual bonds"). + residue_virtual_bonds = {} + for resname, bond_angles in residue_bond_angles.items(): + # Create a fast lookup dict for bond lengths. + bond_cache = {} + for b in residue_bonds[resname]: + bond_cache[make_bond_key(b.atom1_name, b.atom2_name)] = b + residue_virtual_bonds[resname] = [] + for ba in bond_angles: + bond1 = bond_cache[make_bond_key(ba.atom1_name, ba.atom2_name)] + bond2 = bond_cache[make_bond_key(ba.atom2_name, ba.atom3name)] + + # Compute distance between atom1 and atom3 using the law of cosines + # c^2 = a^2 + b^2 - 2ab*cos(gamma). + gamma = ba.angle_rad + length = np.sqrt(bond1.length**2 + bond2.length**2 + - 2 * bond1.length * bond2.length * np.cos(gamma)) + + # Propagation of uncertainty assuming uncorrelated errors. + dl_outer = 0.5 / length + dl_dgamma = (2 * bond1.length * bond2.length * np.sin(gamma)) * dl_outer + dl_db1 = (2 * bond1.length - 2 * bond2.length * np.cos(gamma)) * dl_outer + dl_db2 = (2 * bond2.length - 2 * bond1.length * np.cos(gamma)) * dl_outer + stddev = np.sqrt((dl_dgamma * ba.stddev)**2 + + (dl_db1 * bond1.stddev)**2 + + (dl_db2 * bond2.stddev)**2) + residue_virtual_bonds[resname].append( + Bond(ba.atom1_name, ba.atom3name, length, stddev)) + + return (residue_bonds, + residue_virtual_bonds, + residue_bond_angles) + + +# Between-residue bond lengths for general bonds (first element) and for Proline +# (second element). +between_res_bond_length_c_n = [1.329, 1.341] +between_res_bond_length_stddev_c_n = [0.014, 0.016] + +# Between-residue cos_angles. +between_res_cos_angles_c_n_ca = [-0.5203, 0.0353] # degrees: 121.352 +- 2.315 +between_res_cos_angles_ca_c_n = [-0.4473, 0.0311] # degrees: 116.568 +- 1.995 + +# This mapping is used when we need to store atom data in a format that requires +# fixed atom data size for every residue (e.g. a numpy array). +atom_types = [ + 'N', 'CA', 'C', 'CB', 'O', 'CG', 'CG1', 'CG2', 'OG', 'OG1', 'SG', 'CD', + 'CD1', 'CD2', 'ND1', 'ND2', 'OD1', 'OD2', 'SD', 'CE', 'CE1', 'CE2', 'CE3', + 'NE', 'NE1', 'NE2', 'OE1', 'OE2', 'CH2', 'NH1', 'NH2', 'OH', 'CZ', 'CZ2', + 'CZ3', 'NZ', 'OXT' +] +atom_order = {atom_type: i for i, atom_type in enumerate(atom_types)} +atom_type_num = len(atom_types) # := 37. + +# A compact atom encoding with 14 columns +# pylint: disable=line-too-long +# pylint: disable=bad-whitespace +restype_name_to_atom14_names = { + 'ALA': ['N', 'CA', 'C', 'O', 'CB', '', '', '', '', '', '', '', '', ''], + 'ARG': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'NE', 'CZ', 'NH1', 'NH2', '', '', ''], + 'ASN': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'OD1', 'ND2', '', '', '', '', '', ''], + 'ASP': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'OD1', 'OD2', '', '', '', '', '', ''], + 'CYS': ['N', 'CA', 'C', 'O', 'CB', 'SG', '', '', '', '', '', '', '', ''], + 'GLN': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'OE1', 'NE2', '', '', '', '', ''], + 'GLU': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'OE1', 'OE2', '', '', '', '', ''], + 'GLY': ['N', 'CA', 'C', 'O', '', '', '', '', '', '', '', '', '', ''], + 'HIS': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'ND1', 'CD2', 'CE1', 'NE2', '', '', '', ''], + 'ILE': ['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', 'CD1', '', '', '', '', '', ''], + 'LEU': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', '', '', '', '', '', ''], + 'LYS': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', 'CE', 'NZ', '', '', '', '', ''], + 'MET': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'SD', 'CE', '', '', '', '', '', ''], + 'PHE': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', '', '', ''], + 'PRO': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD', '', '', '', '', '', '', ''], + 'SER': ['N', 'CA', 'C', 'O', 'CB', 'OG', '', '', '', '', '', '', '', ''], + 'THR': ['N', 'CA', 'C', 'O', 'CB', 'OG1', 'CG2', '', '', '', '', '', '', ''], + 'TRP': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'NE1', 'CE2', 'CE3', 'CZ2', 'CZ3', 'CH2'], + 'TYR': ['N', 'CA', 'C', 'O', 'CB', 'CG', 'CD1', 'CD2', 'CE1', 'CE2', 'CZ', 'OH', '', ''], + 'VAL': ['N', 'CA', 'C', 'O', 'CB', 'CG1', 'CG2', '', '', '', '', '', '', ''], + 'UNK': ['', '', '', '', '', '', '', '', '', '', '', '', '', ''], + +} +# pylint: enable=line-too-long +# pylint: enable=bad-whitespace + + +# This is the standard residue order when coding AA type as a number. +# Reproduce it by taking 3-letter AA codes and sorting them alphabetically. +restypes = [ + 'A', 'R', 'N', 'D', 'C', 'Q', 'E', 'G', 'H', 'I', 'L', 'K', 'M', 'F', 'P', + 'S', 'T', 'W', 'Y', 'V' +] +restype_order = {restype: i for i, restype in enumerate(restypes)} +restype_num = len(restypes) # := 20. +unk_restype_index = restype_num # Catch-all index for unknown restypes. + +restypes_with_x = restypes + ['X'] +restype_order_with_x = {restype: i for i, restype in enumerate(restypes_with_x)} + + +def sequence_to_onehot( + sequence: str, + mapping: Mapping[str, int], + map_unknown_to_x: bool = False) -> np.ndarray: + """Maps the given sequence into a one-hot encoded matrix. + + Args: + sequence: An amino acid sequence. + mapping: A dictionary mapping amino acids to integers. + map_unknown_to_x: If True, any amino acid that is not in the mapping will be + mapped to the unknown amino acid 'X'. If the mapping doesn't contain + amino acid 'X', an error will be thrown. If False, any amino acid not in + the mapping will throw an error. + + Returns: + A numpy array of shape (seq_len, num_unique_aas) with one-hot encoding of + the sequence. + + Raises: + ValueError: If the mapping doesn't contain values from 0 to + num_unique_aas - 1 without any gaps. + """ + num_entries = max(mapping.values()) + 1 + + if sorted(set(mapping.values())) != list(range(num_entries)): + raise ValueError('The mapping must have values from 0 to num_unique_aas-1 ' + 'without any gaps. Got: %s' % sorted(mapping.values())) + + one_hot_arr = np.zeros((len(sequence), num_entries), dtype=np.int32) + + for aa_index, aa_type in enumerate(sequence): + if map_unknown_to_x: + if aa_type.isalpha() and aa_type.isupper(): + aa_id = mapping.get(aa_type, mapping['X']) + else: + raise ValueError(f'Invalid character in the sequence: {aa_type}') + else: + aa_id = mapping[aa_type] + one_hot_arr[aa_index, aa_id] = 1 + + return one_hot_arr + + +restype_1to3 = { + 'A': 'ALA', + 'R': 'ARG', + 'N': 'ASN', + 'D': 'ASP', + 'C': 'CYS', + 'Q': 'GLN', + 'E': 'GLU', + 'G': 'GLY', + 'H': 'HIS', + 'I': 'ILE', + 'L': 'LEU', + 'K': 'LYS', + 'M': 'MET', + 'F': 'PHE', + 'P': 'PRO', + 'S': 'SER', + 'T': 'THR', + 'W': 'TRP', + 'Y': 'TYR', + 'V': 'VAL', +} + + +# NB: restype_3to1 differs from Bio.PDB.protein_letters_3to1 by being a simple +# 1-to-1 mapping of 3 letter names to one letter names. The latter contains +# many more, and less common, three letter names as keys and maps many of these +# to the same one letter name (including 'X' and 'U' which we don't use here). +restype_3to1 = {v: k for k, v in restype_1to3.items()} + +# Define a restype name for all unknown residues. +unk_restype = 'UNK' + +resnames = [restype_1to3[r] for r in restypes] + [unk_restype] +resname_to_idx = {resname: i for i, resname in enumerate(resnames)} + + +# The mapping here uses hhblits convention, so that B is mapped to D, J and O +# are mapped to X, U is mapped to C, and Z is mapped to E. Other than that the +# remaining 20 amino acids are kept in alphabetical order. +# There are 2 non-amino acid codes, X (representing any amino acid) and +# "-" representing a missing amino acid in an alignment. The id for these +# codes is put at the end (20 and 21) so that they can easily be ignored if +# desired. +HHBLITS_AA_TO_ID = { + 'A': 0, + 'B': 2, + 'C': 1, + 'D': 2, + 'E': 3, + 'F': 4, + 'G': 5, + 'H': 6, + 'I': 7, + 'J': 20, + 'K': 8, + 'L': 9, + 'M': 10, + 'N': 11, + 'O': 20, + 'P': 12, + 'Q': 13, + 'R': 14, + 'S': 15, + 'T': 16, + 'U': 1, + 'V': 17, + 'W': 18, + 'X': 20, + 'Y': 19, + 'Z': 3, + '-': 21, +} + +# Partial inversion of HHBLITS_AA_TO_ID. +ID_TO_HHBLITS_AA = { + 0: 'A', + 1: 'C', # Also U. + 2: 'D', # Also B. + 3: 'E', # Also Z. + 4: 'F', + 5: 'G', + 6: 'H', + 7: 'I', + 8: 'K', + 9: 'L', + 10: 'M', + 11: 'N', + 12: 'P', + 13: 'Q', + 14: 'R', + 15: 'S', + 16: 'T', + 17: 'V', + 18: 'W', + 19: 'Y', + 20: 'X', # Includes J and O. + 21: '-', +} + +restypes_with_x_and_gap = restypes + ['X', '-'] +MAP_HHBLITS_AATYPE_TO_OUR_AATYPE = tuple( + restypes_with_x_and_gap.index(ID_TO_HHBLITS_AA[i]) + for i in range(len(restypes_with_x_and_gap))) + + +def _make_standard_atom_mask() -> np.ndarray: + """Returns [num_res_types, num_atom_types] mask array.""" + # +1 to account for unknown (all 0s). + mask = np.zeros([restype_num + 1, atom_type_num], dtype=np.int32) + for restype, restype_letter in enumerate(restypes): + restype_name = restype_1to3[restype_letter] + atom_names = residue_atoms[restype_name] + for atom_name in atom_names: + atom_type = atom_order[atom_name] + mask[restype, atom_type] = 1 + return mask + + +STANDARD_ATOM_MASK = _make_standard_atom_mask() + + +# A one hot representation for the first and second atoms defining the axis +# of rotation for each chi-angle in each residue. +def chi_angle_atom(atom_index: int) -> np.ndarray: + """Define chi-angle rigid groups via one-hot representations.""" + chi_angles_index = {} + one_hots = [] + + for k, v in chi_angles_atoms.items(): + indices = [atom_types.index(s[atom_index]) for s in v] + indices.extend([-1]*(4-len(indices))) + chi_angles_index[k] = indices + + for r in restypes: + res3 = restype_1to3[r] + one_hot = np.eye(atom_type_num)[chi_angles_index[res3]] + one_hots.append(one_hot) + + one_hots.append(np.zeros([4, atom_type_num])) # Add zeros for residue `X`. + one_hot = np.stack(one_hots, axis=0) + one_hot = np.transpose(one_hot, [0, 2, 1]) + + return one_hot + +chi_atom_1_one_hot = chi_angle_atom(1) +chi_atom_2_one_hot = chi_angle_atom(2) + +# An array like chi_angles_atoms but using indices rather than names. +chi_angles_atom_indices = [chi_angles_atoms[restype_1to3[r]] for r in restypes] +chi_angles_atom_indices = tree.map_structure( + lambda atom_name: atom_order[atom_name], chi_angles_atom_indices) +chi_angles_atom_indices = np.array([ + chi_atoms + ([[0, 0, 0, 0]] * (4 - len(chi_atoms))) + for chi_atoms in chi_angles_atom_indices]) + +# Mapping from (res_name, atom_name) pairs to the atom's chi group index +# and atom index within that group. +chi_groups_for_atom = collections.defaultdict(list) +for res_name, chi_angle_atoms_for_res in chi_angles_atoms.items(): + for chi_group_i, chi_group in enumerate(chi_angle_atoms_for_res): + for atom_i, atom in enumerate(chi_group): + chi_groups_for_atom[(res_name, atom)].append((chi_group_i, atom_i)) +chi_groups_for_atom = dict(chi_groups_for_atom) + + +def _make_rigid_transformation_4x4(ex, ey, translation): + """Create a rigid 4x4 transformation matrix from two axes and transl.""" + # Normalize ex. + ex_normalized = ex / np.linalg.norm(ex) + + # make ey perpendicular to ex + ey_normalized = ey - np.dot(ey, ex_normalized) * ex_normalized + ey_normalized /= np.linalg.norm(ey_normalized) + + # compute ez as cross product + eznorm = np.cross(ex_normalized, ey_normalized) + m = np.stack([ex_normalized, ey_normalized, eznorm, translation]).transpose() + m = np.concatenate([m, [[0., 0., 0., 1.]]], axis=0) + return m + + +# create an array with (restype, atomtype) --> rigid_group_idx +# and an array with (restype, atomtype, coord) for the atom positions +# and compute affine transformation matrices (4,4) from one rigid group to the +# previous group +restype_atom37_to_rigid_group = np.zeros([21, 37], dtype=np.int) +restype_atom37_mask = np.zeros([21, 37], dtype=np.float32) +restype_atom37_rigid_group_positions = np.zeros([21, 37, 3], dtype=np.float32) +restype_atom14_to_rigid_group = np.zeros([21, 14], dtype=np.int) +restype_atom14_mask = np.zeros([21, 14], dtype=np.float32) +restype_atom14_rigid_group_positions = np.zeros([21, 14, 3], dtype=np.float32) +restype_rigid_group_default_frame = np.zeros([21, 8, 4, 4], dtype=np.float32) + + +def _make_rigid_group_constants(): + """Fill the arrays above.""" + for restype, restype_letter in enumerate(restypes): + resname = restype_1to3[restype_letter] + for atomname, group_idx, atom_position in rigid_group_atom_positions[ + resname]: + atomtype = atom_order[atomname] + restype_atom37_to_rigid_group[restype, atomtype] = group_idx + restype_atom37_mask[restype, atomtype] = 1 + restype_atom37_rigid_group_positions[restype, atomtype, :] = atom_position + + atom14idx = restype_name_to_atom14_names[resname].index(atomname) + restype_atom14_to_rigid_group[restype, atom14idx] = group_idx + restype_atom14_mask[restype, atom14idx] = 1 + restype_atom14_rigid_group_positions[restype, + atom14idx, :] = atom_position + + for restype, restype_letter in enumerate(restypes): + resname = restype_1to3[restype_letter] + atom_positions = {name: np.array(pos) for name, _, pos + in rigid_group_atom_positions[resname]} + + # backbone to backbone is the identity transform + restype_rigid_group_default_frame[restype, 0, :, :] = np.eye(4) + + # pre-omega-frame to backbone (currently dummy identity matrix) + restype_rigid_group_default_frame[restype, 1, :, :] = np.eye(4) + + # phi-frame to backbone + mat = _make_rigid_transformation_4x4( + ex=atom_positions['N'] - atom_positions['CA'], + ey=np.array([1., 0., 0.]), + translation=atom_positions['N']) + restype_rigid_group_default_frame[restype, 2, :, :] = mat + + # psi-frame to backbone + mat = _make_rigid_transformation_4x4( + ex=atom_positions['C'] - atom_positions['CA'], + ey=atom_positions['CA'] - atom_positions['N'], + translation=atom_positions['C']) + restype_rigid_group_default_frame[restype, 3, :, :] = mat + + # chi1-frame to backbone + if chi_angles_mask[restype][0]: + base_atom_names = chi_angles_atoms[resname][0] + base_atom_positions = [atom_positions[name] for name in base_atom_names] + mat = _make_rigid_transformation_4x4( + ex=base_atom_positions[2] - base_atom_positions[1], + ey=base_atom_positions[0] - base_atom_positions[1], + translation=base_atom_positions[2]) + restype_rigid_group_default_frame[restype, 4, :, :] = mat + + # chi2-frame to chi1-frame + # chi3-frame to chi2-frame + # chi4-frame to chi3-frame + # luckily all rotation axes for the next frame start at (0,0,0) of the + # previous frame + for chi_idx in range(1, 4): + if chi_angles_mask[restype][chi_idx]: + axis_end_atom_name = chi_angles_atoms[resname][chi_idx][2] + axis_end_atom_position = atom_positions[axis_end_atom_name] + mat = _make_rigid_transformation_4x4( + ex=axis_end_atom_position, + ey=np.array([-1., 0., 0.]), + translation=axis_end_atom_position) + restype_rigid_group_default_frame[restype, 4 + chi_idx, :, :] = mat + + +_make_rigid_group_constants() + + +def make_atom14_dists_bounds(overlap_tolerance=1.5, + bond_length_tolerance_factor=15): + """compute upper and lower bounds for bonds to assess violations.""" + restype_atom14_bond_lower_bound = np.zeros([21, 14, 14], np.float32) + restype_atom14_bond_upper_bound = np.zeros([21, 14, 14], np.float32) + restype_atom14_bond_stddev = np.zeros([21, 14, 14], np.float32) + residue_bonds, residue_virtual_bonds, _ = load_stereo_chemical_props() + for restype, restype_letter in enumerate(restypes): + resname = restype_1to3[restype_letter] + atom_list = restype_name_to_atom14_names[resname] + + # create lower and upper bounds for clashes + for atom1_idx, atom1_name in enumerate(atom_list): + if not atom1_name: + continue + atom1_radius = van_der_waals_radius[atom1_name[0]] + for atom2_idx, atom2_name in enumerate(atom_list): + if (not atom2_name) or atom1_idx == atom2_idx: + continue + atom2_radius = van_der_waals_radius[atom2_name[0]] + lower = atom1_radius + atom2_radius - overlap_tolerance + upper = 1e10 + restype_atom14_bond_lower_bound[restype, atom1_idx, atom2_idx] = lower + restype_atom14_bond_lower_bound[restype, atom2_idx, atom1_idx] = lower + restype_atom14_bond_upper_bound[restype, atom1_idx, atom2_idx] = upper + restype_atom14_bond_upper_bound[restype, atom2_idx, atom1_idx] = upper + + # overwrite lower and upper bounds for bonds and angles + for b in residue_bonds[resname] + residue_virtual_bonds[resname]: + atom1_idx = atom_list.index(b.atom1_name) + atom2_idx = atom_list.index(b.atom2_name) + lower = b.length - bond_length_tolerance_factor * b.stddev + upper = b.length + bond_length_tolerance_factor * b.stddev + restype_atom14_bond_lower_bound[restype, atom1_idx, atom2_idx] = lower + restype_atom14_bond_lower_bound[restype, atom2_idx, atom1_idx] = lower + restype_atom14_bond_upper_bound[restype, atom1_idx, atom2_idx] = upper + restype_atom14_bond_upper_bound[restype, atom2_idx, atom1_idx] = upper + restype_atom14_bond_stddev[restype, atom1_idx, atom2_idx] = b.stddev + restype_atom14_bond_stddev[restype, atom2_idx, atom1_idx] = b.stddev + return {'lower_bound': restype_atom14_bond_lower_bound, # shape (21,14,14) + 'upper_bound': restype_atom14_bond_upper_bound, # shape (21,14,14) + 'stddev': restype_atom14_bond_stddev, # shape (21,14,14) + } diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/common/residue_constants_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/common/residue_constants_test.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,190 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Test that residue_constants generates correct values.""" + +from absl.testing import absltest +from absl.testing import parameterized +from alphafold.common import residue_constants +import numpy as np + + +class ResidueConstantsTest(parameterized.TestCase): + + @parameterized.parameters( + ('ALA', 0), + ('CYS', 1), + ('HIS', 2), + ('MET', 3), + ('LYS', 4), + ('ARG', 4), + ) + def testChiAnglesAtoms(self, residue_name, chi_num): + chi_angles_atoms = residue_constants.chi_angles_atoms[residue_name] + self.assertLen(chi_angles_atoms, chi_num) + for chi_angle_atoms in chi_angles_atoms: + self.assertLen(chi_angle_atoms, 4) + + def testChiGroupsForAtom(self): + for k, chi_groups in residue_constants.chi_groups_for_atom.items(): + res_name, atom_name = k + for chi_group_i, atom_i in chi_groups: + self.assertEqual( + atom_name, + residue_constants.chi_angles_atoms[res_name][chi_group_i][atom_i]) + + @parameterized.parameters( + ('ALA', 5), ('ARG', 11), ('ASN', 8), ('ASP', 8), ('CYS', 6), ('GLN', 9), + ('GLU', 9), ('GLY', 4), ('HIS', 10), ('ILE', 8), ('LEU', 8), ('LYS', 9), + ('MET', 8), ('PHE', 11), ('PRO', 7), ('SER', 6), ('THR', 7), ('TRP', 14), + ('TYR', 12), ('VAL', 7) + ) + def testResidueAtoms(self, atom_name, num_residue_atoms): + residue_atoms = residue_constants.residue_atoms[atom_name] + self.assertLen(residue_atoms, num_residue_atoms) + + def testStandardAtomMask(self): + with self.subTest('Check shape'): + self.assertEqual(residue_constants.STANDARD_ATOM_MASK.shape, (21, 37,)) + + with self.subTest('Check values'): + str_to_row = lambda s: [c == '1' for c in s] # More clear/concise. + np.testing.assert_array_equal( + residue_constants.STANDARD_ATOM_MASK, + np.array([ + # NB This was defined by c+p but looks sane. + str_to_row('11111 '), # ALA + str_to_row('111111 1 1 11 1 '), # ARG + str_to_row('111111 11 '), # ASP + str_to_row('111111 11 '), # ASN + str_to_row('11111 1 '), # CYS + str_to_row('111111 1 11 '), # GLU + str_to_row('111111 1 11 '), # GLN + str_to_row('111 1 '), # GLY + str_to_row('111111 11 1 1 '), # HIS + str_to_row('11111 11 1 '), # ILE + str_to_row('111111 11 '), # LEU + str_to_row('111111 1 1 1 '), # LYS + str_to_row('111111 11 '), # MET + str_to_row('111111 11 11 1 '), # PHE + str_to_row('111111 1 '), # PRO + str_to_row('11111 1 '), # SER + str_to_row('11111 1 1 '), # THR + str_to_row('111111 11 11 1 1 11 '), # TRP + str_to_row('111111 11 11 11 '), # TYR + str_to_row('11111 11 '), # VAL + str_to_row(' '), # UNK + ])) + + with self.subTest('Check row totals'): + # Check each row has the right number of atoms. + for row, restype in enumerate(residue_constants.restypes): # A, R, ... + long_restype = residue_constants.restype_1to3[restype] # ALA, ARG, ... + atoms_names = residue_constants.residue_atoms[ + long_restype] # ['C', 'CA', 'CB', 'N', 'O'], ... + self.assertLen(atoms_names, + residue_constants.STANDARD_ATOM_MASK[row, :].sum(), + long_restype) + + def testAtomTypes(self): + self.assertEqual(residue_constants.atom_type_num, 37) + + self.assertEqual(residue_constants.atom_types[0], 'N') + self.assertEqual(residue_constants.atom_types[1], 'CA') + self.assertEqual(residue_constants.atom_types[2], 'C') + self.assertEqual(residue_constants.atom_types[3], 'CB') + self.assertEqual(residue_constants.atom_types[4], 'O') + + self.assertEqual(residue_constants.atom_order['N'], 0) + self.assertEqual(residue_constants.atom_order['CA'], 1) + self.assertEqual(residue_constants.atom_order['C'], 2) + self.assertEqual(residue_constants.atom_order['CB'], 3) + self.assertEqual(residue_constants.atom_order['O'], 4) + self.assertEqual(residue_constants.atom_type_num, 37) + + def testRestypes(self): + three_letter_restypes = [ + residue_constants.restype_1to3[r] for r in residue_constants.restypes] + for restype, exp_restype in zip( + three_letter_restypes, sorted(residue_constants.restype_1to3.values())): + self.assertEqual(restype, exp_restype) + self.assertEqual(residue_constants.restype_num, 20) + + def testSequenceToOneHotHHBlits(self): + one_hot = residue_constants.sequence_to_onehot( + 'ABCDEFGHIJKLMNOPQRSTUVWXYZ-', residue_constants.HHBLITS_AA_TO_ID) + exp_one_hot = np.array( + [[1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0], + [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0], + [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1]]) + np.testing.assert_array_equal(one_hot, exp_one_hot) + + def testSequenceToOneHotStandard(self): + one_hot = residue_constants.sequence_to_onehot( + 'ARNDCQEGHILKMFPSTWYV', residue_constants.restype_order) + np.testing.assert_array_equal(one_hot, np.eye(20)) + + def testSequenceToOneHotUnknownMapping(self): + seq = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' + expected_out = np.zeros([26, 21]) + for row, position in enumerate( + [0, 20, 4, 3, 6, 13, 7, 8, 9, 20, 11, 10, 12, 2, 20, 14, 5, 1, 15, 16, + 20, 19, 17, 20, 18, 20]): + expected_out[row, position] = 1 + aa_types = residue_constants.sequence_to_onehot( + sequence=seq, + mapping=residue_constants.restype_order_with_x, + map_unknown_to_x=True) + self.assertTrue((aa_types == expected_out).all()) + + @parameterized.named_parameters( + ('lowercase', 'aaa'), # Insertions in A3M. + ('gaps', '---'), # Gaps in A3M. + ('dots', '...'), # Gaps in A3M. + ('metadata', '>TEST'), # FASTA metadata line. + ) + def testSequenceToOneHotUnknownMappingError(self, seq): + with self.assertRaises(ValueError): + residue_constants.sequence_to_onehot( + sequence=seq, + mapping=residue_constants.restype_order_with_x, + map_unknown_to_x=True) + + +if __name__ == '__main__': + absltest.main() diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/common/testdata/2rbg.pdb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/common/testdata/2rbg.pdb Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,2784 @@ +HEADER STRUCTURAL GENOMICS, UNKNOWN FUNCTION 19-SEP-07 2RBG +TITLE CRYSTAL STRUCTURE OF HYPOTHETICAL PROTEIN(ST0493) FROM +TITLE 2 SULFOLOBUS TOKODAII +COMPND MOL_ID: 1; +COMPND 2 MOLECULE: PUTATIVE UNCHARACTERIZED PROTEIN ST0493; +COMPND 3 CHAIN: A, B; +COMPND 4 ENGINEERED: YES +SOURCE MOL_ID: 1; +SOURCE 2 ORGANISM_SCIENTIFIC: SULFOLOBUS TOKODAII; +SOURCE 3 ORGANISM_TAXID: 111955; +SOURCE 4 STRAIN: STRAIN 7; +SOURCE 5 EXPRESSION_SYSTEM: ESCHERICHIA COLI; +SOURCE 6 EXPRESSION_SYSTEM_TAXID: 562; +SOURCE 7 EXPRESSION_SYSTEM_STRAIN: ROSETTA834(DE3); +SOURCE 8 EXPRESSION_SYSTEM_VECTOR_TYPE: PLASMID; +SOURCE 9 EXPRESSION_SYSTEM_PLASMID: PET-21A +KEYWDS HYPOTHETICAL PROTEIN, STRUCTURAL GENOMICS, UNKNOWN FUNCTION, +KEYWDS 2 NPPSFA, NATIONAL PROJECT ON PROTEIN STRUCTURAL AND +KEYWDS 3 FUNCTIONAL ANALYSES, RIKEN STRUCTURAL GENOMICS/PROTEOMICS +KEYWDS 4 INITIATIVE, RSGI +EXPDTA X-RAY DIFFRACTION +AUTHOR J.JEYAKANTHAN,S.KURAMITSU,S.YOKOYAMA,RIKEN STRUCTURAL +AUTHOR 2 GENOMICS/PROTEOMICS INITIATIVE (RSGI) +REVDAT 2 24-FEB-09 2RBG 1 VERSN +REVDAT 1 30-SEP-08 2RBG 0 +JRNL AUTH J.JEYAKANTHAN,S.KURAMITSU,S.YOKOYAMA +JRNL TITL CRYSTAL STRUCTURE OF HYPOTHETICAL PROTEIN(ST0493) +JRNL TITL 2 FROM SULFOLOBUS TOKODAII +JRNL REF TO BE PUBLISHED +JRNL REFN +REMARK 1 +REMARK 2 +REMARK 2 RESOLUTION. 1.75 ANGSTROMS. +REMARK 3 +REMARK 3 REFINEMENT. +REMARK 3 PROGRAM : CNS 1.1 +REMARK 3 AUTHORS : BRUNGER,ADAMS,CLORE,DELANO,GROS,GROSSE- +REMARK 3 : KUNSTLEVE,JIANG,KUSZEWSKI,NILGES, PANNU, +REMARK 3 : READ,RICE,SIMONSON,WARREN +REMARK 3 +REMARK 3 REFINEMENT TARGET : ENGH & HUBER +REMARK 3 +REMARK 3 DATA USED IN REFINEMENT. +REMARK 3 RESOLUTION RANGE HIGH (ANGSTROMS) : 1.75 +REMARK 3 RESOLUTION RANGE LOW (ANGSTROMS) : 33.49 +REMARK 3 DATA CUTOFF (SIGMA(F)) : 0.000 +REMARK 3 DATA CUTOFF HIGH (ABS(F)) : 2067291.840 +REMARK 3 DATA CUTOFF LOW (ABS(F)) : 0.0000 +REMARK 3 COMPLETENESS (WORKING+TEST) (%) : 99.3 +REMARK 3 NUMBER OF REFLECTIONS : 25029 +REMARK 3 +REMARK 3 FIT TO DATA USED IN REFINEMENT. +REMARK 3 CROSS-VALIDATION METHOD : THROUGHOUT +REMARK 3 FREE R VALUE TEST SET SELECTION : RANDOM +REMARK 3 R VALUE (WORKING SET) : 0.173 +REMARK 3 FREE R VALUE : 0.196 +REMARK 3 FREE R VALUE TEST SET SIZE (%) : 4.900 +REMARK 3 FREE R VALUE TEST SET COUNT : 1216 +REMARK 3 ESTIMATED ERROR OF FREE R VALUE : 0.006 +REMARK 3 +REMARK 3 FIT IN THE HIGHEST RESOLUTION BIN. +REMARK 3 TOTAL NUMBER OF BINS USED : 8 +REMARK 3 BIN RESOLUTION RANGE HIGH (A) : 1.75 +REMARK 3 BIN RESOLUTION RANGE LOW (A) : 1.83 +REMARK 3 BIN COMPLETENESS (WORKING+TEST) (%) : 96.80 +REMARK 3 REFLECTIONS IN BIN (WORKING SET) : 2906 +REMARK 3 BIN R VALUE (WORKING SET) : 0.1980 +REMARK 3 BIN FREE R VALUE : 0.2420 +REMARK 3 BIN FREE R VALUE TEST SET SIZE (%) : 5.10 +REMARK 3 BIN FREE R VALUE TEST SET COUNT : 156 +REMARK 3 ESTIMATED ERROR OF BIN FREE R VALUE : 0.019 +REMARK 3 +REMARK 3 NUMBER OF NON-HYDROGEN ATOMS USED IN REFINEMENT. +REMARK 3 PROTEIN ATOMS : 2060 +REMARK 3 NUCLEIC ACID ATOMS : 0 +REMARK 3 HETEROGEN ATOMS : 5 +REMARK 3 SOLVENT ATOMS : 316 +REMARK 3 +REMARK 3 B VALUES. +REMARK 3 FROM WILSON PLOT (A**2) : 13.30 +REMARK 3 MEAN B VALUE (OVERALL, A**2) : 16.90 +REMARK 3 OVERALL ANISOTROPIC B VALUE. +REMARK 3 B11 (A**2) : 2.81000 +REMARK 3 B22 (A**2) : -1.00000 +REMARK 3 B33 (A**2) : -1.81000 +REMARK 3 B12 (A**2) : 0.00000 +REMARK 3 B13 (A**2) : -1.31000 +REMARK 3 B23 (A**2) : 0.00000 +REMARK 3 +REMARK 3 ESTIMATED COORDINATE ERROR. +REMARK 3 ESD FROM LUZZATI PLOT (A) : 0.16 +REMARK 3 ESD FROM SIGMAA (A) : 0.06 +REMARK 3 LOW RESOLUTION CUTOFF (A) : 5.00 +REMARK 3 +REMARK 3 CROSS-VALIDATED ESTIMATED COORDINATE ERROR. +REMARK 3 ESD FROM C-V LUZZATI PLOT (A) : 0.19 +REMARK 3 ESD FROM C-V SIGMAA (A) : 0.14 +REMARK 3 +REMARK 3 RMS DEVIATIONS FROM IDEAL VALUES. +REMARK 3 BOND LENGTHS (A) : 0.005 +REMARK 3 BOND ANGLES (DEGREES) : 1.10 +REMARK 3 DIHEDRAL ANGLES (DEGREES) : 22.00 +REMARK 3 IMPROPER ANGLES (DEGREES) : 0.70 +REMARK 3 +REMARK 3 ISOTROPIC THERMAL MODEL : RESTRAINED +REMARK 3 +REMARK 3 ISOTROPIC THERMAL FACTOR RESTRAINTS. RMS SIGMA +REMARK 3 MAIN-CHAIN BOND (A**2) : NULL ; NULL +REMARK 3 MAIN-CHAIN ANGLE (A**2) : NULL ; NULL +REMARK 3 SIDE-CHAIN BOND (A**2) : NULL ; NULL +REMARK 3 SIDE-CHAIN ANGLE (A**2) : NULL ; NULL +REMARK 3 +REMARK 3 BULK SOLVENT MODELING. +REMARK 3 METHOD USED : FLAT MODEL +REMARK 3 KSOL : 0.37 +REMARK 3 BSOL : 51.20 +REMARK 3 +REMARK 3 NCS MODEL : NULL +REMARK 3 +REMARK 3 NCS RESTRAINTS. RMS SIGMA/WEIGHT +REMARK 3 GROUP 1 POSITIONAL (A) : NULL ; NULL +REMARK 3 GROUP 1 B-FACTOR (A**2) : NULL ; NULL +REMARK 3 +REMARK 3 PARAMETER FILE 1 : PROTEIN_REP.PARAM +REMARK 3 PARAMETER FILE 2 : LIGAND.PARAM +REMARK 3 PARAMETER FILE 3 : ION.PARAM +REMARK 3 PARAMETER FILE 5 : WATER_REP.PARAM +REMARK 3 PARAMETER FILE 6 : NULL +REMARK 3 TOPOLOGY FILE 1 : PROTEIN.TOP +REMARK 3 TOPOLOGY FILE 2 : LIGAND.TOP +REMARK 3 TOPOLOGY FILE 3 : ION.TOP +REMARK 3 TOPOLOGY FILE 5 : WATER_PROTIN.TOP +REMARK 3 TOPOLOGY FILE 6 : NULL +REMARK 3 +REMARK 3 OTHER REFINEMENT REMARKS: NULL +REMARK 4 +REMARK 4 2RBG COMPLIES WITH FORMAT V. 3.15, 01-DEC-08 +REMARK 100 +REMARK 100 THIS ENTRY HAS BEEN PROCESSED BY PDBJ ON 27-SEP-07. +REMARK 100 THE RCSB ID CODE IS RCSB044658. +REMARK 200 +REMARK 200 EXPERIMENTAL DETAILS +REMARK 200 EXPERIMENT TYPE : X-RAY DIFFRACTION +REMARK 200 DATE OF DATA COLLECTION : 16-JUN-07 +REMARK 200 TEMPERATURE (KELVIN) : 100 +REMARK 200 PH : 7.5 +REMARK 200 NUMBER OF CRYSTALS USED : 1 +REMARK 200 +REMARK 200 SYNCHROTRON (Y/N) : Y +REMARK 200 RADIATION SOURCE : SPRING-8 +REMARK 200 BEAMLINE : BL26B2 +REMARK 200 X-RAY GENERATOR MODEL : NULL +REMARK 200 MONOCHROMATIC OR LAUE (M/L) : M +REMARK 200 WAVELENGTH OR RANGE (A) : 0.97899, 0.9, 0.97931 +REMARK 200 MONOCHROMATOR : SI-1 1 1 DOUBLE CRYSTAL +REMARK 200 MONOCHROMATOR +REMARK 200 OPTICS : RH COATED BENT-CYRINDRICAL +REMARK 200 MIRROR +REMARK 200 +REMARK 200 DETECTOR TYPE : CCD +REMARK 200 DETECTOR MANUFACTURER : MARMOSAIC 225 MM CCD +REMARK 200 INTENSITY-INTEGRATION SOFTWARE : HKL-2000 +REMARK 200 DATA SCALING SOFTWARE : SCALEPACK +REMARK 200 +REMARK 200 NUMBER OF UNIQUE REFLECTIONS : 25105 +REMARK 200 RESOLUTION RANGE HIGH (A) : 1.750 +REMARK 200 RESOLUTION RANGE LOW (A) : 50.000 +REMARK 200 REJECTION CRITERIA (SIGMA(I)) : NULL +REMARK 200 +REMARK 200 OVERALL. +REMARK 200 COMPLETENESS FOR RANGE (%) : 99.6 +REMARK 200 DATA REDUNDANCY : NULL +REMARK 200 R MERGE (I) : 0.05900 +REMARK 200 R SYM (I) : 0.06300 +REMARK 200 FOR THE DATA SET : NULL +REMARK 200 +REMARK 200 IN THE HIGHEST RESOLUTION SHELL. +REMARK 200 HIGHEST RESOLUTION SHELL, RANGE HIGH (A) : 1.75 +REMARK 200 HIGHEST RESOLUTION SHELL, RANGE LOW (A) : 1.81 +REMARK 200 COMPLETENESS FOR SHELL (%) : 96.9 +REMARK 200 DATA REDUNDANCY IN SHELL : NULL +REMARK 200 R MERGE FOR SHELL (I) : 0.14300 +REMARK 200 R SYM FOR SHELL (I) : 0.13300 +REMARK 200 FOR SHELL : NULL +REMARK 200 +REMARK 200 DIFFRACTION PROTOCOL: MAD +REMARK 200 METHOD USED TO DETERMINE THE STRUCTURE: MAD +REMARK 200 SOFTWARE USED: SOLVE +REMARK 200 STARTING MODEL: NULL +REMARK 200 +REMARK 200 REMARK: NULL +REMARK 280 +REMARK 280 CRYSTAL +REMARK 280 SOLVENT CONTENT, VS (%): 41.69 +REMARK 280 MATTHEWS COEFFICIENT, VM (ANGSTROMS**3/DA): 2.11 +REMARK 280 +REMARK 280 CRYSTALLIZATION CONDITIONS: 30% PEG 4K, 0.2M AMMONIUM SULFATE, +REMARK 280 PH 7.5, MICROBATCH, TEMPERATURE 293K +REMARK 290 +REMARK 290 CRYSTALLOGRAPHIC SYMMETRY +REMARK 290 SYMMETRY OPERATORS FOR SPACE GROUP: P 1 21 1 +REMARK 290 +REMARK 290 SYMOP SYMMETRY +REMARK 290 NNNMMM OPERATOR +REMARK 290 1555 X,Y,Z +REMARK 290 2555 -X,Y+1/2,-Z +REMARK 290 +REMARK 290 WHERE NNN -> OPERATOR NUMBER +REMARK 290 MMM -> TRANSLATION VECTOR +REMARK 290 +REMARK 290 CRYSTALLOGRAPHIC SYMMETRY TRANSFORMATIONS +REMARK 290 THE FOLLOWING TRANSFORMATIONS OPERATE ON THE ATOM/HETATM +REMARK 290 RECORDS IN THIS ENTRY TO PRODUCE CRYSTALLOGRAPHICALLY +REMARK 290 RELATED MOLECULES. +REMARK 290 SMTRY1 1 1.000000 0.000000 0.000000 0.00000 +REMARK 290 SMTRY2 1 0.000000 1.000000 0.000000 0.00000 +REMARK 290 SMTRY3 1 0.000000 0.000000 1.000000 0.00000 +REMARK 290 SMTRY1 2 -1.000000 0.000000 0.000000 0.00000 +REMARK 290 SMTRY2 2 0.000000 1.000000 0.000000 32.59200 +REMARK 290 SMTRY3 2 0.000000 0.000000 -1.000000 0.00000 +REMARK 290 +REMARK 290 REMARK: NULL +REMARK 300 +REMARK 300 BIOMOLECULE: 1, 2, 3 +REMARK 300 SEE REMARK 350 FOR THE AUTHOR PROVIDED AND/OR PROGRAM +REMARK 300 GENERATED ASSEMBLY INFORMATION FOR THE STRUCTURE IN +REMARK 300 THIS ENTRY. THE REMARK MAY ALSO PROVIDE INFORMATION ON +REMARK 300 BURIED SURFACE AREA. +REMARK 350 +REMARK 350 COORDINATES FOR A COMPLETE MULTIMER REPRESENTING THE KNOWN +REMARK 350 BIOLOGICALLY SIGNIFICANT OLIGOMERIZATION STATE OF THE +REMARK 350 MOLECULE CAN BE GENERATED BY APPLYING BIOMT TRANSFORMATIONS +REMARK 350 GIVEN BELOW. BOTH NON-CRYSTALLOGRAPHIC AND +REMARK 350 CRYSTALLOGRAPHIC OPERATIONS ARE GIVEN. +REMARK 350 +REMARK 350 BIOMOLECULE: 1 +REMARK 350 AUTHOR DETERMINED BIOLOGICAL UNIT: DIMERIC +REMARK 350 APPLY THE FOLLOWING TO CHAINS: A, B +REMARK 350 BIOMT1 1 1.000000 0.000000 0.000000 0.00000 +REMARK 350 BIOMT2 1 0.000000 1.000000 0.000000 0.00000 +REMARK 350 BIOMT3 1 0.000000 0.000000 1.000000 0.00000 +REMARK 350 +REMARK 350 BIOMOLECULE: 2 +REMARK 350 SOFTWARE DETERMINED QUATERNARY STRUCTURE: MONOMERIC +REMARK 350 SOFTWARE USED: PISA +REMARK 350 APPLY THE FOLLOWING TO CHAINS: A +REMARK 350 BIOMT1 1 1.000000 0.000000 0.000000 0.00000 +REMARK 350 BIOMT2 1 0.000000 1.000000 0.000000 0.00000 +REMARK 350 BIOMT3 1 0.000000 0.000000 1.000000 0.00000 +REMARK 350 +REMARK 350 BIOMOLECULE: 3 +REMARK 350 SOFTWARE DETERMINED QUATERNARY STRUCTURE: MONOMERIC +REMARK 350 SOFTWARE USED: PISA +REMARK 350 APPLY THE FOLLOWING TO CHAINS: B +REMARK 350 BIOMT1 1 1.000000 0.000000 0.000000 0.00000 +REMARK 350 BIOMT2 1 0.000000 1.000000 0.000000 0.00000 +REMARK 350 BIOMT3 1 0.000000 0.000000 1.000000 0.00000 +REMARK 465 +REMARK 465 MISSING RESIDUES +REMARK 465 THE FOLLOWING RESIDUES WERE NOT LOCATED IN THE +REMARK 465 EXPERIMENT. (M=MODEL NUMBER; RES=RESIDUE NAME; C=CHAIN +REMARK 465 IDENTIFIER; SSSEQ=SEQUENCE NUMBER; I=INSERTION CODE.) +REMARK 465 +REMARK 465 M RES C SSSEQI +REMARK 465 MSE A 1 +REMARK 465 PRO A 2 +REMARK 465 MSE B 1 +REMARK 465 PRO B 2 +REMARK 500 +REMARK 500 GEOMETRY AND STEREOCHEMISTRY +REMARK 500 SUBTOPIC: TORSION ANGLES +REMARK 500 +REMARK 500 TORSION ANGLES OUTSIDE THE EXPECTED RAMACHANDRAN REGIONS: +REMARK 500 (M=MODEL NUMBER; RES=RESIDUE NAME; C=CHAIN IDENTIFIER; +REMARK 500 SSEQ=SEQUENCE NUMBER; I=INSERTION CODE). +REMARK 500 +REMARK 500 STANDARD TABLE: +REMARK 500 FORMAT:(10X,I3,1X,A3,1X,A1,I4,A1,4X,F7.2,3X,F7.2) +REMARK 500 +REMARK 500 EXPECTED VALUES: GJ KLEYWEGT AND TA JONES (1996). PHI/PSI- +REMARK 500 CHOLOGY: RAMACHANDRAN REVISITED. STRUCTURE 4, 1395 - 1400 +REMARK 500 +REMARK 500 M RES CSSEQI PSI PHI +REMARK 500 PHE A 121 76.88 -102.11 +REMARK 500 CYS A 122 -73.41 -165.90 +REMARK 500 CYS B 122 -70.28 -161.68 +REMARK 500 +REMARK 500 REMARK: NULL +REMARK 800 +REMARK 800 SITE +REMARK 800 SITE_IDENTIFIER: AC1 +REMARK 800 EVIDENCE_CODE: SOFTWARE +REMARK 800 SITE_DESCRIPTION: BINDING SITE FOR RESIDUE SO4 B 127 +REMARK 900 +REMARK 900 RELATED ENTRIES +REMARK 900 RELATED ID: STO001000493.1 RELATED DB: TARGETDB +DBREF 2RBG A 1 126 UNP Q975B5 Q975B5_SULTO 1 126 +DBREF 2RBG B 1 126 UNP Q975B5 Q975B5_SULTO 1 126 +SEQRES 1 A 126 MSE PRO TYR LYS ASN ILE LEU THR LEU ILE SER VAL ASN +SEQRES 2 A 126 ASN ASP ASN PHE GLU ASN TYR PHE ARG LYS ILE PHE LEU +SEQRES 3 A 126 ASP VAL ARG SER SER GLY SER LYS LYS THR THR ILE ASN +SEQRES 4 A 126 VAL PHE THR GLU ILE GLN TYR GLN GLU LEU VAL THR LEU +SEQRES 5 A 126 ILE ARG GLU ALA LEU LEU GLU ASN ILE ASP ILE GLY TYR +SEQRES 6 A 126 GLU LEU PHE LEU TRP LYS LYS ASN GLU VAL ASP ILE PHE +SEQRES 7 A 126 LEU LYS ASN LEU GLU LYS SER GLU VAL ASP GLY LEU LEU +SEQRES 8 A 126 VAL TYR CYS ASP ASP GLU ASN LYS VAL PHE MSE SER LYS +SEQRES 9 A 126 ILE VAL ASP ASN LEU PRO THR ALA ILE LYS ARG ASN LEU +SEQRES 10 A 126 ILE LYS ASP PHE CYS ARG LYS LEU SER +SEQRES 1 B 126 MSE PRO TYR LYS ASN ILE LEU THR LEU ILE SER VAL ASN +SEQRES 2 B 126 ASN ASP ASN PHE GLU ASN TYR PHE ARG LYS ILE PHE LEU +SEQRES 3 B 126 ASP VAL ARG SER SER GLY SER LYS LYS THR THR ILE ASN +SEQRES 4 B 126 VAL PHE THR GLU ILE GLN TYR GLN GLU LEU VAL THR LEU +SEQRES 5 B 126 ILE ARG GLU ALA LEU LEU GLU ASN ILE ASP ILE GLY TYR +SEQRES 6 B 126 GLU LEU PHE LEU TRP LYS LYS ASN GLU VAL ASP ILE PHE +SEQRES 7 B 126 LEU LYS ASN LEU GLU LYS SER GLU VAL ASP GLY LEU LEU +SEQRES 8 B 126 VAL TYR CYS ASP ASP GLU ASN LYS VAL PHE MSE SER LYS +SEQRES 9 B 126 ILE VAL ASP ASN LEU PRO THR ALA ILE LYS ARG ASN LEU +SEQRES 10 B 126 ILE LYS ASP PHE CYS ARG LYS LEU SER +MODRES 2RBG MSE A 102 MET SELENOMETHIONINE +MODRES 2RBG MSE B 102 MET SELENOMETHIONINE +HET MSE A 102 8 +HET MSE B 102 8 +HET SO4 B 127 5 +HETNAM MSE SELENOMETHIONINE +HETNAM SO4 SULFATE ION +FORMUL 1 MSE 2(C5 H11 N O2 SE) +FORMUL 3 SO4 O4 S 2- +FORMUL 4 HOH *316(H2 O) +HELIX 1 1 ASN A 13 ASP A 15 5 3 +HELIX 2 2 ASN A 16 GLY A 32 1 17 +HELIX 3 3 GLN A 45 ILE A 53 1 9 +HELIX 4 4 ILE A 53 ASN A 60 1 8 +HELIX 5 5 LYS A 71 ASN A 73 5 3 +HELIX 6 6 GLU A 74 GLU A 83 1 10 +HELIX 7 7 ASN A 98 ASN A 108 1 11 +HELIX 8 8 PRO A 110 ARG A 115 1 6 +HELIX 9 9 ASN B 13 ASP B 15 5 3 +HELIX 10 10 ASN B 16 GLY B 32 1 17 +HELIX 11 11 GLN B 45 ILE B 53 1 9 +HELIX 12 12 ILE B 53 GLU B 59 1 7 +HELIX 13 13 LYS B 71 ASN B 73 5 3 +HELIX 14 14 GLU B 74 LEU B 82 1 9 +HELIX 15 15 GLU B 83 SER B 85 5 3 +HELIX 16 16 ASN B 98 ASN B 108 1 11 +HELIX 17 17 PRO B 110 ASN B 116 1 7 +SHEET 1 A 5 GLY A 64 TRP A 70 0 +SHEET 2 A 5 LYS A 35 PHE A 41 1 N VAL A 40 O PHE A 68 +SHEET 3 A 5 ILE A 6 SER A 11 1 N THR A 8 O ASN A 39 +SHEET 4 A 5 GLY A 89 CYS A 94 1 O GLY A 89 N LEU A 7 +SHEET 5 A 5 LEU A 117 PHE A 121 1 O ILE A 118 N LEU A 90 +SHEET 1 B 5 GLY B 64 TRP B 70 0 +SHEET 2 B 5 LYS B 35 PHE B 41 1 N VAL B 40 O TRP B 70 +SHEET 3 B 5 ILE B 6 SER B 11 1 N THR B 8 O ASN B 39 +SHEET 4 B 5 GLY B 89 CYS B 94 1 O GLY B 89 N LEU B 7 +SHEET 5 B 5 LEU B 117 PHE B 121 1 O ILE B 118 N LEU B 90 +SSBOND 1 CYS A 94 CYS A 122 1555 1555 2.03 +SSBOND 2 CYS B 94 CYS B 122 1555 1555 2.03 +LINK C PHE A 101 N MSE A 102 1555 1555 1.33 +LINK C MSE A 102 N SER A 103 1555 1555 1.33 +LINK C PHE B 101 N MSE B 102 1555 1555 1.33 +LINK C MSE B 102 N SER B 103 1555 1555 1.33 +SITE 1 AC1 5 GLU B 18 ARG B 22 GLU B 55 HOH B 217 +SITE 2 AC1 5 HOH B 234 +CRYST1 39.444 65.184 49.604 90.00 98.19 90.00 P 1 21 1 4 +ORIGX1 1.000000 0.000000 0.000000 0.00000 +ORIGX2 0.000000 1.000000 0.000000 0.00000 +ORIGX3 0.000000 0.000000 1.000000 0.00000 +SCALE1 0.025352 0.000000 0.003650 0.00000 +SCALE2 0.000000 0.015341 0.000000 0.00000 +SCALE3 0.000000 0.000000 0.020368 0.00000 +ATOM 1 N TYR A 3 33.471 9.062 24.101 1.00 24.34 N +ATOM 2 CA TYR A 3 32.068 8.798 23.671 1.00 22.76 C +ATOM 3 C TYR A 3 31.421 10.059 23.108 1.00 22.12 C +ATOM 4 O TYR A 3 31.551 11.144 23.678 1.00 23.86 O +ATOM 5 CB TYR A 3 31.252 8.265 24.852 1.00 22.59 C +ATOM 6 CG TYR A 3 31.720 6.909 25.338 1.00 23.54 C +ATOM 7 CD1 TYR A 3 32.254 6.746 26.616 1.00 23.82 C +ATOM 8 CD2 TYR A 3 31.647 5.792 24.508 1.00 23.93 C +ATOM 9 CE1 TYR A 3 32.705 5.500 27.055 1.00 25.31 C +ATOM 10 CE2 TYR A 3 32.095 4.544 24.936 1.00 22.68 C +ATOM 11 CZ TYR A 3 32.622 4.405 26.208 1.00 25.21 C +ATOM 12 OH TYR A 3 33.070 3.171 26.625 1.00 27.53 O +ATOM 13 N LYS A 4 30.720 9.903 21.989 1.00 18.90 N +ATOM 14 CA LYS A 4 30.060 11.019 21.317 1.00 18.65 C +ATOM 15 C LYS A 4 28.537 10.918 21.313 1.00 15.20 C +ATOM 16 O LYS A 4 27.850 11.932 21.232 1.00 13.13 O +ATOM 17 CB LYS A 4 30.555 11.114 19.870 1.00 21.41 C +ATOM 18 CG LYS A 4 32.064 11.283 19.734 1.00 32.01 C +ATOM 19 CD LYS A 4 32.527 12.652 20.213 1.00 36.58 C +ATOM 20 CE LYS A 4 32.002 13.760 19.311 1.00 39.57 C +ATOM 21 NZ LYS A 4 32.463 15.105 19.752 1.00 43.99 N +ATOM 22 N ASN A 5 28.009 9.699 21.374 1.00 13.77 N +ATOM 23 CA ASN A 5 26.560 9.508 21.373 1.00 13.94 C +ATOM 24 C ASN A 5 26.217 8.213 22.092 1.00 14.07 C +ATOM 25 O ASN A 5 26.368 7.121 21.548 1.00 13.93 O +ATOM 26 CB ASN A 5 26.022 9.489 19.936 1.00 15.07 C +ATOM 27 CG ASN A 5 24.503 9.457 19.879 1.00 19.05 C +ATOM 28 OD1 ASN A 5 23.826 10.028 20.734 1.00 18.93 O +ATOM 29 ND2 ASN A 5 23.960 8.805 18.857 1.00 23.13 N +ATOM 30 N ILE A 6 25.749 8.359 23.324 1.00 12.56 N +ATOM 31 CA ILE A 6 25.398 7.232 24.174 1.00 10.81 C +ATOM 32 C ILE A 6 24.026 6.636 23.871 1.00 9.05 C +ATOM 33 O ILE A 6 23.032 7.360 23.784 1.00 10.03 O +ATOM 34 CB ILE A 6 25.409 7.661 25.661 1.00 10.42 C +ATOM 35 CG1 ILE A 6 26.761 8.291 26.015 1.00 14.05 C +ATOM 36 CG2 ILE A 6 25.114 6.465 26.555 1.00 10.54 C +ATOM 37 CD1 ILE A 6 27.942 7.352 25.864 1.00 13.83 C +ATOM 38 N LEU A 7 23.978 5.317 23.695 1.00 7.97 N +ATOM 39 CA LEU A 7 22.708 4.638 23.468 1.00 7.84 C +ATOM 40 C LEU A 7 22.167 4.341 24.862 1.00 6.49 C +ATOM 41 O LEU A 7 22.786 3.598 25.623 1.00 7.93 O +ATOM 42 CB LEU A 7 22.901 3.315 22.724 1.00 7.80 C +ATOM 43 CG LEU A 7 21.627 2.465 22.610 1.00 8.47 C +ATOM 44 CD1 LEU A 7 20.587 3.198 21.769 1.00 8.00 C +ATOM 45 CD2 LEU A 7 21.961 1.115 21.988 1.00 10.59 C +ATOM 46 N THR A 8 21.029 4.936 25.201 1.00 5.70 N +ATOM 47 CA THR A 8 20.419 4.719 26.508 1.00 6.42 C +ATOM 48 C THR A 8 19.137 3.917 26.352 1.00 6.87 C +ATOM 49 O THR A 8 18.243 4.298 25.595 1.00 7.76 O +ATOM 50 CB THR A 8 20.101 6.061 27.208 1.00 6.58 C +ATOM 51 OG1 THR A 8 21.328 6.729 27.538 1.00 7.53 O +ATOM 52 CG2 THR A 8 19.310 5.826 28.490 1.00 7.99 C +ATOM 53 N LEU A 9 19.067 2.792 27.057 1.00 7.67 N +ATOM 54 CA LEU A 9 17.898 1.930 27.012 1.00 8.24 C +ATOM 55 C LEU A 9 17.289 1.878 28.404 1.00 8.48 C +ATOM 56 O LEU A 9 18.000 1.681 29.391 1.00 7.88 O +ATOM 57 CB LEU A 9 18.293 0.514 26.583 1.00 9.90 C +ATOM 58 CG LEU A 9 19.140 0.391 25.315 1.00 11.56 C +ATOM 59 CD1 LEU A 9 19.413 -1.082 25.031 1.00 10.95 C +ATOM 60 CD2 LEU A 9 18.418 1.039 24.145 1.00 10.46 C +ATOM 61 N ILE A 10 15.976 2.056 28.484 1.00 7.53 N +ATOM 62 CA ILE A 10 15.301 2.010 29.771 1.00 7.34 C +ATOM 63 C ILE A 10 13.911 1.408 29.690 1.00 8.82 C +ATOM 64 O ILE A 10 13.146 1.683 28.767 1.00 10.17 O +ATOM 65 CB ILE A 10 15.190 3.420 30.412 1.00 8.96 C +ATOM 66 CG1 ILE A 10 14.388 3.338 31.717 1.00 7.62 C +ATOM 67 CG2 ILE A 10 14.524 4.392 29.433 1.00 9.63 C +ATOM 68 CD1 ILE A 10 14.445 4.613 32.566 1.00 11.33 C +ATOM 69 N SER A 11 13.605 0.560 30.664 1.00 8.33 N +ATOM 70 CA SER A 11 12.297 -0.060 30.761 1.00 10.47 C +ATOM 71 C SER A 11 11.962 -0.145 32.245 1.00 9.11 C +ATOM 72 O SER A 11 12.520 -0.964 32.972 1.00 11.58 O +ATOM 73 CB SER A 11 12.300 -1.457 30.143 1.00 13.19 C +ATOM 74 OG SER A 11 10.990 -1.998 30.156 1.00 19.72 O +ATOM 75 N VAL A 12 11.067 0.730 32.687 1.00 11.21 N +ATOM 76 CA VAL A 12 10.643 0.770 34.081 1.00 11.41 C +ATOM 77 C VAL A 12 9.161 1.098 34.156 1.00 15.63 C +ATOM 78 O VAL A 12 8.563 1.528 33.170 1.00 16.75 O +ATOM 79 CB VAL A 12 11.402 1.858 34.886 1.00 11.30 C +ATOM 80 CG1 VAL A 12 12.884 1.530 34.945 1.00 8.11 C +ATOM 81 CG2 VAL A 12 11.178 3.230 34.255 1.00 12.03 C +ATOM 82 N ASN A 13 8.575 0.887 35.330 1.00 17.25 N +ATOM 83 CA ASN A 13 7.170 1.200 35.547 1.00 20.47 C +ATOM 84 C ASN A 13 7.075 2.724 35.563 1.00 19.38 C +ATOM 85 O ASN A 13 8.061 3.404 35.845 1.00 18.17 O +ATOM 86 CB ASN A 13 6.700 0.622 36.885 1.00 23.13 C +ATOM 87 CG ASN A 13 6.713 -0.895 36.900 1.00 31.36 C +ATOM 88 OD1 ASN A 13 6.035 -1.541 36.099 1.00 36.96 O +ATOM 89 ND2 ASN A 13 7.484 -1.472 37.817 1.00 34.18 N +ATOM 90 N ASN A 14 5.896 3.259 35.266 1.00 18.03 N +ATOM 91 CA ASN A 14 5.707 4.707 35.224 1.00 19.51 C +ATOM 92 C ASN A 14 6.148 5.468 36.472 1.00 20.09 C +ATOM 93 O ASN A 14 6.659 6.582 36.372 1.00 20.91 O +ATOM 94 CB ASN A 14 4.242 5.048 34.941 1.00 20.73 C +ATOM 95 CG ASN A 14 3.742 4.437 33.653 1.00 23.53 C +ATOM 96 OD1 ASN A 14 4.496 4.276 32.696 1.00 22.26 O +ATOM 97 ND2 ASN A 14 2.456 4.108 33.615 1.00 26.38 N +ATOM 98 N ASP A 15 5.954 4.876 37.645 1.00 20.00 N +ATOM 99 CA ASP A 15 6.319 5.543 38.890 1.00 23.11 C +ATOM 100 C ASP A 15 7.828 5.697 39.071 1.00 20.27 C +ATOM 101 O ASP A 15 8.275 6.420 39.958 1.00 21.58 O +ATOM 102 CB ASP A 15 5.736 4.783 40.086 1.00 23.65 C +ATOM 103 CG ASP A 15 6.495 3.509 40.394 1.00 33.42 C +ATOM 104 OD1 ASP A 15 6.862 2.787 39.443 1.00 37.24 O +ATOM 105 OD2 ASP A 15 6.719 3.222 41.591 1.00 40.07 O +ATOM 106 N ASN A 16 8.607 5.025 38.228 1.00 17.28 N +ATOM 107 CA ASN A 16 10.063 5.089 38.322 1.00 16.22 C +ATOM 108 C ASN A 16 10.757 6.035 37.343 1.00 17.13 C +ATOM 109 O ASN A 16 11.960 6.258 37.458 1.00 15.71 O +ATOM 110 CB ASN A 16 10.670 3.691 38.150 1.00 18.31 C +ATOM 111 CG ASN A 16 10.692 2.896 39.440 1.00 21.25 C +ATOM 112 OD1 ASN A 16 11.056 3.416 40.495 1.00 23.56 O +ATOM 113 ND2 ASN A 16 10.323 1.623 39.357 1.00 19.07 N +ATOM 114 N PHE A 17 10.020 6.598 36.392 1.00 14.63 N +ATOM 115 CA PHE A 17 10.641 7.486 35.409 1.00 14.77 C +ATOM 116 C PHE A 17 11.409 8.670 35.984 1.00 14.87 C +ATOM 117 O PHE A 17 12.552 8.913 35.604 1.00 9.25 O +ATOM 118 CB PHE A 17 9.602 7.998 34.404 1.00 12.16 C +ATOM 119 CG PHE A 17 9.216 6.987 33.365 1.00 11.38 C +ATOM 120 CD1 PHE A 17 10.192 6.337 32.614 1.00 13.83 C +ATOM 121 CD2 PHE A 17 7.878 6.680 33.135 1.00 14.52 C +ATOM 122 CE1 PHE A 17 9.842 5.393 31.649 1.00 14.54 C +ATOM 123 CE2 PHE A 17 7.518 5.740 32.174 1.00 14.67 C +ATOM 124 CZ PHE A 17 8.500 5.095 31.429 1.00 14.46 C +ATOM 125 N GLU A 18 10.792 9.411 36.897 1.00 16.23 N +ATOM 126 CA GLU A 18 11.464 10.565 37.475 1.00 16.73 C +ATOM 127 C GLU A 18 12.805 10.207 38.106 1.00 16.00 C +ATOM 128 O GLU A 18 13.818 10.842 37.814 1.00 16.65 O +ATOM 129 CB GLU A 18 10.557 11.247 38.505 1.00 23.36 C +ATOM 130 CG GLU A 18 9.338 11.909 37.879 1.00 30.35 C +ATOM 131 CD GLU A 18 8.469 12.633 38.889 1.00 37.35 C +ATOM 132 OE1 GLU A 18 8.971 13.562 39.558 1.00 37.02 O +ATOM 133 OE2 GLU A 18 7.280 12.273 39.010 1.00 40.39 O +ATOM 134 N ASN A 19 12.816 9.184 38.954 1.00 16.87 N +ATOM 135 CA ASN A 19 14.049 8.770 39.618 1.00 15.97 C +ATOM 136 C ASN A 19 15.094 8.227 38.649 1.00 15.31 C +ATOM 137 O ASN A 19 16.278 8.557 38.756 1.00 13.61 O +ATOM 138 CB ASN A 19 13.761 7.713 40.690 1.00 19.94 C +ATOM 139 CG ASN A 19 12.921 8.251 41.831 1.00 26.59 C +ATOM 140 OD1 ASN A 19 13.143 9.361 42.313 1.00 28.74 O +ATOM 141 ND2 ASN A 19 11.958 7.454 42.283 1.00 31.96 N +ATOM 142 N TYR A 20 14.667 7.395 37.705 1.00 9.62 N +ATOM 143 CA TYR A 20 15.612 6.830 36.750 1.00 8.42 C +ATOM 144 C TYR A 20 16.193 7.835 35.765 1.00 10.15 C +ATOM 145 O TYR A 20 17.354 7.718 35.390 1.00 8.97 O +ATOM 146 CB TYR A 20 14.988 5.667 35.975 1.00 8.90 C +ATOM 147 CG TYR A 20 15.099 4.331 36.683 1.00 11.47 C +ATOM 148 CD1 TYR A 20 14.377 4.074 37.848 1.00 11.36 C +ATOM 149 CD2 TYR A 20 15.916 3.319 36.178 1.00 9.86 C +ATOM 150 CE1 TYR A 20 14.461 2.838 38.488 1.00 10.09 C +ATOM 151 CE2 TYR A 20 16.008 2.080 36.808 1.00 11.95 C +ATOM 152 CZ TYR A 20 15.272 1.847 37.965 1.00 10.22 C +ATOM 153 OH TYR A 20 15.329 0.615 38.579 1.00 12.19 O +ATOM 154 N PHE A 21 15.407 8.817 35.331 1.00 10.83 N +ATOM 155 CA PHE A 21 15.961 9.786 34.396 1.00 10.37 C +ATOM 156 C PHE A 21 17.015 10.652 35.066 1.00 9.86 C +ATOM 157 O PHE A 21 17.893 11.207 34.403 1.00 10.68 O +ATOM 158 CB PHE A 21 14.863 10.640 33.760 1.00 10.02 C +ATOM 159 CG PHE A 21 14.380 10.090 32.448 1.00 9.94 C +ATOM 160 CD1 PHE A 21 13.536 8.984 32.413 1.00 10.87 C +ATOM 161 CD2 PHE A 21 14.844 10.618 31.247 1.00 11.58 C +ATOM 162 CE1 PHE A 21 13.166 8.405 31.199 1.00 10.52 C +ATOM 163 CE2 PHE A 21 14.479 10.046 30.021 1.00 12.43 C +ATOM 164 CZ PHE A 21 13.640 8.937 29.999 1.00 11.64 C +ATOM 165 N ARG A 22 16.937 10.756 36.386 1.00 10.63 N +ATOM 166 CA ARG A 22 17.930 11.519 37.121 1.00 12.46 C +ATOM 167 C ARG A 22 19.243 10.741 36.990 1.00 12.16 C +ATOM 168 O ARG A 22 20.314 11.326 36.831 1.00 12.50 O +ATOM 169 CB ARG A 22 17.521 11.653 38.592 1.00 12.81 C +ATOM 170 CG ARG A 22 18.512 12.441 39.436 1.00 17.97 C +ATOM 171 CD ARG A 22 18.033 12.635 40.873 1.00 15.56 C +ATOM 172 NE ARG A 22 16.944 13.605 40.993 1.00 15.48 N +ATOM 173 CZ ARG A 22 16.484 14.056 42.158 1.00 17.00 C +ATOM 174 NH1 ARG A 22 17.020 13.622 43.293 1.00 13.10 N +ATOM 175 NH2 ARG A 22 15.495 14.941 42.195 1.00 16.86 N +ATOM 176 N LYS A 23 19.150 9.414 37.040 1.00 9.11 N +ATOM 177 CA LYS A 23 20.330 8.562 36.910 1.00 8.13 C +ATOM 178 C LYS A 23 20.899 8.647 35.497 1.00 8.65 C +ATOM 179 O LYS A 23 22.109 8.744 35.305 1.00 11.79 O +ATOM 180 CB LYS A 23 19.983 7.099 37.206 1.00 10.36 C +ATOM 181 CG LYS A 23 19.601 6.794 38.646 1.00 10.87 C +ATOM 182 CD LYS A 23 19.398 5.289 38.832 1.00 14.62 C +ATOM 183 CE LYS A 23 19.222 4.926 40.294 1.00 23.04 C +ATOM 184 NZ LYS A 23 20.438 5.264 41.088 1.00 16.09 N +ATOM 185 N ILE A 24 20.015 8.600 34.505 1.00 7.43 N +ATOM 186 CA ILE A 24 20.443 8.660 33.116 1.00 6.12 C +ATOM 187 C ILE A 24 21.374 9.834 32.842 1.00 8.47 C +ATOM 188 O ILE A 24 22.446 9.661 32.271 1.00 9.39 O +ATOM 189 CB ILE A 24 19.226 8.750 32.168 1.00 6.49 C +ATOM 190 CG1 ILE A 24 18.475 7.414 32.183 1.00 5.04 C +ATOM 191 CG2 ILE A 24 19.684 9.104 30.748 1.00 7.14 C +ATOM 192 CD1 ILE A 24 17.160 7.432 31.432 1.00 5.89 C +ATOM 193 N PHE A 25 20.976 11.031 33.254 1.00 8.27 N +ATOM 194 CA PHE A 25 21.814 12.192 32.991 1.00 10.11 C +ATOM 195 C PHE A 25 23.098 12.230 33.813 1.00 8.55 C +ATOM 196 O PHE A 25 24.106 12.772 33.361 1.00 9.67 O +ATOM 197 CB PHE A 25 20.985 13.470 33.142 1.00 9.31 C +ATOM 198 CG PHE A 25 20.000 13.667 32.016 1.00 11.97 C +ATOM 199 CD1 PHE A 25 20.452 13.926 30.721 1.00 13.37 C +ATOM 200 CD2 PHE A 25 18.635 13.523 32.230 1.00 12.47 C +ATOM 201 CE1 PHE A 25 19.556 14.034 29.657 1.00 12.22 C +ATOM 202 CE2 PHE A 25 17.728 13.627 31.173 1.00 15.03 C +ATOM 203 CZ PHE A 25 18.193 13.883 29.883 1.00 13.24 C +ATOM 204 N LEU A 26 23.077 11.647 35.008 1.00 8.53 N +ATOM 205 CA LEU A 26 24.284 11.592 35.825 1.00 10.27 C +ATOM 206 C LEU A 26 25.305 10.752 35.054 1.00 7.43 C +ATOM 207 O LEU A 26 26.474 11.116 34.935 1.00 8.43 O +ATOM 208 CB LEU A 26 24.005 10.915 37.172 1.00 12.37 C +ATOM 209 CG LEU A 26 23.874 11.773 38.432 1.00 23.05 C +ATOM 210 CD1 LEU A 26 22.666 12.653 38.319 1.00 28.50 C +ATOM 211 CD2 LEU A 26 23.748 10.880 39.654 1.00 23.45 C +ATOM 212 N ASP A 27 24.847 9.626 34.519 1.00 8.78 N +ATOM 213 CA ASP A 27 25.724 8.732 33.779 1.00 6.87 C +ATOM 214 C ASP A 27 26.167 9.306 32.439 1.00 7.47 C +ATOM 215 O ASP A 27 27.331 9.171 32.059 1.00 8.28 O +ATOM 216 CB ASP A 27 25.053 7.370 33.581 1.00 10.81 C +ATOM 217 CG ASP A 27 24.911 6.601 34.882 1.00 11.54 C +ATOM 218 OD1 ASP A 27 25.857 6.645 35.699 1.00 9.76 O +ATOM 219 OD2 ASP A 27 23.868 5.947 35.086 1.00 10.25 O +ATOM 220 N VAL A 28 25.251 9.952 31.723 1.00 6.57 N +ATOM 221 CA VAL A 28 25.619 10.536 30.437 1.00 8.12 C +ATOM 222 C VAL A 28 26.681 11.616 30.644 1.00 10.25 C +ATOM 223 O VAL A 28 27.683 11.663 29.928 1.00 9.64 O +ATOM 224 CB VAL A 28 24.399 11.150 29.718 1.00 8.01 C +ATOM 225 CG1 VAL A 28 24.862 11.969 28.515 1.00 9.50 C +ATOM 226 CG2 VAL A 28 23.457 10.034 29.253 1.00 8.04 C +ATOM 227 N ARG A 29 26.475 12.475 31.636 1.00 10.05 N +ATOM 228 CA ARG A 29 27.444 13.536 31.898 1.00 11.15 C +ATOM 229 C ARG A 29 28.827 12.967 32.214 1.00 11.79 C +ATOM 230 O ARG A 29 29.835 13.455 31.704 1.00 12.01 O +ATOM 231 CB ARG A 29 26.970 14.422 33.053 1.00 9.99 C +ATOM 232 CG ARG A 29 25.831 15.367 32.695 1.00 10.18 C +ATOM 233 CD ARG A 29 25.445 16.189 33.912 1.00 10.25 C +ATOM 234 NE ARG A 29 24.425 17.192 33.628 1.00 14.64 N +ATOM 235 CZ ARG A 29 24.640 18.502 33.651 1.00 20.85 C +ATOM 236 NH1 ARG A 29 25.844 18.976 33.943 1.00 20.73 N +ATOM 237 NH2 ARG A 29 23.645 19.341 33.398 1.00 23.29 N +ATOM 238 N SER A 30 28.875 11.926 33.040 1.00 10.27 N +ATOM 239 CA SER A 30 30.149 11.310 33.406 1.00 10.98 C +ATOM 240 C SER A 30 30.842 10.609 32.239 1.00 13.07 C +ATOM 241 O SER A 30 32.064 10.454 32.245 1.00 12.79 O +ATOM 242 CB SER A 30 29.953 10.298 34.543 1.00 8.79 C +ATOM 243 OG SER A 30 29.665 10.953 35.765 1.00 12.96 O +ATOM 244 N SER A 31 30.067 10.189 31.243 1.00 12.07 N +ATOM 245 CA SER A 31 30.625 9.488 30.087 1.00 12.63 C +ATOM 246 C SER A 31 31.478 10.385 29.197 1.00 14.41 C +ATOM 247 O SER A 31 32.286 9.894 28.411 1.00 16.95 O +ATOM 248 CB SER A 31 29.507 8.879 29.237 1.00 15.15 C +ATOM 249 OG SER A 31 28.857 9.877 28.469 1.00 12.95 O +ATOM 250 N GLY A 32 31.289 11.694 29.312 1.00 16.32 N +ATOM 251 CA GLY A 32 32.051 12.623 28.496 1.00 17.33 C +ATOM 252 C GLY A 32 31.281 13.013 27.251 1.00 17.70 C +ATOM 253 O GLY A 32 31.649 13.951 26.540 1.00 16.74 O +ATOM 254 N SER A 33 30.205 12.284 26.981 1.00 14.11 N +ATOM 255 CA SER A 33 29.375 12.562 25.818 1.00 12.55 C +ATOM 256 C SER A 33 28.436 13.717 26.128 1.00 16.51 C +ATOM 257 O SER A 33 28.044 13.919 27.281 1.00 17.01 O +ATOM 258 CB SER A 33 28.557 11.324 25.442 1.00 12.31 C +ATOM 259 OG SER A 33 27.756 11.569 24.299 1.00 11.59 O +ATOM 260 N LYS A 34 28.081 14.476 25.099 1.00 15.42 N +ATOM 261 CA LYS A 34 27.176 15.601 25.267 1.00 17.53 C +ATOM 262 C LYS A 34 25.871 15.259 24.559 1.00 17.01 C +ATOM 263 O LYS A 34 24.970 16.090 24.465 1.00 17.51 O +ATOM 264 CB LYS A 34 27.785 16.869 24.656 1.00 21.02 C +ATOM 265 CG LYS A 34 29.250 17.100 25.025 1.00 25.18 C +ATOM 266 CD LYS A 34 29.463 17.088 26.533 1.00 29.46 C +ATOM 267 CE LYS A 34 30.942 17.190 26.884 1.00 31.20 C +ATOM 268 NZ LYS A 34 31.184 17.073 28.353 1.00 29.05 N +ATOM 269 N LYS A 35 25.781 14.020 24.078 1.00 16.37 N +ATOM 270 CA LYS A 35 24.604 13.544 23.358 1.00 13.55 C +ATOM 271 C LYS A 35 24.222 12.119 23.748 1.00 10.82 C +ATOM 272 O LYS A 35 25.074 11.303 24.092 1.00 12.00 O +ATOM 273 CB LYS A 35 24.861 13.551 21.851 1.00 14.65 C +ATOM 274 CG LYS A 35 25.180 14.899 21.239 1.00 23.77 C +ATOM 275 CD LYS A 35 25.571 14.724 19.774 1.00 29.96 C +ATOM 276 CE LYS A 35 25.766 16.063 19.075 1.00 34.03 C +ATOM 277 NZ LYS A 35 24.495 16.835 18.986 1.00 39.83 N +ATOM 278 N THR A 36 22.932 11.825 23.676 1.00 11.15 N +ATOM 279 CA THR A 36 22.449 10.487 23.972 1.00 9.64 C +ATOM 280 C THR A 36 21.129 10.278 23.253 1.00 8.90 C +ATOM 281 O THR A 36 20.336 11.211 23.103 1.00 11.37 O +ATOM 282 CB THR A 36 22.235 10.255 25.494 1.00 9.30 C +ATOM 283 OG1 THR A 36 21.808 8.903 25.714 1.00 11.46 O +ATOM 284 CG2 THR A 36 21.178 11.205 26.049 1.00 10.57 C +ATOM 285 N THR A 37 20.918 9.064 22.766 1.00 8.09 N +ATOM 286 CA THR A 37 19.669 8.733 22.098 1.00 8.90 C +ATOM 287 C THR A 37 18.999 7.773 23.072 1.00 9.34 C +ATOM 288 O THR A 37 19.467 6.652 23.292 1.00 10.35 O +ATOM 289 CB THR A 37 19.916 8.084 20.710 1.00 16.76 C +ATOM 290 OG1 THR A 37 18.661 7.702 20.136 1.00 18.76 O +ATOM 291 CG2 THR A 37 20.828 6.875 20.819 1.00 17.18 C +ATOM 292 N ILE A 38 17.924 8.254 23.685 1.00 8.42 N +ATOM 293 CA ILE A 38 17.186 7.508 24.697 1.00 9.46 C +ATOM 294 C ILE A 38 16.015 6.715 24.137 1.00 10.38 C +ATOM 295 O ILE A 38 15.143 7.264 23.462 1.00 11.66 O +ATOM 296 CB ILE A 38 16.668 8.472 25.778 1.00 9.91 C +ATOM 297 CG1 ILE A 38 17.829 9.320 26.300 1.00 12.94 C +ATOM 298 CG2 ILE A 38 16.015 7.697 26.913 1.00 9.08 C +ATOM 299 CD1 ILE A 38 17.408 10.432 27.235 1.00 11.43 C +ATOM 300 N ASN A 39 15.999 5.422 24.441 1.00 6.80 N +ATOM 301 CA ASN A 39 14.946 4.527 23.976 1.00 8.56 C +ATOM 302 C ASN A 39 14.206 3.962 25.172 1.00 8.17 C +ATOM 303 O ASN A 39 14.772 3.221 25.977 1.00 12.28 O +ATOM 304 CB ASN A 39 15.563 3.409 23.141 1.00 6.67 C +ATOM 305 CG ASN A 39 16.136 3.923 21.841 1.00 11.85 C +ATOM 306 OD1 ASN A 39 15.430 4.038 20.838 1.00 10.25 O +ATOM 307 ND2 ASN A 39 17.416 4.264 21.856 1.00 11.82 N +ATOM 308 N VAL A 40 12.932 4.318 25.272 1.00 9.81 N +ATOM 309 CA VAL A 40 12.091 3.905 26.380 1.00 10.60 C +ATOM 310 C VAL A 40 11.061 2.874 25.947 1.00 11.33 C +ATOM 311 O VAL A 40 10.274 3.117 25.035 1.00 13.32 O +ATOM 312 CB VAL A 40 11.351 5.120 26.969 1.00 10.53 C +ATOM 313 CG1 VAL A 40 10.654 4.734 28.265 1.00 9.46 C +ATOM 314 CG2 VAL A 40 12.328 6.266 27.186 1.00 10.11 C +ATOM 315 N PHE A 41 11.073 1.724 26.609 1.00 10.47 N +ATOM 316 CA PHE A 41 10.134 0.655 26.303 1.00 10.56 C +ATOM 317 C PHE A 41 9.024 0.767 27.336 1.00 14.51 C +ATOM 318 O PHE A 41 9.169 0.343 28.482 1.00 12.82 O +ATOM 319 CB PHE A 41 10.880 -0.674 26.364 1.00 11.18 C +ATOM 320 CG PHE A 41 12.024 -0.741 25.393 1.00 13.39 C +ATOM 321 CD1 PHE A 41 11.798 -1.046 24.052 1.00 11.41 C +ATOM 322 CD2 PHE A 41 13.314 -0.401 25.795 1.00 13.82 C +ATOM 323 CE1 PHE A 41 12.837 -1.005 23.125 1.00 12.58 C +ATOM 324 CE2 PHE A 41 14.361 -0.357 24.875 1.00 16.09 C +ATOM 325 CZ PHE A 41 14.120 -0.659 23.535 1.00 13.07 C +ATOM 326 N THR A 42 7.918 1.371 26.909 1.00 15.02 N +ATOM 327 CA THR A 42 6.788 1.623 27.790 1.00 14.54 C +ATOM 328 C THR A 42 5.495 1.721 26.988 1.00 15.87 C +ATOM 329 O THR A 42 5.521 1.803 25.764 1.00 14.97 O +ATOM 330 CB THR A 42 7.011 2.962 28.532 1.00 16.35 C +ATOM 331 OG1 THR A 42 5.902 3.242 29.391 1.00 16.32 O +ATOM 332 CG2 THR A 42 7.166 4.098 27.525 1.00 15.89 C +ATOM 333 N GLU A 43 4.366 1.718 27.689 1.00 18.33 N +ATOM 334 CA GLU A 43 3.063 1.834 27.041 1.00 22.06 C +ATOM 335 C GLU A 43 2.551 3.265 27.188 1.00 23.02 C +ATOM 336 O GLU A 43 1.500 3.621 26.656 1.00 22.29 O +ATOM 337 CB GLU A 43 2.065 0.859 27.673 1.00 21.32 C +ATOM 338 CG GLU A 43 2.461 -0.607 27.557 1.00 26.43 C +ATOM 339 CD GLU A 43 2.665 -1.048 26.118 1.00 31.13 C +ATOM 340 OE1 GLU A 43 1.763 -0.802 25.290 1.00 33.47 O +ATOM 341 OE2 GLU A 43 3.724 -1.642 25.815 1.00 32.35 O +ATOM 342 N ILE A 44 3.311 4.083 27.910 1.00 23.91 N +ATOM 343 CA ILE A 44 2.948 5.476 28.149 1.00 26.52 C +ATOM 344 C ILE A 44 3.168 6.328 26.894 1.00 27.74 C +ATOM 345 O ILE A 44 3.974 5.976 26.033 1.00 24.73 O +ATOM 346 CB ILE A 44 3.783 6.040 29.326 1.00 28.12 C +ATOM 347 CG1 ILE A 44 2.971 7.072 30.104 1.00 28.57 C +ATOM 348 CG2 ILE A 44 5.080 6.650 28.810 1.00 24.21 C +ATOM 349 CD1 ILE A 44 3.649 7.523 31.384 1.00 31.00 C +ATOM 350 N GLN A 45 2.447 7.444 26.787 1.00 29.19 N +ATOM 351 CA GLN A 45 2.580 8.334 25.633 1.00 30.61 C +ATOM 352 C GLN A 45 3.693 9.358 25.823 1.00 28.49 C +ATOM 353 O GLN A 45 4.030 9.722 26.950 1.00 29.37 O +ATOM 354 CB GLN A 45 1.258 9.058 25.363 1.00 36.95 C +ATOM 355 CG GLN A 45 0.165 8.172 24.788 1.00 48.09 C +ATOM 356 CD GLN A 45 0.496 7.672 23.394 1.00 54.95 C +ATOM 357 OE1 GLN A 45 0.715 8.463 22.477 1.00 59.19 O +ATOM 358 NE2 GLN A 45 0.531 6.354 23.229 1.00 59.21 N +ATOM 359 N TYR A 46 4.248 9.823 24.708 1.00 24.28 N +ATOM 360 CA TYR A 46 5.337 10.793 24.713 1.00 24.71 C +ATOM 361 C TYR A 46 5.090 11.982 25.639 1.00 25.54 C +ATOM 362 O TYR A 46 5.881 12.250 26.541 1.00 22.81 O +ATOM 363 CB TYR A 46 5.583 11.314 23.296 1.00 23.11 C +ATOM 364 CG TYR A 46 6.881 12.075 23.142 1.00 27.98 C +ATOM 365 CD1 TYR A 46 8.087 11.399 22.962 1.00 31.13 C +ATOM 366 CD2 TYR A 46 6.910 13.468 23.200 1.00 28.93 C +ATOM 367 CE1 TYR A 46 9.291 12.088 22.845 1.00 32.93 C +ATOM 368 CE2 TYR A 46 8.113 14.169 23.086 1.00 32.03 C +ATOM 369 CZ TYR A 46 9.298 13.470 22.909 1.00 32.08 C +ATOM 370 OH TYR A 46 10.492 14.148 22.803 1.00 33.47 O +ATOM 371 N GLN A 47 3.994 12.697 25.406 1.00 24.63 N +ATOM 372 CA GLN A 47 3.667 13.870 26.208 1.00 25.17 C +ATOM 373 C GLN A 47 3.568 13.610 27.706 1.00 23.21 C +ATOM 374 O GLN A 47 3.976 14.450 28.507 1.00 24.07 O +ATOM 375 CB GLN A 47 2.370 14.508 25.706 1.00 28.35 C +ATOM 376 CG GLN A 47 2.495 15.121 24.321 1.00 36.99 C +ATOM 377 CD GLN A 47 3.718 16.012 24.190 1.00 43.34 C +ATOM 378 OE1 GLN A 47 3.944 16.904 25.011 1.00 46.34 O +ATOM 379 NE2 GLN A 47 4.514 15.776 23.152 1.00 45.64 N +ATOM 380 N GLU A 48 3.025 12.459 28.091 1.00 23.97 N +ATOM 381 CA GLU A 48 2.911 12.138 29.507 1.00 22.97 C +ATOM 382 C GLU A 48 4.296 11.901 30.099 1.00 22.51 C +ATOM 383 O GLU A 48 4.583 12.325 31.217 1.00 17.54 O +ATOM 384 CB GLU A 48 2.029 10.903 29.720 1.00 27.75 C +ATOM 385 CG GLU A 48 2.033 10.402 31.160 1.00 35.85 C +ATOM 386 CD GLU A 48 0.862 9.493 31.483 1.00 42.98 C +ATOM 387 OE1 GLU A 48 0.527 8.621 30.652 1.00 46.34 O +ATOM 388 OE2 GLU A 48 0.281 9.645 32.578 1.00 44.85 O +ATOM 389 N LEU A 49 5.157 11.228 29.342 1.00 18.62 N +ATOM 390 CA LEU A 49 6.510 10.961 29.811 1.00 18.78 C +ATOM 391 C LEU A 49 7.267 12.268 30.002 1.00 17.00 C +ATOM 392 O LEU A 49 7.848 12.511 31.058 1.00 16.05 O +ATOM 393 CB LEU A 49 7.269 10.084 28.811 1.00 16.29 C +ATOM 394 CG LEU A 49 8.755 9.895 29.139 1.00 16.44 C +ATOM 395 CD1 LEU A 49 8.901 9.183 30.479 1.00 17.18 C +ATOM 396 CD2 LEU A 49 9.432 9.102 28.033 1.00 19.52 C +ATOM 397 N VAL A 50 7.262 13.102 28.967 1.00 16.87 N +ATOM 398 CA VAL A 50 7.953 14.385 29.010 1.00 15.59 C +ATOM 399 C VAL A 50 7.490 15.214 30.201 1.00 17.36 C +ATOM 400 O VAL A 50 8.260 15.984 30.771 1.00 15.94 O +ATOM 401 CB VAL A 50 7.715 15.185 27.712 1.00 20.49 C +ATOM 402 CG1 VAL A 50 8.450 16.511 27.775 1.00 22.89 C +ATOM 403 CG2 VAL A 50 8.186 14.373 26.511 1.00 20.90 C +ATOM 404 N THR A 51 6.222 15.061 30.568 1.00 16.58 N +ATOM 405 CA THR A 51 5.677 15.789 31.705 1.00 13.61 C +ATOM 406 C THR A 51 6.308 15.251 32.989 1.00 13.20 C +ATOM 407 O THR A 51 6.723 16.020 33.856 1.00 14.98 O +ATOM 408 CB THR A 51 4.147 15.633 31.774 1.00 16.56 C +ATOM 409 OG1 THR A 51 3.559 16.293 30.645 1.00 19.12 O +ATOM 410 CG2 THR A 51 3.597 16.237 33.060 1.00 17.52 C +ATOM 411 N LEU A 52 6.396 13.929 33.099 1.00 13.91 N +ATOM 412 CA LEU A 52 6.985 13.303 34.279 1.00 13.78 C +ATOM 413 C LEU A 52 8.467 13.623 34.464 1.00 16.52 C +ATOM 414 O LEU A 52 8.925 13.837 35.587 1.00 20.03 O +ATOM 415 CB LEU A 52 6.814 11.781 34.219 1.00 15.00 C +ATOM 416 CG LEU A 52 5.407 11.210 34.404 1.00 18.12 C +ATOM 417 CD1 LEU A 52 5.443 9.698 34.229 1.00 19.35 C +ATOM 418 CD2 LEU A 52 4.885 11.576 35.785 1.00 20.14 C +ATOM 419 N ILE A 53 9.220 13.657 33.371 1.00 13.70 N +ATOM 420 CA ILE A 53 10.653 13.921 33.466 1.00 15.34 C +ATOM 421 C ILE A 53 11.051 15.357 33.146 1.00 16.08 C +ATOM 422 O ILE A 53 12.228 15.643 32.926 1.00 12.68 O +ATOM 423 CB ILE A 53 11.451 12.979 32.540 1.00 13.70 C +ATOM 424 CG1 ILE A 53 11.137 13.288 31.076 1.00 12.89 C +ATOM 425 CG2 ILE A 53 11.112 11.530 32.859 1.00 15.48 C +ATOM 426 CD1 ILE A 53 11.973 12.498 30.092 1.00 17.37 C +ATOM 427 N ARG A 54 10.075 16.259 33.133 1.00 14.93 N +ATOM 428 CA ARG A 54 10.339 17.663 32.831 1.00 17.46 C +ATOM 429 C ARG A 54 11.521 18.227 33.615 1.00 15.05 C +ATOM 430 O ARG A 54 12.396 18.875 33.043 1.00 13.17 O +ATOM 431 CB ARG A 54 9.100 18.515 33.120 1.00 21.33 C +ATOM 432 CG ARG A 54 9.292 19.993 32.805 1.00 27.86 C +ATOM 433 CD ARG A 54 8.119 20.838 33.282 1.00 38.40 C +ATOM 434 NE ARG A 54 7.921 20.738 34.727 1.00 45.55 N +ATOM 435 CZ ARG A 54 6.935 20.058 35.304 1.00 48.28 C +ATOM 436 NH1 ARG A 54 6.838 20.021 36.627 1.00 49.43 N +ATOM 437 NH2 ARG A 54 6.037 19.424 34.560 1.00 44.89 N +ATOM 438 N GLU A 55 11.542 17.982 34.922 1.00 14.40 N +ATOM 439 CA GLU A 55 12.616 18.484 35.777 1.00 18.96 C +ATOM 440 C GLU A 55 13.983 17.950 35.365 1.00 15.03 C +ATOM 441 O GLU A 55 14.967 18.691 35.337 1.00 13.65 O +ATOM 442 CB GLU A 55 12.335 18.123 37.240 1.00 20.18 C +ATOM 443 CG GLU A 55 13.348 18.673 38.231 1.00 27.21 C +ATOM 444 CD GLU A 55 13.525 20.176 38.117 1.00 30.48 C +ATOM 445 OE1 GLU A 55 12.515 20.882 37.911 1.00 32.00 O +ATOM 446 OE2 GLU A 55 14.673 20.653 38.246 1.00 30.82 O +ATOM 447 N ALA A 56 14.046 16.664 35.041 1.00 15.12 N +ATOM 448 CA ALA A 56 15.308 16.061 34.628 1.00 13.23 C +ATOM 449 C ALA A 56 15.794 16.704 33.334 1.00 12.47 C +ATOM 450 O ALA A 56 16.980 16.980 33.175 1.00 12.54 O +ATOM 451 CB ALA A 56 15.137 14.559 34.439 1.00 12.68 C +ATOM 452 N LEU A 57 14.873 16.938 32.404 1.00 8.74 N +ATOM 453 CA LEU A 57 15.230 17.554 31.136 1.00 7.77 C +ATOM 454 C LEU A 57 15.739 18.981 31.332 1.00 8.61 C +ATOM 455 O LEU A 57 16.717 19.390 30.700 1.00 9.52 O +ATOM 456 CB LEU A 57 14.023 17.558 30.189 1.00 10.14 C +ATOM 457 CG LEU A 57 13.440 16.175 29.888 1.00 9.25 C +ATOM 458 CD1 LEU A 57 12.287 16.312 28.914 1.00 10.72 C +ATOM 459 CD2 LEU A 57 14.518 15.277 29.300 1.00 10.29 C +ATOM 460 N LEU A 58 15.081 19.731 32.211 1.00 9.14 N +ATOM 461 CA LEU A 58 15.472 21.111 32.480 1.00 10.13 C +ATOM 462 C LEU A 58 16.850 21.191 33.132 1.00 9.44 C +ATOM 463 O LEU A 58 17.678 22.021 32.756 1.00 9.82 O +ATOM 464 CB LEU A 58 14.433 21.785 33.386 1.00 13.24 C +ATOM 465 CG LEU A 58 14.756 23.201 33.871 1.00 18.30 C +ATOM 466 CD1 LEU A 58 14.880 24.140 32.686 1.00 21.51 C +ATOM 467 CD2 LEU A 58 13.657 23.679 34.814 1.00 25.65 C +ATOM 468 N GLU A 59 17.090 20.322 34.107 1.00 8.78 N +ATOM 469 CA GLU A 59 18.364 20.302 34.818 1.00 11.12 C +ATOM 470 C GLU A 59 19.538 19.936 33.920 1.00 11.69 C +ATOM 471 O GLU A 59 20.687 20.279 34.210 1.00 12.28 O +ATOM 472 CB GLU A 59 18.304 19.309 35.980 1.00 13.15 C +ATOM 473 CG GLU A 59 17.497 19.777 37.170 1.00 17.13 C +ATOM 474 CD GLU A 59 17.449 18.742 38.275 1.00 18.20 C +ATOM 475 OE1 GLU A 59 18.404 17.944 38.381 1.00 18.34 O +ATOM 476 OE2 GLU A 59 16.466 18.734 39.045 1.00 19.18 O +ATOM 477 N ASN A 60 19.249 19.245 32.826 1.00 9.78 N +ATOM 478 CA ASN A 60 20.295 18.811 31.914 1.00 10.61 C +ATOM 479 C ASN A 60 20.108 19.363 30.509 1.00 12.44 C +ATOM 480 O ASN A 60 20.324 18.670 29.515 1.00 10.50 O +ATOM 481 CB ASN A 60 20.327 17.286 31.914 1.00 11.88 C +ATOM 482 CG ASN A 60 20.659 16.731 33.279 1.00 11.93 C +ATOM 483 OD1 ASN A 60 21.817 16.741 33.693 1.00 13.53 O +ATOM 484 ND2 ASN A 60 19.640 16.277 34.007 1.00 9.60 N +ATOM 485 N ILE A 61 19.726 20.634 30.453 1.00 14.87 N +ATOM 486 CA ILE A 61 19.486 21.340 29.203 1.00 14.76 C +ATOM 487 C ILE A 61 20.688 21.322 28.253 1.00 15.03 C +ATOM 488 O ILE A 61 20.517 21.351 27.035 1.00 13.45 O +ATOM 489 CB ILE A 61 19.085 22.812 29.492 1.00 15.88 C +ATOM 490 CG1 ILE A 61 18.626 23.505 28.208 1.00 20.00 C +ATOM 491 CG2 ILE A 61 20.250 23.555 30.119 1.00 19.27 C +ATOM 492 CD1 ILE A 61 17.277 23.039 27.718 1.00 24.49 C +ATOM 493 N ASP A 62 21.900 21.274 28.803 1.00 13.67 N +ATOM 494 CA ASP A 62 23.103 21.264 27.972 1.00 14.54 C +ATOM 495 C ASP A 62 23.280 19.967 27.191 1.00 14.65 C +ATOM 496 O ASP A 62 23.929 19.953 26.146 1.00 18.15 O +ATOM 497 CB ASP A 62 24.359 21.499 28.819 1.00 17.19 C +ATOM 498 CG ASP A 62 24.426 22.899 29.397 1.00 23.32 C +ATOM 499 OD1 ASP A 62 23.613 23.757 28.991 1.00 22.32 O +ATOM 500 OD2 ASP A 62 25.304 23.141 30.253 1.00 23.74 O +ATOM 501 N ILE A 63 22.711 18.880 27.699 1.00 12.30 N +ATOM 502 CA ILE A 63 22.830 17.585 27.038 1.00 12.18 C +ATOM 503 C ILE A 63 21.861 17.474 25.867 1.00 14.22 C +ATOM 504 O ILE A 63 20.675 17.746 26.010 1.00 18.15 O +ATOM 505 CB ILE A 63 22.543 16.420 28.018 1.00 13.92 C +ATOM 506 CG1 ILE A 63 23.548 16.441 29.172 1.00 17.28 C +ATOM 507 CG2 ILE A 63 22.620 15.091 27.280 1.00 14.00 C +ATOM 508 CD1 ILE A 63 24.995 16.287 28.735 1.00 17.06 C +ATOM 509 N GLY A 64 22.375 17.081 24.708 1.00 14.14 N +ATOM 510 CA GLY A 64 21.516 16.922 23.552 1.00 16.94 C +ATOM 511 C GLY A 64 20.961 15.515 23.593 1.00 18.30 C +ATOM 512 O GLY A 64 21.693 14.568 23.869 1.00 20.02 O +ATOM 513 N TYR A 65 19.673 15.357 23.331 1.00 18.65 N +ATOM 514 CA TYR A 65 19.100 14.024 23.372 1.00 18.31 C +ATOM 515 C TYR A 65 17.954 13.851 22.395 1.00 20.44 C +ATOM 516 O TYR A 65 17.351 14.821 21.934 1.00 19.03 O +ATOM 517 CB TYR A 65 18.598 13.718 24.790 1.00 22.93 C +ATOM 518 CG TYR A 65 17.282 14.393 25.118 1.00 22.93 C +ATOM 519 CD1 TYR A 65 16.071 13.842 24.693 1.00 27.91 C +ATOM 520 CD2 TYR A 65 17.249 15.608 25.797 1.00 24.79 C +ATOM 521 CE1 TYR A 65 14.862 14.486 24.929 1.00 26.74 C +ATOM 522 CE2 TYR A 65 16.042 16.264 26.040 1.00 26.83 C +ATOM 523 CZ TYR A 65 14.853 15.695 25.600 1.00 28.42 C +ATOM 524 OH TYR A 65 13.655 16.334 25.824 1.00 30.78 O +ATOM 525 N GLU A 66 17.679 12.593 22.080 1.00 19.35 N +ATOM 526 CA GLU A 66 16.576 12.221 21.212 1.00 18.13 C +ATOM 527 C GLU A 66 15.814 11.238 22.081 1.00 17.07 C +ATOM 528 O GLU A 66 16.422 10.477 22.836 1.00 13.89 O +ATOM 529 CB GLU A 66 17.066 11.523 19.944 1.00 22.10 C +ATOM 530 CG GLU A 66 17.781 12.427 18.959 1.00 31.96 C +ATOM 531 CD GLU A 66 18.028 11.743 17.627 1.00 38.40 C +ATOM 532 OE1 GLU A 66 18.679 10.676 17.616 1.00 42.11 O +ATOM 533 OE2 GLU A 66 17.569 12.272 16.592 1.00 43.89 O +ATOM 534 N LEU A 67 14.491 11.264 21.992 1.00 15.56 N +ATOM 535 CA LEU A 67 13.666 10.376 22.794 1.00 14.40 C +ATOM 536 C LEU A 67 12.745 9.543 21.909 1.00 14.70 C +ATOM 537 O LEU A 67 11.996 10.080 21.093 1.00 15.80 O +ATOM 538 CB LEU A 67 12.839 11.203 23.785 1.00 16.90 C +ATOM 539 CG LEU A 67 11.914 10.470 24.757 1.00 19.24 C +ATOM 540 CD1 LEU A 67 12.727 9.532 25.637 1.00 21.66 C +ATOM 541 CD2 LEU A 67 11.172 11.489 25.610 1.00 20.68 C +ATOM 542 N PHE A 68 12.818 8.227 22.076 1.00 11.13 N +ATOM 543 CA PHE A 68 11.996 7.298 21.314 1.00 12.93 C +ATOM 544 C PHE A 68 11.285 6.355 22.272 1.00 12.63 C +ATOM 545 O PHE A 68 11.911 5.759 23.149 1.00 12.01 O +ATOM 546 CB PHE A 68 12.866 6.479 20.355 1.00 12.05 C +ATOM 547 CG PHE A 68 13.523 7.296 19.285 1.00 14.59 C +ATOM 548 CD1 PHE A 68 12.792 7.756 18.195 1.00 14.07 C +ATOM 549 CD2 PHE A 68 14.870 7.625 19.375 1.00 15.16 C +ATOM 550 CE1 PHE A 68 13.394 8.532 17.208 1.00 15.37 C +ATOM 551 CE2 PHE A 68 15.482 8.401 18.393 1.00 17.63 C +ATOM 552 CZ PHE A 68 14.744 8.856 17.308 1.00 18.22 C +ATOM 553 N LEU A 69 9.974 6.232 22.112 1.00 12.84 N +ATOM 554 CA LEU A 69 9.198 5.334 22.955 1.00 13.24 C +ATOM 555 C LEU A 69 8.765 4.137 22.123 1.00 14.32 C +ATOM 556 O LEU A 69 8.332 4.289 20.978 1.00 13.65 O +ATOM 557 CB LEU A 69 7.968 6.046 23.526 1.00 14.24 C +ATOM 558 CG LEU A 69 8.206 6.979 24.718 1.00 18.28 C +ATOM 559 CD1 LEU A 69 9.175 8.085 24.331 1.00 18.52 C +ATOM 560 CD2 LEU A 69 6.879 7.565 25.175 1.00 19.14 C +ATOM 561 N TRP A 70 8.900 2.949 22.702 1.00 12.36 N +ATOM 562 CA TRP A 70 8.536 1.716 22.025 1.00 13.82 C +ATOM 563 C TRP A 70 7.665 0.826 22.889 1.00 14.23 C +ATOM 564 O TRP A 70 7.958 0.612 24.063 1.00 14.03 O +ATOM 565 CB TRP A 70 9.783 0.908 21.663 1.00 10.85 C +ATOM 566 CG TRP A 70 10.830 1.673 20.944 1.00 10.51 C +ATOM 567 CD1 TRP A 70 12.000 2.158 21.461 1.00 11.33 C +ATOM 568 CD2 TRP A 70 10.815 2.036 19.565 1.00 9.50 C +ATOM 569 NE1 TRP A 70 12.718 2.801 20.477 1.00 10.64 N +ATOM 570 CE2 TRP A 70 12.012 2.740 19.305 1.00 9.79 C +ATOM 571 CE3 TRP A 70 9.905 1.834 18.520 1.00 12.09 C +ATOM 572 CZ2 TRP A 70 12.322 3.243 18.038 1.00 11.85 C +ATOM 573 CZ3 TRP A 70 10.215 2.336 17.259 1.00 12.80 C +ATOM 574 CH2 TRP A 70 11.414 3.031 17.031 1.00 14.03 C +ATOM 575 N LYS A 71 6.585 0.311 22.315 1.00 16.48 N +ATOM 576 CA LYS A 71 5.751 -0.615 23.057 1.00 18.93 C +ATOM 577 C LYS A 71 6.589 -1.886 22.981 1.00 22.72 C +ATOM 578 O LYS A 71 7.369 -2.052 22.045 1.00 20.37 O +ATOM 579 CB LYS A 71 4.404 -0.808 22.362 1.00 21.31 C +ATOM 580 CG LYS A 71 3.515 0.422 22.417 1.00 26.79 C +ATOM 581 CD LYS A 71 2.153 0.147 21.800 1.00 34.56 C +ATOM 582 CE LYS A 71 1.226 1.341 21.964 1.00 38.54 C +ATOM 583 NZ LYS A 71 1.787 2.569 21.336 1.00 42.94 N +ATOM 584 N LYS A 72 6.453 -2.775 23.957 1.00 25.48 N +ATOM 585 CA LYS A 72 7.250 -3.995 23.956 1.00 28.37 C +ATOM 586 C LYS A 72 7.172 -4.780 22.646 1.00 25.79 C +ATOM 587 O LYS A 72 8.112 -5.485 22.282 1.00 26.09 O +ATOM 588 CB LYS A 72 6.847 -4.875 25.142 1.00 33.91 C +ATOM 589 CG LYS A 72 7.142 -4.215 26.484 1.00 41.92 C +ATOM 590 CD LYS A 72 6.760 -5.093 27.661 1.00 49.29 C +ATOM 591 CE LYS A 72 7.062 -4.389 28.976 1.00 53.30 C +ATOM 592 NZ LYS A 72 6.675 -5.210 30.154 1.00 56.26 N +ATOM 593 N ASN A 73 6.063 -4.638 21.929 1.00 22.90 N +ATOM 594 CA ASN A 73 5.882 -5.339 20.663 1.00 22.29 C +ATOM 595 C ASN A 73 6.590 -4.636 19.501 1.00 18.11 C +ATOM 596 O ASN A 73 6.606 -5.141 18.379 1.00 16.58 O +ATOM 597 CB ASN A 73 4.388 -5.471 20.351 1.00 26.17 C +ATOM 598 CG ASN A 73 3.713 -4.126 20.148 1.00 30.05 C +ATOM 599 OD1 ASN A 73 3.996 -3.417 19.182 1.00 34.23 O +ATOM 600 ND2 ASN A 73 2.815 -3.767 21.060 1.00 33.29 N +ATOM 601 N GLU A 74 7.181 -3.476 19.774 1.00 15.76 N +ATOM 602 CA GLU A 74 7.876 -2.716 18.737 1.00 12.60 C +ATOM 603 C GLU A 74 9.394 -2.799 18.865 1.00 10.48 C +ATOM 604 O GLU A 74 10.123 -2.059 18.200 1.00 9.42 O +ATOM 605 CB GLU A 74 7.441 -1.250 18.779 1.00 16.35 C +ATOM 606 CG GLU A 74 5.944 -1.042 18.607 1.00 17.92 C +ATOM 607 CD GLU A 74 5.549 0.420 18.673 1.00 20.34 C +ATOM 608 OE1 GLU A 74 5.999 1.117 19.606 1.00 16.46 O +ATOM 609 OE2 GLU A 74 4.782 0.874 17.800 1.00 19.67 O +ATOM 610 N VAL A 75 9.871 -3.700 19.715 1.00 9.06 N +ATOM 611 CA VAL A 75 11.307 -3.853 19.904 1.00 10.52 C +ATOM 612 C VAL A 75 12.000 -4.150 18.577 1.00 10.22 C +ATOM 613 O VAL A 75 13.149 -3.753 18.366 1.00 11.77 O +ATOM 614 CB VAL A 75 11.630 -4.984 20.903 1.00 11.34 C +ATOM 615 CG1 VAL A 75 13.144 -5.106 21.081 1.00 15.60 C +ATOM 616 CG2 VAL A 75 10.972 -4.693 22.241 1.00 17.33 C +ATOM 617 N ASP A 76 11.312 -4.838 17.672 1.00 9.91 N +ATOM 618 CA ASP A 76 11.929 -5.147 16.387 1.00 12.34 C +ATOM 619 C ASP A 76 12.226 -3.892 15.563 1.00 9.21 C +ATOM 620 O ASP A 76 13.214 -3.852 14.831 1.00 9.36 O +ATOM 621 CB ASP A 76 11.070 -6.145 15.589 1.00 14.04 C +ATOM 622 CG ASP A 76 9.660 -5.646 15.307 1.00 17.54 C +ATOM 623 OD1 ASP A 76 9.238 -4.607 15.857 1.00 13.26 O +ATOM 624 OD2 ASP A 76 8.960 -6.325 14.525 1.00 15.98 O +ATOM 625 N ILE A 77 11.388 -2.865 15.690 1.00 7.60 N +ATOM 626 CA ILE A 77 11.612 -1.620 14.956 1.00 8.71 C +ATOM 627 C ILE A 77 12.832 -0.927 15.568 1.00 9.62 C +ATOM 628 O ILE A 77 13.683 -0.391 14.857 1.00 8.90 O +ATOM 629 CB ILE A 77 10.393 -0.676 15.051 1.00 10.93 C +ATOM 630 CG1 ILE A 77 9.149 -1.364 14.476 1.00 10.71 C +ATOM 631 CG2 ILE A 77 10.673 0.611 14.282 1.00 10.85 C +ATOM 632 CD1 ILE A 77 7.862 -0.594 14.705 1.00 12.30 C +ATOM 633 N PHE A 78 12.907 -0.944 16.894 1.00 7.81 N +ATOM 634 CA PHE A 78 14.037 -0.347 17.592 1.00 8.65 C +ATOM 635 C PHE A 78 15.349 -0.969 17.115 1.00 11.53 C +ATOM 636 O PHE A 78 16.296 -0.264 16.767 1.00 11.69 O +ATOM 637 CB PHE A 78 13.900 -0.555 19.101 1.00 8.63 C +ATOM 638 CG PHE A 78 15.210 -0.506 19.831 1.00 10.37 C +ATOM 639 CD1 PHE A 78 15.906 0.690 19.962 1.00 13.42 C +ATOM 640 CD2 PHE A 78 15.776 -1.673 20.335 1.00 11.53 C +ATOM 641 CE1 PHE A 78 17.155 0.722 20.581 1.00 15.39 C +ATOM 642 CE2 PHE A 78 17.025 -1.651 20.955 1.00 11.81 C +ATOM 643 CZ PHE A 78 17.713 -0.451 21.077 1.00 14.87 C +ATOM 644 N LEU A 79 15.400 -2.296 17.108 1.00 10.16 N +ATOM 645 CA LEU A 79 16.603 -3.000 16.689 1.00 9.69 C +ATOM 646 C LEU A 79 16.961 -2.733 15.231 1.00 10.98 C +ATOM 647 O LEU A 79 18.139 -2.623 14.893 1.00 10.05 O +ATOM 648 CB LEU A 79 16.443 -4.500 16.940 1.00 11.59 C +ATOM 649 CG LEU A 79 16.470 -4.879 18.425 1.00 9.25 C +ATOM 650 CD1 LEU A 79 15.977 -6.304 18.620 1.00 13.42 C +ATOM 651 CD2 LEU A 79 17.888 -4.720 18.953 1.00 12.07 C +ATOM 652 N LYS A 80 15.954 -2.620 14.369 1.00 10.41 N +ATOM 653 CA LYS A 80 16.218 -2.349 12.958 1.00 10.15 C +ATOM 654 C LYS A 80 16.780 -0.939 12.799 1.00 10.76 C +ATOM 655 O LYS A 80 17.782 -0.733 12.114 1.00 10.37 O +ATOM 656 CB LYS A 80 14.940 -2.495 12.126 1.00 12.27 C +ATOM 657 CG LYS A 80 15.145 -2.238 10.633 1.00 16.44 C +ATOM 658 CD LYS A 80 16.171 -3.193 10.040 1.00 20.17 C +ATOM 659 CE LYS A 80 16.448 -2.877 8.575 1.00 24.66 C +ATOM 660 NZ LYS A 80 17.426 -3.837 7.977 1.00 25.43 N +ATOM 661 N ASN A 81 16.134 0.032 13.438 1.00 8.82 N +ATOM 662 CA ASN A 81 16.580 1.417 13.367 1.00 9.52 C +ATOM 663 C ASN A 81 17.985 1.579 13.938 1.00 9.21 C +ATOM 664 O ASN A 81 18.736 2.458 13.516 1.00 8.16 O +ATOM 665 CB ASN A 81 15.615 2.325 14.133 1.00 6.26 C +ATOM 666 CG ASN A 81 14.281 2.498 13.423 1.00 9.62 C +ATOM 667 OD1 ASN A 81 14.035 1.894 12.378 1.00 9.19 O +ATOM 668 ND2 ASN A 81 13.414 3.328 13.993 1.00 6.70 N +ATOM 669 N LEU A 82 18.331 0.736 14.904 1.00 8.29 N +ATOM 670 CA LEU A 82 19.650 0.796 15.531 1.00 9.25 C +ATOM 671 C LEU A 82 20.761 0.619 14.493 1.00 10.59 C +ATOM 672 O LEU A 82 21.870 1.130 14.661 1.00 9.88 O +ATOM 673 CB LEU A 82 19.762 -0.282 16.614 1.00 9.95 C +ATOM 674 CG LEU A 82 21.043 -0.292 17.456 1.00 14.09 C +ATOM 675 CD1 LEU A 82 21.245 1.066 18.111 1.00 13.71 C +ATOM 676 CD2 LEU A 82 20.950 -1.389 18.509 1.00 10.74 C +ATOM 677 N GLU A 83 20.458 -0.095 13.413 1.00 9.50 N +ATOM 678 CA GLU A 83 21.448 -0.317 12.362 1.00 11.88 C +ATOM 679 C GLU A 83 21.947 0.992 11.755 1.00 12.86 C +ATOM 680 O GLU A 83 23.071 1.058 11.257 1.00 13.04 O +ATOM 681 CB GLU A 83 20.865 -1.191 11.249 1.00 12.61 C +ATOM 682 CG GLU A 83 20.452 -2.577 11.705 1.00 14.42 C +ATOM 683 CD GLU A 83 19.991 -3.454 10.558 1.00 18.48 C +ATOM 684 OE1 GLU A 83 19.859 -2.939 9.430 1.00 18.66 O +ATOM 685 OE2 GLU A 83 19.754 -4.658 10.787 1.00 22.59 O +ATOM 686 N LYS A 84 21.115 2.030 11.799 1.00 11.63 N +ATOM 687 CA LYS A 84 21.477 3.330 11.232 1.00 14.14 C +ATOM 688 C LYS A 84 21.896 4.374 12.268 1.00 16.24 C +ATOM 689 O LYS A 84 22.274 5.489 11.911 1.00 17.54 O +ATOM 690 CB LYS A 84 20.302 3.900 10.431 1.00 13.67 C +ATOM 691 CG LYS A 84 19.888 3.087 9.219 1.00 18.74 C +ATOM 692 CD LYS A 84 18.672 3.720 8.549 1.00 19.43 C +ATOM 693 CE LYS A 84 18.253 2.953 7.308 1.00 25.40 C +ATOM 694 NZ LYS A 84 19.315 2.959 6.266 1.00 30.14 N +ATOM 695 N SER A 85 21.823 4.016 13.544 1.00 12.83 N +ATOM 696 CA SER A 85 22.165 4.943 14.616 1.00 16.34 C +ATOM 697 C SER A 85 23.641 4.895 14.983 1.00 17.60 C +ATOM 698 O SER A 85 24.186 3.830 15.267 1.00 14.60 O +ATOM 699 CB SER A 85 21.316 4.638 15.855 1.00 18.24 C +ATOM 700 OG SER A 85 21.550 5.583 16.885 1.00 25.32 O +ATOM 701 N GLU A 86 24.281 6.058 14.976 1.00 16.65 N +ATOM 702 CA GLU A 86 25.691 6.148 15.318 1.00 20.54 C +ATOM 703 C GLU A 86 25.847 6.356 16.818 1.00 19.99 C +ATOM 704 O GLU A 86 25.795 7.484 17.308 1.00 23.78 O +ATOM 705 CB GLU A 86 26.349 7.301 14.555 1.00 25.96 C +ATOM 706 CG GLU A 86 27.754 7.656 15.035 1.00 39.12 C +ATOM 707 CD GLU A 86 28.656 6.445 15.179 1.00 45.75 C +ATOM 708 OE1 GLU A 86 28.753 5.654 14.216 1.00 50.19 O +ATOM 709 OE2 GLU A 86 29.275 6.290 16.256 1.00 50.06 O +ATOM 710 N VAL A 87 26.020 5.254 17.540 1.00 20.16 N +ATOM 711 CA VAL A 87 26.198 5.286 18.989 1.00 18.68 C +ATOM 712 C VAL A 87 27.523 4.612 19.338 1.00 18.77 C +ATOM 713 O VAL A 87 27.977 3.724 18.616 1.00 17.50 O +ATOM 714 CB VAL A 87 25.051 4.552 19.704 1.00 18.06 C +ATOM 715 CG1 VAL A 87 23.748 5.309 19.504 1.00 20.67 C +ATOM 716 CG2 VAL A 87 24.926 3.138 19.163 1.00 18.82 C +ATOM 717 N ASP A 88 28.144 5.031 20.439 1.00 16.11 N +ATOM 718 CA ASP A 88 29.428 4.461 20.846 1.00 16.31 C +ATOM 719 C ASP A 88 29.521 4.009 22.300 1.00 20.76 C +ATOM 720 O ASP A 88 30.507 3.385 22.698 1.00 25.38 O +ATOM 721 CB ASP A 88 30.556 5.454 20.570 1.00 19.28 C +ATOM 722 CG ASP A 88 30.224 6.857 21.036 1.00 18.16 C +ATOM 723 OD1 ASP A 88 29.422 7.004 21.984 1.00 19.26 O +ATOM 724 OD2 ASP A 88 30.779 7.813 20.458 1.00 20.16 O +ATOM 725 N GLY A 89 28.514 4.344 23.094 1.00 14.98 N +ATOM 726 CA GLY A 89 28.505 3.944 24.492 1.00 11.49 C +ATOM 727 C GLY A 89 27.131 3.392 24.807 1.00 11.28 C +ATOM 728 O GLY A 89 26.179 3.676 24.081 1.00 11.07 O +ATOM 729 N LEU A 90 27.014 2.623 25.887 1.00 7.86 N +ATOM 730 CA LEU A 90 25.732 2.028 26.248 1.00 8.27 C +ATOM 731 C LEU A 90 25.364 2.179 27.720 1.00 6.84 C +ATOM 732 O LEU A 90 26.191 1.947 28.599 1.00 8.06 O +ATOM 733 CB LEU A 90 25.743 0.539 25.897 1.00 9.22 C +ATOM 734 CG LEU A 90 24.518 -0.287 26.296 1.00 7.17 C +ATOM 735 CD1 LEU A 90 23.307 0.167 25.493 1.00 7.45 C +ATOM 736 CD2 LEU A 90 24.793 -1.763 26.048 1.00 10.76 C +ATOM 737 N LEU A 91 24.115 2.563 27.969 1.00 6.67 N +ATOM 738 CA LEU A 91 23.578 2.701 29.323 1.00 5.88 C +ATOM 739 C LEU A 91 22.297 1.877 29.362 1.00 7.65 C +ATOM 740 O LEU A 91 21.460 1.981 28.461 1.00 7.92 O +ATOM 741 CB LEU A 91 23.271 4.165 29.649 1.00 6.06 C +ATOM 742 CG LEU A 91 24.490 5.069 29.862 1.00 7.72 C +ATOM 743 CD1 LEU A 91 24.037 6.516 30.030 1.00 9.04 C +ATOM 744 CD2 LEU A 91 25.261 4.604 31.098 1.00 11.40 C +ATOM 745 N VAL A 92 22.147 1.056 30.397 1.00 7.74 N +ATOM 746 CA VAL A 92 20.973 0.196 30.526 1.00 9.41 C +ATOM 747 C VAL A 92 20.280 0.360 31.876 1.00 8.85 C +ATOM 748 O VAL A 92 20.929 0.302 32.920 1.00 9.80 O +ATOM 749 CB VAL A 92 21.368 -1.292 30.351 1.00 9.33 C +ATOM 750 CG1 VAL A 92 20.167 -2.188 30.602 1.00 11.21 C +ATOM 751 CG2 VAL A 92 21.923 -1.520 28.949 1.00 9.68 C +ATOM 752 N TYR A 93 18.962 0.546 31.846 1.00 8.30 N +ATOM 753 CA TYR A 93 18.179 0.713 33.072 1.00 7.46 C +ATOM 754 C TYR A 93 16.867 -0.059 33.053 1.00 9.64 C +ATOM 755 O TYR A 93 16.169 -0.096 32.039 1.00 9.58 O +ATOM 756 CB TYR A 93 17.833 2.185 33.292 1.00 7.49 C +ATOM 757 CG TYR A 93 19.013 3.115 33.250 1.00 7.32 C +ATOM 758 CD1 TYR A 93 19.727 3.425 34.408 1.00 10.22 C +ATOM 759 CD2 TYR A 93 19.428 3.677 32.045 1.00 8.50 C +ATOM 760 CE1 TYR A 93 20.827 4.275 34.363 1.00 7.50 C +ATOM 761 CE2 TYR A 93 20.519 4.520 31.989 1.00 10.65 C +ATOM 762 CZ TYR A 93 21.217 4.818 33.149 1.00 9.17 C +ATOM 763 OH TYR A 93 22.297 5.665 33.083 1.00 9.81 O +ATOM 764 N CYS A 94 16.525 -0.652 34.191 1.00 9.58 N +ATOM 765 CA CYS A 94 15.270 -1.383 34.321 1.00 11.58 C +ATOM 766 C CYS A 94 14.964 -1.598 35.795 1.00 11.27 C +ATOM 767 O CYS A 94 15.816 -1.357 36.656 1.00 12.83 O +ATOM 768 CB CYS A 94 15.356 -2.754 33.632 1.00 11.71 C +ATOM 769 SG CYS A 94 16.168 -4.070 34.608 1.00 12.06 S +ATOM 770 N ASP A 95 13.733 -2.008 36.085 1.00 13.29 N +ATOM 771 CA ASP A 95 13.353 -2.344 37.450 1.00 15.28 C +ATOM 772 C ASP A 95 13.033 -3.840 37.408 1.00 15.81 C +ATOM 773 O ASP A 95 13.032 -4.440 36.335 1.00 14.16 O +ATOM 774 CB ASP A 95 12.152 -1.522 37.960 1.00 14.63 C +ATOM 775 CG ASP A 95 11.055 -1.342 36.927 1.00 16.19 C +ATOM 776 OD1 ASP A 95 10.946 -2.160 35.993 1.00 16.16 O +ATOM 777 OD2 ASP A 95 10.279 -0.370 37.074 1.00 16.66 O +ATOM 778 N ASP A 96 12.781 -4.451 38.561 1.00 18.96 N +ATOM 779 CA ASP A 96 12.504 -5.884 38.602 1.00 20.56 C +ATOM 780 C ASP A 96 11.413 -6.363 37.654 1.00 20.25 C +ATOM 781 O ASP A 96 11.549 -7.411 37.027 1.00 19.79 O +ATOM 782 CB ASP A 96 12.154 -6.317 40.026 1.00 25.74 C +ATOM 783 CG ASP A 96 13.353 -6.310 40.945 1.00 27.79 C +ATOM 784 OD1 ASP A 96 14.408 -6.847 40.547 1.00 33.13 O +ATOM 785 OD2 ASP A 96 13.237 -5.779 42.067 1.00 35.26 O +ATOM 786 N GLU A 97 10.333 -5.599 37.556 1.00 20.92 N +ATOM 787 CA GLU A 97 9.216 -5.962 36.693 1.00 22.17 C +ATOM 788 C GLU A 97 9.593 -6.037 35.216 1.00 22.07 C +ATOM 789 O GLU A 97 8.908 -6.691 34.431 1.00 21.21 O +ATOM 790 CB GLU A 97 8.068 -4.964 36.869 1.00 25.44 C +ATOM 791 CG GLU A 97 7.371 -5.031 38.219 1.00 37.78 C +ATOM 792 CD GLU A 97 8.317 -4.805 39.384 1.00 43.22 C +ATOM 793 OE1 GLU A 97 9.043 -3.786 39.372 1.00 45.28 O +ATOM 794 OE2 GLU A 97 8.330 -5.642 40.314 1.00 42.89 O +ATOM 795 N ASN A 98 10.685 -5.380 34.840 1.00 17.80 N +ATOM 796 CA ASN A 98 11.110 -5.371 33.443 1.00 15.94 C +ATOM 797 C ASN A 98 12.511 -5.926 33.204 1.00 16.06 C +ATOM 798 O ASN A 98 13.054 -5.792 32.104 1.00 13.18 O +ATOM 799 CB ASN A 98 11.031 -3.942 32.901 1.00 16.46 C +ATOM 800 CG ASN A 98 9.621 -3.391 32.918 1.00 19.73 C +ATOM 801 OD1 ASN A 98 8.775 -3.797 32.120 1.00 23.19 O +ATOM 802 ND2 ASN A 98 9.354 -2.468 33.837 1.00 17.02 N +ATOM 803 N LYS A 99 13.088 -6.561 34.218 1.00 13.73 N +ATOM 804 CA LYS A 99 14.437 -7.107 34.102 1.00 14.87 C +ATOM 805 C LYS A 99 14.580 -8.218 33.063 1.00 14.96 C +ATOM 806 O LYS A 99 15.552 -8.238 32.307 1.00 14.14 O +ATOM 807 CB LYS A 99 14.920 -7.605 35.468 1.00 16.79 C +ATOM 808 CG LYS A 99 16.342 -8.144 35.460 1.00 18.70 C +ATOM 809 CD LYS A 99 16.878 -8.325 36.875 1.00 25.73 C +ATOM 810 CE LYS A 99 16.023 -9.280 37.685 1.00 30.36 C +ATOM 811 NZ LYS A 99 16.496 -9.377 39.094 1.00 34.03 N +ATOM 812 N VAL A 100 13.628 -9.147 33.025 1.00 15.47 N +ATOM 813 CA VAL A 100 13.688 -10.233 32.049 1.00 14.52 C +ATOM 814 C VAL A 100 13.612 -9.647 30.641 1.00 13.62 C +ATOM 815 O VAL A 100 14.373 -10.028 29.752 1.00 13.40 O +ATOM 816 CB VAL A 100 12.520 -11.229 32.240 1.00 17.58 C +ATOM 817 CG1 VAL A 100 12.531 -12.268 31.124 1.00 14.99 C +ATOM 818 CG2 VAL A 100 12.641 -11.914 33.593 1.00 18.88 C +ATOM 819 N PHE A 101 12.694 -8.707 30.454 1.00 14.49 N +ATOM 820 CA PHE A 101 12.518 -8.053 29.166 1.00 16.41 C +ATOM 821 C PHE A 101 13.790 -7.329 28.728 1.00 16.42 C +ATOM 822 O PHE A 101 14.326 -7.593 27.650 1.00 13.50 O +ATOM 823 CB PHE A 101 11.368 -7.052 29.243 1.00 16.85 C +ATOM 824 CG PHE A 101 11.188 -6.238 27.995 1.00 21.20 C +ATOM 825 CD1 PHE A 101 10.807 -6.843 26.801 1.00 22.47 C +ATOM 826 CD2 PHE A 101 11.394 -4.864 28.013 1.00 24.04 C +ATOM 827 CE1 PHE A 101 10.633 -6.091 25.642 1.00 24.25 C +ATOM 828 CE2 PHE A 101 11.224 -4.101 26.861 1.00 28.34 C +ATOM 829 CZ PHE A 101 10.842 -4.716 25.672 1.00 27.51 C +HETATM 830 N MSE A 102 14.271 -6.413 29.562 1.00 15.07 N +HETATM 831 CA MSE A 102 15.471 -5.656 29.229 1.00 15.73 C +HETATM 832 C MSE A 102 16.697 -6.542 29.030 1.00 15.29 C +HETATM 833 O MSE A 102 17.510 -6.291 28.138 1.00 14.86 O +HETATM 834 CB MSE A 102 15.761 -4.609 30.308 1.00 16.50 C +HETATM 835 CG MSE A 102 16.999 -3.766 30.031 1.00 12.98 C +HETATM 836 SE MSE A 102 16.938 -2.880 28.300 1.00 27.13 SE +HETATM 837 CE MSE A 102 15.668 -1.533 28.732 1.00 8.64 C +ATOM 838 N SER A 103 16.835 -7.578 29.852 1.00 17.48 N +ATOM 839 CA SER A 103 17.978 -8.478 29.733 1.00 17.27 C +ATOM 840 C SER A 103 18.018 -9.139 28.360 1.00 17.75 C +ATOM 841 O SER A 103 19.089 -9.324 27.783 1.00 18.72 O +ATOM 842 CB SER A 103 17.930 -9.555 30.822 1.00 17.33 C +ATOM 843 OG SER A 103 18.125 -8.986 32.103 1.00 22.22 O +ATOM 844 N LYS A 104 16.848 -9.489 27.836 1.00 18.40 N +ATOM 845 CA LYS A 104 16.772 -10.126 26.526 1.00 17.38 C +ATOM 846 C LYS A 104 17.196 -9.150 25.431 1.00 17.58 C +ATOM 847 O LYS A 104 17.929 -9.518 24.512 1.00 19.01 O +ATOM 848 CB LYS A 104 15.349 -10.623 26.261 1.00 18.69 C +ATOM 849 CG LYS A 104 15.172 -11.313 24.916 1.00 22.54 C +ATOM 850 CD LYS A 104 13.791 -11.952 24.792 1.00 23.95 C +ATOM 851 CE LYS A 104 12.674 -10.929 24.964 1.00 27.27 C +ATOM 852 NZ LYS A 104 11.319 -11.539 24.823 1.00 29.03 N +ATOM 853 N ILE A 105 16.734 -7.906 25.533 1.00 16.37 N +ATOM 854 CA ILE A 105 17.081 -6.884 24.551 1.00 16.07 C +ATOM 855 C ILE A 105 18.596 -6.688 24.526 1.00 14.20 C +ATOM 856 O ILE A 105 19.206 -6.640 23.458 1.00 15.34 O +ATOM 857 CB ILE A 105 16.404 -5.529 24.878 1.00 15.75 C +ATOM 858 CG1 ILE A 105 14.885 -5.662 24.757 1.00 18.48 C +ATOM 859 CG2 ILE A 105 16.901 -4.445 23.925 1.00 16.56 C +ATOM 860 CD1 ILE A 105 14.134 -4.377 25.044 1.00 22.69 C +ATOM 861 N VAL A 106 19.198 -6.572 25.706 1.00 11.65 N +ATOM 862 CA VAL A 106 20.641 -6.388 25.798 1.00 13.23 C +ATOM 863 C VAL A 106 21.380 -7.546 25.132 1.00 17.19 C +ATOM 864 O VAL A 106 22.370 -7.336 24.431 1.00 15.73 O +ATOM 865 CB VAL A 106 21.103 -6.275 27.268 1.00 14.34 C +ATOM 866 CG1 VAL A 106 22.621 -6.188 27.332 1.00 14.51 C +ATOM 867 CG2 VAL A 106 20.482 -5.042 27.909 1.00 12.30 C +ATOM 868 N ASP A 107 20.894 -8.767 25.344 1.00 15.92 N +ATOM 869 CA ASP A 107 21.528 -9.939 24.748 1.00 19.28 C +ATOM 870 C ASP A 107 21.531 -9.877 23.224 1.00 18.25 C +ATOM 871 O ASP A 107 22.408 -10.453 22.581 1.00 19.40 O +ATOM 872 CB ASP A 107 20.820 -11.232 25.174 1.00 21.10 C +ATOM 873 CG ASP A 107 20.957 -11.522 26.654 1.00 24.44 C +ATOM 874 OD1 ASP A 107 22.031 -11.238 27.225 1.00 24.29 O +ATOM 875 OD2 ASP A 107 19.993 -12.057 27.244 1.00 26.54 O +ATOM 876 N ASN A 108 20.550 -9.187 22.650 1.00 16.67 N +ATOM 877 CA ASN A 108 20.448 -9.090 21.196 1.00 16.97 C +ATOM 878 C ASN A 108 21.076 -7.848 20.570 1.00 15.91 C +ATOM 879 O ASN A 108 20.954 -7.636 19.366 1.00 13.39 O +ATOM 880 CB ASN A 108 18.984 -9.189 20.759 1.00 19.30 C +ATOM 881 CG ASN A 108 18.415 -10.582 20.944 1.00 23.83 C +ATOM 882 OD1 ASN A 108 18.184 -11.032 22.068 1.00 26.90 O +ATOM 883 ND2 ASN A 108 18.194 -11.278 19.835 1.00 22.68 N +ATOM 884 N LEU A 109 21.741 -7.026 21.374 1.00 14.98 N +ATOM 885 CA LEU A 109 22.385 -5.828 20.840 1.00 14.18 C +ATOM 886 C LEU A 109 23.672 -6.221 20.135 1.00 16.48 C +ATOM 887 O LEU A 109 24.253 -7.265 20.431 1.00 14.47 O +ATOM 888 CB LEU A 109 22.727 -4.847 21.963 1.00 11.94 C +ATOM 889 CG LEU A 109 21.578 -4.194 22.728 1.00 9.62 C +ATOM 890 CD1 LEU A 109 22.146 -3.384 23.887 1.00 8.05 C +ATOM 891 CD2 LEU A 109 20.769 -3.304 21.795 1.00 8.93 C +ATOM 892 N PRO A 110 24.137 -5.388 19.190 1.00 16.03 N +ATOM 893 CA PRO A 110 25.377 -5.682 18.467 1.00 18.68 C +ATOM 894 C PRO A 110 26.539 -5.848 19.445 1.00 18.36 C +ATOM 895 O PRO A 110 26.588 -5.189 20.486 1.00 16.80 O +ATOM 896 CB PRO A 110 25.551 -4.459 17.572 1.00 17.68 C +ATOM 897 CG PRO A 110 24.137 -4.055 17.288 1.00 20.90 C +ATOM 898 CD PRO A 110 23.488 -4.181 18.649 1.00 17.75 C +ATOM 899 N THR A 111 27.473 -6.725 19.102 1.00 17.87 N +ATOM 900 CA THR A 111 28.631 -6.987 19.946 1.00 19.17 C +ATOM 901 C THR A 111 29.378 -5.717 20.354 1.00 18.59 C +ATOM 902 O THR A 111 29.683 -5.518 21.529 1.00 15.29 O +ATOM 903 CB THR A 111 29.625 -7.918 19.230 1.00 18.45 C +ATOM 904 OG1 THR A 111 28.975 -9.154 18.913 1.00 26.33 O +ATOM 905 CG2 THR A 111 30.828 -8.193 20.116 1.00 22.32 C +ATOM 906 N ALA A 112 29.671 -4.864 19.377 1.00 16.37 N +ATOM 907 CA ALA A 112 30.402 -3.627 19.631 1.00 18.47 C +ATOM 908 C ALA A 112 29.717 -2.716 20.643 1.00 17.46 C +ATOM 909 O ALA A 112 30.381 -2.010 21.401 1.00 19.35 O +ATOM 910 CB ALA A 112 30.624 -2.875 18.321 1.00 18.49 C +ATOM 911 N ILE A 113 28.390 -2.731 20.658 1.00 12.89 N +ATOM 912 CA ILE A 113 27.644 -1.887 21.583 1.00 16.15 C +ATOM 913 C ILE A 113 27.695 -2.444 23.001 1.00 15.76 C +ATOM 914 O ILE A 113 27.925 -1.706 23.959 1.00 17.40 O +ATOM 915 CB ILE A 113 26.179 -1.734 21.130 1.00 15.62 C +ATOM 916 CG1 ILE A 113 26.143 -1.026 19.771 1.00 16.32 C +ATOM 917 CG2 ILE A 113 25.391 -0.936 22.162 1.00 15.49 C +ATOM 918 CD1 ILE A 113 24.753 -0.743 19.245 1.00 16.88 C +ATOM 919 N LYS A 114 27.491 -3.749 23.134 1.00 16.40 N +ATOM 920 CA LYS A 114 27.527 -4.383 24.446 1.00 17.88 C +ATOM 921 C LYS A 114 28.898 -4.255 25.099 1.00 18.65 C +ATOM 922 O LYS A 114 29.004 -4.206 26.323 1.00 19.90 O +ATOM 923 CB LYS A 114 27.149 -5.863 24.332 1.00 20.13 C +ATOM 924 CG LYS A 114 25.693 -6.097 23.967 1.00 23.14 C +ATOM 925 CD LYS A 114 25.324 -7.573 24.001 1.00 28.04 C +ATOM 926 CE LYS A 114 25.952 -8.340 22.854 1.00 31.86 C +ATOM 927 NZ LYS A 114 25.460 -9.747 22.805 1.00 36.71 N +ATOM 928 N ARG A 115 29.946 -4.185 24.285 1.00 19.13 N +ATOM 929 CA ARG A 115 31.299 -4.081 24.819 1.00 21.63 C +ATOM 930 C ARG A 115 31.620 -2.708 25.393 1.00 21.69 C +ATOM 931 O ARG A 115 32.625 -2.540 26.082 1.00 19.57 O +ATOM 932 CB ARG A 115 32.322 -4.458 23.745 1.00 29.14 C +ATOM 933 CG ARG A 115 32.066 -5.832 23.150 1.00 40.81 C +ATOM 934 CD ARG A 115 33.338 -6.510 22.674 1.00 51.82 C +ATOM 935 NE ARG A 115 33.045 -7.808 22.070 1.00 60.88 N +ATOM 936 CZ ARG A 115 33.959 -8.734 21.800 1.00 65.54 C +ATOM 937 NH1 ARG A 115 35.235 -8.513 22.083 1.00 68.77 N +ATOM 938 NH2 ARG A 115 33.595 -9.882 21.242 1.00 67.23 N +ATOM 939 N ASN A 116 30.768 -1.727 25.116 1.00 16.60 N +ATOM 940 CA ASN A 116 30.983 -0.385 25.639 1.00 13.28 C +ATOM 941 C ASN A 116 29.921 -0.012 26.664 1.00 13.98 C +ATOM 942 O ASN A 116 29.466 1.131 26.721 1.00 11.77 O +ATOM 943 CB ASN A 116 31.011 0.644 24.508 1.00 17.96 C +ATOM 944 CG ASN A 116 32.280 0.560 23.682 1.00 23.95 C +ATOM 945 OD1 ASN A 116 32.393 -0.258 22.771 1.00 27.65 O +ATOM 946 ND2 ASN A 116 33.253 1.399 24.014 1.00 24.96 N +ATOM 947 N LEU A 117 29.532 -0.993 27.472 1.00 14.76 N +ATOM 948 CA LEU A 117 28.539 -0.790 28.520 1.00 16.01 C +ATOM 949 C LEU A 117 29.169 0.082 29.606 1.00 16.50 C +ATOM 950 O LEU A 117 30.149 -0.311 30.238 1.00 19.94 O +ATOM 951 CB LEU A 117 28.119 -2.142 29.100 1.00 13.89 C +ATOM 952 CG LEU A 117 27.156 -2.147 30.288 1.00 15.07 C +ATOM 953 CD1 LEU A 117 25.825 -1.532 29.881 1.00 13.69 C +ATOM 954 CD2 LEU A 117 26.961 -3.579 30.766 1.00 16.68 C +ATOM 955 N ILE A 118 28.601 1.266 29.813 1.00 11.74 N +ATOM 956 CA ILE A 118 29.109 2.215 30.801 1.00 13.36 C +ATOM 957 C ILE A 118 28.505 1.995 32.180 1.00 12.53 C +ATOM 958 O ILE A 118 29.206 2.033 33.191 1.00 12.37 O +ATOM 959 CB ILE A 118 28.810 3.664 30.367 1.00 14.81 C +ATOM 960 CG1 ILE A 118 29.505 3.958 29.037 1.00 17.20 C +ATOM 961 CG2 ILE A 118 29.273 4.644 31.445 1.00 16.33 C +ATOM 962 CD1 ILE A 118 29.137 5.297 28.442 1.00 15.95 C +ATOM 963 N LYS A 119 27.196 1.782 32.216 1.00 9.94 N +ATOM 964 CA LYS A 119 26.491 1.553 33.470 1.00 8.83 C +ATOM 965 C LYS A 119 25.264 0.711 33.185 1.00 12.00 C +ATOM 966 O LYS A 119 24.547 0.953 32.216 1.00 9.96 O +ATOM 967 CB LYS A 119 26.062 2.883 34.105 1.00 10.54 C +ATOM 968 CG LYS A 119 25.176 2.742 35.358 1.00 12.68 C +ATOM 969 CD LYS A 119 25.904 2.066 36.516 1.00 12.97 C +ATOM 970 CE LYS A 119 25.005 1.904 37.747 1.00 9.78 C +ATOM 971 NZ LYS A 119 24.704 3.205 38.415 1.00 9.17 N +ATOM 972 N ASP A 120 25.043 -0.291 34.025 1.00 11.88 N +ATOM 973 CA ASP A 120 23.892 -1.164 33.887 1.00 12.15 C +ATOM 974 C ASP A 120 23.189 -1.204 35.238 1.00 13.35 C +ATOM 975 O ASP A 120 23.647 -1.875 36.158 1.00 13.72 O +ATOM 976 CB ASP A 120 24.332 -2.579 33.492 1.00 13.47 C +ATOM 977 CG ASP A 120 23.156 -3.506 33.216 1.00 18.79 C +ATOM 978 OD1 ASP A 120 22.239 -3.586 34.061 1.00 19.37 O +ATOM 979 OD2 ASP A 120 23.149 -4.162 32.155 1.00 26.90 O +ATOM 980 N PHE A 121 22.112 -0.440 35.374 1.00 9.12 N +ATOM 981 CA PHE A 121 21.345 -0.460 36.612 1.00 8.53 C +ATOM 982 C PHE A 121 20.158 -1.323 36.233 1.00 10.99 C +ATOM 983 O PHE A 121 19.062 -0.820 35.976 1.00 12.01 O +ATOM 984 CB PHE A 121 20.864 0.938 37.000 1.00 8.25 C +ATOM 985 CG PHE A 121 20.292 1.008 38.388 1.00 9.71 C +ATOM 986 CD1 PHE A 121 21.126 1.167 39.490 1.00 9.45 C +ATOM 987 CD2 PHE A 121 18.926 0.875 38.597 1.00 13.08 C +ATOM 988 CE1 PHE A 121 20.606 1.190 40.781 1.00 10.63 C +ATOM 989 CE2 PHE A 121 18.394 0.895 39.887 1.00 11.99 C +ATOM 990 CZ PHE A 121 19.238 1.054 40.981 1.00 12.89 C +ATOM 991 N CYS A 122 20.385 -2.631 36.182 1.00 12.06 N +ATOM 992 CA CYS A 122 19.331 -3.542 35.781 1.00 13.91 C +ATOM 993 C CYS A 122 19.612 -5.009 36.086 1.00 15.31 C +ATOM 994 O CYS A 122 19.032 -5.583 37.008 1.00 15.62 O +ATOM 995 CB CYS A 122 19.078 -3.364 34.280 1.00 13.84 C +ATOM 996 SG CYS A 122 17.857 -4.508 33.568 1.00 13.88 S +ATOM 997 N ARG A 123 20.511 -5.611 35.318 1.00 16.09 N +ATOM 998 CA ARG A 123 20.827 -7.025 35.485 1.00 18.40 C +ATOM 999 C ARG A 123 21.351 -7.484 36.846 1.00 17.94 C +ATOM 1000 O ARG A 123 21.097 -8.621 37.244 1.00 17.65 O +ATOM 1001 CB ARG A 123 21.772 -7.468 34.366 1.00 19.65 C +ATOM 1002 CG ARG A 123 21.072 -7.500 33.007 1.00 28.18 C +ATOM 1003 CD ARG A 123 22.004 -7.899 31.879 1.00 29.84 C +ATOM 1004 NE ARG A 123 23.045 -6.901 31.658 1.00 32.33 N +ATOM 1005 CZ ARG A 123 24.006 -7.013 30.748 1.00 29.90 C +ATOM 1006 NH1 ARG A 123 24.060 -8.083 29.967 1.00 27.70 N +ATOM 1007 NH2 ARG A 123 24.913 -6.056 30.619 1.00 29.83 N +ATOM 1008 N LYS A 124 22.063 -6.621 37.567 1.00 13.84 N +ATOM 1009 CA LYS A 124 22.574 -7.010 38.881 1.00 12.49 C +ATOM 1010 C LYS A 124 21.558 -6.778 40.004 1.00 10.77 C +ATOM 1011 O LYS A 124 21.839 -7.074 41.164 1.00 12.23 O +ATOM 1012 CB LYS A 124 23.877 -6.264 39.210 1.00 15.02 C +ATOM 1013 CG LYS A 124 25.124 -6.785 38.485 1.00 17.86 C +ATOM 1014 CD LYS A 124 26.376 -6.035 38.942 1.00 21.21 C +ATOM 1015 CE LYS A 124 27.647 -6.566 38.282 1.00 24.54 C +ATOM 1016 NZ LYS A 124 28.878 -5.873 38.791 1.00 21.46 N +ATOM 1017 N LEU A 125 20.385 -6.246 39.670 1.00 9.74 N +ATOM 1018 CA LEU A 125 19.359 -6.009 40.684 1.00 11.32 C +ATOM 1019 C LEU A 125 18.803 -7.337 41.180 1.00 14.53 C +ATOM 1020 O LEU A 125 18.539 -8.239 40.388 1.00 14.63 O +ATOM 1021 CB LEU A 125 18.213 -5.171 40.115 1.00 11.72 C +ATOM 1022 CG LEU A 125 18.482 -3.693 39.848 1.00 13.13 C +ATOM 1023 CD1 LEU A 125 17.307 -3.105 39.084 1.00 13.14 C +ATOM 1024 CD2 LEU A 125 18.690 -2.955 41.168 1.00 13.06 C +ATOM 1025 N SER A 126 18.630 -7.454 42.493 1.00 13.97 N +ATOM 1026 CA SER A 126 18.095 -8.673 43.087 1.00 18.03 C +ATOM 1027 C SER A 126 16.570 -8.624 43.178 1.00 20.57 C +ATOM 1028 O SER A 126 15.954 -9.708 43.269 1.00 24.85 O +ATOM 1029 CB SER A 126 18.697 -8.899 44.479 1.00 18.07 C +ATOM 1030 OG SER A 126 18.407 -7.823 45.351 1.00 15.78 O +TER 1031 SER A 126 +ATOM 1032 N TYR B 3 24.874 -14.238 65.592 1.00 21.36 N +ATOM 1033 CA TYR B 3 24.778 -13.845 64.154 1.00 14.51 C +ATOM 1034 C TYR B 3 24.644 -15.062 63.250 1.00 13.36 C +ATOM 1035 O TYR B 3 25.275 -16.090 63.494 1.00 14.59 O +ATOM 1036 CB TYR B 3 26.025 -13.057 63.736 1.00 14.32 C +ATOM 1037 CG TYR B 3 26.252 -11.805 64.544 1.00 12.05 C +ATOM 1038 CD1 TYR B 3 27.202 -11.769 65.564 1.00 12.30 C +ATOM 1039 CD2 TYR B 3 25.492 -10.662 64.309 1.00 10.81 C +ATOM 1040 CE1 TYR B 3 27.388 -10.616 66.331 1.00 14.06 C +ATOM 1041 CE2 TYR B 3 25.667 -9.512 65.069 1.00 13.41 C +ATOM 1042 CZ TYR B 3 26.614 -9.496 66.076 1.00 15.79 C +ATOM 1043 OH TYR B 3 26.781 -8.356 66.825 1.00 14.76 O +ATOM 1044 N LYS B 4 23.823 -14.950 62.210 1.00 11.22 N +ATOM 1045 CA LYS B 4 23.663 -16.057 61.274 1.00 13.45 C +ATOM 1046 C LYS B 4 23.831 -15.659 59.804 1.00 10.36 C +ATOM 1047 O LYS B 4 23.870 -16.522 58.933 1.00 10.86 O +ATOM 1048 CB LYS B 4 22.316 -16.761 61.471 1.00 16.58 C +ATOM 1049 CG LYS B 4 21.093 -15.942 61.128 1.00 16.48 C +ATOM 1050 CD LYS B 4 19.882 -16.859 61.028 1.00 21.17 C +ATOM 1051 CE LYS B 4 18.595 -16.085 60.822 1.00 27.98 C +ATOM 1052 NZ LYS B 4 18.247 -15.263 62.014 1.00 29.87 N +ATOM 1053 N ASN B 5 23.921 -14.359 59.523 1.00 8.93 N +ATOM 1054 CA ASN B 5 24.134 -13.899 58.150 1.00 8.93 C +ATOM 1055 C ASN B 5 24.842 -12.553 58.160 1.00 8.91 C +ATOM 1056 O ASN B 5 24.222 -11.500 58.322 1.00 10.65 O +ATOM 1057 CB ASN B 5 22.820 -13.791 57.372 1.00 11.88 C +ATOM 1058 CG ASN B 5 23.049 -13.491 55.898 1.00 13.66 C +ATOM 1059 OD1 ASN B 5 24.105 -13.815 55.347 1.00 19.21 O +ATOM 1060 ND2 ASN B 5 22.060 -12.887 55.250 1.00 22.41 N +ATOM 1061 N ILE B 6 26.152 -12.612 57.965 1.00 8.00 N +ATOM 1062 CA ILE B 6 27.003 -11.433 57.994 1.00 9.10 C +ATOM 1063 C ILE B 6 27.177 -10.713 56.666 1.00 8.01 C +ATOM 1064 O ILE B 6 27.518 -11.327 55.654 1.00 9.10 O +ATOM 1065 CB ILE B 6 28.416 -11.809 58.500 1.00 9.03 C +ATOM 1066 CG1 ILE B 6 28.320 -12.480 59.874 1.00 9.90 C +ATOM 1067 CG2 ILE B 6 29.303 -10.572 58.544 1.00 10.80 C +ATOM 1068 CD1 ILE B 6 27.770 -11.591 60.966 1.00 9.93 C +ATOM 1069 N LEU B 7 26.941 -9.405 56.678 1.00 8.14 N +ATOM 1070 CA LEU B 7 27.147 -8.591 55.489 1.00 6.09 C +ATOM 1071 C LEU B 7 28.588 -8.124 55.596 1.00 6.79 C +ATOM 1072 O LEU B 7 28.950 -7.434 56.551 1.00 7.97 O +ATOM 1073 CB LEU B 7 26.225 -7.367 55.478 1.00 7.81 C +ATOM 1074 CG LEU B 7 26.529 -6.349 54.369 1.00 8.36 C +ATOM 1075 CD1 LEU B 7 26.304 -6.988 53.002 1.00 9.26 C +ATOM 1076 CD2 LEU B 7 25.638 -5.118 54.545 1.00 7.47 C +ATOM 1077 N THR B 8 29.417 -8.518 54.635 1.00 6.69 N +ATOM 1078 CA THR B 8 30.821 -8.123 54.639 1.00 7.81 C +ATOM 1079 C THR B 8 31.088 -7.181 53.468 1.00 6.38 C +ATOM 1080 O THR B 8 30.726 -7.481 52.328 1.00 6.46 O +ATOM 1081 CB THR B 8 31.759 -9.352 54.501 1.00 10.09 C +ATOM 1082 OG1 THR B 8 31.631 -10.194 55.656 1.00 7.20 O +ATOM 1083 CG2 THR B 8 33.214 -8.901 54.370 1.00 8.79 C +ATOM 1084 N LEU B 9 31.706 -6.039 53.757 1.00 4.35 N +ATOM 1085 CA LEU B 9 32.043 -5.062 52.729 1.00 5.78 C +ATOM 1086 C LEU B 9 33.558 -4.900 52.689 1.00 6.78 C +ATOM 1087 O LEU B 9 34.209 -4.773 53.731 1.00 6.68 O +ATOM 1088 CB LEU B 9 31.407 -3.701 53.039 1.00 6.10 C +ATOM 1089 CG LEU B 9 29.928 -3.657 53.430 1.00 6.69 C +ATOM 1090 CD1 LEU B 9 29.491 -2.202 53.589 1.00 7.98 C +ATOM 1091 CD2 LEU B 9 29.087 -4.351 52.367 1.00 8.90 C +ATOM 1092 N ILE B 10 34.125 -4.911 51.490 1.00 5.68 N +ATOM 1093 CA ILE B 10 35.562 -4.744 51.369 1.00 6.72 C +ATOM 1094 C ILE B 10 35.965 -4.002 50.110 1.00 7.62 C +ATOM 1095 O ILE B 10 35.402 -4.206 49.037 1.00 5.59 O +ATOM 1096 CB ILE B 10 36.308 -6.109 51.406 1.00 6.91 C +ATOM 1097 CG1 ILE B 10 37.820 -5.882 51.274 1.00 7.02 C +ATOM 1098 CG2 ILE B 10 35.802 -7.021 50.297 1.00 6.03 C +ATOM 1099 CD1 ILE B 10 38.662 -7.130 51.515 1.00 8.43 C +ATOM 1100 N SER B 11 36.934 -3.110 50.267 1.00 7.10 N +ATOM 1101 CA SER B 11 37.476 -2.363 49.148 1.00 6.62 C +ATOM 1102 C SER B 11 38.942 -2.164 49.466 1.00 8.26 C +ATOM 1103 O SER B 11 39.288 -1.400 50.368 1.00 7.92 O +ATOM 1104 CB SER B 11 36.790 -1.008 48.984 1.00 6.11 C +ATOM 1105 OG SER B 11 37.256 -0.365 47.808 1.00 9.34 O +ATOM 1106 N VAL B 12 39.792 -2.885 48.742 1.00 7.09 N +ATOM 1107 CA VAL B 12 41.239 -2.801 48.918 1.00 10.34 C +ATOM 1108 C VAL B 12 41.924 -2.927 47.557 1.00 12.03 C +ATOM 1109 O VAL B 12 41.310 -3.347 46.576 1.00 10.61 O +ATOM 1110 CB VAL B 12 41.782 -3.938 49.832 1.00 10.18 C +ATOM 1111 CG1 VAL B 12 41.208 -3.817 51.237 1.00 7.55 C +ATOM 1112 CG2 VAL B 12 41.446 -5.301 49.234 1.00 11.44 C +ATOM 1113 N ASN B 13 43.197 -2.553 47.500 1.00 11.33 N +ATOM 1114 CA ASN B 13 43.973 -2.680 46.275 1.00 11.60 C +ATOM 1115 C ASN B 13 44.161 -4.169 46.021 1.00 10.16 C +ATOM 1116 O ASN B 13 44.092 -4.971 46.953 1.00 11.25 O +ATOM 1117 CB ASN B 13 45.327 -1.996 46.446 1.00 14.31 C +ATOM 1118 CG ASN B 13 45.205 -0.496 46.587 1.00 19.40 C +ATOM 1119 OD1 ASN B 13 46.076 0.158 47.157 1.00 27.47 O +ATOM 1120 ND2 ASN B 13 44.121 0.060 46.054 1.00 20.72 N +ATOM 1121 N ASN B 14 44.410 -4.534 44.769 1.00 10.10 N +ATOM 1122 CA ASN B 14 44.581 -5.938 44.400 1.00 11.06 C +ATOM 1123 C ASN B 14 45.570 -6.732 45.250 1.00 10.81 C +ATOM 1124 O ASN B 14 45.307 -7.888 45.580 1.00 10.95 O +ATOM 1125 CB ASN B 14 44.995 -6.057 42.930 1.00 10.61 C +ATOM 1126 CG ASN B 14 43.954 -5.496 41.982 1.00 15.18 C +ATOM 1127 OD1 ASN B 14 42.782 -5.372 42.330 1.00 14.69 O +ATOM 1128 ND2 ASN B 14 44.378 -5.164 40.769 1.00 19.56 N +ATOM 1129 N ASP B 15 46.702 -6.125 45.602 1.00 10.94 N +ATOM 1130 CA ASP B 15 47.705 -6.838 46.383 1.00 10.98 C +ATOM 1131 C ASP B 15 47.330 -7.096 47.836 1.00 10.19 C +ATOM 1132 O ASP B 15 48.071 -7.759 48.557 1.00 10.38 O +ATOM 1133 CB ASP B 15 49.065 -6.124 46.325 1.00 14.95 C +ATOM 1134 CG ASP B 15 49.000 -4.684 46.787 1.00 21.05 C +ATOM 1135 OD1 ASP B 15 48.229 -4.376 47.719 1.00 22.00 O +ATOM 1136 OD2 ASP B 15 49.746 -3.853 46.223 1.00 30.99 O +ATOM 1137 N ASN B 16 46.183 -6.581 48.267 1.00 11.48 N +ATOM 1138 CA ASN B 16 45.732 -6.790 49.641 1.00 7.76 C +ATOM 1139 C ASN B 16 44.579 -7.784 49.751 1.00 8.08 C +ATOM 1140 O ASN B 16 44.177 -8.141 50.855 1.00 8.44 O +ATOM 1141 CB ASN B 16 45.291 -5.466 50.279 1.00 10.28 C +ATOM 1142 CG ASN B 16 46.462 -4.627 50.763 1.00 15.33 C +ATOM 1143 OD1 ASN B 16 47.456 -5.156 51.263 1.00 15.34 O +ATOM 1144 ND2 ASN B 16 46.339 -3.308 50.640 1.00 12.76 N +ATOM 1145 N PHE B 17 44.050 -8.238 48.619 1.00 7.20 N +ATOM 1146 CA PHE B 17 42.923 -9.167 48.655 1.00 8.00 C +ATOM 1147 C PHE B 17 43.147 -10.464 49.434 1.00 8.33 C +ATOM 1148 O PHE B 17 42.342 -10.807 50.296 1.00 7.68 O +ATOM 1149 CB PHE B 17 42.446 -9.503 47.237 1.00 9.52 C +ATOM 1150 CG PHE B 17 41.524 -8.472 46.637 1.00 8.82 C +ATOM 1151 CD1 PHE B 17 40.442 -7.979 47.362 1.00 9.39 C +ATOM 1152 CD2 PHE B 17 41.709 -8.030 45.330 1.00 9.79 C +ATOM 1153 CE1 PHE B 17 39.554 -7.063 46.793 1.00 11.35 C +ATOM 1154 CE2 PHE B 17 40.827 -7.116 44.753 1.00 9.80 C +ATOM 1155 CZ PHE B 17 39.746 -6.631 45.487 1.00 12.77 C +ATOM 1156 N GLU B 18 44.218 -11.194 49.136 1.00 8.33 N +ATOM 1157 CA GLU B 18 44.467 -12.456 49.833 1.00 9.72 C +ATOM 1158 C GLU B 18 44.568 -12.297 51.348 1.00 6.59 C +ATOM 1159 O GLU B 18 43.893 -13.004 52.094 1.00 7.10 O +ATOM 1160 CB GLU B 18 45.735 -13.128 49.290 1.00 9.13 C +ATOM 1161 CG GLU B 18 46.238 -14.336 50.100 1.00 9.85 C +ATOM 1162 CD GLU B 18 45.206 -15.448 50.289 1.00 8.42 C +ATOM 1163 OE1 GLU B 18 44.234 -15.524 49.511 1.00 9.56 O +ATOM 1164 OE2 GLU B 18 45.382 -16.271 51.218 1.00 8.00 O +ATOM 1165 N ASN B 19 45.401 -11.369 51.806 1.00 9.24 N +ATOM 1166 CA ASN B 19 45.557 -11.164 53.242 1.00 9.87 C +ATOM 1167 C ASN B 19 44.248 -10.732 53.898 1.00 8.85 C +ATOM 1168 O ASN B 19 43.897 -11.222 54.976 1.00 8.14 O +ATOM 1169 CB ASN B 19 46.644 -10.121 53.523 1.00 14.61 C +ATOM 1170 CG ASN B 19 48.006 -10.540 52.996 1.00 22.00 C +ATOM 1171 OD1 ASN B 19 48.394 -11.704 53.106 1.00 25.40 O +ATOM 1172 ND2 ASN B 19 48.743 -9.590 52.434 1.00 24.63 N +ATOM 1173 N TYR B 20 43.531 -9.813 53.257 1.00 8.18 N +ATOM 1174 CA TYR B 20 42.266 -9.340 53.804 1.00 8.95 C +ATOM 1175 C TYR B 20 41.182 -10.413 53.812 1.00 6.99 C +ATOM 1176 O TYR B 20 40.396 -10.486 54.756 1.00 6.01 O +ATOM 1177 CB TYR B 20 41.754 -8.112 53.038 1.00 7.13 C +ATOM 1178 CG TYR B 20 42.236 -6.785 53.592 1.00 10.34 C +ATOM 1179 CD1 TYR B 20 43.548 -6.365 53.404 1.00 10.74 C +ATOM 1180 CD2 TYR B 20 41.368 -5.940 54.287 1.00 8.51 C +ATOM 1181 CE1 TYR B 20 43.987 -5.135 53.886 1.00 12.05 C +ATOM 1182 CE2 TYR B 20 41.797 -4.704 54.778 1.00 10.80 C +ATOM 1183 CZ TYR B 20 43.110 -4.309 54.569 1.00 8.47 C +ATOM 1184 OH TYR B 20 43.553 -3.083 55.017 1.00 8.55 O +ATOM 1185 N PHE B 21 41.125 -11.248 52.780 1.00 5.65 N +ATOM 1186 CA PHE B 21 40.097 -12.284 52.766 1.00 7.22 C +ATOM 1187 C PHE B 21 40.299 -13.326 53.856 1.00 6.90 C +ATOM 1188 O PHE B 21 39.324 -13.832 54.410 1.00 8.49 O +ATOM 1189 CB PHE B 21 39.983 -12.952 51.392 1.00 5.51 C +ATOM 1190 CG PHE B 21 38.875 -12.384 50.547 1.00 7.65 C +ATOM 1191 CD1 PHE B 21 39.053 -11.194 49.852 1.00 6.57 C +ATOM 1192 CD2 PHE B 21 37.629 -13.008 50.502 1.00 8.14 C +ATOM 1193 CE1 PHE B 21 38.005 -10.630 49.123 1.00 8.19 C +ATOM 1194 CE2 PHE B 21 36.577 -12.451 49.778 1.00 7.92 C +ATOM 1195 CZ PHE B 21 36.765 -11.262 49.089 1.00 7.63 C +ATOM 1196 N ARG B 22 41.545 -13.654 54.184 1.00 8.64 N +ATOM 1197 CA ARG B 22 41.753 -14.615 55.260 1.00 7.68 C +ATOM 1198 C ARG B 22 41.218 -13.986 56.546 1.00 9.23 C +ATOM 1199 O ARG B 22 40.626 -14.672 57.380 1.00 9.14 O +ATOM 1200 CB ARG B 22 43.234 -14.984 55.408 1.00 9.83 C +ATOM 1201 CG ARG B 22 43.737 -15.893 54.292 1.00 10.47 C +ATOM 1202 CD ARG B 22 45.028 -16.626 54.665 1.00 13.14 C +ATOM 1203 NE ARG B 22 46.093 -15.705 55.047 1.00 13.85 N +ATOM 1204 CZ ARG B 22 46.393 -15.381 56.301 1.00 16.65 C +ATOM 1205 NH1 ARG B 22 47.374 -14.525 56.546 1.00 19.74 N +ATOM 1206 NH2 ARG B 22 45.724 -15.926 57.310 1.00 16.23 N +ATOM 1207 N LYS B 23 41.398 -12.673 56.692 1.00 7.74 N +ATOM 1208 CA LYS B 23 40.906 -11.974 57.875 1.00 7.68 C +ATOM 1209 C LYS B 23 39.377 -11.989 57.872 1.00 5.61 C +ATOM 1210 O LYS B 23 38.747 -12.230 58.905 1.00 7.57 O +ATOM 1211 CB LYS B 23 41.417 -10.528 57.899 1.00 9.24 C +ATOM 1212 CG LYS B 23 41.097 -9.790 59.192 1.00 11.93 C +ATOM 1213 CD LYS B 23 41.651 -8.368 59.170 1.00 14.74 C +ATOM 1214 CE LYS B 23 41.414 -7.664 60.495 1.00 14.38 C +ATOM 1215 NZ LYS B 23 42.184 -8.300 61.608 1.00 14.94 N +ATOM 1216 N ILE B 24 38.780 -11.740 56.710 1.00 5.67 N +ATOM 1217 CA ILE B 24 37.320 -11.754 56.607 1.00 7.29 C +ATOM 1218 C ILE B 24 36.753 -13.062 57.160 1.00 6.15 C +ATOM 1219 O ILE B 24 35.866 -13.060 58.012 1.00 5.44 O +ATOM 1220 CB ILE B 24 36.851 -11.614 55.145 1.00 7.36 C +ATOM 1221 CG1 ILE B 24 37.094 -10.182 54.655 1.00 8.85 C +ATOM 1222 CG2 ILE B 24 35.367 -11.981 55.032 1.00 6.64 C +ATOM 1223 CD1 ILE B 24 36.766 -9.965 53.187 1.00 10.23 C +ATOM 1224 N PHE B 25 37.273 -14.181 56.675 1.00 7.30 N +ATOM 1225 CA PHE B 25 36.786 -15.477 57.122 1.00 6.63 C +ATOM 1226 C PHE B 25 37.036 -15.736 58.600 1.00 6.31 C +ATOM 1227 O PHE B 25 36.211 -16.363 59.267 1.00 8.06 O +ATOM 1228 CB PHE B 25 37.379 -16.585 56.248 1.00 6.24 C +ATOM 1229 CG PHE B 25 36.784 -16.631 54.864 1.00 7.17 C +ATOM 1230 CD1 PHE B 25 35.424 -16.880 54.690 1.00 10.68 C +ATOM 1231 CD2 PHE B 25 37.574 -16.405 53.740 1.00 8.30 C +ATOM 1232 CE1 PHE B 25 34.854 -16.902 53.414 1.00 9.57 C +ATOM 1233 CE2 PHE B 25 37.018 -16.424 52.459 1.00 9.90 C +ATOM 1234 CZ PHE B 25 35.653 -16.674 52.295 1.00 9.39 C +ATOM 1235 N LEU B 26 38.160 -15.256 59.125 1.00 6.24 N +ATOM 1236 CA LEU B 26 38.429 -15.438 60.548 1.00 7.07 C +ATOM 1237 C LEU B 26 37.368 -14.669 61.341 1.00 8.61 C +ATOM 1238 O LEU B 26 36.790 -15.187 62.301 1.00 5.92 O +ATOM 1239 CB LEU B 26 39.826 -14.922 60.903 1.00 9.61 C +ATOM 1240 CG LEU B 26 40.979 -15.783 60.382 1.00 11.99 C +ATOM 1241 CD1 LEU B 26 42.318 -15.135 60.716 1.00 15.65 C +ATOM 1242 CD2 LEU B 26 40.888 -17.159 61.010 1.00 15.48 C +ATOM 1243 N ASP B 27 37.096 -13.436 60.929 1.00 8.13 N +ATOM 1244 CA ASP B 27 36.103 -12.634 61.626 1.00 7.07 C +ATOM 1245 C ASP B 27 34.694 -13.209 61.508 1.00 7.64 C +ATOM 1246 O ASP B 27 33.921 -13.169 62.470 1.00 7.91 O +ATOM 1247 CB ASP B 27 36.155 -11.187 61.136 1.00 7.16 C +ATOM 1248 CG ASP B 27 37.445 -10.494 61.533 1.00 11.83 C +ATOM 1249 OD1 ASP B 27 38.008 -10.859 62.590 1.00 13.94 O +ATOM 1250 OD2 ASP B 27 37.894 -9.586 60.805 1.00 9.25 O +ATOM 1251 N VAL B 28 34.356 -13.751 60.342 1.00 7.89 N +ATOM 1252 CA VAL B 28 33.037 -14.357 60.160 1.00 7.06 C +ATOM 1253 C VAL B 28 32.926 -15.562 61.097 1.00 9.16 C +ATOM 1254 O VAL B 28 31.903 -15.760 61.754 1.00 7.46 O +ATOM 1255 CB VAL B 28 32.816 -14.809 58.693 1.00 6.73 C +ATOM 1256 CG1 VAL B 28 31.580 -15.694 58.589 1.00 7.73 C +ATOM 1257 CG2 VAL B 28 32.634 -13.586 57.804 1.00 7.11 C +ATOM 1258 N ARG B 29 33.987 -16.359 61.174 1.00 8.79 N +ATOM 1259 CA ARG B 29 33.973 -17.523 62.054 1.00 7.46 C +ATOM 1260 C ARG B 29 33.776 -17.098 63.509 1.00 8.15 C +ATOM 1261 O ARG B 29 33.002 -17.712 64.242 1.00 9.77 O +ATOM 1262 CB ARG B 29 35.277 -18.318 61.898 1.00 8.24 C +ATOM 1263 CG ARG B 29 35.368 -19.078 60.575 1.00 9.41 C +ATOM 1264 CD ARG B 29 36.749 -19.682 60.375 1.00 8.70 C +ATOM 1265 NE ARG B 29 36.866 -20.456 59.139 1.00 8.62 N +ATOM 1266 CZ ARG B 29 36.483 -21.722 59.003 1.00 8.78 C +ATOM 1267 NH1 ARG B 29 35.950 -22.369 60.030 1.00 9.82 N +ATOM 1268 NH2 ARG B 29 36.656 -22.349 57.843 1.00 7.70 N +ATOM 1269 N SER B 30 34.465 -16.037 63.918 1.00 8.47 N +ATOM 1270 CA SER B 30 34.359 -15.538 65.287 1.00 8.70 C +ATOM 1271 C SER B 30 32.961 -15.039 65.643 1.00 11.97 C +ATOM 1272 O SER B 30 32.552 -15.105 66.807 1.00 10.24 O +ATOM 1273 CB SER B 30 35.365 -14.405 65.520 1.00 10.41 C +ATOM 1274 OG SER B 30 36.696 -14.894 65.528 1.00 11.54 O +ATOM 1275 N SER B 31 32.233 -14.544 64.644 1.00 9.60 N +ATOM 1276 CA SER B 31 30.889 -14.011 64.854 1.00 10.45 C +ATOM 1277 C SER B 31 29.856 -15.081 65.192 1.00 14.38 C +ATOM 1278 O SER B 31 28.767 -14.765 65.677 1.00 12.30 O +ATOM 1279 CB SER B 31 30.422 -13.255 63.607 1.00 11.04 C +ATOM 1280 OG SER B 31 30.032 -14.161 62.589 1.00 12.20 O +ATOM 1281 N GLY B 32 30.193 -16.340 64.928 1.00 10.52 N +ATOM 1282 CA GLY B 32 29.270 -17.425 65.206 1.00 11.98 C +ATOM 1283 C GLY B 32 28.446 -17.773 63.980 1.00 13.86 C +ATOM 1284 O GLY B 32 27.744 -18.785 63.947 1.00 12.72 O +ATOM 1285 N SER B 33 28.535 -16.928 62.959 1.00 10.25 N +ATOM 1286 CA SER B 33 27.795 -17.154 61.730 1.00 10.12 C +ATOM 1287 C SER B 33 28.492 -18.175 60.842 1.00 13.27 C +ATOM 1288 O SER B 33 29.716 -18.303 60.857 1.00 14.12 O +ATOM 1289 CB SER B 33 27.634 -15.844 60.953 1.00 11.43 C +ATOM 1290 OG SER B 33 26.877 -16.052 59.770 1.00 9.60 O +ATOM 1291 N LYS B 34 27.692 -18.904 60.075 1.00 12.90 N +ATOM 1292 CA LYS B 34 28.201 -19.903 59.151 1.00 16.28 C +ATOM 1293 C LYS B 34 28.040 -19.350 57.738 1.00 15.90 C +ATOM 1294 O LYS B 34 28.468 -19.973 56.766 1.00 17.22 O +ATOM 1295 CB LYS B 34 27.387 -21.195 59.265 1.00 20.08 C +ATOM 1296 CG LYS B 34 27.424 -21.879 60.621 1.00 26.09 C +ATOM 1297 CD LYS B 34 28.788 -22.475 60.911 1.00 34.94 C +ATOM 1298 CE LYS B 34 28.685 -23.580 61.956 1.00 40.70 C +ATOM 1299 NZ LYS B 34 27.976 -23.125 63.184 1.00 42.62 N +ATOM 1300 N LYS B 35 27.418 -18.177 57.635 1.00 15.00 N +ATOM 1301 CA LYS B 35 27.156 -17.559 56.339 1.00 11.11 C +ATOM 1302 C LYS B 35 27.487 -16.076 56.267 1.00 10.39 C +ATOM 1303 O LYS B 35 27.381 -15.347 57.255 1.00 11.24 O +ATOM 1304 CB LYS B 35 25.683 -17.740 55.977 1.00 15.77 C +ATOM 1305 CG LYS B 35 25.189 -19.176 56.068 1.00 20.51 C +ATOM 1306 CD LYS B 35 23.718 -19.269 55.693 1.00 27.36 C +ATOM 1307 CE LYS B 35 23.207 -20.698 55.805 1.00 30.29 C +ATOM 1308 NZ LYS B 35 21.767 -20.799 55.436 1.00 32.88 N +ATOM 1309 N THR B 36 27.878 -15.635 55.080 1.00 9.45 N +ATOM 1310 CA THR B 36 28.195 -14.235 54.860 1.00 6.93 C +ATOM 1311 C THR B 36 28.056 -13.890 53.388 1.00 9.04 C +ATOM 1312 O THR B 36 28.339 -14.711 52.515 1.00 8.55 O +ATOM 1313 CB THR B 36 29.639 -13.890 55.312 1.00 8.94 C +ATOM 1314 OG1 THR B 36 29.877 -12.488 55.108 1.00 7.64 O +ATOM 1315 CG2 THR B 36 30.668 -14.686 54.509 1.00 8.11 C +ATOM 1316 N THR B 37 27.575 -12.685 53.116 1.00 6.55 N +ATOM 1317 CA THR B 37 27.456 -12.220 51.747 1.00 7.22 C +ATOM 1318 C THR B 37 28.550 -11.157 51.653 1.00 8.96 C +ATOM 1319 O THR B 37 28.499 -10.132 52.332 1.00 10.00 O +ATOM 1320 CB THR B 37 26.045 -11.642 51.461 1.00 9.54 C +ATOM 1321 OG1 THR B 37 26.030 -11.052 50.157 1.00 19.34 O +ATOM 1322 CG2 THR B 37 25.651 -10.610 52.505 1.00 10.53 C +ATOM 1323 N ILE B 38 29.562 -11.441 50.837 1.00 8.16 N +ATOM 1324 CA ILE B 38 30.718 -10.567 50.672 1.00 8.94 C +ATOM 1325 C ILE B 38 30.612 -9.660 49.451 1.00 10.05 C +ATOM 1326 O ILE B 38 30.499 -10.126 48.319 1.00 9.73 O +ATOM 1327 CB ILE B 38 32.004 -11.415 50.574 1.00 7.92 C +ATOM 1328 CG1 ILE B 38 32.062 -12.379 51.763 1.00 9.45 C +ATOM 1329 CG2 ILE B 38 33.238 -10.515 50.565 1.00 8.79 C +ATOM 1330 CD1 ILE B 38 33.220 -13.372 51.715 1.00 7.50 C +ATOM 1331 N ASN B 39 30.662 -8.357 49.696 1.00 6.83 N +ATOM 1332 CA ASN B 39 30.555 -7.375 48.632 1.00 7.60 C +ATOM 1333 C ASN B 39 31.892 -6.676 48.469 1.00 7.50 C +ATOM 1334 O ASN B 39 32.391 -6.020 49.387 1.00 7.74 O +ATOM 1335 CB ASN B 39 29.431 -6.397 48.972 1.00 6.86 C +ATOM 1336 CG ASN B 39 28.067 -7.076 48.974 1.00 9.10 C +ATOM 1337 OD1 ASN B 39 27.369 -7.091 47.962 1.00 8.69 O +ATOM 1338 ND2 ASN B 39 27.695 -7.661 50.108 1.00 7.52 N +ATOM 1339 N VAL B 40 32.458 -6.839 47.279 1.00 7.15 N +ATOM 1340 CA VAL B 40 33.767 -6.311 46.936 1.00 7.12 C +ATOM 1341 C VAL B 40 33.661 -5.146 45.967 1.00 5.32 C +ATOM 1342 O VAL B 40 33.176 -5.295 44.850 1.00 7.18 O +ATOM 1343 CB VAL B 40 34.619 -7.420 46.298 1.00 7.37 C +ATOM 1344 CG1 VAL B 40 36.071 -6.974 46.193 1.00 8.05 C +ATOM 1345 CG2 VAL B 40 34.495 -8.707 47.128 1.00 8.26 C +ATOM 1346 N PHE B 41 34.125 -3.985 46.407 1.00 6.40 N +ATOM 1347 CA PHE B 41 34.074 -2.781 45.595 1.00 7.08 C +ATOM 1348 C PHE B 41 35.436 -2.650 44.936 1.00 8.59 C +ATOM 1349 O PHE B 41 36.407 -2.182 45.533 1.00 9.38 O +ATOM 1350 CB PHE B 41 33.711 -1.605 46.501 1.00 7.78 C +ATOM 1351 CG PHE B 41 32.379 -1.790 47.188 1.00 6.03 C +ATOM 1352 CD1 PHE B 41 31.199 -1.394 46.565 1.00 7.43 C +ATOM 1353 CD2 PHE B 41 32.299 -2.440 48.419 1.00 9.83 C +ATOM 1354 CE1 PHE B 41 29.961 -1.645 47.155 1.00 7.05 C +ATOM 1355 CE2 PHE B 41 31.067 -2.696 49.017 1.00 6.67 C +ATOM 1356 CZ PHE B 41 29.894 -2.297 48.381 1.00 8.89 C +ATOM 1357 N THR B 42 35.482 -3.107 43.690 1.00 7.50 N +ATOM 1358 CA THR B 42 36.710 -3.151 42.912 1.00 8.15 C +ATOM 1359 C THR B 42 36.402 -3.075 41.421 1.00 9.00 C +ATOM 1360 O THR B 42 35.268 -3.300 40.997 1.00 9.92 O +ATOM 1361 CB THR B 42 37.437 -4.487 43.196 1.00 8.93 C +ATOM 1362 OG1 THR B 42 38.652 -4.557 42.448 1.00 10.65 O +ATOM 1363 CG2 THR B 42 36.541 -5.666 42.806 1.00 11.87 C +ATOM 1364 N GLU B 43 37.422 -2.775 40.624 1.00 10.57 N +ATOM 1365 CA GLU B 43 37.249 -2.693 39.181 1.00 12.53 C +ATOM 1366 C GLU B 43 37.648 -3.989 38.483 1.00 14.07 C +ATOM 1367 O GLU B 43 37.383 -4.157 37.293 1.00 16.11 O +ATOM 1368 CB GLU B 43 38.075 -1.536 38.611 1.00 16.12 C +ATOM 1369 CG GLU B 43 37.726 -0.181 39.197 1.00 15.81 C +ATOM 1370 CD GLU B 43 36.268 0.189 38.986 1.00 23.61 C +ATOM 1371 OE1 GLU B 43 35.846 0.301 37.816 1.00 24.60 O +ATOM 1372 OE2 GLU B 43 35.545 0.365 39.989 1.00 18.58 O +ATOM 1373 N ILE B 44 38.278 -4.910 39.208 1.00 14.38 N +ATOM 1374 CA ILE B 44 38.701 -6.163 38.588 1.00 13.44 C +ATOM 1375 C ILE B 44 37.572 -7.170 38.400 1.00 15.13 C +ATOM 1376 O ILE B 44 36.524 -7.074 39.036 1.00 15.84 O +ATOM 1377 CB ILE B 44 39.841 -6.843 39.375 1.00 14.02 C +ATOM 1378 CG1 ILE B 44 39.340 -7.340 40.733 1.00 11.19 C +ATOM 1379 CG2 ILE B 44 40.994 -5.866 39.553 1.00 16.33 C +ATOM 1380 CD1 ILE B 44 40.359 -8.210 41.455 1.00 15.68 C +ATOM 1381 N GLN B 45 37.807 -8.134 37.514 1.00 16.76 N +ATOM 1382 CA GLN B 45 36.832 -9.174 37.202 1.00 19.24 C +ATOM 1383 C GLN B 45 36.789 -10.271 38.260 1.00 16.19 C +ATOM 1384 O GLN B 45 37.752 -10.479 38.996 1.00 14.16 O +ATOM 1385 CB GLN B 45 37.155 -9.808 35.845 1.00 24.48 C +ATOM 1386 CG GLN B 45 37.228 -8.825 34.687 1.00 36.32 C +ATOM 1387 CD GLN B 45 35.945 -8.039 34.503 1.00 44.19 C +ATOM 1388 OE1 GLN B 45 35.594 -7.194 35.328 1.00 48.12 O +ATOM 1389 NE2 GLN B 45 35.232 -8.317 33.416 1.00 48.87 N +ATOM 1390 N TYR B 46 35.667 -10.979 38.312 1.00 14.44 N +ATOM 1391 CA TYR B 46 35.469 -12.063 39.267 1.00 15.86 C +ATOM 1392 C TYR B 46 36.586 -13.102 39.193 1.00 15.61 C +ATOM 1393 O TYR B 46 37.147 -13.498 40.215 1.00 13.79 O +ATOM 1394 CB TYR B 46 34.119 -12.740 39.004 1.00 18.12 C +ATOM 1395 CG TYR B 46 33.872 -13.978 39.836 1.00 21.54 C +ATOM 1396 CD1 TYR B 46 33.393 -13.885 41.142 1.00 25.49 C +ATOM 1397 CD2 TYR B 46 34.141 -15.242 39.324 1.00 25.07 C +ATOM 1398 CE1 TYR B 46 33.190 -15.030 41.917 1.00 27.73 C +ATOM 1399 CE2 TYR B 46 33.944 -16.387 40.088 1.00 28.82 C +ATOM 1400 CZ TYR B 46 33.470 -16.274 41.381 1.00 26.16 C +ATOM 1401 OH TYR B 46 33.292 -17.410 42.136 1.00 35.66 O +ATOM 1402 N GLN B 47 36.904 -13.545 37.980 1.00 14.94 N +ATOM 1403 CA GLN B 47 37.942 -14.550 37.785 1.00 16.78 C +ATOM 1404 C GLN B 47 39.307 -14.112 38.297 1.00 13.48 C +ATOM 1405 O GLN B 47 40.060 -14.923 38.834 1.00 14.27 O +ATOM 1406 CB GLN B 47 38.049 -14.931 36.306 1.00 22.36 C +ATOM 1407 CG GLN B 47 36.854 -15.712 35.777 1.00 35.45 C +ATOM 1408 CD GLN B 47 36.478 -16.881 36.668 1.00 40.51 C +ATOM 1409 OE1 GLN B 47 37.328 -17.687 37.049 1.00 47.75 O +ATOM 1410 NE2 GLN B 47 35.195 -16.982 37.001 1.00 44.08 N +ATOM 1411 N GLU B 48 39.635 -12.835 38.127 1.00 14.84 N +ATOM 1412 CA GLU B 48 40.919 -12.340 38.598 1.00 15.28 C +ATOM 1413 C GLU B 48 40.953 -12.332 40.122 1.00 11.23 C +ATOM 1414 O GLU B 48 41.966 -12.685 40.728 1.00 12.95 O +ATOM 1415 CB GLU B 48 41.194 -10.929 38.068 1.00 19.97 C +ATOM 1416 CG GLU B 48 42.401 -10.274 38.728 1.00 27.02 C +ATOM 1417 CD GLU B 48 42.936 -9.080 37.961 1.00 35.49 C +ATOM 1418 OE1 GLU B 48 42.132 -8.218 37.548 1.00 40.04 O +ATOM 1419 OE2 GLU B 48 44.169 -8.997 37.783 1.00 40.61 O +ATOM 1420 N LEU B 49 39.846 -11.926 40.736 1.00 11.02 N +ATOM 1421 CA LEU B 49 39.762 -11.885 42.193 1.00 11.31 C +ATOM 1422 C LEU B 49 39.961 -13.280 42.770 1.00 10.26 C +ATOM 1423 O LEU B 49 40.787 -13.481 43.656 1.00 9.70 O +ATOM 1424 CB LEU B 49 38.404 -11.336 42.644 1.00 10.72 C +ATOM 1425 CG LEU B 49 38.112 -11.444 44.146 1.00 11.65 C +ATOM 1426 CD1 LEU B 49 39.136 -10.645 44.939 1.00 10.78 C +ATOM 1427 CD2 LEU B 49 36.704 -10.938 44.434 1.00 11.81 C +ATOM 1428 N VAL B 50 39.202 -14.244 42.261 1.00 10.32 N +ATOM 1429 CA VAL B 50 39.306 -15.613 42.748 1.00 10.38 C +ATOM 1430 C VAL B 50 40.726 -16.154 42.600 1.00 10.88 C +ATOM 1431 O VAL B 50 41.206 -16.896 43.455 1.00 13.41 O +ATOM 1432 CB VAL B 50 38.310 -16.535 42.016 1.00 13.49 C +ATOM 1433 CG1 VAL B 50 38.539 -17.985 42.420 1.00 17.72 C +ATOM 1434 CG2 VAL B 50 36.884 -16.117 42.361 1.00 15.41 C +ATOM 1435 N THR B 51 41.407 -15.783 41.523 1.00 11.07 N +ATOM 1436 CA THR B 51 42.776 -16.248 41.341 1.00 10.86 C +ATOM 1437 C THR B 51 43.654 -15.720 42.478 1.00 11.23 C +ATOM 1438 O THR B 51 44.450 -16.459 43.060 1.00 10.30 O +ATOM 1439 CB THR B 51 43.347 -15.788 39.984 1.00 12.39 C +ATOM 1440 OG1 THR B 51 42.631 -16.439 38.926 1.00 15.79 O +ATOM 1441 CG2 THR B 51 44.827 -16.145 39.877 1.00 18.33 C +ATOM 1442 N LEU B 52 43.487 -14.445 42.810 1.00 9.34 N +ATOM 1443 CA LEU B 52 44.268 -13.828 43.879 1.00 9.57 C +ATOM 1444 C LEU B 52 43.998 -14.404 45.271 1.00 11.04 C +ATOM 1445 O LEU B 52 44.922 -14.555 46.074 1.00 9.13 O +ATOM 1446 CB LEU B 52 44.007 -12.317 43.913 1.00 10.77 C +ATOM 1447 CG LEU B 52 44.517 -11.502 42.726 1.00 12.17 C +ATOM 1448 CD1 LEU B 52 43.986 -10.074 42.826 1.00 9.44 C +ATOM 1449 CD2 LEU B 52 46.042 -11.518 42.710 1.00 12.47 C +ATOM 1450 N ILE B 53 42.737 -14.722 45.555 1.00 9.03 N +ATOM 1451 CA ILE B 53 42.364 -15.239 46.870 1.00 7.73 C +ATOM 1452 C ILE B 53 42.152 -16.749 46.942 1.00 7.93 C +ATOM 1453 O ILE B 53 41.464 -17.244 47.836 1.00 8.40 O +ATOM 1454 CB ILE B 53 41.103 -14.523 47.406 1.00 5.70 C +ATOM 1455 CG1 ILE B 53 39.873 -14.863 46.556 1.00 6.12 C +ATOM 1456 CG2 ILE B 53 41.334 -13.011 47.401 1.00 9.05 C +ATOM 1457 CD1 ILE B 53 38.572 -14.312 47.139 1.00 8.25 C +ATOM 1458 N ARG B 54 42.758 -17.477 46.014 1.00 9.29 N +ATOM 1459 CA ARG B 54 42.631 -18.928 45.974 1.00 9.99 C +ATOM 1460 C ARG B 54 42.875 -19.572 47.327 1.00 8.02 C +ATOM 1461 O ARG B 54 42.100 -20.417 47.756 1.00 9.00 O +ATOM 1462 CB ARG B 54 43.621 -19.514 44.970 1.00 10.84 C +ATOM 1463 CG ARG B 54 43.545 -21.034 44.825 1.00 15.54 C +ATOM 1464 CD ARG B 54 44.889 -21.574 44.350 1.00 25.54 C +ATOM 1465 NE ARG B 54 45.874 -21.548 45.431 1.00 35.40 N +ATOM 1466 CZ ARG B 54 45.931 -22.444 46.417 1.00 35.95 C +ATOM 1467 NH1 ARG B 54 46.855 -22.337 47.363 1.00 34.18 N +ATOM 1468 NH2 ARG B 54 45.084 -23.463 46.445 1.00 36.89 N +ATOM 1469 N GLU B 55 43.955 -19.179 48.001 1.00 7.65 N +ATOM 1470 CA GLU B 55 44.280 -19.755 49.306 1.00 8.27 C +ATOM 1471 C GLU B 55 43.230 -19.447 50.378 1.00 8.18 C +ATOM 1472 O GLU B 55 42.825 -20.334 51.128 1.00 7.81 O +ATOM 1473 CB GLU B 55 45.660 -19.268 49.765 1.00 7.59 C +ATOM 1474 CG GLU B 55 46.121 -19.813 51.110 1.00 9.10 C +ATOM 1475 CD GLU B 55 46.189 -21.334 51.151 1.00 11.01 C +ATOM 1476 OE1 GLU B 55 46.581 -21.942 50.133 1.00 11.02 O +ATOM 1477 OE2 GLU B 55 45.871 -21.924 52.210 1.00 14.73 O +ATOM 1478 N ALA B 56 42.780 -18.197 50.450 1.00 5.58 N +ATOM 1479 CA ALA B 56 41.766 -17.826 51.441 1.00 8.49 C +ATOM 1480 C ALA B 56 40.509 -18.678 51.253 1.00 6.38 C +ATOM 1481 O ALA B 56 39.930 -19.169 52.222 1.00 7.61 O +ATOM 1482 CB ALA B 56 41.421 -16.341 51.305 1.00 7.15 C +ATOM 1483 N LEU B 57 40.087 -18.854 50.003 1.00 6.97 N +ATOM 1484 CA LEU B 57 38.895 -19.654 49.723 1.00 6.21 C +ATOM 1485 C LEU B 57 39.125 -21.130 50.040 1.00 6.89 C +ATOM 1486 O LEU B 57 38.231 -21.810 50.550 1.00 7.94 O +ATOM 1487 CB LEU B 57 38.481 -19.495 48.256 1.00 6.42 C +ATOM 1488 CG LEU B 57 38.129 -18.065 47.838 1.00 5.28 C +ATOM 1489 CD1 LEU B 57 37.753 -18.044 46.360 1.00 9.60 C +ATOM 1490 CD2 LEU B 57 36.975 -17.546 48.688 1.00 8.97 C +ATOM 1491 N LEU B 58 40.328 -21.617 49.746 1.00 7.19 N +ATOM 1492 CA LEU B 58 40.682 -23.014 50.003 1.00 6.27 C +ATOM 1493 C LEU B 58 40.545 -23.373 51.479 1.00 7.53 C +ATOM 1494 O LEU B 58 40.129 -24.481 51.820 1.00 8.25 O +ATOM 1495 CB LEU B 58 42.121 -23.282 49.548 1.00 6.35 C +ATOM 1496 CG LEU B 58 42.717 -24.658 49.858 1.00 7.35 C +ATOM 1497 CD1 LEU B 58 41.888 -25.747 49.182 1.00 7.89 C +ATOM 1498 CD2 LEU B 58 44.163 -24.708 49.380 1.00 10.98 C +ATOM 1499 N GLU B 59 40.896 -22.431 52.349 1.00 7.39 N +ATOM 1500 CA GLU B 59 40.833 -22.649 53.795 1.00 5.78 C +ATOM 1501 C GLU B 59 39.439 -22.459 54.377 1.00 7.88 C +ATOM 1502 O GLU B 59 39.225 -22.688 55.567 1.00 7.81 O +ATOM 1503 CB GLU B 59 41.794 -21.689 54.508 1.00 7.90 C +ATOM 1504 CG GLU B 59 43.269 -21.913 54.189 1.00 9.78 C +ATOM 1505 CD GLU B 59 44.161 -20.852 54.813 1.00 14.31 C +ATOM 1506 OE1 GLU B 59 43.781 -20.303 55.868 1.00 14.64 O +ATOM 1507 OE2 GLU B 59 45.246 -20.578 54.261 1.00 12.79 O +ATOM 1508 N ASN B 60 38.487 -22.056 53.543 1.00 7.83 N +ATOM 1509 CA ASN B 60 37.139 -21.803 54.034 1.00 6.84 C +ATOM 1510 C ASN B 60 36.024 -22.395 53.193 1.00 9.20 C +ATOM 1511 O ASN B 60 34.979 -21.777 52.989 1.00 7.74 O +ATOM 1512 CB ASN B 60 36.959 -20.295 54.187 1.00 7.58 C +ATOM 1513 CG ASN B 60 37.812 -19.737 55.302 1.00 7.35 C +ATOM 1514 OD1 ASN B 60 37.463 -19.854 56.474 1.00 8.74 O +ATOM 1515 ND2 ASN B 60 38.955 -19.151 54.947 1.00 7.82 N +ATOM 1516 N ILE B 61 36.254 -23.612 52.716 1.00 7.19 N +ATOM 1517 CA ILE B 61 35.274 -24.308 51.903 1.00 8.45 C +ATOM 1518 C ILE B 61 33.970 -24.537 52.664 1.00 6.57 C +ATOM 1519 O ILE B 61 32.893 -24.505 52.067 1.00 7.89 O +ATOM 1520 CB ILE B 61 35.847 -25.659 51.421 1.00 5.90 C +ATOM 1521 CG1 ILE B 61 36.984 -25.406 50.425 1.00 9.41 C +ATOM 1522 CG2 ILE B 61 34.754 -26.509 50.796 1.00 8.75 C +ATOM 1523 CD1 ILE B 61 37.798 -26.654 50.073 1.00 8.14 C +ATOM 1524 N ASP B 62 34.053 -24.753 53.977 1.00 7.51 N +ATOM 1525 CA ASP B 62 32.839 -24.992 54.752 1.00 7.54 C +ATOM 1526 C ASP B 62 32.080 -23.742 55.198 1.00 8.49 C +ATOM 1527 O ASP B 62 31.092 -23.839 55.927 1.00 9.19 O +ATOM 1528 CB ASP B 62 33.125 -25.913 55.954 1.00 8.95 C +ATOM 1529 CG ASP B 62 34.151 -25.345 56.931 1.00 11.12 C +ATOM 1530 OD1 ASP B 62 34.862 -24.373 56.603 1.00 11.35 O +ATOM 1531 OD2 ASP B 62 34.250 -25.905 58.043 1.00 12.08 O +ATOM 1532 N ILE B 63 32.526 -22.570 54.754 1.00 6.73 N +ATOM 1533 CA ILE B 63 31.831 -21.333 55.099 1.00 7.47 C +ATOM 1534 C ILE B 63 30.831 -21.025 53.983 1.00 6.96 C +ATOM 1535 O ILE B 63 31.175 -21.080 52.801 1.00 8.12 O +ATOM 1536 CB ILE B 63 32.812 -20.143 55.247 1.00 7.47 C +ATOM 1537 CG1 ILE B 63 33.789 -20.415 56.397 1.00 8.97 C +ATOM 1538 CG2 ILE B 63 32.041 -18.852 55.484 1.00 9.26 C +ATOM 1539 CD1 ILE B 63 33.126 -20.594 57.752 1.00 13.96 C +ATOM 1540 N GLY B 64 29.594 -20.718 54.361 1.00 8.71 N +ATOM 1541 CA GLY B 64 28.574 -20.414 53.371 1.00 9.81 C +ATOM 1542 C GLY B 64 28.652 -18.974 52.900 1.00 11.60 C +ATOM 1543 O GLY B 64 27.928 -18.109 53.392 1.00 16.00 O +ATOM 1544 N TYR B 65 29.521 -18.716 51.931 1.00 9.38 N +ATOM 1545 CA TYR B 65 29.691 -17.360 51.422 1.00 9.58 C +ATOM 1546 C TYR B 65 29.271 -17.191 49.968 1.00 9.17 C +ATOM 1547 O TYR B 65 29.208 -18.152 49.198 1.00 10.42 O +ATOM 1548 CB TYR B 65 31.159 -16.940 51.560 1.00 8.00 C +ATOM 1549 CG TYR B 65 32.102 -17.727 50.671 1.00 8.44 C +ATOM 1550 CD1 TYR B 65 32.355 -17.325 49.358 1.00 8.81 C +ATOM 1551 CD2 TYR B 65 32.707 -18.900 51.130 1.00 7.86 C +ATOM 1552 CE1 TYR B 65 33.186 -18.072 48.522 1.00 9.11 C +ATOM 1553 CE2 TYR B 65 33.542 -19.655 50.300 1.00 7.54 C +ATOM 1554 CZ TYR B 65 33.773 -19.233 48.997 1.00 8.70 C +ATOM 1555 OH TYR B 65 34.584 -19.970 48.162 1.00 8.32 O +ATOM 1556 N GLU B 66 28.971 -15.950 49.609 1.00 9.95 N +ATOM 1557 CA GLU B 66 28.626 -15.599 48.244 1.00 10.88 C +ATOM 1558 C GLU B 66 29.390 -14.316 47.987 1.00 10.98 C +ATOM 1559 O GLU B 66 29.493 -13.463 48.869 1.00 11.96 O +ATOM 1560 CB GLU B 66 27.117 -15.390 48.066 1.00 16.40 C +ATOM 1561 CG GLU B 66 26.420 -14.541 49.113 1.00 22.87 C +ATOM 1562 CD GLU B 66 24.932 -14.377 48.814 1.00 29.66 C +ATOM 1563 OE1 GLU B 66 24.317 -15.340 48.309 1.00 28.07 O +ATOM 1564 OE2 GLU B 66 24.372 -13.294 49.089 1.00 28.91 O +ATOM 1565 N LEU B 67 29.958 -14.200 46.793 1.00 10.09 N +ATOM 1566 CA LEU B 67 30.736 -13.023 46.438 1.00 9.45 C +ATOM 1567 C LEU B 67 30.013 -12.174 45.404 1.00 9.82 C +ATOM 1568 O LEU B 67 29.510 -12.684 44.405 1.00 9.58 O +ATOM 1569 CB LEU B 67 32.098 -13.436 45.864 1.00 10.31 C +ATOM 1570 CG LEU B 67 33.024 -14.359 46.663 1.00 12.61 C +ATOM 1571 CD1 LEU B 67 34.260 -14.652 45.824 1.00 14.54 C +ATOM 1572 CD2 LEU B 67 33.416 -13.716 47.986 1.00 16.56 C +ATOM 1573 N PHE B 68 29.965 -10.874 45.655 1.00 7.13 N +ATOM 1574 CA PHE B 68 29.349 -9.941 44.731 1.00 8.35 C +ATOM 1575 C PHE B 68 30.373 -8.842 44.496 1.00 8.15 C +ATOM 1576 O PHE B 68 30.893 -8.265 45.452 1.00 9.87 O +ATOM 1577 CB PHE B 68 28.079 -9.342 45.338 1.00 6.56 C +ATOM 1578 CG PHE B 68 26.940 -10.313 45.438 1.00 11.63 C +ATOM 1579 CD1 PHE B 68 26.211 -10.667 44.306 1.00 15.37 C +ATOM 1580 CD2 PHE B 68 26.596 -10.875 46.662 1.00 12.83 C +ATOM 1581 CE1 PHE B 68 25.152 -11.568 44.393 1.00 13.27 C +ATOM 1582 CE2 PHE B 68 25.539 -11.778 46.760 1.00 15.36 C +ATOM 1583 CZ PHE B 68 24.817 -12.124 45.626 1.00 16.24 C +ATOM 1584 N LEU B 69 30.692 -8.578 43.234 1.00 5.68 N +ATOM 1585 CA LEU B 69 31.650 -7.527 42.914 1.00 4.44 C +ATOM 1586 C LEU B 69 30.907 -6.317 42.384 1.00 6.39 C +ATOM 1587 O LEU B 69 29.952 -6.459 41.617 1.00 7.09 O +ATOM 1588 CB LEU B 69 32.661 -8.008 41.873 1.00 5.18 C +ATOM 1589 CG LEU B 69 33.798 -8.882 42.412 1.00 10.26 C +ATOM 1590 CD1 LEU B 69 33.221 -10.151 43.019 1.00 11.61 C +ATOM 1591 CD2 LEU B 69 34.768 -9.215 41.286 1.00 11.86 C +ATOM 1592 N TRP B 70 31.340 -5.130 42.800 1.00 6.32 N +ATOM 1593 CA TRP B 70 30.705 -3.894 42.360 1.00 6.17 C +ATOM 1594 C TRP B 70 31.718 -2.869 41.889 1.00 6.61 C +ATOM 1595 O TRP B 70 32.664 -2.547 42.611 1.00 7.06 O +ATOM 1596 CB TRP B 70 29.895 -3.267 43.501 1.00 7.19 C +ATOM 1597 CG TRP B 70 28.922 -4.198 44.142 1.00 8.42 C +ATOM 1598 CD1 TRP B 70 29.044 -4.803 45.358 1.00 9.62 C +ATOM 1599 CD2 TRP B 70 27.671 -4.631 43.600 1.00 10.62 C +ATOM 1600 NE1 TRP B 70 27.941 -5.588 45.610 1.00 8.49 N +ATOM 1601 CE2 TRP B 70 27.083 -5.499 44.545 1.00 7.83 C +ATOM 1602 CE3 TRP B 70 26.989 -4.369 42.403 1.00 11.10 C +ATOM 1603 CZ2 TRP B 70 25.840 -6.108 44.333 1.00 10.76 C +ATOM 1604 CZ3 TRP B 70 25.756 -4.974 42.191 1.00 13.52 C +ATOM 1605 CH2 TRP B 70 25.194 -5.835 43.152 1.00 12.87 C +ATOM 1606 N LYS B 71 31.529 -2.358 40.679 1.00 7.61 N +ATOM 1607 CA LYS B 71 32.419 -1.326 40.172 1.00 6.82 C +ATOM 1608 C LYS B 71 32.048 -0.038 40.905 1.00 7.46 C +ATOM 1609 O LYS B 71 30.964 0.055 41.489 1.00 8.76 O +ATOM 1610 CB LYS B 71 32.262 -1.184 38.657 1.00 10.45 C +ATOM 1611 CG LYS B 71 32.688 -2.447 37.920 1.00 14.60 C +ATOM 1612 CD LYS B 71 32.776 -2.235 36.424 1.00 21.45 C +ATOM 1613 CE LYS B 71 33.170 -3.528 35.718 1.00 28.85 C +ATOM 1614 NZ LYS B 71 34.417 -4.119 36.279 1.00 29.47 N +ATOM 1615 N LYS B 72 32.934 0.952 40.881 1.00 10.20 N +ATOM 1616 CA LYS B 72 32.683 2.190 41.615 1.00 10.67 C +ATOM 1617 C LYS B 72 31.363 2.895 41.325 1.00 11.13 C +ATOM 1618 O LYS B 72 30.823 3.562 42.204 1.00 12.36 O +ATOM 1619 CB LYS B 72 33.845 3.175 41.423 1.00 12.82 C +ATOM 1620 CG LYS B 72 34.057 3.639 40.001 1.00 18.96 C +ATOM 1621 CD LYS B 72 35.183 4.664 39.913 1.00 28.19 C +ATOM 1622 CE LYS B 72 36.520 4.078 40.352 1.00 32.59 C +ATOM 1623 NZ LYS B 72 37.623 5.080 40.287 1.00 35.96 N +ATOM 1624 N ASN B 73 30.832 2.751 40.113 1.00 8.90 N +ATOM 1625 CA ASN B 73 29.574 3.414 39.788 1.00 9.56 C +ATOM 1626 C ASN B 73 28.354 2.527 40.037 1.00 10.10 C +ATOM 1627 O ASN B 73 27.233 2.896 39.693 1.00 10.34 O +ATOM 1628 CB ASN B 73 29.592 3.918 38.328 1.00 8.81 C +ATOM 1629 CG ASN B 73 29.639 2.794 37.304 1.00 10.55 C +ATOM 1630 OD1 ASN B 73 29.998 1.660 37.616 1.00 12.03 O +ATOM 1631 ND2 ASN B 73 29.290 3.117 36.060 1.00 9.29 N +ATOM 1632 N GLU B 74 28.569 1.373 40.666 1.00 9.00 N +ATOM 1633 CA GLU B 74 27.469 0.454 40.940 1.00 6.85 C +ATOM 1634 C GLU B 74 27.067 0.383 42.410 1.00 8.04 C +ATOM 1635 O GLU B 74 26.327 -0.515 42.811 1.00 7.33 O +ATOM 1636 CB GLU B 74 27.822 -0.950 40.445 1.00 9.08 C +ATOM 1637 CG GLU B 74 28.150 -1.011 38.957 1.00 8.15 C +ATOM 1638 CD GLU B 74 28.526 -2.404 38.503 1.00 12.67 C +ATOM 1639 OE1 GLU B 74 29.315 -3.068 39.207 1.00 10.13 O +ATOM 1640 OE2 GLU B 74 28.042 -2.832 37.435 1.00 15.47 O +ATOM 1641 N VAL B 75 27.546 1.323 43.217 1.00 6.01 N +ATOM 1642 CA VAL B 75 27.194 1.309 44.628 1.00 6.91 C +ATOM 1643 C VAL B 75 25.682 1.445 44.799 1.00 6.67 C +ATOM 1644 O VAL B 75 25.110 0.892 45.736 1.00 8.26 O +ATOM 1645 CB VAL B 75 27.912 2.439 45.412 1.00 7.12 C +ATOM 1646 CG1 VAL B 75 27.420 2.463 46.858 1.00 5.38 C +ATOM 1647 CG2 VAL B 75 29.418 2.209 45.390 1.00 6.09 C +ATOM 1648 N ASP B 76 25.025 2.163 43.893 1.00 6.82 N +ATOM 1649 CA ASP B 76 23.583 2.319 44.020 1.00 7.89 C +ATOM 1650 C ASP B 76 22.834 1.000 43.830 1.00 7.12 C +ATOM 1651 O ASP B 76 21.776 0.803 44.422 1.00 8.14 O +ATOM 1652 CB ASP B 76 23.051 3.404 43.064 1.00 7.64 C +ATOM 1653 CG ASP B 76 23.365 3.132 41.602 1.00 11.13 C +ATOM 1654 OD1 ASP B 76 24.132 2.203 41.297 1.00 10.63 O +ATOM 1655 OD2 ASP B 76 22.837 3.878 40.751 1.00 13.20 O +ATOM 1656 N ILE B 77 23.387 0.092 43.028 1.00 6.11 N +ATOM 1657 CA ILE B 77 22.744 -1.205 42.808 1.00 8.01 C +ATOM 1658 C ILE B 77 22.865 -2.010 44.101 1.00 8.15 C +ATOM 1659 O ILE B 77 21.906 -2.637 44.558 1.00 9.44 O +ATOM 1660 CB ILE B 77 23.425 -2.003 41.673 1.00 9.58 C +ATOM 1661 CG1 ILE B 77 23.411 -1.197 40.376 1.00 7.88 C +ATOM 1662 CG2 ILE B 77 22.686 -3.322 41.452 1.00 8.83 C +ATOM 1663 CD1 ILE B 77 24.174 -1.865 39.240 1.00 12.83 C +ATOM 1664 N PHE B 78 24.063 -1.987 44.677 1.00 6.79 N +ATOM 1665 CA PHE B 78 24.345 -2.679 45.931 1.00 8.25 C +ATOM 1666 C PHE B 78 23.391 -2.195 47.028 1.00 8.65 C +ATOM 1667 O PHE B 78 22.766 -2.994 47.732 1.00 7.35 O +ATOM 1668 CB PHE B 78 25.797 -2.404 46.343 1.00 8.13 C +ATOM 1669 CG PHE B 78 26.064 -2.614 47.804 1.00 9.18 C +ATOM 1670 CD1 PHE B 78 26.089 -3.896 48.347 1.00 11.50 C +ATOM 1671 CD2 PHE B 78 26.260 -1.525 48.647 1.00 9.14 C +ATOM 1672 CE1 PHE B 78 26.302 -4.091 49.710 1.00 12.70 C +ATOM 1673 CE2 PHE B 78 26.472 -1.709 50.011 1.00 8.09 C +ATOM 1674 CZ PHE B 78 26.492 -2.997 50.543 1.00 8.89 C +ATOM 1675 N LEU B 79 23.280 -0.879 47.174 1.00 6.99 N +ATOM 1676 CA LEU B 79 22.406 -0.309 48.195 1.00 8.19 C +ATOM 1677 C LEU B 79 20.935 -0.653 47.975 1.00 9.19 C +ATOM 1678 O LEU B 79 20.199 -0.903 48.933 1.00 10.56 O +ATOM 1679 CB LEU B 79 22.586 1.212 48.258 1.00 7.77 C +ATOM 1680 CG LEU B 79 23.921 1.718 48.820 1.00 5.84 C +ATOM 1681 CD1 LEU B 79 23.942 3.241 48.802 1.00 6.51 C +ATOM 1682 CD2 LEU B 79 24.111 1.209 50.245 1.00 8.59 C +ATOM 1683 N LYS B 80 20.506 -0.667 46.717 1.00 8.80 N +ATOM 1684 CA LYS B 80 19.117 -0.992 46.399 1.00 11.21 C +ATOM 1685 C LYS B 80 18.807 -2.433 46.794 1.00 10.33 C +ATOM 1686 O LYS B 80 17.754 -2.720 47.370 1.00 11.42 O +ATOM 1687 CB LYS B 80 18.856 -0.805 44.901 1.00 8.58 C +ATOM 1688 CG LYS B 80 17.441 -1.174 44.457 1.00 13.04 C +ATOM 1689 CD LYS B 80 16.393 -0.278 45.106 1.00 19.56 C +ATOM 1690 CE LYS B 80 14.994 -0.616 44.612 1.00 20.22 C +ATOM 1691 NZ LYS B 80 14.629 -2.024 44.919 1.00 31.99 N +ATOM 1692 N ASN B 81 19.731 -3.335 46.486 1.00 7.78 N +ATOM 1693 CA ASN B 81 19.559 -4.746 46.801 1.00 8.85 C +ATOM 1694 C ASN B 81 19.494 -5.008 48.302 1.00 11.79 C +ATOM 1695 O ASN B 81 18.920 -6.005 48.733 1.00 13.02 O +ATOM 1696 CB ASN B 81 20.695 -5.571 46.195 1.00 8.56 C +ATOM 1697 CG ASN B 81 20.609 -5.668 44.686 1.00 11.64 C +ATOM 1698 OD1 ASN B 81 19.701 -5.116 44.060 1.00 10.80 O +ATOM 1699 ND2 ASN B 81 21.561 -6.377 44.091 1.00 10.75 N +ATOM 1700 N LEU B 82 20.082 -4.120 49.098 1.00 9.55 N +ATOM 1701 CA LEU B 82 20.057 -4.301 50.545 1.00 10.33 C +ATOM 1702 C LEU B 82 18.641 -4.223 51.100 1.00 11.86 C +ATOM 1703 O LEU B 82 18.372 -4.695 52.203 1.00 13.00 O +ATOM 1704 CB LEU B 82 20.930 -3.254 51.240 1.00 9.26 C +ATOM 1705 CG LEU B 82 22.442 -3.431 51.116 1.00 9.05 C +ATOM 1706 CD1 LEU B 82 23.140 -2.329 51.907 1.00 8.92 C +ATOM 1707 CD2 LEU B 82 22.851 -4.799 51.646 1.00 10.02 C +ATOM 1708 N GLU B 83 17.731 -3.629 50.339 1.00 14.53 N +ATOM 1709 CA GLU B 83 16.353 -3.521 50.798 1.00 19.28 C +ATOM 1710 C GLU B 83 15.740 -4.909 50.969 1.00 16.91 C +ATOM 1711 O GLU B 83 14.796 -5.087 51.740 1.00 22.85 O +ATOM 1712 CB GLU B 83 15.523 -2.705 49.806 1.00 17.82 C +ATOM 1713 CG GLU B 83 16.124 -1.348 49.480 1.00 20.84 C +ATOM 1714 CD GLU B 83 15.169 -0.450 48.720 1.00 24.65 C +ATOM 1715 OE1 GLU B 83 14.455 -0.957 47.831 1.00 24.44 O +ATOM 1716 OE2 GLU B 83 15.143 0.766 49.008 1.00 29.53 O +ATOM 1717 N LYS B 84 16.288 -5.890 50.257 1.00 18.37 N +ATOM 1718 CA LYS B 84 15.791 -7.263 50.318 1.00 19.92 C +ATOM 1719 C LYS B 84 16.692 -8.213 51.104 1.00 21.36 C +ATOM 1720 O LYS B 84 16.516 -9.429 51.038 1.00 22.42 O +ATOM 1721 CB LYS B 84 15.623 -7.824 48.905 1.00 19.42 C +ATOM 1722 CG LYS B 84 14.672 -7.046 48.012 1.00 20.23 C +ATOM 1723 CD LYS B 84 14.571 -7.707 46.647 1.00 19.41 C +ATOM 1724 CE LYS B 84 13.602 -6.969 45.739 1.00 21.89 C +ATOM 1725 NZ LYS B 84 13.462 -7.655 44.424 1.00 22.51 N +ATOM 1726 N SER B 85 17.653 -7.668 51.842 1.00 20.41 N +ATOM 1727 CA SER B 85 18.573 -8.503 52.610 1.00 23.07 C +ATOM 1728 C SER B 85 18.061 -8.818 54.011 1.00 23.57 C +ATOM 1729 O SER B 85 17.211 -8.109 54.546 1.00 24.14 O +ATOM 1730 CB SER B 85 19.933 -7.813 52.728 1.00 21.57 C +ATOM 1731 OG SER B 85 19.832 -6.638 53.513 1.00 25.20 O +ATOM 1732 N GLU B 86 18.589 -9.890 54.595 1.00 26.07 N +ATOM 1733 CA GLU B 86 18.218 -10.297 55.947 1.00 27.12 C +ATOM 1734 C GLU B 86 19.449 -10.455 56.833 1.00 25.62 C +ATOM 1735 O GLU B 86 19.457 -11.265 57.761 1.00 29.42 O +ATOM 1736 CB GLU B 86 17.442 -11.616 55.932 1.00 33.85 C +ATOM 1737 CG GLU B 86 15.974 -11.483 55.572 1.00 46.71 C +ATOM 1738 CD GLU B 86 15.149 -12.647 56.094 1.00 55.10 C +ATOM 1739 OE1 GLU B 86 15.076 -12.816 57.331 1.00 58.77 O +ATOM 1740 OE2 GLU B 86 14.577 -13.393 55.272 1.00 60.31 O +ATOM 1741 N VAL B 87 20.487 -9.678 56.549 1.00 19.87 N +ATOM 1742 CA VAL B 87 21.721 -9.739 57.325 1.00 15.79 C +ATOM 1743 C VAL B 87 21.506 -9.220 58.747 1.00 15.58 C +ATOM 1744 O VAL B 87 20.686 -8.329 58.969 1.00 16.78 O +ATOM 1745 CB VAL B 87 22.835 -8.920 56.642 1.00 13.99 C +ATOM 1746 CG1 VAL B 87 23.189 -9.550 55.304 1.00 14.84 C +ATOM 1747 CG2 VAL B 87 22.378 -7.485 56.435 1.00 15.74 C +ATOM 1748 N ASP B 88 22.243 -9.777 59.707 1.00 12.15 N +ATOM 1749 CA ASP B 88 22.111 -9.363 61.103 1.00 12.58 C +ATOM 1750 C ASP B 88 23.424 -8.891 61.719 1.00 12.85 C +ATOM 1751 O ASP B 88 23.489 -8.607 62.913 1.00 12.10 O +ATOM 1752 CB ASP B 88 21.535 -10.510 61.946 1.00 12.92 C +ATOM 1753 CG ASP B 88 22.373 -11.778 61.865 1.00 16.42 C +ATOM 1754 OD1 ASP B 88 23.456 -11.748 61.248 1.00 13.85 O +ATOM 1755 OD2 ASP B 88 21.945 -12.809 62.426 1.00 18.60 O +ATOM 1756 N GLY B 89 24.465 -8.810 60.898 1.00 9.61 N +ATOM 1757 CA GLY B 89 25.765 -8.370 61.375 1.00 8.63 C +ATOM 1758 C GLY B 89 26.526 -7.749 60.220 1.00 8.43 C +ATOM 1759 O GLY B 89 26.303 -8.118 59.068 1.00 9.07 O +ATOM 1760 N LEU B 90 27.431 -6.824 60.526 1.00 6.06 N +ATOM 1761 CA LEU B 90 28.197 -6.125 59.495 1.00 7.92 C +ATOM 1762 C LEU B 90 29.699 -6.040 59.755 1.00 8.64 C +ATOM 1763 O LEU B 90 30.126 -5.642 60.839 1.00 8.63 O +ATOM 1764 CB LEU B 90 27.641 -4.703 59.335 1.00 6.14 C +ATOM 1765 CG LEU B 90 28.462 -3.683 58.540 1.00 7.39 C +ATOM 1766 CD1 LEU B 90 28.459 -4.048 57.061 1.00 8.17 C +ATOM 1767 CD2 LEU B 90 27.865 -2.287 58.742 1.00 7.67 C +ATOM 1768 N LEU B 91 30.488 -6.410 58.747 1.00 8.03 N +ATOM 1769 CA LEU B 91 31.948 -6.346 58.821 1.00 7.73 C +ATOM 1770 C LEU B 91 32.418 -5.421 57.696 1.00 7.16 C +ATOM 1771 O LEU B 91 31.980 -5.560 56.556 1.00 7.44 O +ATOM 1772 CB LEU B 91 32.566 -7.739 58.630 1.00 8.30 C +ATOM 1773 CG LEU B 91 32.297 -8.804 59.697 1.00 9.27 C +ATOM 1774 CD1 LEU B 91 32.918 -10.122 59.253 1.00 8.84 C +ATOM 1775 CD2 LEU B 91 32.879 -8.366 61.036 1.00 9.26 C +ATOM 1776 N VAL B 92 33.300 -4.476 58.020 1.00 5.28 N +ATOM 1777 CA VAL B 92 33.805 -3.528 57.030 1.00 7.29 C +ATOM 1778 C VAL B 92 35.331 -3.537 56.959 1.00 8.28 C +ATOM 1779 O VAL B 92 36.003 -3.466 57.991 1.00 8.12 O +ATOM 1780 CB VAL B 92 33.333 -2.097 57.360 1.00 6.10 C +ATOM 1781 CG1 VAL B 92 33.953 -1.102 56.387 1.00 9.80 C +ATOM 1782 CG2 VAL B 92 31.810 -2.031 57.295 1.00 8.41 C +ATOM 1783 N TYR B 93 35.865 -3.613 55.739 1.00 7.56 N +ATOM 1784 CA TYR B 93 37.314 -3.644 55.512 1.00 8.52 C +ATOM 1785 C TYR B 93 37.751 -2.746 54.368 1.00 8.93 C +ATOM 1786 O TYR B 93 37.059 -2.635 53.356 1.00 8.79 O +ATOM 1787 CB TYR B 93 37.777 -5.054 55.143 1.00 6.28 C +ATOM 1788 CG TYR B 93 37.401 -6.110 56.135 1.00 7.75 C +ATOM 1789 CD1 TYR B 93 38.258 -6.448 57.178 1.00 8.54 C +ATOM 1790 CD2 TYR B 93 36.173 -6.760 56.047 1.00 6.62 C +ATOM 1791 CE1 TYR B 93 37.899 -7.410 58.112 1.00 7.93 C +ATOM 1792 CE2 TYR B 93 35.805 -7.720 56.973 1.00 7.24 C +ATOM 1793 CZ TYR B 93 36.668 -8.041 58.001 1.00 6.95 C +ATOM 1794 OH TYR B 93 36.297 -8.983 58.924 1.00 8.30 O +ATOM 1795 N CYS B 94 38.916 -2.126 54.519 1.00 10.13 N +ATOM 1796 CA CYS B 94 39.474 -1.296 53.458 1.00 6.30 C +ATOM 1797 C CYS B 94 40.934 -1.012 53.768 1.00 9.34 C +ATOM 1798 O CYS B 94 41.413 -1.327 54.855 1.00 9.31 O +ATOM 1799 CB CYS B 94 38.720 0.038 53.327 1.00 9.03 C +ATOM 1800 SG CYS B 94 39.178 1.336 54.530 1.00 9.87 S +ATOM 1801 N ASP B 95 41.650 -0.461 52.793 1.00 8.48 N +ATOM 1802 CA ASP B 95 43.035 -0.071 53.012 1.00 10.08 C +ATOM 1803 C ASP B 95 43.051 1.444 52.844 1.00 11.80 C +ATOM 1804 O ASP B 95 42.013 2.040 52.560 1.00 10.63 O +ATOM 1805 CB ASP B 95 44.006 -0.766 52.039 1.00 10.61 C +ATOM 1806 CG ASP B 95 43.580 -0.672 50.585 1.00 8.45 C +ATOM 1807 OD1 ASP B 95 42.664 0.111 50.255 1.00 12.75 O +ATOM 1808 OD2 ASP B 95 44.191 -1.392 49.765 1.00 11.59 O +ATOM 1809 N ASP B 96 44.202 2.080 53.024 1.00 12.83 N +ATOM 1810 CA ASP B 96 44.246 3.534 52.912 1.00 12.81 C +ATOM 1811 C ASP B 96 43.786 4.116 51.581 1.00 12.83 C +ATOM 1812 O ASP B 96 43.073 5.124 51.555 1.00 12.12 O +ATOM 1813 CB ASP B 96 45.646 4.051 53.243 1.00 14.63 C +ATOM 1814 CG ASP B 96 45.965 3.940 54.717 1.00 18.00 C +ATOM 1815 OD1 ASP B 96 45.036 4.107 55.539 1.00 20.62 O +ATOM 1816 OD2 ASP B 96 47.142 3.704 55.055 1.00 20.71 O +ATOM 1817 N GLU B 97 44.181 3.493 50.480 1.00 13.53 N +ATOM 1818 CA GLU B 97 43.797 3.990 49.165 1.00 16.29 C +ATOM 1819 C GLU B 97 42.293 3.999 48.910 1.00 15.06 C +ATOM 1820 O GLU B 97 41.811 4.738 48.053 1.00 13.57 O +ATOM 1821 CB GLU B 97 44.491 3.177 48.068 1.00 19.19 C +ATOM 1822 CG GLU B 97 45.967 3.506 47.915 1.00 33.67 C +ATOM 1823 CD GLU B 97 46.597 2.839 46.710 1.00 39.74 C +ATOM 1824 OE1 GLU B 97 46.065 3.000 45.590 1.00 44.51 O +ATOM 1825 OE2 GLU B 97 47.631 2.159 46.883 1.00 45.30 O +ATOM 1826 N ASN B 98 41.549 3.195 49.661 1.00 12.17 N +ATOM 1827 CA ASN B 98 40.107 3.113 49.459 1.00 11.15 C +ATOM 1828 C ASN B 98 39.261 3.487 50.674 1.00 11.99 C +ATOM 1829 O ASN B 98 38.048 3.275 50.679 1.00 10.72 O +ATOM 1830 CB ASN B 98 39.754 1.699 48.991 1.00 11.64 C +ATOM 1831 CG ASN B 98 40.408 1.347 47.664 1.00 10.09 C +ATOM 1832 OD1 ASN B 98 39.992 1.828 46.607 1.00 13.42 O +ATOM 1833 ND2 ASN B 98 41.447 0.520 47.713 1.00 8.18 N +ATOM 1834 N LYS B 99 39.896 4.064 51.689 1.00 10.70 N +ATOM 1835 CA LYS B 99 39.202 4.453 52.914 1.00 9.28 C +ATOM 1836 C LYS B 99 38.101 5.505 52.754 1.00 10.44 C +ATOM 1837 O LYS B 99 37.004 5.350 53.298 1.00 9.09 O +ATOM 1838 CB LYS B 99 40.223 4.933 53.946 1.00 14.04 C +ATOM 1839 CG LYS B 99 39.622 5.373 55.271 1.00 15.01 C +ATOM 1840 CD LYS B 99 40.712 5.559 56.327 1.00 19.90 C +ATOM 1841 CE LYS B 99 41.761 6.564 55.878 1.00 26.11 C +ATOM 1842 NZ LYS B 99 42.944 6.571 56.785 1.00 26.96 N +ATOM 1843 N VAL B 100 38.390 6.580 52.030 1.00 9.93 N +ATOM 1844 CA VAL B 100 37.398 7.632 51.838 1.00 7.71 C +ATOM 1845 C VAL B 100 36.164 7.083 51.128 1.00 6.80 C +ATOM 1846 O VAL B 100 35.033 7.363 51.524 1.00 8.22 O +ATOM 1847 CB VAL B 100 37.983 8.805 51.019 1.00 9.27 C +ATOM 1848 CG1 VAL B 100 36.916 9.865 50.786 1.00 11.14 C +ATOM 1849 CG2 VAL B 100 39.168 9.414 51.769 1.00 15.69 C +ATOM 1850 N PHE B 101 36.398 6.280 50.097 1.00 7.70 N +ATOM 1851 CA PHE B 101 35.322 5.675 49.312 1.00 8.14 C +ATOM 1852 C PHE B 101 34.479 4.742 50.180 1.00 8.71 C +ATOM 1853 O PHE B 101 33.250 4.853 50.224 1.00 7.41 O +ATOM 1854 CB PHE B 101 35.927 4.907 48.130 1.00 5.31 C +ATOM 1855 CG PHE B 101 34.907 4.246 47.230 1.00 7.28 C +ATOM 1856 CD1 PHE B 101 33.906 4.989 46.616 1.00 8.36 C +ATOM 1857 CD2 PHE B 101 34.976 2.880 46.976 1.00 8.23 C +ATOM 1858 CE1 PHE B 101 32.985 4.381 45.754 1.00 12.37 C +ATOM 1859 CE2 PHE B 101 34.066 2.261 46.120 1.00 9.85 C +ATOM 1860 CZ PHE B 101 33.068 3.012 45.506 1.00 11.79 C +HETATM 1861 N MSE B 102 35.133 3.819 50.878 1.00 8.34 N +HETATM 1862 CA MSE B 102 34.399 2.889 51.728 1.00 7.79 C +HETATM 1863 C MSE B 102 33.624 3.614 52.821 1.00 8.44 C +HETATM 1864 O MSE B 102 32.502 3.230 53.152 1.00 7.89 O +HETATM 1865 CB MSE B 102 35.342 1.866 52.363 1.00 7.74 C +HETATM 1866 CG MSE B 102 34.653 0.952 53.367 1.00 7.13 C +HETATM 1867 SE MSE B 102 33.179 -0.057 52.589 1.00 21.70 SE +HETATM 1868 CE MSE B 102 34.237 -1.244 51.580 1.00 3.62 C +ATOM 1869 N SER B 103 34.214 4.667 53.379 1.00 10.25 N +ATOM 1870 CA SER B 103 33.543 5.428 54.423 1.00 9.15 C +ATOM 1871 C SER B 103 32.213 5.987 53.913 1.00 9.43 C +ATOM 1872 O SER B 103 31.223 6.000 54.643 1.00 8.98 O +ATOM 1873 CB SER B 103 34.438 6.569 54.911 1.00 12.94 C +ATOM 1874 OG SER B 103 35.597 6.055 55.542 1.00 22.39 O +ATOM 1875 N LYS B 104 32.194 6.449 52.664 1.00 9.19 N +ATOM 1876 CA LYS B 104 30.970 6.986 52.078 1.00 8.57 C +ATOM 1877 C LYS B 104 29.930 5.884 51.887 1.00 8.03 C +ATOM 1878 O LYS B 104 28.731 6.110 52.064 1.00 8.00 O +ATOM 1879 CB LYS B 104 31.268 7.676 50.740 1.00 8.00 C +ATOM 1880 CG LYS B 104 31.871 9.067 50.906 1.00 9.99 C +ATOM 1881 CD LYS B 104 32.200 9.726 49.569 1.00 7.11 C +ATOM 1882 CE LYS B 104 33.320 8.997 48.840 1.00 6.13 C +ATOM 1883 NZ LYS B 104 33.713 9.685 47.577 1.00 5.13 N +ATOM 1884 N ILE B 105 30.384 4.688 51.531 1.00 6.44 N +ATOM 1885 CA ILE B 105 29.462 3.573 51.353 1.00 6.44 C +ATOM 1886 C ILE B 105 28.830 3.272 52.708 1.00 6.86 C +ATOM 1887 O ILE B 105 27.612 3.161 52.822 1.00 6.84 O +ATOM 1888 CB ILE B 105 30.192 2.306 50.833 1.00 7.27 C +ATOM 1889 CG1 ILE B 105 30.792 2.582 49.447 1.00 6.29 C +ATOM 1890 CG2 ILE B 105 29.221 1.133 50.772 1.00 8.00 C +ATOM 1891 CD1 ILE B 105 31.576 1.404 48.858 1.00 7.28 C +ATOM 1892 N VAL B 106 29.661 3.163 53.743 1.00 7.24 N +ATOM 1893 CA VAL B 106 29.164 2.872 55.084 1.00 9.20 C +ATOM 1894 C VAL B 106 28.161 3.927 55.551 1.00 7.38 C +ATOM 1895 O VAL B 106 27.119 3.595 56.122 1.00 8.51 O +ATOM 1896 CB VAL B 106 30.323 2.788 56.108 1.00 7.67 C +ATOM 1897 CG1 VAL B 106 29.766 2.577 57.510 1.00 11.73 C +ATOM 1898 CG2 VAL B 106 31.254 1.633 55.740 1.00 12.47 C +ATOM 1899 N ASP B 107 28.473 5.195 55.298 1.00 7.77 N +ATOM 1900 CA ASP B 107 27.600 6.297 55.700 1.00 9.68 C +ATOM 1901 C ASP B 107 26.195 6.181 55.132 1.00 9.43 C +ATOM 1902 O ASP B 107 25.243 6.702 55.715 1.00 10.12 O +ATOM 1903 CB ASP B 107 28.160 7.649 55.246 1.00 11.43 C +ATOM 1904 CG ASP B 107 29.454 8.021 55.935 1.00 14.81 C +ATOM 1905 OD1 ASP B 107 29.679 7.575 57.079 1.00 13.62 O +ATOM 1906 OD2 ASP B 107 30.238 8.783 55.327 1.00 15.91 O +ATOM 1907 N ASN B 108 26.071 5.508 53.992 1.00 6.57 N +ATOM 1908 CA ASN B 108 24.786 5.373 53.330 1.00 6.46 C +ATOM 1909 C ASN B 108 24.078 4.036 53.473 1.00 7.81 C +ATOM 1910 O ASN B 108 23.088 3.775 52.787 1.00 6.91 O +ATOM 1911 CB ASN B 108 24.944 5.724 51.854 1.00 5.98 C +ATOM 1912 CG ASN B 108 25.294 7.180 51.656 1.00 11.56 C +ATOM 1913 OD1 ASN B 108 26.455 7.536 51.435 1.00 13.06 O +ATOM 1914 ND2 ASN B 108 24.288 8.038 51.762 1.00 7.03 N +ATOM 1915 N LEU B 109 24.574 3.193 54.367 1.00 8.14 N +ATOM 1916 CA LEU B 109 23.942 1.902 54.592 1.00 9.38 C +ATOM 1917 C LEU B 109 22.630 2.122 55.329 1.00 9.65 C +ATOM 1918 O LEU B 109 22.479 3.106 56.056 1.00 11.76 O +ATOM 1919 CB LEU B 109 24.843 1.007 55.443 1.00 7.90 C +ATOM 1920 CG LEU B 109 26.133 0.519 54.786 1.00 9.37 C +ATOM 1921 CD1 LEU B 109 27.005 -0.174 55.820 1.00 11.40 C +ATOM 1922 CD2 LEU B 109 25.797 -0.421 53.642 1.00 9.65 C +ATOM 1923 N PRO B 110 21.658 1.219 55.136 1.00 9.50 N +ATOM 1924 CA PRO B 110 20.372 1.357 55.826 1.00 11.16 C +ATOM 1925 C PRO B 110 20.658 1.400 57.325 1.00 11.60 C +ATOM 1926 O PRO B 110 21.537 0.692 57.816 1.00 11.71 O +ATOM 1927 CB PRO B 110 19.628 0.090 55.417 1.00 9.96 C +ATOM 1928 CG PRO B 110 20.142 -0.164 54.034 1.00 14.79 C +ATOM 1929 CD PRO B 110 21.629 0.087 54.192 1.00 12.17 C +ATOM 1930 N THR B 111 19.915 2.230 58.045 1.00 10.64 N +ATOM 1931 CA THR B 111 20.094 2.385 59.485 1.00 11.86 C +ATOM 1932 C THR B 111 20.244 1.077 60.269 1.00 12.74 C +ATOM 1933 O THR B 111 21.185 0.923 61.051 1.00 15.22 O +ATOM 1934 CB THR B 111 18.921 3.195 60.078 1.00 14.73 C +ATOM 1935 OG1 THR B 111 18.902 4.497 59.480 1.00 19.58 O +ATOM 1936 CG2 THR B 111 19.062 3.336 61.581 1.00 15.13 C +ATOM 1937 N ALA B 112 19.323 0.142 60.053 1.00 12.23 N +ATOM 1938 CA ALA B 112 19.331 -1.141 60.754 1.00 11.14 C +ATOM 1939 C ALA B 112 20.574 -1.979 60.483 1.00 12.34 C +ATOM 1940 O ALA B 112 21.040 -2.715 61.355 1.00 13.84 O +ATOM 1941 CB ALA B 112 18.080 -1.932 60.391 1.00 12.37 C +ATOM 1942 N ILE B 113 21.111 -1.869 59.274 1.00 9.34 N +ATOM 1943 CA ILE B 113 22.304 -2.621 58.913 1.00 9.05 C +ATOM 1944 C ILE B 113 23.555 -1.956 59.482 1.00 7.74 C +ATOM 1945 O ILE B 113 24.417 -2.625 60.057 1.00 11.19 O +ATOM 1946 CB ILE B 113 22.432 -2.740 57.378 1.00 10.56 C +ATOM 1947 CG1 ILE B 113 21.304 -3.629 56.844 1.00 10.65 C +ATOM 1948 CG2 ILE B 113 23.796 -3.307 57.002 1.00 13.59 C +ATOM 1949 CD1 ILE B 113 21.297 -3.801 55.332 1.00 15.03 C +ATOM 1950 N LYS B 114 23.645 -0.639 59.327 1.00 10.63 N +ATOM 1951 CA LYS B 114 24.794 0.124 59.815 1.00 11.86 C +ATOM 1952 C LYS B 114 24.963 -0.081 61.318 1.00 11.42 C +ATOM 1953 O LYS B 114 26.074 -0.212 61.827 1.00 11.28 O +ATOM 1954 CB LYS B 114 24.588 1.614 59.527 1.00 14.01 C +ATOM 1955 CG LYS B 114 25.802 2.487 59.815 1.00 17.21 C +ATOM 1956 CD LYS B 114 25.454 3.972 59.759 1.00 21.95 C +ATOM 1957 CE LYS B 114 24.861 4.377 58.417 1.00 22.79 C +ATOM 1958 NZ LYS B 114 24.536 5.835 58.380 1.00 25.75 N +ATOM 1959 N ARG B 115 23.831 -0.098 62.012 1.00 11.17 N +ATOM 1960 CA ARG B 115 23.769 -0.271 63.455 1.00 12.50 C +ATOM 1961 C ARG B 115 24.406 -1.587 63.920 1.00 13.03 C +ATOM 1962 O ARG B 115 24.989 -1.654 65.005 1.00 12.52 O +ATOM 1963 CB ARG B 115 22.293 -0.205 63.865 1.00 17.05 C +ATOM 1964 CG ARG B 115 21.964 -0.530 65.299 1.00 24.23 C +ATOM 1965 CD ARG B 115 20.478 -0.855 65.396 1.00 17.93 C +ATOM 1966 NE ARG B 115 19.625 0.260 64.986 1.00 17.89 N +ATOM 1967 CZ ARG B 115 18.384 0.122 64.528 1.00 15.78 C +ATOM 1968 NH1 ARG B 115 17.848 -1.084 64.406 1.00 18.02 N +ATOM 1969 NH2 ARG B 115 17.665 1.191 64.219 1.00 16.37 N +ATOM 1970 N ASN B 116 24.299 -2.623 63.091 1.00 11.67 N +ATOM 1971 CA ASN B 116 24.841 -3.943 63.414 1.00 11.83 C +ATOM 1972 C ASN B 116 26.324 -4.121 63.093 1.00 10.94 C +ATOM 1973 O ASN B 116 26.787 -5.246 62.882 1.00 8.87 O +ATOM 1974 CB ASN B 116 24.043 -5.033 62.688 1.00 16.10 C +ATOM 1975 CG ASN B 116 22.639 -5.197 63.240 1.00 25.97 C +ATOM 1976 OD1 ASN B 116 22.443 -5.273 64.453 1.00 29.95 O +ATOM 1977 ND2 ASN B 116 21.656 -5.268 62.348 1.00 29.56 N +ATOM 1978 N LEU B 117 27.063 -3.018 63.057 1.00 11.88 N +ATOM 1979 CA LEU B 117 28.491 -3.063 62.772 1.00 12.36 C +ATOM 1980 C LEU B 117 29.195 -3.846 63.874 1.00 14.22 C +ATOM 1981 O LEU B 117 29.126 -3.479 65.049 1.00 15.46 O +ATOM 1982 CB LEU B 117 29.061 -1.640 62.696 1.00 14.10 C +ATOM 1983 CG LEU B 117 30.572 -1.486 62.471 1.00 14.06 C +ATOM 1984 CD1 LEU B 117 30.963 -2.022 61.099 1.00 11.30 C +ATOM 1985 CD2 LEU B 117 30.951 -0.014 62.586 1.00 15.66 C +ATOM 1986 N ILE B 118 29.860 -4.931 63.487 1.00 11.99 N +ATOM 1987 CA ILE B 118 30.584 -5.786 64.424 1.00 14.24 C +ATOM 1988 C ILE B 118 32.050 -5.387 64.479 1.00 14.47 C +ATOM 1989 O ILE B 118 32.655 -5.317 65.551 1.00 15.41 O +ATOM 1990 CB ILE B 118 30.537 -7.271 63.985 1.00 13.78 C +ATOM 1991 CG1 ILE B 118 29.107 -7.801 64.047 1.00 17.05 C +ATOM 1992 CG2 ILE B 118 31.464 -8.106 64.863 1.00 21.57 C +ATOM 1993 CD1 ILE B 118 28.975 -9.211 63.506 1.00 13.64 C +ATOM 1994 N LYS B 119 32.617 -5.133 63.307 1.00 12.50 N +ATOM 1995 CA LYS B 119 34.020 -4.779 63.206 1.00 13.01 C +ATOM 1996 C LYS B 119 34.268 -3.873 62.013 1.00 13.28 C +ATOM 1997 O LYS B 119 33.690 -4.068 60.943 1.00 10.47 O +ATOM 1998 CB LYS B 119 34.845 -6.058 63.061 1.00 15.90 C +ATOM 1999 CG LYS B 119 36.341 -5.859 62.985 1.00 20.79 C +ATOM 2000 CD LYS B 119 37.027 -7.195 62.754 1.00 22.35 C +ATOM 2001 CE LYS B 119 38.522 -7.087 62.940 1.00 23.28 C +ATOM 2002 NZ LYS B 119 38.848 -6.704 64.340 1.00 21.74 N +ATOM 2003 N ASP B 120 35.123 -2.875 62.207 1.00 11.74 N +ATOM 2004 CA ASP B 120 35.468 -1.949 61.143 1.00 13.09 C +ATOM 2005 C ASP B 120 36.980 -1.826 61.068 1.00 15.64 C +ATOM 2006 O ASP B 120 37.588 -1.076 61.830 1.00 18.59 O +ATOM 2007 CB ASP B 120 34.859 -0.566 61.395 1.00 14.18 C +ATOM 2008 CG ASP B 120 35.133 0.406 60.255 1.00 21.36 C +ATOM 2009 OD1 ASP B 120 36.318 0.667 59.958 1.00 18.80 O +ATOM 2010 OD2 ASP B 120 34.162 0.905 59.651 1.00 24.72 O +ATOM 2011 N PHE B 121 37.586 -2.590 60.165 1.00 10.68 N +ATOM 2012 CA PHE B 121 39.028 -2.547 59.977 1.00 8.67 C +ATOM 2013 C PHE B 121 39.187 -1.661 58.755 1.00 9.80 C +ATOM 2014 O PHE B 121 39.505 -2.131 57.658 1.00 10.45 O +ATOM 2015 CB PHE B 121 39.567 -3.948 59.690 1.00 11.06 C +ATOM 2016 CG PHE B 121 41.064 -4.048 59.755 1.00 8.57 C +ATOM 2017 CD1 PHE B 121 41.720 -4.053 60.981 1.00 7.88 C +ATOM 2018 CD2 PHE B 121 41.819 -4.133 58.589 1.00 11.03 C +ATOM 2019 CE1 PHE B 121 43.108 -4.144 61.049 1.00 9.29 C +ATOM 2020 CE2 PHE B 121 43.210 -4.224 58.644 1.00 12.45 C +ATOM 2021 CZ PHE B 121 43.857 -4.230 59.876 1.00 11.76 C +ATOM 2022 N CYS B 122 38.954 -0.368 58.952 1.00 9.68 N +ATOM 2023 CA CYS B 122 39.012 0.558 57.842 1.00 11.76 C +ATOM 2024 C CYS B 122 39.164 2.035 58.195 1.00 12.00 C +ATOM 2025 O CYS B 122 40.214 2.631 57.964 1.00 11.46 O +ATOM 2026 CB CYS B 122 37.749 0.362 56.999 1.00 10.50 C +ATOM 2027 SG CYS B 122 37.515 1.575 55.670 1.00 12.06 S +ATOM 2028 N ARG B 123 38.109 2.618 58.753 1.00 13.81 N +ATOM 2029 CA ARG B 123 38.098 4.038 59.077 1.00 13.54 C +ATOM 2030 C ARG B 123 39.199 4.550 60.003 1.00 15.49 C +ATOM 2031 O ARG B 123 39.665 5.677 59.835 1.00 15.89 O +ATOM 2032 CB ARG B 123 36.724 4.424 59.631 1.00 17.36 C +ATOM 2033 CG ARG B 123 35.592 4.061 58.678 1.00 27.63 C +ATOM 2034 CD ARG B 123 34.225 4.502 59.180 1.00 35.34 C +ATOM 2035 NE ARG B 123 34.042 5.948 59.100 1.00 40.69 N +ATOM 2036 CZ ARG B 123 32.965 6.534 58.583 1.00 43.50 C +ATOM 2037 NH1 ARG B 123 32.876 7.856 58.550 1.00 44.75 N +ATOM 2038 NH2 ARG B 123 31.979 5.797 58.087 1.00 40.61 N +ATOM 2039 N LYS B 124 39.622 3.735 60.964 1.00 12.28 N +ATOM 2040 CA LYS B 124 40.664 4.165 61.897 1.00 10.38 C +ATOM 2041 C LYS B 124 42.094 3.826 61.471 1.00 11.81 C +ATOM 2042 O LYS B 124 43.044 4.116 62.200 1.00 10.66 O +ATOM 2043 CB LYS B 124 40.400 3.591 63.293 1.00 12.36 C +ATOM 2044 CG LYS B 124 39.249 4.256 64.030 1.00 15.36 C +ATOM 2045 CD LYS B 124 39.132 3.712 65.448 1.00 24.54 C +ATOM 2046 CE LYS B 124 38.082 4.458 66.251 1.00 31.08 C +ATOM 2047 NZ LYS B 124 38.118 4.064 67.691 1.00 36.16 N +ATOM 2048 N LEU B 125 42.251 3.215 60.301 1.00 10.78 N +ATOM 2049 CA LEU B 125 43.581 2.873 59.807 1.00 9.35 C +ATOM 2050 C LEU B 125 44.342 4.140 59.430 1.00 13.02 C +ATOM 2051 O LEU B 125 43.768 5.079 58.880 1.00 16.48 O +ATOM 2052 CB LEU B 125 43.482 1.964 58.579 1.00 12.38 C +ATOM 2053 CG LEU B 125 43.052 0.517 58.828 1.00 9.98 C +ATOM 2054 CD1 LEU B 125 42.807 -0.166 57.488 1.00 15.34 C +ATOM 2055 CD2 LEU B 125 44.125 -0.216 59.624 1.00 10.78 C +ATOM 2056 N SER B 126 45.634 4.162 59.737 1.00 10.00 N +ATOM 2057 CA SER B 126 46.478 5.305 59.416 1.00 14.06 C +ATOM 2058 C SER B 126 47.202 5.060 58.094 1.00 16.28 C +ATOM 2059 O SER B 126 47.689 6.043 57.498 1.00 15.23 O +ATOM 2060 CB SER B 126 47.498 5.546 60.535 1.00 15.40 C +ATOM 2061 OG SER B 126 48.333 4.419 60.724 1.00 18.55 O +TER 2062 SER B 126 +HETATM 2063 S SO4 B 127 48.867 -16.604 52.271 1.00 25.56 S +HETATM 2064 O1 SO4 B 127 49.341 -16.224 50.947 1.00 28.92 O +HETATM 2065 O2 SO4 B 127 48.593 -18.036 52.303 1.00 28.08 O +HETATM 2066 O3 SO4 B 127 47.644 -15.882 52.588 1.00 25.89 O +HETATM 2067 O4 SO4 B 127 49.897 -16.287 53.254 1.00 29.35 O +HETATM 2068 O HOH A 127 28.458 6.021 35.670 1.00 18.16 O +HETATM 2069 O HOH A 128 15.890 4.410 18.138 1.00 20.16 O +HETATM 2070 O HOH A 129 27.529 3.101 15.774 1.00 15.18 O +HETATM 2071 O HOH A 130 32.450 11.412 36.797 1.00 19.18 O +HETATM 2072 O HOH A 131 8.531 7.793 20.199 1.00 17.77 O +HETATM 2073 O HOH A 132 14.364 5.265 15.973 1.00 14.05 O +HETATM 2074 O HOH A 133 21.168 -10.826 39.672 1.00 40.57 O +HETATM 2075 O HOH A 134 13.915 13.483 37.608 1.00 27.78 O +HETATM 2076 O HOH A 135 26.790 -7.651 16.405 1.00 24.21 O +HETATM 2077 O HOH A 136 6.144 2.232 31.847 1.00 13.55 O +HETATM 2078 O HOH A 137 23.212 5.252 37.469 1.00 16.16 O +HETATM 2079 O HOH A 138 32.876 14.054 31.321 1.00 14.52 O +HETATM 2080 O HOH A 139 18.750 15.727 36.860 1.00 13.89 O +HETATM 2081 O HOH A 140 20.509 13.974 37.699 1.00 18.12 O +HETATM 2082 O HOH A 141 12.567 16.103 40.950 1.00 47.04 O +HETATM 2083 O HOH A 142 9.021 -9.289 16.267 1.00 17.98 O +HETATM 2084 O HOH A 143 17.666 0.546 9.599 1.00 17.59 O +HETATM 2085 O HOH A 144 20.509 19.781 39.096 1.00 16.70 O +HETATM 2086 O HOH A 145 29.542 14.893 29.222 1.00 19.74 O +HETATM 2087 O HOH A 146 24.207 0.245 8.979 1.00 13.69 O +HETATM 2088 O HOH A 147 19.424 -0.651 7.809 1.00 27.68 O +HETATM 2089 O HOH A 148 9.795 2.207 30.687 1.00 15.45 O +HETATM 2090 O HOH A 149 18.090 5.197 13.129 1.00 16.39 O +HETATM 2091 O HOH A 150 10.390 8.107 40.116 1.00 22.43 O +HETATM 2092 O HOH A 151 9.065 -6.856 18.209 1.00 15.48 O +HETATM 2093 O HOH A 152 0.569 7.297 34.180 1.00 59.53 O +HETATM 2094 O HOH A 153 15.795 -12.274 29.582 1.00 22.45 O +HETATM 2095 O HOH A 154 0.364 8.593 28.085 1.00 32.19 O +HETATM 2096 O HOH A 155 14.526 -6.119 13.878 1.00 17.77 O +HETATM 2097 O HOH A 156 32.185 -6.589 38.208 1.00 18.38 O +HETATM 2098 O HOH A 157 7.829 8.695 37.464 1.00 26.42 O +HETATM 2099 O HOH A 158 34.392 10.333 26.308 1.00 36.60 O +HETATM 2100 O HOH A 159 23.565 -8.918 42.385 1.00 17.23 O +HETATM 2101 O HOH A 160 13.468 -2.601 41.021 1.00 31.02 O +HETATM 2102 O HOH A 161 15.974 -11.974 39.259 1.00 46.44 O +HETATM 2103 O HOH A 162 20.465 -3.746 15.557 1.00 19.08 O +HETATM 2104 O HOH A 163 10.423 -8.380 32.230 1.00 20.38 O +HETATM 2105 O HOH A 164 26.461 20.839 31.209 1.00 31.39 O +HETATM 2106 O HOH A 165 29.295 -9.281 16.094 1.00 23.95 O +HETATM 2107 O HOH A 166 26.125 5.967 38.272 1.00 11.91 O +HETATM 2108 O HOH A 167 29.108 7.237 33.084 1.00 20.47 O +HETATM 2109 O HOH A 168 34.636 11.173 33.949 1.00 30.19 O +HETATM 2110 O HOH A 169 6.420 -3.918 15.489 1.00 25.60 O +HETATM 2111 O HOH A 170 23.472 24.694 26.504 1.00 26.33 O +HETATM 2112 O HOH A 171 34.478 14.634 27.057 1.00 37.21 O +HETATM 2113 O HOH A 172 18.881 4.081 43.149 1.00 24.39 O +HETATM 2114 O HOH A 173 28.866 14.402 22.340 1.00 18.69 O +HETATM 2115 O HOH A 174 4.439 4.023 24.306 1.00 23.70 O +HETATM 2116 O HOH A 175 31.755 2.603 33.263 1.00 16.38 O +HETATM 2117 O HOH A 176 23.200 -4.358 36.680 1.00 31.65 O +HETATM 2118 O HOH A 177 21.150 21.348 36.961 1.00 26.97 O +HETATM 2119 O HOH A 178 11.925 20.572 30.823 1.00 22.88 O +HETATM 2120 O HOH A 179 16.017 -13.071 21.195 1.00 25.75 O +HETATM 2121 O HOH A 180 14.731 -0.038 41.120 1.00 26.44 O +HETATM 2122 O HOH A 181 11.300 -9.182 34.868 1.00 21.65 O +HETATM 2123 O HOH A 182 18.427 8.411 15.853 1.00 38.01 O +HETATM 2124 O HOH A 183 14.795 -4.039 42.949 1.00 21.73 O +HETATM 2125 O HOH A 184 21.308 0.911 6.430 1.00 32.59 O +HETATM 2126 O HOH A 185 4.527 0.507 15.147 1.00 25.41 O +HETATM 2127 O HOH A 186 29.661 0.827 20.631 1.00 25.66 O +HETATM 2128 O HOH A 187 21.751 11.717 19.933 1.00 32.92 O +HETATM 2129 O HOH A 188 21.736 -9.779 17.752 1.00 27.30 O +HETATM 2130 O HOH A 189 11.316 -12.582 27.378 1.00 29.15 O +HETATM 2131 O HOH A 190 9.669 5.484 18.610 1.00 26.69 O +HETATM 2132 O HOH A 191 19.318 3.820 18.119 1.00 38.74 O +HETATM 2133 O HOH A 192 18.070 -13.805 22.871 1.00 25.31 O +HETATM 2134 O HOH A 193 22.731 8.761 14.625 1.00 41.11 O +HETATM 2135 O HOH A 194 4.385 2.690 38.102 1.00 35.23 O +HETATM 2136 O HOH A 195 23.645 19.112 17.947 1.00 46.35 O +HETATM 2137 O HOH A 196 30.774 3.605 16.864 1.00 28.90 O +HETATM 2138 O HOH A 197 19.222 -10.903 33.649 1.00 28.13 O +HETATM 2139 O HOH A 198 17.347 9.215 41.145 1.00 22.51 O +HETATM 2140 O HOH A 199 21.370 8.664 17.154 1.00 38.09 O +HETATM 2141 O HOH A 200 18.365 -12.649 29.262 1.00 33.71 O +HETATM 2142 O HOH A 201 2.634 2.625 18.276 1.00 42.51 O +HETATM 2143 O HOH A 202 13.378 13.206 20.148 1.00 35.17 O +HETATM 2144 O HOH A 203 8.441 -6.718 31.351 1.00 38.20 O +HETATM 2145 O HOH A 204 21.225 -5.776 17.077 1.00 25.88 O +HETATM 2146 O HOH A 205 9.449 -9.504 24.578 1.00 41.50 O +HETATM 2147 O HOH A 206 33.653 8.582 34.021 1.00 37.11 O +HETATM 2148 O HOH A 207 32.071 -2.237 29.668 1.00 38.24 O +HETATM 2149 O HOH A 208 22.846 20.908 31.331 1.00 24.85 O +HETATM 2150 O HOH A 209 9.709 16.523 36.392 1.00 34.23 O +HETATM 2151 O HOH A 210 19.545 -8.901 47.793 1.00 19.93 O +HETATM 2152 O HOH A 211 3.834 1.410 34.546 1.00 29.83 O +HETATM 2153 O HOH A 212 9.928 10.065 19.432 1.00 23.37 O +HETATM 2154 O HOH A 213 17.316 2.456 17.331 1.00 23.33 O +HETATM 2155 O HOH A 214 18.932 -10.214 38.460 1.00 51.98 O +HETATM 2156 O HOH A 215 12.017 14.522 36.271 1.00 20.10 O +HETATM 2157 O HOH A 216 16.337 6.663 14.598 1.00 20.86 O +HETATM 2158 O HOH A 217 11.392 -9.367 14.836 1.00 24.37 O +HETATM 2159 O HOH A 218 24.178 -2.514 8.091 1.00 29.18 O +HETATM 2160 O HOH A 219 19.046 7.405 11.509 1.00 24.46 O +HETATM 2161 O HOH A 220 13.723 -7.995 15.643 1.00 19.27 O +HETATM 2162 O HOH A 221 16.915 -6.210 12.855 1.00 34.03 O +HETATM 2163 O HOH A 222 15.213 -12.334 36.436 1.00 43.21 O +HETATM 2164 O HOH A 223 16.447 -11.679 34.274 1.00 31.98 O +HETATM 2165 O HOH A 224 22.688 -2.690 14.501 1.00 17.55 O +HETATM 2166 O HOH A 225 9.995 -10.442 28.308 1.00 27.23 O +HETATM 2167 O HOH A 226 31.864 6.950 32.720 1.00 21.11 O +HETATM 2168 O HOH A 227 4.229 -2.360 15.587 1.00 23.38 O +HETATM 2169 O HOH A 228 4.505 0.687 30.549 1.00 23.25 O +HETATM 2170 O HOH A 229 6.174 9.648 39.470 1.00 41.45 O +HETATM 2171 O HOH A 230 4.610 12.718 39.742 1.00 35.21 O +HETATM 2172 O HOH A 231 15.402 10.740 42.875 1.00 36.92 O +HETATM 2173 O HOH A 232 16.260 14.730 38.071 1.00 25.76 O +HETATM 2174 O HOH A 233 15.127 16.485 39.592 1.00 30.08 O +HETATM 2175 O HOH A 234 28.618 7.242 38.403 1.00 15.53 O +HETATM 2176 O HOH A 235 28.047 16.891 29.963 1.00 23.89 O +HETATM 2177 O HOH A 236 27.988 19.477 29.092 1.00 28.80 O +HETATM 2178 O HOH A 237 26.753 23.936 32.222 1.00 31.67 O +HETATM 2179 O HOH A 238 28.048 21.800 33.174 1.00 21.45 O +HETATM 2180 O HOH A 239 33.571 13.322 35.119 1.00 15.52 O +HETATM 2181 O HOH A 240 32.541 8.637 36.660 1.00 36.94 O +HETATM 2182 O HOH A 241 34.452 16.574 29.303 1.00 48.18 O +HETATM 2183 O HOH A 242 18.024 5.361 19.430 1.00 47.32 O +HETATM 2184 O HOH A 243 14.592 11.263 14.709 1.00 42.99 O +HETATM 2185 O HOH A 244 14.557 12.387 17.278 1.00 40.04 O +HETATM 2186 O HOH A 245 5.402 11.533 19.367 1.00 37.99 O +HETATM 2187 O HOH A 246 1.547 10.483 34.966 1.00 50.86 O +HETATM 2188 O HOH A 247 8.347 19.301 29.971 1.00 34.22 O +HETATM 2189 O HOH A 248 4.474 5.156 21.778 1.00 35.03 O +HETATM 2190 O HOH A 249 4.468 -6.497 29.200 1.00 55.94 O +HETATM 2191 O HOH A 250 19.651 -5.640 13.417 1.00 36.65 O +HETATM 2192 O HOH A 251 21.605 -7.187 12.499 1.00 40.70 O +HETATM 2193 O HOH A 252 23.453 -4.868 12.762 1.00 29.49 O +HETATM 2194 O HOH A 253 23.477 2.365 7.143 1.00 32.07 O +HETATM 2195 O HOH A 254 9.360 -11.007 35.370 1.00 34.26 O +HETATM 2196 O HOH A 255 13.398 -14.109 27.878 1.00 35.77 O +HETATM 2197 O HOH A 256 21.048 -13.292 21.921 1.00 33.01 O +HETATM 2198 O HOH A 257 33.756 -0.353 28.616 1.00 50.01 O +HETATM 2199 O HOH A 258 23.567 -10.992 38.444 1.00 31.78 O +HETATM 2200 O HOH A 259 17.284 -11.906 42.550 1.00 54.66 O +HETATM 2201 O HOH A 260 30.748 19.823 29.133 1.00 25.39 O +HETATM 2202 O HOH A 261 25.043 -8.497 14.937 1.00 41.33 O +HETATM 2203 O HOH A 262 19.440 8.016 41.932 1.00 35.29 O +HETATM 2204 O HOH A 263 29.159 5.342 11.721 1.00 43.86 O +HETATM 2205 O HOH A 264 10.175 16.614 23.367 1.00 47.96 O +HETATM 2206 O HOH A 265 8.888 19.630 27.509 1.00 45.65 O +HETATM 2207 O HOH A 266 15.171 8.791 13.449 1.00 28.77 O +HETATM 2208 O HOH A 267 17.008 9.159 11.329 1.00 40.73 O +HETATM 2209 O HOH A 268 20.272 -10.986 36.161 1.00 41.24 O +HETATM 2210 O HOH A 269 21.708 -11.083 42.464 1.00 33.85 O +HETATM 2211 O HOH A 270 34.883 19.761 29.704 1.00 42.92 O +HETATM 2212 O HOH A 271 33.870 21.990 30.733 1.00 43.19 O +HETATM 2213 O HOH A 272 10.579 -15.293 33.494 1.00 39.64 O +HETATM 2214 O HOH A 273 9.508 -13.518 35.159 1.00 38.76 O +HETATM 2215 O HOH A 274 12.930 -11.463 37.352 1.00 43.18 O +HETATM 2216 O HOH A 275 16.007 -13.401 32.247 1.00 39.13 O +HETATM 2217 O HOH A 276 9.398 -14.257 28.668 1.00 33.79 O +HETATM 2218 O HOH A 277 1.835 11.932 23.571 1.00 30.16 O +HETATM 2219 O HOH A 278 9.119 -10.573 30.966 1.00 37.25 O +HETATM 2220 O HOH A 279 7.930 -9.110 27.143 1.00 42.11 O +HETATM 2221 O HOH A 280 26.814 17.987 16.451 1.00 51.41 O +HETATM 2222 O HOH A 281 9.758 9.876 16.768 1.00 40.33 O +HETATM 2223 O HOH A 282 7.752 12.854 19.320 1.00 35.83 O +HETATM 2224 O HOH A 283 6.752 15.361 19.555 1.00 39.15 O +HETATM 2225 O HOH A 284 12.304 -3.776 44.474 1.00 42.18 O +HETATM 2226 O HOH B 128 21.749 7.845 52.818 1.00 14.25 O +HETATM 2227 O HOH B 129 35.392 -13.107 35.614 1.00 25.17 O +HETATM 2228 O HOH B 130 35.727 8.326 46.410 1.00 12.99 O +HETATM 2229 O HOH B 131 23.311 -8.504 45.288 1.00 12.61 O +HETATM 2230 O HOH B 132 29.110 -18.789 46.047 1.00 23.45 O +HETATM 2231 O HOH B 133 47.502 1.457 58.755 1.00 14.50 O +HETATM 2232 O HOH B 134 27.493 10.307 51.644 1.00 12.27 O +HETATM 2233 O HOH B 135 24.522 10.723 51.950 1.00 10.99 O +HETATM 2234 O HOH B 136 39.003 6.377 48.604 1.00 14.10 O +HETATM 2235 O HOH B 137 27.459 -7.921 41.927 1.00 18.41 O +HETATM 2236 O HOH B 138 27.538 1.783 63.090 1.00 19.30 O +HETATM 2237 O HOH B 139 24.761 -7.627 47.446 1.00 12.74 O +HETATM 2238 O HOH B 140 39.216 4.896 46.269 1.00 19.66 O +HETATM 2239 O HOH B 141 29.205 10.791 53.689 1.00 15.87 O +HETATM 2240 O HOH B 142 19.940 2.678 45.179 1.00 13.59 O +HETATM 2241 O HOH B 143 29.698 -20.740 49.396 1.00 16.97 O +HETATM 2242 O HOH B 144 41.100 6.919 50.486 1.00 10.64 O +HETATM 2243 O HOH B 145 20.931 5.078 51.843 1.00 29.27 O +HETATM 2244 O HOH B 146 27.235 -1.233 35.422 1.00 19.56 O +HETATM 2245 O HOH B 147 46.212 1.487 50.336 1.00 23.03 O +HETATM 2246 O HOH B 148 22.198 -11.958 48.612 1.00 24.32 O +HETATM 2247 O HOH B 149 29.694 -10.191 41.013 1.00 18.37 O +HETATM 2248 O HOH B 150 33.598 -20.124 45.598 1.00 15.74 O +HETATM 2249 O HOH B 151 36.668 7.905 57.359 1.00 28.27 O +HETATM 2250 O HOH B 152 22.383 3.251 62.407 1.00 21.39 O +HETATM 2251 O HOH B 153 30.112 -16.483 44.979 1.00 20.97 O +HETATM 2252 O HOH B 154 34.356 -5.386 39.413 1.00 16.41 O +HETATM 2253 O HOH B 155 15.656 -4.125 46.248 1.00 18.93 O +HETATM 2254 O HOH B 156 16.874 -4.983 44.105 1.00 17.87 O +HETATM 2255 O HOH B 157 33.806 -0.151 43.682 1.00 16.12 O +HETATM 2256 O HOH B 158 15.326 3.157 46.586 1.00 41.28 O +HETATM 2257 O HOH B 159 32.716 9.399 55.235 1.00 21.01 O +HETATM 2258 O HOH B 160 34.643 9.442 53.256 1.00 16.07 O +HETATM 2259 O HOH B 161 29.968 -0.287 35.588 1.00 19.95 O +HETATM 2260 O HOH B 162 23.577 9.066 55.922 1.00 30.60 O +HETATM 2261 O HOH B 163 29.571 -4.628 36.083 1.00 31.15 O +HETATM 2262 O HOH B 164 28.896 -12.583 41.636 1.00 33.44 O +HETATM 2263 O HOH B 165 46.622 0.764 53.966 1.00 19.11 O +HETATM 2264 O HOH B 166 22.306 5.686 56.691 1.00 29.21 O +HETATM 2265 O HOH B 167 36.992 7.644 39.802 1.00 35.51 O +HETATM 2266 O HOH B 168 25.761 -3.624 36.491 1.00 38.29 O +HETATM 2267 O HOH B 169 18.592 1.549 49.716 1.00 27.31 O +HETATM 2268 O HOH B 170 42.868 7.188 46.987 1.00 35.86 O +HETATM 2269 O HOH B 171 27.292 -14.274 43.160 1.00 37.43 O +HETATM 2270 O HOH B 172 25.362 -15.750 45.104 1.00 30.52 O +HETATM 2271 O HOH B 173 43.076 9.501 56.562 1.00 43.34 O +HETATM 2272 O HOH B 174 32.204 2.950 60.662 1.00 32.82 O +HETATM 2273 O HOH B 175 33.831 1.926 37.009 1.00 29.41 O +HETATM 2274 O HOH B 176 47.425 8.687 58.230 1.00 35.83 O +HETATM 2275 O HOH B 177 43.492 7.252 53.386 1.00 29.73 O +HETATM 2276 O HOH B 178 32.958 -17.842 44.639 1.00 36.54 O +HETATM 2277 O HOH B 179 23.971 6.496 39.400 1.00 31.56 O +HETATM 2278 O HOH B 180 38.250 8.614 47.381 1.00 19.44 O +HETATM 2279 O HOH B 181 21.536 -10.246 46.303 1.00 20.07 O +HETATM 2280 O HOH B 182 30.890 -20.716 46.247 1.00 18.53 O +HETATM 2281 O HOH B 183 26.565 -19.302 45.680 1.00 34.84 O +HETATM 2282 O HOH B 184 46.915 -0.253 56.738 1.00 18.35 O +HETATM 2283 O HOH B 185 45.975 -2.818 56.285 1.00 19.90 O +HETATM 2284 O HOH B 186 25.009 3.263 63.422 1.00 16.72 O +HETATM 2285 O HOH B 187 25.048 -8.829 50.066 1.00 15.00 O +HETATM 2286 O HOH B 188 17.988 2.155 47.022 1.00 26.39 O +HETATM 2287 O HOH B 189 20.881 4.644 46.848 1.00 14.77 O +HETATM 2288 O HOH B 190 23.050 5.646 45.819 1.00 13.73 O +HETATM 2289 O HOH B 191 19.937 4.058 49.511 1.00 15.75 O +HETATM 2290 O HOH B 192 17.160 1.434 51.955 1.00 32.82 O +HETATM 2291 O HOH B 193 48.590 -1.720 48.336 1.00 27.63 O +HETATM 2292 O HOH B 194 47.845 -3.678 44.065 1.00 28.54 O +HETATM 2293 O HOH B 195 44.564 -2.291 42.836 1.00 25.85 O +HETATM 2294 O HOH B 196 40.878 -3.749 43.732 1.00 11.38 O +HETATM 2295 O HOH B 197 38.402 -3.964 46.400 1.00 14.57 O +HETATM 2296 O HOH B 198 18.847 -6.953 56.593 1.00 25.01 O +HETATM 2297 O HOH B 199 17.606 -2.703 54.515 1.00 27.07 O +HETATM 2298 O HOH B 200 35.941 -22.085 49.046 1.00 8.69 O +HETATM 2299 O HOH B 201 36.529 9.884 55.225 1.00 34.94 O +HETATM 2300 O HOH B 202 21.808 7.078 43.858 1.00 15.74 O +HETATM 2301 O HOH B 203 33.310 -10.259 36.870 1.00 28.74 O +HETATM 2302 O HOH B 204 24.861 -18.928 59.731 1.00 19.32 O +HETATM 2303 O HOH B 205 32.609 0.477 35.005 1.00 33.68 O +HETATM 2304 O HOH B 206 25.908 -5.820 34.938 1.00 32.89 O +HETATM 2305 O HOH B 207 25.406 -5.983 66.383 1.00 18.21 O +HETATM 2306 O HOH B 208 22.532 -12.430 65.714 1.00 21.91 O +HETATM 2307 O HOH B 209 36.385 0.150 42.555 1.00 21.29 O +HETATM 2308 O HOH B 210 40.011 -1.866 41.602 1.00 16.46 O +HETATM 2309 O HOH B 211 45.990 -10.409 46.937 1.00 15.14 O +HETATM 2310 O HOH B 212 47.299 -12.855 46.122 1.00 37.65 O +HETATM 2311 O HOH B 213 47.300 -10.021 50.070 1.00 11.57 O +HETATM 2312 O HOH B 214 49.415 -11.742 49.384 1.00 25.56 O +HETATM 2313 O HOH B 215 50.432 -8.592 48.180 1.00 34.76 O +HETATM 2314 O HOH B 216 45.950 -17.117 47.140 1.00 11.91 O +HETATM 2315 O HOH B 217 48.209 -16.855 48.400 1.00 20.67 O +HETATM 2316 O HOH B 218 49.622 -19.497 48.902 1.00 36.91 O +HETATM 2317 O HOH B 219 41.472 -19.513 42.460 1.00 22.18 O +HETATM 2318 O HOH B 220 41.314 -19.608 39.815 1.00 29.27 O +HETATM 2319 O HOH B 221 39.856 -17.844 38.429 1.00 27.86 O +HETATM 2320 O HOH B 222 42.340 -15.231 36.539 1.00 36.18 O +HETATM 2321 O HOH B 223 44.862 -13.117 36.832 1.00 31.70 O +HETATM 2322 O HOH B 224 40.127 -8.641 36.131 1.00 26.48 O +HETATM 2323 O HOH B 225 51.919 -10.132 49.666 1.00 40.48 O +HETATM 2324 O HOH B 226 45.494 -11.393 57.214 1.00 17.13 O +HETATM 2325 O HOH B 227 40.907 -17.643 56.706 1.00 12.60 O +HETATM 2326 O HOH B 228 43.628 -18.106 57.649 1.00 17.39 O +HETATM 2327 O HOH B 229 44.061 -22.668 57.601 1.00 21.34 O +HETATM 2328 O HOH B 230 40.771 -24.364 57.020 1.00 20.20 O +HETATM 2329 O HOH B 231 45.333 -24.598 53.420 1.00 28.40 O +HETATM 2330 O HOH B 232 46.233 -26.590 51.909 1.00 21.65 O +HETATM 2331 O HOH B 233 47.659 -24.550 50.161 1.00 20.60 O +HETATM 2332 O HOH B 234 49.724 -20.577 51.545 1.00 34.06 O +HETATM 2333 O HOH B 235 40.623 -10.464 63.091 1.00 15.64 O +HETATM 2334 O HOH B 236 33.734 -11.034 64.366 1.00 21.79 O +HETATM 2335 O HOH B 237 26.492 -22.373 55.525 1.00 23.99 O +HETATM 2336 O HOH B 238 25.286 -19.773 52.394 1.00 35.79 O +HETATM 2337 O HOH B 239 25.742 -17.045 51.744 1.00 29.50 O +HETATM 2338 O HOH B 240 20.699 -11.436 52.346 1.00 31.96 O +HETATM 2339 O HOH B 241 17.839 -12.847 51.084 1.00 53.96 O +HETATM 2340 O HOH B 242 17.325 -14.512 53.143 1.00 48.01 O +HETATM 2341 O HOH B 243 35.255 -19.515 41.312 1.00 31.47 O +HETATM 2342 O HOH B 244 33.856 -21.721 40.045 1.00 48.42 O +HETATM 2343 O HOH B 245 39.732 -21.420 43.520 1.00 21.00 O +HETATM 2344 O HOH B 246 46.284 -18.521 42.584 1.00 31.72 O +HETATM 2345 O HOH B 247 48.066 -14.706 42.752 1.00 40.32 O +HETATM 2346 O HOH B 248 26.797 -21.029 50.222 1.00 33.09 O +HETATM 2347 O HOH B 249 25.231 -19.819 48.034 1.00 34.56 O +HETATM 2348 O HOH B 250 26.390 -12.057 40.877 1.00 38.86 O +HETATM 2349 O HOH B 251 18.719 -5.241 58.774 1.00 33.80 O +HETATM 2350 O HOH B 252 28.694 -8.590 68.622 1.00 44.04 O +HETATM 2351 O HOH B 253 46.080 7.186 51.268 1.00 44.97 O +HETATM 2352 O HOH B 254 28.373 6.487 59.114 1.00 33.05 O +HETATM 2353 O HOH B 255 27.450 11.405 55.672 1.00 31.86 O +HETATM 2354 O HOH B 256 16.075 -0.438 53.582 1.00 38.00 O +HETATM 2355 O HOH B 257 19.558 -3.430 63.621 1.00 18.86 O +HETATM 2356 O HOH B 258 25.992 0.299 66.754 1.00 27.95 O +HETATM 2357 O HOH B 259 28.314 0.884 65.617 1.00 32.35 O +HETATM 2358 O HOH B 260 38.082 1.529 62.092 1.00 20.93 O +HETATM 2359 O HOH B 261 43.837 6.147 64.045 1.00 24.66 O +HETATM 2360 O HOH B 262 41.759 6.912 65.427 1.00 32.58 O +HETATM 2361 O HOH B 263 36.427 5.947 43.277 1.00 54.89 O +HETATM 2362 O HOH B 264 28.281 6.171 42.732 1.00 30.95 O +HETATM 2363 O HOH B 265 28.532 3.758 42.855 1.00 31.26 O +HETATM 2364 O HOH B 266 26.279 4.488 42.304 1.00 18.18 O +HETATM 2365 O HOH B 267 38.450 -25.374 53.625 1.00 11.50 O +HETATM 2366 O HOH B 268 33.268 -11.967 34.839 1.00 44.52 O +HETATM 2367 O HOH B 269 21.291 7.640 55.382 1.00 37.07 O +HETATM 2368 O HOH B 270 40.543 -6.191 35.086 1.00 46.78 O +HETATM 2369 O HOH B 271 36.278 8.494 43.716 1.00 39.94 O +HETATM 2370 O HOH B 272 38.077 0.885 44.425 1.00 37.70 O +HETATM 2371 O HOH B 273 36.624 2.995 44.072 1.00 44.84 O +HETATM 2372 O HOH B 274 47.680 -3.802 54.241 1.00 29.52 O +HETATM 2373 O HOH B 275 47.542 -25.183 47.426 1.00 44.28 O +HETATM 2374 O HOH B 276 47.958 -0.641 51.434 1.00 41.18 O +HETATM 2375 O HOH B 277 48.773 -1.142 45.731 1.00 47.19 O +HETATM 2376 O HOH B 278 52.432 -3.449 47.286 1.00 34.07 O +HETATM 2377 O HOH B 279 22.927 -20.727 46.764 1.00 43.74 O +HETATM 2378 O HOH B 280 19.895 -12.192 66.540 1.00 37.79 O +HETATM 2379 O HOH B 281 41.198 10.198 58.267 1.00 48.98 O +HETATM 2380 O HOH B 282 44.205 11.703 55.646 1.00 52.92 O +HETATM 2381 O HOH B 283 42.359 7.497 60.196 1.00 46.88 O +HETATM 2382 O HOH B 284 43.862 -18.935 38.363 1.00 32.12 O +HETATM 2383 O HOH B 285 44.692 -12.023 39.188 1.00 33.96 O +CONECT 769 996 +CONECT 821 830 +CONECT 830 821 831 +CONECT 831 830 832 834 +CONECT 832 831 833 838 +CONECT 833 832 +CONECT 834 831 835 +CONECT 835 834 836 +CONECT 836 835 837 +CONECT 837 836 +CONECT 838 832 +CONECT 996 769 +CONECT 1800 2027 +CONECT 1852 1861 +CONECT 1861 1852 1862 +CONECT 1862 1861 1863 1865 +CONECT 1863 1862 1864 1869 +CONECT 1864 1863 +CONECT 1865 1862 1866 +CONECT 1866 1865 1867 +CONECT 1867 1866 1868 +CONECT 1868 1867 +CONECT 1869 1863 +CONECT 2027 1800 +CONECT 2063 2064 2065 2066 2067 +CONECT 2064 2063 +CONECT 2065 2063 +CONECT 2066 2063 +CONECT 2067 2063 +MASTER 266 0 3 17 10 0 2 6 2381 2 29 20 +END diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/__init__.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,14 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Data pipeline for model features.""" diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/feature_processing.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/feature_processing.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,231 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Feature processing logic for multimer data pipeline.""" + +from typing import Iterable, MutableMapping, List + +from alphafold.common import residue_constants +from alphafold.data import msa_pairing +from alphafold.data import pipeline +import numpy as np + +REQUIRED_FEATURES = frozenset({ + 'aatype', 'all_atom_mask', 'all_atom_positions', 'all_chains_entity_ids', + 'all_crops_all_chains_mask', 'all_crops_all_chains_positions', + 'all_crops_all_chains_residue_ids', 'assembly_num_chains', 'asym_id', + 'bert_mask', 'cluster_bias_mask', 'deletion_matrix', 'deletion_mean', + 'entity_id', 'entity_mask', 'mem_peak', 'msa', 'msa_mask', 'num_alignments', + 'num_templates', 'queue_size', 'residue_index', 'resolution', + 'seq_length', 'seq_mask', 'sym_id', 'template_aatype', + 'template_all_atom_mask', 'template_all_atom_positions' +}) + +MAX_TEMPLATES = 4 +MSA_CROP_SIZE = 2048 + + +def _is_homomer_or_monomer(chains: Iterable[pipeline.FeatureDict]) -> bool: + """Checks if a list of chains represents a homomer/monomer example.""" + # Note that an entity_id of 0 indicates padding. + num_unique_chains = len(np.unique(np.concatenate( + [np.unique(chain['entity_id'][chain['entity_id'] > 0]) for + chain in chains]))) + return num_unique_chains == 1 + + +def pair_and_merge( + all_chain_features: MutableMapping[str, pipeline.FeatureDict], + is_prokaryote: bool) -> pipeline.FeatureDict: + """Runs processing on features to augment, pair and merge. + + Args: + all_chain_features: A MutableMap of dictionaries of features for each chain. + is_prokaryote: Whether the target complex is from a prokaryotic or + eukaryotic organism. + + Returns: + A dictionary of features. + """ + + process_unmerged_features(all_chain_features) + + np_chains_list = list(all_chain_features.values()) + + pair_msa_sequences = not _is_homomer_or_monomer(np_chains_list) + + if pair_msa_sequences: + np_chains_list = msa_pairing.create_paired_features( + chains=np_chains_list, prokaryotic=is_prokaryote) + np_chains_list = msa_pairing.deduplicate_unpaired_sequences(np_chains_list) + np_chains_list = crop_chains( + np_chains_list, + msa_crop_size=MSA_CROP_SIZE, + pair_msa_sequences=pair_msa_sequences, + max_templates=MAX_TEMPLATES) + np_example = msa_pairing.merge_chain_features( + np_chains_list=np_chains_list, pair_msa_sequences=pair_msa_sequences, + max_templates=MAX_TEMPLATES) + np_example = process_final(np_example) + return np_example + + +def crop_chains( + chains_list: List[pipeline.FeatureDict], + msa_crop_size: int, + pair_msa_sequences: bool, + max_templates: int) -> List[pipeline.FeatureDict]: + """Crops the MSAs for a set of chains. + + Args: + chains_list: A list of chains to be cropped. + msa_crop_size: The total number of sequences to crop from the MSA. + pair_msa_sequences: Whether we are operating in sequence-pairing mode. + max_templates: The maximum templates to use per chain. + + Returns: + The chains cropped. + """ + + # Apply the cropping. + cropped_chains = [] + for chain in chains_list: + cropped_chain = _crop_single_chain( + chain, + msa_crop_size=msa_crop_size, + pair_msa_sequences=pair_msa_sequences, + max_templates=max_templates) + cropped_chains.append(cropped_chain) + + return cropped_chains + + +def _crop_single_chain(chain: pipeline.FeatureDict, + msa_crop_size: int, + pair_msa_sequences: bool, + max_templates: int) -> pipeline.FeatureDict: + """Crops msa sequences to `msa_crop_size`.""" + msa_size = chain['num_alignments'] + + if pair_msa_sequences: + msa_size_all_seq = chain['num_alignments_all_seq'] + msa_crop_size_all_seq = np.minimum(msa_size_all_seq, msa_crop_size // 2) + + # We reduce the number of un-paired sequences, by the number of times a + # sequence from this chain's MSA is included in the paired MSA. This keeps + # the MSA size for each chain roughly constant. + msa_all_seq = chain['msa_all_seq'][:msa_crop_size_all_seq, :] + num_non_gapped_pairs = np.sum( + np.any(msa_all_seq != msa_pairing.MSA_GAP_IDX, axis=1)) + num_non_gapped_pairs = np.minimum(num_non_gapped_pairs, + msa_crop_size_all_seq) + + # Restrict the unpaired crop size so that paired+unpaired sequences do not + # exceed msa_seqs_per_chain for each chain. + max_msa_crop_size = np.maximum(msa_crop_size - num_non_gapped_pairs, 0) + msa_crop_size = np.minimum(msa_size, max_msa_crop_size) + else: + msa_crop_size = np.minimum(msa_size, msa_crop_size) + + include_templates = 'template_aatype' in chain and max_templates + if include_templates: + num_templates = chain['template_aatype'].shape[0] + templates_crop_size = np.minimum(num_templates, max_templates) + + for k in chain: + k_split = k.split('_all_seq')[0] + if k_split in msa_pairing.TEMPLATE_FEATURES: + chain[k] = chain[k][:templates_crop_size, :] + elif k_split in msa_pairing.MSA_FEATURES: + if '_all_seq' in k and pair_msa_sequences: + chain[k] = chain[k][:msa_crop_size_all_seq, :] + else: + chain[k] = chain[k][:msa_crop_size, :] + + chain['num_alignments'] = np.asarray(msa_crop_size, dtype=np.int32) + if include_templates: + chain['num_templates'] = np.asarray(templates_crop_size, dtype=np.int32) + if pair_msa_sequences: + chain['num_alignments_all_seq'] = np.asarray( + msa_crop_size_all_seq, dtype=np.int32) + return chain + + +def process_final(np_example: pipeline.FeatureDict) -> pipeline.FeatureDict: + """Final processing steps in data pipeline, after merging and pairing.""" + np_example = _correct_msa_restypes(np_example) + np_example = _make_seq_mask(np_example) + np_example = _make_msa_mask(np_example) + np_example = _filter_features(np_example) + return np_example + + +def _correct_msa_restypes(np_example): + """Correct MSA restype to have the same order as residue_constants.""" + new_order_list = residue_constants.MAP_HHBLITS_AATYPE_TO_OUR_AATYPE + np_example['msa'] = np.take(new_order_list, np_example['msa'], axis=0) + np_example['msa'] = np_example['msa'].astype(np.int32) + return np_example + + +def _make_seq_mask(np_example): + np_example['seq_mask'] = (np_example['entity_id'] > 0).astype(np.float32) + return np_example + + +def _make_msa_mask(np_example): + """Mask features are all ones, but will later be zero-padded.""" + + np_example['msa_mask'] = np.ones_like(np_example['msa'], dtype=np.float32) + + seq_mask = (np_example['entity_id'] > 0).astype(np.float32) + np_example['msa_mask'] *= seq_mask[None] + + return np_example + + +def _filter_features(np_example: pipeline.FeatureDict) -> pipeline.FeatureDict: + """Filters features of example to only those requested.""" + return {k: v for (k, v) in np_example.items() if k in REQUIRED_FEATURES} + + +def process_unmerged_features( + all_chain_features: MutableMapping[str, pipeline.FeatureDict]): + """Postprocessing stage for per-chain features before merging.""" + num_chains = len(all_chain_features) + for chain_features in all_chain_features.values(): + # Convert deletion matrices to float. + chain_features['deletion_matrix'] = np.asarray( + chain_features.pop('deletion_matrix_int'), dtype=np.float32) + if 'deletion_matrix_int_all_seq' in chain_features: + chain_features['deletion_matrix_all_seq'] = np.asarray( + chain_features.pop('deletion_matrix_int_all_seq'), dtype=np.float32) + + chain_features['deletion_mean'] = np.mean( + chain_features['deletion_matrix'], axis=0) + + # Add all_atom_mask and dummy all_atom_positions based on aatype. + all_atom_mask = residue_constants.STANDARD_ATOM_MASK[ + chain_features['aatype']] + chain_features['all_atom_mask'] = all_atom_mask + chain_features['all_atom_positions'] = np.zeros( + list(all_atom_mask.shape) + [3]) + + # Add assembly_num_chains. + chain_features['assembly_num_chains'] = np.asarray(num_chains) + + # Add entity_mask. + for chain_features in all_chain_features.values(): + chain_features['entity_mask'] = ( + chain_features['entity_id'] != 0).astype(np.int32) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/mmcif_parsing.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/mmcif_parsing.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,386 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Parses the mmCIF file format.""" +import collections +import dataclasses +import functools +import io +from typing import Any, Mapping, Optional, Sequence, Tuple + +from absl import logging +from Bio import PDB +from Bio.Data import SCOPData + +# Type aliases: +ChainId = str +PdbHeader = Mapping[str, Any] +PdbStructure = PDB.Structure.Structure +SeqRes = str +MmCIFDict = Mapping[str, Sequence[str]] + + +@dataclasses.dataclass(frozen=True) +class Monomer: + id: str + num: int + + +# Note - mmCIF format provides no guarantees on the type of author-assigned +# sequence numbers. They need not be integers. +@dataclasses.dataclass(frozen=True) +class AtomSite: + residue_name: str + author_chain_id: str + mmcif_chain_id: str + author_seq_num: str + mmcif_seq_num: int + insertion_code: str + hetatm_atom: str + model_num: int + + +# Used to map SEQRES index to a residue in the structure. +@dataclasses.dataclass(frozen=True) +class ResiduePosition: + chain_id: str + residue_number: int + insertion_code: str + + +@dataclasses.dataclass(frozen=True) +class ResidueAtPosition: + position: Optional[ResiduePosition] + name: str + is_missing: bool + hetflag: str + + +@dataclasses.dataclass(frozen=True) +class MmcifObject: + """Representation of a parsed mmCIF file. + + Contains: + file_id: A meaningful name, e.g. a pdb_id. Should be unique amongst all + files being processed. + header: Biopython header. + structure: Biopython structure. + chain_to_seqres: Dict mapping chain_id to 1 letter amino acid sequence. E.g. + {'A': 'ABCDEFG'} + seqres_to_structure: Dict; for each chain_id contains a mapping between + SEQRES index and a ResidueAtPosition. e.g. {'A': {0: ResidueAtPosition, + 1: ResidueAtPosition, + ...}} + raw_string: The raw string used to construct the MmcifObject. + """ + file_id: str + header: PdbHeader + structure: PdbStructure + chain_to_seqres: Mapping[ChainId, SeqRes] + seqres_to_structure: Mapping[ChainId, Mapping[int, ResidueAtPosition]] + raw_string: Any + + +@dataclasses.dataclass(frozen=True) +class ParsingResult: + """Returned by the parse function. + + Contains: + mmcif_object: A MmcifObject, may be None if no chain could be successfully + parsed. + errors: A dict mapping (file_id, chain_id) to any exception generated. + """ + mmcif_object: Optional[MmcifObject] + errors: Mapping[Tuple[str, str], Any] + + +class ParseError(Exception): + """An error indicating that an mmCIF file could not be parsed.""" + + +def mmcif_loop_to_list(prefix: str, + parsed_info: MmCIFDict) -> Sequence[Mapping[str, str]]: + """Extracts loop associated with a prefix from mmCIF data as a list. + + Reference for loop_ in mmCIF: + http://mmcif.wwpdb.org/docs/tutorials/mechanics/pdbx-mmcif-syntax.html + + Args: + prefix: Prefix shared by each of the data items in the loop. + e.g. '_entity_poly_seq.', where the data items are _entity_poly_seq.num, + _entity_poly_seq.mon_id. Should include the trailing period. + parsed_info: A dict of parsed mmCIF data, e.g. _mmcif_dict from a Biopython + parser. + + Returns: + Returns a list of dicts; each dict represents 1 entry from an mmCIF loop. + """ + cols = [] + data = [] + for key, value in parsed_info.items(): + if key.startswith(prefix): + cols.append(key) + data.append(value) + + assert all([len(xs) == len(data[0]) for xs in data]), ( + 'mmCIF error: Not all loops are the same length: %s' % cols) + + return [dict(zip(cols, xs)) for xs in zip(*data)] + + +def mmcif_loop_to_dict(prefix: str, + index: str, + parsed_info: MmCIFDict, + ) -> Mapping[str, Mapping[str, str]]: + """Extracts loop associated with a prefix from mmCIF data as a dictionary. + + Args: + prefix: Prefix shared by each of the data items in the loop. + e.g. '_entity_poly_seq.', where the data items are _entity_poly_seq.num, + _entity_poly_seq.mon_id. Should include the trailing period. + index: Which item of loop data should serve as the key. + parsed_info: A dict of parsed mmCIF data, e.g. _mmcif_dict from a Biopython + parser. + + Returns: + Returns a dict of dicts; each dict represents 1 entry from an mmCIF loop, + indexed by the index column. + """ + entries = mmcif_loop_to_list(prefix, parsed_info) + return {entry[index]: entry for entry in entries} + + +@functools.lru_cache(16, typed=False) +def parse(*, + file_id: str, + mmcif_string: str, + catch_all_errors: bool = True) -> ParsingResult: + """Entry point, parses an mmcif_string. + + Args: + file_id: A string identifier for this file. Should be unique within the + collection of files being processed. + mmcif_string: Contents of an mmCIF file. + catch_all_errors: If True, all exceptions are caught and error messages are + returned as part of the ParsingResult. If False exceptions will be allowed + to propagate. + + Returns: + A ParsingResult. + """ + errors = {} + try: + parser = PDB.MMCIFParser(QUIET=True) + handle = io.StringIO(mmcif_string) + full_structure = parser.get_structure('', handle) + first_model_structure = _get_first_model(full_structure) + # Extract the _mmcif_dict from the parser, which contains useful fields not + # reflected in the Biopython structure. + parsed_info = parser._mmcif_dict # pylint:disable=protected-access + + # Ensure all values are lists, even if singletons. + for key, value in parsed_info.items(): + if not isinstance(value, list): + parsed_info[key] = [value] + + header = _get_header(parsed_info) + + # Determine the protein chains, and their start numbers according to the + # internal mmCIF numbering scheme (likely but not guaranteed to be 1). + valid_chains = _get_protein_chains(parsed_info=parsed_info) + if not valid_chains: + return ParsingResult( + None, {(file_id, ''): 'No protein chains found in this file.'}) + seq_start_num = {chain_id: min([monomer.num for monomer in seq]) + for chain_id, seq in valid_chains.items()} + + # Loop over the atoms for which we have coordinates. Populate two mappings: + # -mmcif_to_author_chain_id (maps internal mmCIF chain ids to chain ids used + # the authors / Biopython). + # -seq_to_structure_mappings (maps idx into sequence to ResidueAtPosition). + mmcif_to_author_chain_id = {} + seq_to_structure_mappings = {} + for atom in _get_atom_site_list(parsed_info): + if atom.model_num != '1': + # We only process the first model at the moment. + continue + + mmcif_to_author_chain_id[atom.mmcif_chain_id] = atom.author_chain_id + + if atom.mmcif_chain_id in valid_chains: + hetflag = ' ' + if atom.hetatm_atom == 'HETATM': + # Water atoms are assigned a special hetflag of W in Biopython. We + # need to do the same, so that this hetflag can be used to fetch + # a residue from the Biopython structure by id. + if atom.residue_name in ('HOH', 'WAT'): + hetflag = 'W' + else: + hetflag = 'H_' + atom.residue_name + insertion_code = atom.insertion_code + if not _is_set(atom.insertion_code): + insertion_code = ' ' + position = ResiduePosition(chain_id=atom.author_chain_id, + residue_number=int(atom.author_seq_num), + insertion_code=insertion_code) + seq_idx = int(atom.mmcif_seq_num) - seq_start_num[atom.mmcif_chain_id] + current = seq_to_structure_mappings.get(atom.author_chain_id, {}) + current[seq_idx] = ResidueAtPosition(position=position, + name=atom.residue_name, + is_missing=False, + hetflag=hetflag) + seq_to_structure_mappings[atom.author_chain_id] = current + + # Add missing residue information to seq_to_structure_mappings. + for chain_id, seq_info in valid_chains.items(): + author_chain = mmcif_to_author_chain_id[chain_id] + current_mapping = seq_to_structure_mappings[author_chain] + for idx, monomer in enumerate(seq_info): + if idx not in current_mapping: + current_mapping[idx] = ResidueAtPosition(position=None, + name=monomer.id, + is_missing=True, + hetflag=' ') + + author_chain_to_sequence = {} + for chain_id, seq_info in valid_chains.items(): + author_chain = mmcif_to_author_chain_id[chain_id] + seq = [] + for monomer in seq_info: + code = SCOPData.protein_letters_3to1.get(monomer.id, 'X') + seq.append(code if len(code) == 1 else 'X') + seq = ''.join(seq) + author_chain_to_sequence[author_chain] = seq + + mmcif_object = MmcifObject( + file_id=file_id, + header=header, + structure=first_model_structure, + chain_to_seqres=author_chain_to_sequence, + seqres_to_structure=seq_to_structure_mappings, + raw_string=parsed_info) + + return ParsingResult(mmcif_object=mmcif_object, errors=errors) + except Exception as e: # pylint:disable=broad-except + errors[(file_id, '')] = e + if not catch_all_errors: + raise + return ParsingResult(mmcif_object=None, errors=errors) + + +def _get_first_model(structure: PdbStructure) -> PdbStructure: + """Returns the first model in a Biopython structure.""" + return next(structure.get_models()) + +_MIN_LENGTH_OF_CHAIN_TO_BE_COUNTED_AS_PEPTIDE = 21 + + +def get_release_date(parsed_info: MmCIFDict) -> str: + """Returns the oldest revision date.""" + revision_dates = parsed_info['_pdbx_audit_revision_history.revision_date'] + return min(revision_dates) + + +def _get_header(parsed_info: MmCIFDict) -> PdbHeader: + """Returns a basic header containing method, release date and resolution.""" + header = {} + + experiments = mmcif_loop_to_list('_exptl.', parsed_info) + header['structure_method'] = ','.join([ + experiment['_exptl.method'].lower() for experiment in experiments]) + + # Note: The release_date here corresponds to the oldest revision. We prefer to + # use this for dataset filtering over the deposition_date. + if '_pdbx_audit_revision_history.revision_date' in parsed_info: + header['release_date'] = get_release_date(parsed_info) + else: + logging.warning('Could not determine release_date: %s', + parsed_info['_entry.id']) + + header['resolution'] = 0.00 + for res_key in ('_refine.ls_d_res_high', '_em_3d_reconstruction.resolution', + '_reflns.d_resolution_high'): + if res_key in parsed_info: + try: + raw_resolution = parsed_info[res_key][0] + header['resolution'] = float(raw_resolution) + except ValueError: + logging.debug('Invalid resolution format: %s', parsed_info[res_key]) + + return header + + +def _get_atom_site_list(parsed_info: MmCIFDict) -> Sequence[AtomSite]: + """Returns list of atom sites; contains data not present in the structure.""" + return [AtomSite(*site) for site in zip( # pylint:disable=g-complex-comprehension + parsed_info['_atom_site.label_comp_id'], + parsed_info['_atom_site.auth_asym_id'], + parsed_info['_atom_site.label_asym_id'], + parsed_info['_atom_site.auth_seq_id'], + parsed_info['_atom_site.label_seq_id'], + parsed_info['_atom_site.pdbx_PDB_ins_code'], + parsed_info['_atom_site.group_PDB'], + parsed_info['_atom_site.pdbx_PDB_model_num'], + )] + + +def _get_protein_chains( + *, parsed_info: Mapping[str, Any]) -> Mapping[ChainId, Sequence[Monomer]]: + """Extracts polymer information for protein chains only. + + Args: + parsed_info: _mmcif_dict produced by the Biopython parser. + + Returns: + A dict mapping mmcif chain id to a list of Monomers. + """ + # Get polymer information for each entity in the structure. + entity_poly_seqs = mmcif_loop_to_list('_entity_poly_seq.', parsed_info) + + polymers = collections.defaultdict(list) + for entity_poly_seq in entity_poly_seqs: + polymers[entity_poly_seq['_entity_poly_seq.entity_id']].append( + Monomer(id=entity_poly_seq['_entity_poly_seq.mon_id'], + num=int(entity_poly_seq['_entity_poly_seq.num']))) + + # Get chemical compositions. Will allow us to identify which of these polymers + # are proteins. + chem_comps = mmcif_loop_to_dict('_chem_comp.', '_chem_comp.id', parsed_info) + + # Get chains information for each entity. Necessary so that we can return a + # dict keyed on chain id rather than entity. + struct_asyms = mmcif_loop_to_list('_struct_asym.', parsed_info) + + entity_to_mmcif_chains = collections.defaultdict(list) + for struct_asym in struct_asyms: + chain_id = struct_asym['_struct_asym.id'] + entity_id = struct_asym['_struct_asym.entity_id'] + entity_to_mmcif_chains[entity_id].append(chain_id) + + # Identify and return the valid protein chains. + valid_chains = {} + for entity_id, seq_info in polymers.items(): + chain_ids = entity_to_mmcif_chains[entity_id] + + # Reject polymers without any peptide-like components, such as DNA/RNA. + if any(['peptide' in chem_comps[monomer.id]['_chem_comp.type'] + for monomer in seq_info]): + for chain_id in chain_ids: + valid_chains[chain_id] = seq_info + return valid_chains + + +def _is_set(data: str) -> bool: + """Returns False if data is a special mmCIF character indicating 'unset'.""" + return data not in ('.', '?') diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/msa_identifiers.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/msa_identifiers.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,92 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for extracting identifiers from MSA sequence descriptions.""" + +import dataclasses +import re +from typing import Optional + + +# Sequences coming from UniProtKB database come in the +# `db|UniqueIdentifier|EntryName` format, e.g. `tr|A0A146SKV9|A0A146SKV9_FUNHE` +# or `sp|P0C2L1|A3X1_LOXLA` (for TREMBL/Swiss-Prot respectively). +_UNIPROT_PATTERN = re.compile( + r""" + ^ + # UniProtKB/TrEMBL or UniProtKB/Swiss-Prot + (?:tr|sp) + \| + # A primary accession number of the UniProtKB entry. + (?P[A-Za-z0-9]{6,10}) + # Occasionally there is a _0 or _1 isoform suffix, which we ignore. + (?:_\d)? + \| + # TREMBL repeats the accession ID here. Swiss-Prot has a mnemonic + # protein ID code. + (?:[A-Za-z0-9]+) + _ + # A mnemonic species identification code. + (?P([A-Za-z0-9]){1,5}) + # Small BFD uses a final value after an underscore, which we ignore. + (?:_\d+)? + $ + """, + re.VERBOSE) + + +@dataclasses.dataclass(frozen=True) +class Identifiers: + uniprot_accession_id: str = '' + species_id: str = '' + + +def _parse_sequence_identifier(msa_sequence_identifier: str) -> Identifiers: + """Gets accession id and species from an msa sequence identifier. + + The sequence identifier has the format specified by + _UNIPROT_TREMBL_ENTRY_NAME_PATTERN or _UNIPROT_SWISSPROT_ENTRY_NAME_PATTERN. + An example of a sequence identifier: `tr|A0A146SKV9|A0A146SKV9_FUNHE` + + Args: + msa_sequence_identifier: a sequence identifier. + + Returns: + An `Identifiers` instance with a uniprot_accession_id and species_id. These + can be empty in the case where no identifier was found. + """ + matches = re.search(_UNIPROT_PATTERN, msa_sequence_identifier.strip()) + if matches: + return Identifiers( + uniprot_accession_id=matches.group('AccessionIdentifier'), + species_id=matches.group('SpeciesIdentifier')) + return Identifiers() + + +def _extract_sequence_identifier(description: str) -> Optional[str]: + """Extracts sequence identifier from description. Returns None if no match.""" + split_description = description.split() + if split_description: + return split_description[0].partition('/')[0] + else: + return None + + +def get_identifiers(description: str) -> Identifiers: + """Computes extra MSA features from the description.""" + sequence_identifier = _extract_sequence_identifier(description) + if sequence_identifier is None: + return Identifiers() + else: + return _parse_sequence_identifier(sequence_identifier) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/msa_pairing.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/msa_pairing.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,638 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Pairing logic for multimer data pipeline.""" + +import collections +import functools +import re +import string +from typing import Any, Dict, Iterable, List, Sequence + +from alphafold.common import residue_constants +from alphafold.data import pipeline +import numpy as np +import pandas as pd +import scipy.linalg + +ALPHA_ACCESSION_ID_MAP = {x: y for y, x in enumerate(string.ascii_uppercase)} +ALPHANUM_ACCESSION_ID_MAP = { + chr: num for num, chr in enumerate(string.ascii_uppercase + string.digits) +} # A-Z,0-9 +NUM_ACCESSION_ID_MAP = {str(x): x for x in range(10)} # 0-9 + +MSA_GAP_IDX = residue_constants.restypes_with_x_and_gap.index('-') +SEQUENCE_GAP_CUTOFF = 0.5 +SEQUENCE_SIMILARITY_CUTOFF = 0.9 + +MSA_PAD_VALUES = {'msa_all_seq': MSA_GAP_IDX, + 'msa_mask_all_seq': 1, + 'deletion_matrix_all_seq': 0, + 'deletion_matrix_int_all_seq': 0, + 'msa': MSA_GAP_IDX, + 'msa_mask': 1, + 'deletion_matrix': 0, + 'deletion_matrix_int': 0} + +MSA_FEATURES = ('msa', 'msa_mask', 'deletion_matrix', 'deletion_matrix_int') +SEQ_FEATURES = ('residue_index', 'aatype', 'all_atom_positions', + 'all_atom_mask', 'seq_mask', 'between_segment_residues', + 'has_alt_locations', 'has_hetatoms', 'asym_id', 'entity_id', + 'sym_id', 'entity_mask', 'deletion_mean', + 'prediction_atom_mask', + 'literature_positions', 'atom_indices_to_group_indices', + 'rigid_group_default_frame') +TEMPLATE_FEATURES = ('template_aatype', 'template_all_atom_positions', + 'template_all_atom_mask') +CHAIN_FEATURES = ('num_alignments', 'seq_length') + + +domain_name_pattern = re.compile( + r'''^(?P[a-z\d]{4}) + \{(?P[\d+(\+\d+)?])\} + (?P[a-zA-Z\d]+) + \{(?P\d+)\}$ + ''', re.VERBOSE) + + +def create_paired_features( + chains: Iterable[pipeline.FeatureDict], + prokaryotic: bool, + ) -> List[pipeline.FeatureDict]: + """Returns the original chains with paired NUM_SEQ features. + + Args: + chains: A list of feature dictionaries for each chain. + prokaryotic: Whether the target complex is from a prokaryotic organism. + Used to determine the distance metric for pairing. + + Returns: + A list of feature dictionaries with sequence features including only + rows to be paired. + """ + chains = list(chains) + chain_keys = chains[0].keys() + + if len(chains) < 2: + return chains + else: + updated_chains = [] + paired_chains_to_paired_row_indices = pair_sequences( + chains, prokaryotic) + paired_rows = reorder_paired_rows( + paired_chains_to_paired_row_indices) + + for chain_num, chain in enumerate(chains): + new_chain = {k: v for k, v in chain.items() if '_all_seq' not in k} + for feature_name in chain_keys: + if feature_name.endswith('_all_seq'): + feats_padded = pad_features(chain[feature_name], feature_name) + new_chain[feature_name] = feats_padded[paired_rows[:, chain_num]] + new_chain['num_alignments_all_seq'] = np.asarray( + len(paired_rows[:, chain_num])) + updated_chains.append(new_chain) + return updated_chains + + +def pad_features(feature: np.ndarray, feature_name: str) -> np.ndarray: + """Add a 'padding' row at the end of the features list. + + The padding row will be selected as a 'paired' row in the case of partial + alignment - for the chain that doesn't have paired alignment. + + Args: + feature: The feature to be padded. + feature_name: The name of the feature to be padded. + + Returns: + The feature with an additional padding row. + """ + assert feature.dtype != np.dtype(np.string_) + if feature_name in ('msa_all_seq', 'msa_mask_all_seq', + 'deletion_matrix_all_seq', 'deletion_matrix_int_all_seq'): + num_res = feature.shape[1] + padding = MSA_PAD_VALUES[feature_name] * np.ones([1, num_res], + feature.dtype) + elif feature_name in ('msa_uniprot_accession_identifiers_all_seq', + 'msa_species_identifiers_all_seq'): + padding = [b''] + else: + return feature + feats_padded = np.concatenate([feature, padding], axis=0) + return feats_padded + + +def _make_msa_df(chain_features: pipeline.FeatureDict) -> pd.DataFrame: + """Makes dataframe with msa features needed for msa pairing.""" + chain_msa = chain_features['msa_all_seq'] + query_seq = chain_msa[0] + per_seq_similarity = np.sum( + query_seq[None] == chain_msa, axis=-1) / float(len(query_seq)) + per_seq_gap = np.sum(chain_msa == 21, axis=-1) / float(len(query_seq)) + msa_df = pd.DataFrame({ + 'msa_species_identifiers': + chain_features['msa_species_identifiers_all_seq'], + 'msa_uniprot_accession_identifiers': + chain_features['msa_uniprot_accession_identifiers_all_seq'], + 'msa_row': + np.arange(len( + chain_features['msa_uniprot_accession_identifiers_all_seq'])), + 'msa_similarity': per_seq_similarity, + 'gap': per_seq_gap + }) + return msa_df + + +def _create_species_dict(msa_df: pd.DataFrame) -> Dict[bytes, pd.DataFrame]: + """Creates mapping from species to msa dataframe of that species.""" + species_lookup = {} + for species, species_df in msa_df.groupby('msa_species_identifiers'): + species_lookup[species] = species_df + return species_lookup + + +@functools.lru_cache(maxsize=65536) +def encode_accession(accession_id: str) -> int: + """Map accession codes to the serial order in which they were assigned.""" + alpha = ALPHA_ACCESSION_ID_MAP # A-Z + alphanum = ALPHANUM_ACCESSION_ID_MAP # A-Z,0-9 + num = NUM_ACCESSION_ID_MAP # 0-9 + + coding = 0 + + # This is based on the uniprot accession id format + # https://www.uniprot.org/help/accession_numbers + if accession_id[0] in {'O', 'P', 'Q'}: + bases = (alpha, num, alphanum, alphanum, alphanum, num) + elif len(accession_id) == 6: + bases = (alpha, num, alpha, alphanum, alphanum, num) + elif len(accession_id) == 10: + bases = (alpha, num, alpha, alphanum, alphanum, num, alpha, alphanum, + alphanum, num) + + product = 1 + for place, base in zip(reversed(accession_id), reversed(bases)): + coding += base[place] * product + product *= len(base) + + return coding + + +def _calc_id_diff(id_a: bytes, id_b: bytes) -> int: + return abs(encode_accession(id_a.decode()) - encode_accession(id_b.decode())) + + +def _find_all_accession_matches(accession_id_lists: List[List[bytes]], + diff_cutoff: int = 20 + ) -> List[List[Any]]: + """Finds accession id matches across the chains based on their difference.""" + all_accession_tuples = [] + current_tuple = [] + tokens_used_in_answer = set() + + def _matches_all_in_current_tuple(inp: bytes, diff_cutoff: int) -> bool: + return all((_calc_id_diff(s, inp) < diff_cutoff for s in current_tuple)) + + def _all_tokens_not_used_before() -> bool: + return all((s not in tokens_used_in_answer for s in current_tuple)) + + def dfs(level, accession_id, diff_cutoff=diff_cutoff) -> None: + if level == len(accession_id_lists) - 1: + if _all_tokens_not_used_before(): + all_accession_tuples.append(list(current_tuple)) + for s in current_tuple: + tokens_used_in_answer.add(s) + return + + if level == -1: + new_list = accession_id_lists[level+1] + else: + new_list = [(_calc_id_diff(accession_id, s), s) for + s in accession_id_lists[level+1]] + new_list = sorted(new_list) + new_list = [s for d, s in new_list] + + for s in new_list: + if (_matches_all_in_current_tuple(s, diff_cutoff) and + s not in tokens_used_in_answer): + current_tuple.append(s) + dfs(level + 1, s) + current_tuple.pop() + dfs(-1, '') + return all_accession_tuples + + +def _accession_row(msa_df: pd.DataFrame, accession_id: bytes) -> pd.Series: + matched_df = msa_df[msa_df.msa_uniprot_accession_identifiers == accession_id] + return matched_df.iloc[0] + + +def _match_rows_by_genetic_distance( + this_species_msa_dfs: List[pd.DataFrame], + cutoff: int = 20) -> List[List[int]]: + """Finds MSA sequence pairings across chains within a genetic distance cutoff. + + The genetic distance between two sequences is approximated by taking the + difference in their UniProt accession ids. + + Args: + this_species_msa_dfs: a list of dataframes containing MSA features for + sequences for a specific species. If species is missing for a chain, the + dataframe is set to None. + cutoff: the genetic distance cutoff. + + Returns: + A list of lists, each containing M indices corresponding to paired MSA rows, + where M is the number of chains. + """ + num_examples = len(this_species_msa_dfs) # N + + accession_id_lists = [] # M + match_index_to_chain_index = {} + for chain_index, species_df in enumerate(this_species_msa_dfs): + if species_df is not None: + accession_id_lists.append( + list(species_df.msa_uniprot_accession_identifiers.values)) + # Keep track of which of the this_species_msa_dfs are not None. + match_index_to_chain_index[len(accession_id_lists) - 1] = chain_index + + all_accession_id_matches = _find_all_accession_matches( + accession_id_lists, cutoff) # [k, M] + + all_paired_msa_rows = [] # [k, N] + for accession_id_match in all_accession_id_matches: + paired_msa_rows = [] + for match_index, accession_id in enumerate(accession_id_match): + # Map back to chain index. + chain_index = match_index_to_chain_index[match_index] + seq_series = _accession_row( + this_species_msa_dfs[chain_index], accession_id) + + if (seq_series.msa_similarity > SEQUENCE_SIMILARITY_CUTOFF or + seq_series.gap > SEQUENCE_GAP_CUTOFF): + continue + else: + paired_msa_rows.append(seq_series.msa_row) + # If a sequence is skipped based on sequence similarity to the respective + # target sequence or a gap cuttoff, the lengths of accession_id_match and + # paired_msa_rows will be different. Skip this match. + if len(paired_msa_rows) == len(accession_id_match): + paired_and_non_paired_msa_rows = np.array([-1] * num_examples) + matched_chain_indices = list(match_index_to_chain_index.values()) + paired_and_non_paired_msa_rows[matched_chain_indices] = paired_msa_rows + all_paired_msa_rows.append(list(paired_and_non_paired_msa_rows)) + return all_paired_msa_rows + + +def _match_rows_by_sequence_similarity(this_species_msa_dfs: List[pd.DataFrame] + ) -> List[List[int]]: + """Finds MSA sequence pairings across chains based on sequence similarity. + + Each chain's MSA sequences are first sorted by their sequence similarity to + their respective target sequence. The sequences are then paired, starting + from the sequences most similar to their target sequence. + + Args: + this_species_msa_dfs: a list of dataframes containing MSA features for + sequences for a specific species. + + Returns: + A list of lists, each containing M indices corresponding to paired MSA rows, + where M is the number of chains. + """ + all_paired_msa_rows = [] + + num_seqs = [len(species_df) for species_df in this_species_msa_dfs + if species_df is not None] + take_num_seqs = np.min(num_seqs) + + sort_by_similarity = ( + lambda x: x.sort_values('msa_similarity', axis=0, ascending=False)) + + for species_df in this_species_msa_dfs: + if species_df is not None: + species_df_sorted = sort_by_similarity(species_df) + msa_rows = species_df_sorted.msa_row.iloc[:take_num_seqs].values + else: + msa_rows = [-1] * take_num_seqs # take the last 'padding' row + all_paired_msa_rows.append(msa_rows) + all_paired_msa_rows = list(np.array(all_paired_msa_rows).transpose()) + return all_paired_msa_rows + + +def pair_sequences(examples: List[pipeline.FeatureDict], + prokaryotic: bool) -> Dict[int, np.ndarray]: + """Returns indices for paired MSA sequences across chains.""" + + num_examples = len(examples) + + all_chain_species_dict = [] + common_species = set() + for chain_features in examples: + msa_df = _make_msa_df(chain_features) + species_dict = _create_species_dict(msa_df) + all_chain_species_dict.append(species_dict) + common_species.update(set(species_dict)) + + common_species = sorted(common_species) + common_species.remove(b'') # Remove target sequence species. + + all_paired_msa_rows = [np.zeros(len(examples), int)] + all_paired_msa_rows_dict = {k: [] for k in range(num_examples)} + all_paired_msa_rows_dict[num_examples] = [np.zeros(len(examples), int)] + + for species in common_species: + if not species: + continue + this_species_msa_dfs = [] + species_dfs_present = 0 + for species_dict in all_chain_species_dict: + if species in species_dict: + this_species_msa_dfs.append(species_dict[species]) + species_dfs_present += 1 + else: + this_species_msa_dfs.append(None) + + # Skip species that are present in only one chain. + if species_dfs_present <= 1: + continue + + if np.any( + np.array([len(species_df) for species_df in + this_species_msa_dfs if + isinstance(species_df, pd.DataFrame)]) > 600): + continue + + # In prokaryotes (and some eukaryotes), interacting genes are often + # co-located on the chromosome into operons. Because of that we can assume + # that if two proteins' intergenic distance is less than a threshold, they + # two proteins will form an an interacting pair. + # In most eukaryotes, a single protein's MSA can contain many paralogs. + # Two genes may interact even if they are not close by genomic distance. + # In case of eukaryotes, some methods pair MSA sequences using sequence + # similarity method. + # See Jinbo Xu's work: + # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC6030867/#B28. + if prokaryotic: + paired_msa_rows = _match_rows_by_genetic_distance(this_species_msa_dfs) + + if not paired_msa_rows: + continue + else: + paired_msa_rows = _match_rows_by_sequence_similarity(this_species_msa_dfs) + all_paired_msa_rows.extend(paired_msa_rows) + all_paired_msa_rows_dict[species_dfs_present].extend(paired_msa_rows) + all_paired_msa_rows_dict = { + num_examples: np.array(paired_msa_rows) for + num_examples, paired_msa_rows in all_paired_msa_rows_dict.items() + } + return all_paired_msa_rows_dict + + +def reorder_paired_rows(all_paired_msa_rows_dict: Dict[int, np.ndarray] + ) -> np.ndarray: + """Creates a list of indices of paired MSA rows across chains. + + Args: + all_paired_msa_rows_dict: a mapping from the number of paired chains to the + paired indices. + + Returns: + a list of lists, each containing indices of paired MSA rows across chains. + The paired-index lists are ordered by: + 1) the number of chains in the paired alignment, i.e, all-chain pairings + will come first. + 2) e-values + """ + all_paired_msa_rows = [] + + for num_pairings in sorted(all_paired_msa_rows_dict, reverse=True): + paired_rows = all_paired_msa_rows_dict[num_pairings] + paired_rows_product = abs(np.array([np.prod(rows) for rows in paired_rows])) + paired_rows_sort_index = np.argsort(paired_rows_product) + all_paired_msa_rows.extend(paired_rows[paired_rows_sort_index]) + + return np.array(all_paired_msa_rows) + + +def block_diag(*arrs: np.ndarray, pad_value: float = 0.0) -> np.ndarray: + """Like scipy.linalg.block_diag but with an optional padding value.""" + ones_arrs = [np.ones_like(x) for x in arrs] + off_diag_mask = 1.0 - scipy.linalg.block_diag(*ones_arrs) + diag = scipy.linalg.block_diag(*arrs) + diag += (off_diag_mask * pad_value).astype(diag.dtype) + return diag + + +def _correct_post_merged_feats( + np_example: pipeline.FeatureDict, + np_chains_list: Sequence[pipeline.FeatureDict], + pair_msa_sequences: bool) -> pipeline.FeatureDict: + """Adds features that need to be computed/recomputed post merging.""" + + np_example['seq_length'] = np.asarray(np_example['aatype'].shape[0], + dtype=np.int32) + np_example['num_alignments'] = np.asarray(np_example['msa'].shape[0], + dtype=np.int32) + + if not pair_msa_sequences: + # Generate a bias that is 1 for the first row of every block in the + # block diagonal MSA - i.e. make sure the cluster stack always includes + # the query sequences for each chain (since the first row is the query + # sequence). + cluster_bias_masks = [] + for chain in np_chains_list: + mask = np.zeros(chain['msa'].shape[0]) + mask[0] = 1 + cluster_bias_masks.append(mask) + np_example['cluster_bias_mask'] = np.concatenate(cluster_bias_masks) + + # Initialize Bert mask with masked out off diagonals. + msa_masks = [np.ones(x['msa'].shape, dtype=np.float32) + for x in np_chains_list] + + np_example['bert_mask'] = block_diag( + *msa_masks, pad_value=0) + else: + np_example['cluster_bias_mask'] = np.zeros(np_example['msa'].shape[0]) + np_example['cluster_bias_mask'][0] = 1 + + # Initialize Bert mask with masked out off diagonals. + msa_masks = [np.ones(x['msa'].shape, dtype=np.float32) for + x in np_chains_list] + msa_masks_all_seq = [np.ones(x['msa_all_seq'].shape, dtype=np.float32) for + x in np_chains_list] + + msa_mask_block_diag = block_diag( + *msa_masks, pad_value=0) + msa_mask_all_seq = np.concatenate(msa_masks_all_seq, axis=1) + np_example['bert_mask'] = np.concatenate( + [msa_mask_all_seq, msa_mask_block_diag], axis=0) + return np_example + + +def _pad_templates(chains: Sequence[pipeline.FeatureDict], + max_templates: int) -> Sequence[pipeline.FeatureDict]: + """For each chain pad the number of templates to a fixed size. + + Args: + chains: A list of protein chains. + max_templates: Each chain will be padded to have this many templates. + + Returns: + The list of chains, updated to have template features padded to + max_templates. + """ + for chain in chains: + for k, v in chain.items(): + if k in TEMPLATE_FEATURES: + padding = np.zeros_like(v.shape) + padding[0] = max_templates - v.shape[0] + padding = [(0, p) for p in padding] + chain[k] = np.pad(v, padding, mode='constant') + return chains + + +def _merge_features_from_multiple_chains( + chains: Sequence[pipeline.FeatureDict], + pair_msa_sequences: bool) -> pipeline.FeatureDict: + """Merge features from multiple chains. + + Args: + chains: A list of feature dictionaries that we want to merge. + pair_msa_sequences: Whether to concatenate MSA features along the + num_res dimension (if True), or to block diagonalize them (if False). + + Returns: + A feature dictionary for the merged example. + """ + merged_example = {} + for feature_name in chains[0]: + feats = [x[feature_name] for x in chains] + feature_name_split = feature_name.split('_all_seq')[0] + if feature_name_split in MSA_FEATURES: + if pair_msa_sequences or '_all_seq' in feature_name: + merged_example[feature_name] = np.concatenate(feats, axis=1) + else: + merged_example[feature_name] = block_diag( + *feats, pad_value=MSA_PAD_VALUES[feature_name]) + elif feature_name_split in SEQ_FEATURES: + merged_example[feature_name] = np.concatenate(feats, axis=0) + elif feature_name_split in TEMPLATE_FEATURES: + merged_example[feature_name] = np.concatenate(feats, axis=1) + elif feature_name_split in CHAIN_FEATURES: + merged_example[feature_name] = np.sum(x for x in feats).astype(np.int32) + else: + merged_example[feature_name] = feats[0] + return merged_example + + +def _merge_homomers_dense_msa( + chains: Iterable[pipeline.FeatureDict]) -> Sequence[pipeline.FeatureDict]: + """Merge all identical chains, making the resulting MSA dense. + + Args: + chains: An iterable of features for each chain. + + Returns: + A list of feature dictionaries. All features with the same entity_id + will be merged - MSA features will be concatenated along the num_res + dimension - making them dense. + """ + entity_chains = collections.defaultdict(list) + for chain in chains: + entity_id = chain['entity_id'][0] + entity_chains[entity_id].append(chain) + + grouped_chains = [] + for entity_id in sorted(entity_chains): + chains = entity_chains[entity_id] + grouped_chains.append(chains) + chains = [ + _merge_features_from_multiple_chains(chains, pair_msa_sequences=True) + for chains in grouped_chains] + return chains + + +def _concatenate_paired_and_unpaired_features( + example: pipeline.FeatureDict) -> pipeline.FeatureDict: + """Merges paired and block-diagonalised features.""" + features = MSA_FEATURES + for feature_name in features: + if feature_name in example: + feat = example[feature_name] + feat_all_seq = example[feature_name + '_all_seq'] + merged_feat = np.concatenate([feat_all_seq, feat], axis=0) + example[feature_name] = merged_feat + example['num_alignments'] = np.array(example['msa'].shape[0], + dtype=np.int32) + return example + + +def merge_chain_features(np_chains_list: List[pipeline.FeatureDict], + pair_msa_sequences: bool, + max_templates: int) -> pipeline.FeatureDict: + """Merges features for multiple chains to single FeatureDict. + + Args: + np_chains_list: List of FeatureDicts for each chain. + pair_msa_sequences: Whether to merge paired MSAs. + max_templates: The maximum number of templates to include. + + Returns: + Single FeatureDict for entire complex. + """ + np_chains_list = _pad_templates( + np_chains_list, max_templates=max_templates) + np_chains_list = _merge_homomers_dense_msa(np_chains_list) + # Unpaired MSA features will be always block-diagonalised; paired MSA + # features will be concatenated. + np_example = _merge_features_from_multiple_chains( + np_chains_list, pair_msa_sequences=False) + if pair_msa_sequences: + np_example = _concatenate_paired_and_unpaired_features(np_example) + np_example = _correct_post_merged_feats( + np_example=np_example, + np_chains_list=np_chains_list, + pair_msa_sequences=pair_msa_sequences) + + return np_example + + +def deduplicate_unpaired_sequences( + np_chains: List[pipeline.FeatureDict]) -> List[pipeline.FeatureDict]: + """Removes unpaired sequences which duplicate a paired sequence.""" + + feature_names = np_chains[0].keys() + msa_features = MSA_FEATURES + + for chain in np_chains: + sequence_set = set(tuple(s) for s in chain['msa_all_seq']) + keep_rows = [] + # Go through unpaired MSA seqs and remove any rows that correspond to the + # sequences that are already present in the paired MSA. + for row_num, seq in enumerate(chain['msa']): + if tuple(seq) not in sequence_set: + keep_rows.append(row_num) + for feature_name in feature_names: + if feature_name in msa_features: + if keep_rows: + chain[feature_name] = chain[feature_name][keep_rows] + else: + new_shape = list(chain[feature_name].shape) + new_shape[0] = 0 + chain[feature_name] = np.zeros(new_shape, + dtype=chain[feature_name].dtype) + chain['num_alignments'] = np.array(chain['msa'].shape[0], dtype=np.int32) + return np_chains diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/parsers.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/parsers.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,607 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Functions for parsing various file formats.""" +import collections +import dataclasses +import itertools +import re +import string +from typing import Dict, Iterable, List, Optional, Sequence, Tuple, Set + +DeletionMatrix = Sequence[Sequence[int]] + + +@dataclasses.dataclass(frozen=True) +class Msa: + """Class representing a parsed MSA file.""" + sequences: Sequence[str] + deletion_matrix: DeletionMatrix + descriptions: Sequence[str] + + def __post_init__(self): + if not (len(self.sequences) == + len(self.deletion_matrix) == + len(self.descriptions)): + raise ValueError( + 'All fields for an MSA must have the same length. ' + f'Got {len(self.sequences)} sequences, ' + f'{len(self.deletion_matrix)} rows in the deletion matrix and ' + f'{len(self.descriptions)} descriptions.') + + def __len__(self): + return len(self.sequences) + + def truncate(self, max_seqs: int): + return Msa(sequences=self.sequences[:max_seqs], + deletion_matrix=self.deletion_matrix[:max_seqs], + descriptions=self.descriptions[:max_seqs]) + + +@dataclasses.dataclass(frozen=True) +class TemplateHit: + """Class representing a template hit.""" + index: int + name: str + aligned_cols: int + sum_probs: Optional[float] + query: str + hit_sequence: str + indices_query: List[int] + indices_hit: List[int] + + +def parse_fasta(fasta_string: str) -> Tuple[Sequence[str], Sequence[str]]: + """Parses FASTA string and returns list of strings with amino-acid sequences. + + Arguments: + fasta_string: The string contents of a FASTA file. + + Returns: + A tuple of two lists: + * A list of sequences. + * A list of sequence descriptions taken from the comment lines. In the + same order as the sequences. + """ + sequences = [] + descriptions = [] + index = -1 + for line in fasta_string.splitlines(): + line = line.strip() + if line.startswith('>'): + index += 1 + descriptions.append(line[1:]) # Remove the '>' at the beginning. + sequences.append('') + continue + elif not line: + continue # Skip blank lines. + sequences[index] += line + + return sequences, descriptions + + +def parse_stockholm(stockholm_string: str) -> Msa: + """Parses sequences and deletion matrix from stockholm format alignment. + + Args: + stockholm_string: The string contents of a stockholm file. The first + sequence in the file should be the query sequence. + + Returns: + A tuple of: + * A list of sequences that have been aligned to the query. These + might contain duplicates. + * The deletion matrix for the alignment as a list of lists. The element + at `deletion_matrix[i][j]` is the number of residues deleted from + the aligned sequence i at residue position j. + * The names of the targets matched, including the jackhmmer subsequence + suffix. + """ + name_to_sequence = collections.OrderedDict() + for line in stockholm_string.splitlines(): + line = line.strip() + if not line or line.startswith(('#', '//')): + continue + name, sequence = line.split() + if name not in name_to_sequence: + name_to_sequence[name] = '' + name_to_sequence[name] += sequence + + msa = [] + deletion_matrix = [] + + query = '' + keep_columns = [] + for seq_index, sequence in enumerate(name_to_sequence.values()): + if seq_index == 0: + # Gather the columns with gaps from the query + query = sequence + keep_columns = [i for i, res in enumerate(query) if res != '-'] + + # Remove the columns with gaps in the query from all sequences. + aligned_sequence = ''.join([sequence[c] for c in keep_columns]) + + msa.append(aligned_sequence) + + # Count the number of deletions w.r.t. query. + deletion_vec = [] + deletion_count = 0 + for seq_res, query_res in zip(sequence, query): + if seq_res != '-' or query_res != '-': + if query_res == '-': + deletion_count += 1 + else: + deletion_vec.append(deletion_count) + deletion_count = 0 + deletion_matrix.append(deletion_vec) + + return Msa(sequences=msa, + deletion_matrix=deletion_matrix, + descriptions=list(name_to_sequence.keys())) + + +def parse_a3m(a3m_string: str) -> Msa: + """Parses sequences and deletion matrix from a3m format alignment. + + Args: + a3m_string: The string contents of a a3m file. The first sequence in the + file should be the query sequence. + + Returns: + A tuple of: + * A list of sequences that have been aligned to the query. These + might contain duplicates. + * The deletion matrix for the alignment as a list of lists. The element + at `deletion_matrix[i][j]` is the number of residues deleted from + the aligned sequence i at residue position j. + * A list of descriptions, one per sequence, from the a3m file. + """ + sequences, descriptions = parse_fasta(a3m_string) + deletion_matrix = [] + for msa_sequence in sequences: + deletion_vec = [] + deletion_count = 0 + for j in msa_sequence: + if j.islower(): + deletion_count += 1 + else: + deletion_vec.append(deletion_count) + deletion_count = 0 + deletion_matrix.append(deletion_vec) + + # Make the MSA matrix out of aligned (deletion-free) sequences. + deletion_table = str.maketrans('', '', string.ascii_lowercase) + aligned_sequences = [s.translate(deletion_table) for s in sequences] + return Msa(sequences=aligned_sequences, + deletion_matrix=deletion_matrix, + descriptions=descriptions) + + +def _convert_sto_seq_to_a3m( + query_non_gaps: Sequence[bool], sto_seq: str) -> Iterable[str]: + for is_query_res_non_gap, sequence_res in zip(query_non_gaps, sto_seq): + if is_query_res_non_gap: + yield sequence_res + elif sequence_res != '-': + yield sequence_res.lower() + + +def convert_stockholm_to_a3m(stockholm_format: str, + max_sequences: Optional[int] = None, + remove_first_row_gaps: bool = True) -> str: + """Converts MSA in Stockholm format to the A3M format.""" + descriptions = {} + sequences = {} + reached_max_sequences = False + + for line in stockholm_format.splitlines(): + reached_max_sequences = max_sequences and len(sequences) >= max_sequences + if line.strip() and not line.startswith(('#', '//')): + # Ignore blank lines, markup and end symbols - remainder are alignment + # sequence parts. + seqname, aligned_seq = line.split(maxsplit=1) + if seqname not in sequences: + if reached_max_sequences: + continue + sequences[seqname] = '' + sequences[seqname] += aligned_seq + + for line in stockholm_format.splitlines(): + if line[:4] == '#=GS': + # Description row - example format is: + # #=GS UniRef90_Q9H5Z4/4-78 DE [subseq from] cDNA: FLJ22755 ... + columns = line.split(maxsplit=3) + seqname, feature = columns[1:3] + value = columns[3] if len(columns) == 4 else '' + if feature != 'DE': + continue + if reached_max_sequences and seqname not in sequences: + continue + descriptions[seqname] = value + if len(descriptions) == len(sequences): + break + + # Convert sto format to a3m line by line + a3m_sequences = {} + if remove_first_row_gaps: + # query_sequence is assumed to be the first sequence + query_sequence = next(iter(sequences.values())) + query_non_gaps = [res != '-' for res in query_sequence] + for seqname, sto_sequence in sequences.items(): + # Dots are optional in a3m format and are commonly removed. + out_sequence = sto_sequence.replace('.', '') + if remove_first_row_gaps: + out_sequence = ''.join( + _convert_sto_seq_to_a3m(query_non_gaps, out_sequence)) + a3m_sequences[seqname] = out_sequence + + fasta_chunks = (f">{k} {descriptions.get(k, '')}\n{a3m_sequences[k]}" + for k in a3m_sequences) + return '\n'.join(fasta_chunks) + '\n' # Include terminating newline. + + +def _keep_line(line: str, seqnames: Set[str]) -> bool: + """Function to decide which lines to keep.""" + if not line.strip(): + return True + if line.strip() == '//': # End tag + return True + if line.startswith('# STOCKHOLM'): # Start tag + return True + if line.startswith('#=GC RF'): # Reference Annotation Line + return True + if line[:4] == '#=GS': # Description lines - keep if sequence in list. + _, seqname, _ = line.split(maxsplit=2) + return seqname in seqnames + elif line.startswith('#'): # Other markup - filter out + return False + else: # Alignment data - keep if sequence in list. + seqname = line.partition(' ')[0] + return seqname in seqnames + + +def truncate_stockholm_msa(stockholm_msa: str, max_sequences: int) -> str: + """Truncates a stockholm file to a maximum number of sequences.""" + seqnames = set() + filtered_lines = [] + for line in stockholm_msa.splitlines(): + if line.strip() and not line.startswith(('#', '//')): + # Ignore blank lines, markup and end symbols - remainder are alignment + # sequence parts. + seqname = line.partition(' ')[0] + seqnames.add(seqname) + if len(seqnames) >= max_sequences: + break + + for line in stockholm_msa.splitlines(): + if _keep_line(line, seqnames): + filtered_lines.append(line) + + return '\n'.join(filtered_lines) + '\n' + + +def remove_empty_columns_from_stockholm_msa(stockholm_msa: str) -> str: + """Removes empty columns (dashes-only) from a Stockholm MSA.""" + processed_lines = {} + unprocessed_lines = {} + for i, line in enumerate(stockholm_msa.splitlines()): + if line.startswith('#=GC RF'): + reference_annotation_i = i + reference_annotation_line = line + # Reached the end of this chunk of the alignment. Process chunk. + _, _, first_alignment = line.rpartition(' ') + mask = [] + for j in range(len(first_alignment)): + for _, unprocessed_line in unprocessed_lines.items(): + prefix, _, alignment = unprocessed_line.rpartition(' ') + if alignment[j] != '-': + mask.append(True) + break + else: # Every row contained a hyphen - empty column. + mask.append(False) + # Add reference annotation for processing with mask. + unprocessed_lines[reference_annotation_i] = reference_annotation_line + + if not any(mask): # All columns were empty. Output empty lines for chunk. + for line_index in unprocessed_lines: + processed_lines[line_index] = '' + else: + for line_index, unprocessed_line in unprocessed_lines.items(): + prefix, _, alignment = unprocessed_line.rpartition(' ') + masked_alignment = ''.join(itertools.compress(alignment, mask)) + processed_lines[line_index] = f'{prefix} {masked_alignment}' + + # Clear raw_alignments. + unprocessed_lines = {} + elif line.strip() and not line.startswith(('#', '//')): + unprocessed_lines[i] = line + else: + processed_lines[i] = line + return '\n'.join((processed_lines[i] for i in range(len(processed_lines)))) + + +def deduplicate_stockholm_msa(stockholm_msa: str) -> str: + """Remove duplicate sequences (ignoring insertions wrt query).""" + sequence_dict = collections.defaultdict(str) + + # First we must extract all sequences from the MSA. + for line in stockholm_msa.splitlines(): + # Only consider the alignments - ignore reference annotation, empty lines, + # descriptions or markup. + if line.strip() and not line.startswith(('#', '//')): + line = line.strip() + seqname, alignment = line.split() + sequence_dict[seqname] += alignment + + seen_sequences = set() + seqnames = set() + # First alignment is the query. + query_align = next(iter(sequence_dict.values())) + mask = [c != '-' for c in query_align] # Mask is False for insertions. + for seqname, alignment in sequence_dict.items(): + # Apply mask to remove all insertions from the string. + masked_alignment = ''.join(itertools.compress(alignment, mask)) + if masked_alignment in seen_sequences: + continue + else: + seen_sequences.add(masked_alignment) + seqnames.add(seqname) + + filtered_lines = [] + for line in stockholm_msa.splitlines(): + if _keep_line(line, seqnames): + filtered_lines.append(line) + + return '\n'.join(filtered_lines) + '\n' + + +def _get_hhr_line_regex_groups( + regex_pattern: str, line: str) -> Sequence[Optional[str]]: + match = re.match(regex_pattern, line) + if match is None: + raise RuntimeError(f'Could not parse query line {line}') + return match.groups() + + +def _update_hhr_residue_indices_list( + sequence: str, start_index: int, indices_list: List[int]): + """Computes the relative indices for each residue with respect to the original sequence.""" + counter = start_index + for symbol in sequence: + if symbol == '-': + indices_list.append(-1) + else: + indices_list.append(counter) + counter += 1 + + +def _parse_hhr_hit(detailed_lines: Sequence[str]) -> TemplateHit: + """Parses the detailed HMM HMM comparison section for a single Hit. + + This works on .hhr files generated from both HHBlits and HHSearch. + + Args: + detailed_lines: A list of lines from a single comparison section between 2 + sequences (which each have their own HMM's) + + Returns: + A dictionary with the information from that detailed comparison section + + Raises: + RuntimeError: If a certain line cannot be processed + """ + # Parse first 2 lines. + number_of_hit = int(detailed_lines[0].split()[-1]) + name_hit = detailed_lines[1][1:] + + # Parse the summary line. + pattern = ( + 'Probab=(.*)[\t ]*E-value=(.*)[\t ]*Score=(.*)[\t ]*Aligned_cols=(.*)[\t' + ' ]*Identities=(.*)%[\t ]*Similarity=(.*)[\t ]*Sum_probs=(.*)[\t ' + ']*Template_Neff=(.*)') + match = re.match(pattern, detailed_lines[2]) + if match is None: + raise RuntimeError( + 'Could not parse section: %s. Expected this: \n%s to contain summary.' % + (detailed_lines, detailed_lines[2])) + (_, _, _, aligned_cols, _, _, sum_probs, _) = [float(x) + for x in match.groups()] + + # The next section reads the detailed comparisons. These are in a 'human + # readable' format which has a fixed length. The strategy employed is to + # assume that each block starts with the query sequence line, and to parse + # that with a regexp in order to deduce the fixed length used for that block. + query = '' + hit_sequence = '' + indices_query = [] + indices_hit = [] + length_block = None + + for line in detailed_lines[3:]: + # Parse the query sequence line + if (line.startswith('Q ') and not line.startswith('Q ss_dssp') and + not line.startswith('Q ss_pred') and + not line.startswith('Q Consensus')): + # Thus the first 17 characters must be 'Q ', and we can parse + # everything after that. + # start sequence end total_sequence_length + patt = r'[\t ]*([0-9]*) ([A-Z-]*)[\t ]*([0-9]*) \([0-9]*\)' + groups = _get_hhr_line_regex_groups(patt, line[17:]) + + # Get the length of the parsed block using the start and finish indices, + # and ensure it is the same as the actual block length. + start = int(groups[0]) - 1 # Make index zero based. + delta_query = groups[1] + end = int(groups[2]) + num_insertions = len([x for x in delta_query if x == '-']) + length_block = end - start + num_insertions + assert length_block == len(delta_query) + + # Update the query sequence and indices list. + query += delta_query + _update_hhr_residue_indices_list(delta_query, start, indices_query) + + elif line.startswith('T '): + # Parse the hit sequence. + if (not line.startswith('T ss_dssp') and + not line.startswith('T ss_pred') and + not line.startswith('T Consensus')): + # Thus the first 17 characters must be 'T ', and we can + # parse everything after that. + # start sequence end total_sequence_length + patt = r'[\t ]*([0-9]*) ([A-Z-]*)[\t ]*[0-9]* \([0-9]*\)' + groups = _get_hhr_line_regex_groups(patt, line[17:]) + start = int(groups[0]) - 1 # Make index zero based. + delta_hit_sequence = groups[1] + assert length_block == len(delta_hit_sequence) + + # Update the hit sequence and indices list. + hit_sequence += delta_hit_sequence + _update_hhr_residue_indices_list(delta_hit_sequence, start, indices_hit) + + return TemplateHit( + index=number_of_hit, + name=name_hit, + aligned_cols=int(aligned_cols), + sum_probs=sum_probs, + query=query, + hit_sequence=hit_sequence, + indices_query=indices_query, + indices_hit=indices_hit, + ) + + +def parse_hhr(hhr_string: str) -> Sequence[TemplateHit]: + """Parses the content of an entire HHR file.""" + lines = hhr_string.splitlines() + + # Each .hhr file starts with a results table, then has a sequence of hit + # "paragraphs", each paragraph starting with a line 'No '. We + # iterate through each paragraph to parse each hit. + + block_starts = [i for i, line in enumerate(lines) if line.startswith('No ')] + + hits = [] + if block_starts: + block_starts.append(len(lines)) # Add the end of the final block. + for i in range(len(block_starts) - 1): + hits.append(_parse_hhr_hit(lines[block_starts[i]:block_starts[i + 1]])) + return hits + + +def parse_e_values_from_tblout(tblout: str) -> Dict[str, float]: + """Parse target to e-value mapping parsed from Jackhmmer tblout string.""" + e_values = {'query': 0} + lines = [line for line in tblout.splitlines() if line[0] != '#'] + # As per http://eddylab.org/software/hmmer/Userguide.pdf fields are + # space-delimited. Relevant fields are (1) target name: and + # (5) E-value (full sequence) (numbering from 1). + for line in lines: + fields = line.split() + e_value = fields[4] + target_name = fields[0] + e_values[target_name] = float(e_value) + return e_values + + +def _get_indices(sequence: str, start: int) -> List[int]: + """Returns indices for non-gap/insert residues starting at the given index.""" + indices = [] + counter = start + for symbol in sequence: + # Skip gaps but add a placeholder so that the alignment is preserved. + if symbol == '-': + indices.append(-1) + # Skip deleted residues, but increase the counter. + elif symbol.islower(): + counter += 1 + # Normal aligned residue. Increase the counter and append to indices. + else: + indices.append(counter) + counter += 1 + return indices + + +@dataclasses.dataclass(frozen=True) +class HitMetadata: + pdb_id: str + chain: str + start: int + end: int + length: int + text: str + + +def _parse_hmmsearch_description(description: str) -> HitMetadata: + """Parses the hmmsearch A3M sequence description line.""" + # Example 1: >4pqx_A/2-217 [subseq from] mol:protein length:217 Free text + # Example 2: >5g3r_A/1-55 [subseq from] mol:protein length:352 + match = re.match( + r'^>?([a-z0-9]+)_(\w+)/([0-9]+)-([0-9]+).*protein length:([0-9]+) *(.*)$', + description.strip()) + + if not match: + raise ValueError(f'Could not parse description: "{description}".') + + return HitMetadata( + pdb_id=match[1], + chain=match[2], + start=int(match[3]), + end=int(match[4]), + length=int(match[5]), + text=match[6]) + + +def parse_hmmsearch_a3m(query_sequence: str, + a3m_string: str, + skip_first: bool = True) -> Sequence[TemplateHit]: + """Parses an a3m string produced by hmmsearch. + + Args: + query_sequence: The query sequence. + a3m_string: The a3m string produced by hmmsearch. + skip_first: Whether to skip the first sequence in the a3m string. + + Returns: + A sequence of `TemplateHit` results. + """ + # Zip the descriptions and MSAs together, skip the first query sequence. + parsed_a3m = list(zip(*parse_fasta(a3m_string))) + if skip_first: + parsed_a3m = parsed_a3m[1:] + + indices_query = _get_indices(query_sequence, start=0) + + hits = [] + for i, (hit_sequence, hit_description) in enumerate(parsed_a3m, start=1): + if 'mol:protein' not in hit_description: + continue # Skip non-protein chains. + metadata = _parse_hmmsearch_description(hit_description) + # Aligned columns are only the match states. + aligned_cols = sum([r.isupper() and r != '-' for r in hit_sequence]) + indices_hit = _get_indices(hit_sequence, start=metadata.start - 1) + + hit = TemplateHit( + index=i, + name=f'{metadata.pdb_id}_{metadata.chain}', + aligned_cols=aligned_cols, + sum_probs=None, + query=query_sequence, + hit_sequence=hit_sequence.upper(), + indices_query=indices_query, + indices_hit=indices_hit, + ) + hits.append(hit) + + return hits diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/pipeline.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/pipeline.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,230 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Functions for building the input features for the AlphaFold model.""" + +import os +from typing import Any, Mapping, MutableMapping, Optional, Sequence, Union +from absl import logging +from alphafold.common import residue_constants +from alphafold.data import msa_identifiers +from alphafold.data import parsers +from alphafold.data import templates +from alphafold.data.tools import hhblits +from alphafold.data.tools import hhsearch +from alphafold.data.tools import hmmsearch +from alphafold.data.tools import jackhmmer +import numpy as np + +# Internal import (7716). + +FeatureDict = MutableMapping[str, np.ndarray] +TemplateSearcher = Union[hhsearch.HHSearch, hmmsearch.Hmmsearch] + + +def make_sequence_features( + sequence: str, description: str, num_res: int) -> FeatureDict: + """Constructs a feature dict of sequence features.""" + features = {} + features['aatype'] = residue_constants.sequence_to_onehot( + sequence=sequence, + mapping=residue_constants.restype_order_with_x, + map_unknown_to_x=True) + features['between_segment_residues'] = np.zeros((num_res,), dtype=np.int32) + features['domain_name'] = np.array([description.encode('utf-8')], + dtype=np.object_) + features['residue_index'] = np.array(range(num_res), dtype=np.int32) + features['seq_length'] = np.array([num_res] * num_res, dtype=np.int32) + features['sequence'] = np.array([sequence.encode('utf-8')], dtype=np.object_) + return features + + +def make_msa_features(msas: Sequence[parsers.Msa]) -> FeatureDict: + """Constructs a feature dict of MSA features.""" + if not msas: + raise ValueError('At least one MSA must be provided.') + + int_msa = [] + deletion_matrix = [] + uniprot_accession_ids = [] + species_ids = [] + seen_sequences = set() + for msa_index, msa in enumerate(msas): + if not msa: + raise ValueError(f'MSA {msa_index} must contain at least one sequence.') + for sequence_index, sequence in enumerate(msa.sequences): + if sequence in seen_sequences: + continue + seen_sequences.add(sequence) + int_msa.append( + [residue_constants.HHBLITS_AA_TO_ID[res] for res in sequence]) + deletion_matrix.append(msa.deletion_matrix[sequence_index]) + identifiers = msa_identifiers.get_identifiers( + msa.descriptions[sequence_index]) + uniprot_accession_ids.append( + identifiers.uniprot_accession_id.encode('utf-8')) + species_ids.append(identifiers.species_id.encode('utf-8')) + + num_res = len(msas[0].sequences[0]) + num_alignments = len(int_msa) + features = {} + features['deletion_matrix_int'] = np.array(deletion_matrix, dtype=np.int32) + features['msa'] = np.array(int_msa, dtype=np.int32) + features['num_alignments'] = np.array( + [num_alignments] * num_res, dtype=np.int32) + features['msa_uniprot_accession_identifiers'] = np.array( + uniprot_accession_ids, dtype=np.object_) + features['msa_species_identifiers'] = np.array(species_ids, dtype=np.object_) + return features + + +def run_msa_tool(msa_runner, input_fasta_path: str, msa_out_path: str, + msa_format: str, use_precomputed_msas: bool, + ) -> Mapping[str, Any]: + """Runs an MSA tool, checking if output already exists first.""" + if not use_precomputed_msas or not os.path.exists(msa_out_path): + result = msa_runner.query(input_fasta_path)[0] + with open(msa_out_path, 'w') as f: + f.write(result[msa_format]) + else: + logging.warning('Reading MSA from file %s', msa_out_path) + with open(msa_out_path, 'r') as f: + result = {msa_format: f.read()} + return result + + +class DataPipeline: + """Runs the alignment tools and assembles the input features.""" + + def __init__(self, + jackhmmer_binary_path: str, + hhblits_binary_path: str, + uniref90_database_path: str, + mgnify_database_path: str, + bfd_database_path: Optional[str], + uniclust30_database_path: Optional[str], + small_bfd_database_path: Optional[str], + template_searcher: TemplateSearcher, + template_featurizer: templates.TemplateHitFeaturizer, + use_small_bfd: bool, + mgnify_max_hits: int = 501, + uniref_max_hits: int = 10000, + use_precomputed_msas: bool = False): + """Initializes the data pipeline.""" + self._use_small_bfd = use_small_bfd + self.jackhmmer_uniref90_runner = jackhmmer.Jackhmmer( + binary_path=jackhmmer_binary_path, + database_path=uniref90_database_path) + if use_small_bfd: + self.jackhmmer_small_bfd_runner = jackhmmer.Jackhmmer( + binary_path=jackhmmer_binary_path, + database_path=small_bfd_database_path) + else: + self.hhblits_bfd_uniclust_runner = hhblits.HHBlits( + binary_path=hhblits_binary_path, + databases=[bfd_database_path, uniclust30_database_path]) + self.jackhmmer_mgnify_runner = jackhmmer.Jackhmmer( + binary_path=jackhmmer_binary_path, + database_path=mgnify_database_path) + self.template_searcher = template_searcher + self.template_featurizer = template_featurizer + self.mgnify_max_hits = mgnify_max_hits + self.uniref_max_hits = uniref_max_hits + self.use_precomputed_msas = use_precomputed_msas + + def process(self, input_fasta_path: str, msa_output_dir: str) -> FeatureDict: + """Runs alignment tools on the input sequence and creates features.""" + with open(input_fasta_path) as f: + input_fasta_str = f.read() + input_seqs, input_descs = parsers.parse_fasta(input_fasta_str) + if len(input_seqs) != 1: + raise ValueError( + f'More than one input sequence found in {input_fasta_path}.') + input_sequence = input_seqs[0] + input_description = input_descs[0] + num_res = len(input_sequence) + + uniref90_out_path = os.path.join(msa_output_dir, 'uniref90_hits.sto') + jackhmmer_uniref90_result = run_msa_tool( + self.jackhmmer_uniref90_runner, input_fasta_path, uniref90_out_path, + 'sto', self.use_precomputed_msas) + mgnify_out_path = os.path.join(msa_output_dir, 'mgnify_hits.sto') + jackhmmer_mgnify_result = run_msa_tool( + self.jackhmmer_mgnify_runner, input_fasta_path, mgnify_out_path, 'sto', + self.use_precomputed_msas) + + msa_for_templates = jackhmmer_uniref90_result['sto'] + msa_for_templates = parsers.truncate_stockholm_msa( + msa_for_templates, max_sequences=self.uniref_max_hits) + msa_for_templates = parsers.deduplicate_stockholm_msa( + msa_for_templates) + msa_for_templates = parsers.remove_empty_columns_from_stockholm_msa( + msa_for_templates) + + if self.template_searcher.input_format == 'sto': + pdb_templates_result = self.template_searcher.query(msa_for_templates) + elif self.template_searcher.input_format == 'a3m': + uniref90_msa_as_a3m = parsers.convert_stockholm_to_a3m(msa_for_templates) + pdb_templates_result = self.template_searcher.query(uniref90_msa_as_a3m) + else: + raise ValueError('Unrecognized template input format: ' + f'{self.template_searcher.input_format}') + + pdb_hits_out_path = os.path.join( + msa_output_dir, f'pdb_hits.{self.template_searcher.output_format}') + with open(pdb_hits_out_path, 'w') as f: + f.write(pdb_templates_result) + + uniref90_msa = parsers.parse_stockholm(jackhmmer_uniref90_result['sto']) + uniref90_msa = uniref90_msa.truncate(max_seqs=self.uniref_max_hits) + mgnify_msa = parsers.parse_stockholm(jackhmmer_mgnify_result['sto']) + mgnify_msa = mgnify_msa.truncate(max_seqs=self.mgnify_max_hits) + + pdb_template_hits = self.template_searcher.get_template_hits( + output_string=pdb_templates_result, input_sequence=input_sequence) + + if self._use_small_bfd: + bfd_out_path = os.path.join(msa_output_dir, 'small_bfd_hits.sto') + jackhmmer_small_bfd_result = run_msa_tool( + self.jackhmmer_small_bfd_runner, input_fasta_path, bfd_out_path, + 'sto', self.use_precomputed_msas) + bfd_msa = parsers.parse_stockholm(jackhmmer_small_bfd_result['sto']) + else: + bfd_out_path = os.path.join(msa_output_dir, 'bfd_uniclust_hits.a3m') + hhblits_bfd_uniclust_result = run_msa_tool( + self.hhblits_bfd_uniclust_runner, input_fasta_path, bfd_out_path, + 'a3m', self.use_precomputed_msas) + bfd_msa = parsers.parse_a3m(hhblits_bfd_uniclust_result['a3m']) + + templates_result = self.template_featurizer.get_templates( + query_sequence=input_sequence, + hits=pdb_template_hits) + + sequence_features = make_sequence_features( + sequence=input_sequence, + description=input_description, + num_res=num_res) + + msa_features = make_msa_features((uniref90_msa, bfd_msa, mgnify_msa)) + + logging.info('Uniref90 MSA size: %d sequences.', len(uniref90_msa)) + logging.info('BFD MSA size: %d sequences.', len(bfd_msa)) + logging.info('MGnify MSA size: %d sequences.', len(mgnify_msa)) + logging.info('Final (deduplicated) MSA size: %d sequences.', + msa_features['num_alignments'][0]) + logging.info('Total number of templates (NB: this can include bad ' + 'templates and is later filtered to top 4): %d.', + templates_result.features['template_domain_names'].shape[0]) + + return {**sequence_features, **msa_features, **templates_result.features} diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/pipeline_multimer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/pipeline_multimer.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,288 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Functions for building the features for the AlphaFold multimer model.""" + +import collections +import contextlib +import copy +import dataclasses +import json +import os +import tempfile +from typing import Mapping, MutableMapping, Sequence + +from absl import logging +from alphafold.common import protein +from alphafold.common import residue_constants +from alphafold.data import feature_processing +from alphafold.data import msa_pairing +from alphafold.data import parsers +from alphafold.data import pipeline +from alphafold.data.tools import jackhmmer +import numpy as np + +# Internal import (7716). + + +@dataclasses.dataclass(frozen=True) +class _FastaChain: + sequence: str + description: str + + +def _make_chain_id_map(*, + sequences: Sequence[str], + descriptions: Sequence[str], + ) -> Mapping[str, _FastaChain]: + """Makes a mapping from PDB-format chain ID to sequence and description.""" + if len(sequences) != len(descriptions): + raise ValueError('sequences and descriptions must have equal length. ' + f'Got {len(sequences)} != {len(descriptions)}.') + if len(sequences) > protein.PDB_MAX_CHAINS: + raise ValueError('Cannot process more chains than the PDB format supports. ' + f'Got {len(sequences)} chains.') + chain_id_map = {} + for chain_id, sequence, description in zip( + protein.PDB_CHAIN_IDS, sequences, descriptions): + chain_id_map[chain_id] = _FastaChain( + sequence=sequence, description=description) + return chain_id_map + + +@contextlib.contextmanager +def temp_fasta_file(fasta_str: str): + with tempfile.NamedTemporaryFile('w', suffix='.fasta') as fasta_file: + fasta_file.write(fasta_str) + fasta_file.seek(0) + yield fasta_file.name + + +def convert_monomer_features( + monomer_features: pipeline.FeatureDict, + chain_id: str) -> pipeline.FeatureDict: + """Reshapes and modifies monomer features for multimer models.""" + converted = {} + converted['auth_chain_id'] = np.asarray(chain_id, dtype=np.object_) + unnecessary_leading_dim_feats = { + 'sequence', 'domain_name', 'num_alignments', 'seq_length'} + for feature_name, feature in monomer_features.items(): + if feature_name in unnecessary_leading_dim_feats: + # asarray ensures it's a np.ndarray. + feature = np.asarray(feature[0], dtype=feature.dtype) + elif feature_name == 'aatype': + # The multimer model performs the one-hot operation itself. + feature = np.argmax(feature, axis=-1).astype(np.int32) + elif feature_name == 'template_aatype': + feature = np.argmax(feature, axis=-1).astype(np.int32) + new_order_list = residue_constants.MAP_HHBLITS_AATYPE_TO_OUR_AATYPE + feature = np.take(new_order_list, feature.astype(np.int32), axis=0) + elif feature_name == 'template_all_atom_masks': + feature_name = 'template_all_atom_mask' + converted[feature_name] = feature + return converted + + +def int_id_to_str_id(num: int) -> str: + """Encodes a number as a string, using reverse spreadsheet style naming. + + Args: + num: A positive integer. + + Returns: + A string that encodes the positive integer using reverse spreadsheet style, + naming e.g. 1 = A, 2 = B, ..., 27 = AA, 28 = BA, 29 = CA, ... This is the + usual way to encode chain IDs in mmCIF files. + """ + if num <= 0: + raise ValueError(f'Only positive integers allowed, got {num}.') + + num = num - 1 # 1-based indexing. + output = [] + while num >= 0: + output.append(chr(num % 26 + ord('A'))) + num = num // 26 - 1 + return ''.join(output) + + +def add_assembly_features( + all_chain_features: MutableMapping[str, pipeline.FeatureDict], + ) -> MutableMapping[str, pipeline.FeatureDict]: + """Add features to distinguish between chains. + + Args: + all_chain_features: A dictionary which maps chain_id to a dictionary of + features for each chain. + + Returns: + all_chain_features: A dictionary which maps strings of the form + `_` to the corresponding chain features. E.g. two + chains from a homodimer would have keys A_1 and A_2. Two chains from a + heterodimer would have keys A_1 and B_1. + """ + # Group the chains by sequence + seq_to_entity_id = {} + grouped_chains = collections.defaultdict(list) + for chain_id, chain_features in all_chain_features.items(): + seq = str(chain_features['sequence']) + if seq not in seq_to_entity_id: + seq_to_entity_id[seq] = len(seq_to_entity_id) + 1 + grouped_chains[seq_to_entity_id[seq]].append(chain_features) + + new_all_chain_features = {} + chain_id = 1 + for entity_id, group_chain_features in grouped_chains.items(): + for sym_id, chain_features in enumerate(group_chain_features, start=1): + new_all_chain_features[ + f'{int_id_to_str_id(entity_id)}_{sym_id}'] = chain_features + seq_length = chain_features['seq_length'] + chain_features['asym_id'] = chain_id * np.ones(seq_length) + chain_features['sym_id'] = sym_id * np.ones(seq_length) + chain_features['entity_id'] = entity_id * np.ones(seq_length) + chain_id += 1 + + return new_all_chain_features + + +def pad_msa(np_example, min_num_seq): + np_example = dict(np_example) + num_seq = np_example['msa'].shape[0] + if num_seq < min_num_seq: + for feat in ('msa', 'deletion_matrix', 'bert_mask', 'msa_mask'): + np_example[feat] = np.pad( + np_example[feat], ((0, min_num_seq - num_seq), (0, 0))) + np_example['cluster_bias_mask'] = np.pad( + np_example['cluster_bias_mask'], ((0, min_num_seq - num_seq),)) + return np_example + + +class DataPipeline: + """Runs the alignment tools and assembles the input features.""" + + def __init__(self, + monomer_data_pipeline: pipeline.DataPipeline, + jackhmmer_binary_path: str, + uniprot_database_path: str, + max_uniprot_hits: int = 50000, + use_precomputed_msas: bool = False): + """Initializes the data pipeline. + + Args: + monomer_data_pipeline: An instance of pipeline.DataPipeline - that runs + the data pipeline for the monomer AlphaFold system. + jackhmmer_binary_path: Location of the jackhmmer binary. + uniprot_database_path: Location of the unclustered uniprot sequences, that + will be searched with jackhmmer and used for MSA pairing. + max_uniprot_hits: The maximum number of hits to return from uniprot. + use_precomputed_msas: Whether to use pre-existing MSAs; see run_alphafold. + """ + self._monomer_data_pipeline = monomer_data_pipeline + self._uniprot_msa_runner = jackhmmer.Jackhmmer( + binary_path=jackhmmer_binary_path, + database_path=uniprot_database_path) + self._max_uniprot_hits = max_uniprot_hits + self.use_precomputed_msas = use_precomputed_msas + + def _process_single_chain( + self, + chain_id: str, + sequence: str, + description: str, + msa_output_dir: str, + is_homomer_or_monomer: bool) -> pipeline.FeatureDict: + """Runs the monomer pipeline on a single chain.""" + chain_fasta_str = f'>chain_{chain_id}\n{sequence}\n' + chain_msa_output_dir = os.path.join(msa_output_dir, chain_id) + if not os.path.exists(chain_msa_output_dir): + os.makedirs(chain_msa_output_dir) + with temp_fasta_file(chain_fasta_str) as chain_fasta_path: + logging.info('Running monomer pipeline on chain %s: %s', + chain_id, description) + chain_features = self._monomer_data_pipeline.process( + input_fasta_path=chain_fasta_path, + msa_output_dir=chain_msa_output_dir) + + # We only construct the pairing features if there are 2 or more unique + # sequences. + if not is_homomer_or_monomer: + all_seq_msa_features = self._all_seq_msa_features(chain_fasta_path, + chain_msa_output_dir) + chain_features.update(all_seq_msa_features) + return chain_features + + def _all_seq_msa_features(self, input_fasta_path, msa_output_dir): + """Get MSA features for unclustered uniprot, for pairing.""" + out_path = os.path.join(msa_output_dir, 'uniprot_hits.sto') + result = pipeline.run_msa_tool( + self._uniprot_msa_runner, input_fasta_path, out_path, 'sto', + self.use_precomputed_msas) + msa = parsers.parse_stockholm(result['sto']) + msa = msa.truncate(max_seqs=self._max_uniprot_hits) + all_seq_features = pipeline.make_msa_features([msa]) + valid_feats = msa_pairing.MSA_FEATURES + ( + 'msa_uniprot_accession_identifiers', + 'msa_species_identifiers', + ) + feats = {f'{k}_all_seq': v for k, v in all_seq_features.items() + if k in valid_feats} + return feats + + def process(self, + input_fasta_path: str, + msa_output_dir: str, + is_prokaryote: bool = False) -> pipeline.FeatureDict: + """Runs alignment tools on the input sequences and creates features.""" + with open(input_fasta_path) as f: + input_fasta_str = f.read() + input_seqs, input_descs = parsers.parse_fasta(input_fasta_str) + + chain_id_map = _make_chain_id_map(sequences=input_seqs, + descriptions=input_descs) + chain_id_map_path = os.path.join(msa_output_dir, 'chain_id_map.json') + with open(chain_id_map_path, 'w') as f: + chain_id_map_dict = {chain_id: dataclasses.asdict(fasta_chain) + for chain_id, fasta_chain in chain_id_map.items()} + json.dump(chain_id_map_dict, f, indent=4, sort_keys=True) + + all_chain_features = {} + sequence_features = {} + is_homomer_or_monomer = len(set(input_seqs)) == 1 + for chain_id, fasta_chain in chain_id_map.items(): + if fasta_chain.sequence in sequence_features: + all_chain_features[chain_id] = copy.deepcopy( + sequence_features[fasta_chain.sequence]) + continue + chain_features = self._process_single_chain( + chain_id=chain_id, + sequence=fasta_chain.sequence, + description=fasta_chain.description, + msa_output_dir=msa_output_dir, + is_homomer_or_monomer=is_homomer_or_monomer) + + chain_features = convert_monomer_features(chain_features, + chain_id=chain_id) + all_chain_features[chain_id] = chain_features + sequence_features[fasta_chain.sequence] = chain_features + + all_chain_features = add_assembly_features(all_chain_features) + + np_example = feature_processing.pair_and_merge( + all_chain_features=all_chain_features, + is_prokaryote=is_prokaryote, + ) + + # Pad MSA to avoid zero-sized extra_msa. + np_example = pad_msa(np_example, 512) + + return np_example diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/templates.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/templates.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,1010 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Functions for getting templates and calculating template features.""" +import abc +import dataclasses +import datetime +import functools +import glob +import os +import re +from typing import Any, Dict, Mapping, Optional, Sequence, Tuple + +from absl import logging +from alphafold.common import residue_constants +from alphafold.data import mmcif_parsing +from alphafold.data import parsers +from alphafold.data.tools import kalign +import numpy as np + +# Internal import (7716). + + +class Error(Exception): + """Base class for exceptions.""" + + +class NoChainsError(Error): + """An error indicating that template mmCIF didn't have any chains.""" + + +class SequenceNotInTemplateError(Error): + """An error indicating that template mmCIF didn't contain the sequence.""" + + +class NoAtomDataInTemplateError(Error): + """An error indicating that template mmCIF didn't contain atom positions.""" + + +class TemplateAtomMaskAllZerosError(Error): + """An error indicating that template mmCIF had all atom positions masked.""" + + +class QueryToTemplateAlignError(Error): + """An error indicating that the query can't be aligned to the template.""" + + +class CaDistanceError(Error): + """An error indicating that a CA atom distance exceeds a threshold.""" + + +class MultipleChainsError(Error): + """An error indicating that multiple chains were found for a given ID.""" + + +# Prefilter exceptions. +class PrefilterError(Exception): + """A base class for template prefilter exceptions.""" + + +class DateError(PrefilterError): + """An error indicating that the hit date was after the max allowed date.""" + + +class AlignRatioError(PrefilterError): + """An error indicating that the hit align ratio to the query was too small.""" + + +class DuplicateError(PrefilterError): + """An error indicating that the hit was an exact subsequence of the query.""" + + +class LengthError(PrefilterError): + """An error indicating that the hit was too short.""" + + +TEMPLATE_FEATURES = { + 'template_aatype': np.float32, + 'template_all_atom_masks': np.float32, + 'template_all_atom_positions': np.float32, + 'template_domain_names': np.object, + 'template_sequence': np.object, + 'template_sum_probs': np.float32, +} + + +def _get_pdb_id_and_chain(hit: parsers.TemplateHit) -> Tuple[str, str]: + """Returns PDB id and chain id for an HHSearch Hit.""" + # PDB ID: 4 letters. Chain ID: 1+ alphanumeric letters or "." if unknown. + id_match = re.match(r'[a-zA-Z\d]{4}_[a-zA-Z0-9.]+', hit.name) + if not id_match: + raise ValueError(f'hit.name did not start with PDBID_chain: {hit.name}') + pdb_id, chain_id = id_match.group(0).split('_') + return pdb_id.lower(), chain_id + + +def _is_after_cutoff( + pdb_id: str, + release_dates: Mapping[str, datetime.datetime], + release_date_cutoff: Optional[datetime.datetime]) -> bool: + """Checks if the template date is after the release date cutoff. + + Args: + pdb_id: 4 letter pdb code. + release_dates: Dictionary mapping PDB ids to their structure release dates. + release_date_cutoff: Max release date that is valid for this query. + + Returns: + True if the template release date is after the cutoff, False otherwise. + """ + if release_date_cutoff is None: + raise ValueError('The release_date_cutoff must not be None.') + if pdb_id in release_dates: + return release_dates[pdb_id] > release_date_cutoff + else: + # Since this is just a quick prefilter to reduce the number of mmCIF files + # we need to parse, we don't have to worry about returning True here. + return False + + +def _parse_obsolete(obsolete_file_path: str) -> Mapping[str, Optional[str]]: + """Parses the data file from PDB that lists which pdb_ids are obsolete.""" + with open(obsolete_file_path) as f: + result = {} + for line in f: + line = line.strip() + # Format: Date From To + # 'OBSLTE 06-NOV-19 6G9Y' - Removed, rare + # 'OBSLTE 31-JUL-94 116L 216L' - Replaced, common + # 'OBSLTE 26-SEP-06 2H33 2JM5 2OWI' - Replaced by multiple, rare + if line.startswith('OBSLTE'): + if len(line) > 30: + # Replaced by at least one structure. + from_id = line[20:24].lower() + to_id = line[29:33].lower() + result[from_id] = to_id + elif len(line) == 24: + # Removed. + from_id = line[20:24].lower() + result[from_id] = None + return result + + +def _parse_release_dates(path: str) -> Mapping[str, datetime.datetime]: + """Parses release dates file, returns a mapping from PDBs to release dates.""" + if path.endswith('txt'): + release_dates = {} + with open(path, 'r') as f: + for line in f: + pdb_id, date = line.split(':') + date = date.strip() + # Python 3.6 doesn't have datetime.date.fromisoformat() which is about + # 90x faster than strptime. However, splitting the string manually is + # about 10x faster than strptime. + release_dates[pdb_id.strip()] = datetime.datetime( + year=int(date[:4]), month=int(date[5:7]), day=int(date[8:10])) + return release_dates + else: + raise ValueError('Invalid format of the release date file %s.' % path) + + +def _assess_hhsearch_hit( + hit: parsers.TemplateHit, + hit_pdb_code: str, + query_sequence: str, + release_dates: Mapping[str, datetime.datetime], + release_date_cutoff: datetime.datetime, + max_subsequence_ratio: float = 0.95, + min_align_ratio: float = 0.1) -> bool: + """Determines if template is valid (without parsing the template mmcif file). + + Args: + hit: HhrHit for the template. + hit_pdb_code: The 4 letter pdb code of the template hit. This might be + different from the value in the actual hit since the original pdb might + have become obsolete. + query_sequence: Amino acid sequence of the query. + release_dates: Dictionary mapping pdb codes to their structure release + dates. + release_date_cutoff: Max release date that is valid for this query. + max_subsequence_ratio: Exclude any exact matches with this much overlap. + min_align_ratio: Minimum overlap between the template and query. + + Returns: + True if the hit passed the prefilter. Raises an exception otherwise. + + Raises: + DateError: If the hit date was after the max allowed date. + AlignRatioError: If the hit align ratio to the query was too small. + DuplicateError: If the hit was an exact subsequence of the query. + LengthError: If the hit was too short. + """ + aligned_cols = hit.aligned_cols + align_ratio = aligned_cols / len(query_sequence) + + template_sequence = hit.hit_sequence.replace('-', '') + length_ratio = float(len(template_sequence)) / len(query_sequence) + + # Check whether the template is a large subsequence or duplicate of original + # query. This can happen due to duplicate entries in the PDB database. + duplicate = (template_sequence in query_sequence and + length_ratio > max_subsequence_ratio) + + if _is_after_cutoff(hit_pdb_code, release_dates, release_date_cutoff): + raise DateError(f'Date ({release_dates[hit_pdb_code]}) > max template date ' + f'({release_date_cutoff}).') + + if align_ratio <= min_align_ratio: + raise AlignRatioError('Proportion of residues aligned to query too small. ' + f'Align ratio: {align_ratio}.') + + if duplicate: + raise DuplicateError('Template is an exact subsequence of query with large ' + f'coverage. Length ratio: {length_ratio}.') + + if len(template_sequence) < 10: + raise LengthError(f'Template too short. Length: {len(template_sequence)}.') + + return True + + +def _find_template_in_pdb( + template_chain_id: str, + template_sequence: str, + mmcif_object: mmcif_parsing.MmcifObject) -> Tuple[str, str, int]: + """Tries to find the template chain in the given pdb file. + + This method tries the three following things in order: + 1. Tries if there is an exact match in both the chain ID and the sequence. + If yes, the chain sequence is returned. Otherwise: + 2. Tries if there is an exact match only in the sequence. + If yes, the chain sequence is returned. Otherwise: + 3. Tries if there is a fuzzy match (X = wildcard) in the sequence. + If yes, the chain sequence is returned. + If none of these succeed, a SequenceNotInTemplateError is thrown. + + Args: + template_chain_id: The template chain ID. + template_sequence: The template chain sequence. + mmcif_object: The PDB object to search for the template in. + + Returns: + A tuple with: + * The chain sequence that was found to match the template in the PDB object. + * The ID of the chain that is being returned. + * The offset where the template sequence starts in the chain sequence. + + Raises: + SequenceNotInTemplateError: If no match is found after the steps described + above. + """ + # Try if there is an exact match in both the chain ID and the (sub)sequence. + pdb_id = mmcif_object.file_id + chain_sequence = mmcif_object.chain_to_seqres.get(template_chain_id) + if chain_sequence and (template_sequence in chain_sequence): + logging.info( + 'Found an exact template match %s_%s.', pdb_id, template_chain_id) + mapping_offset = chain_sequence.find(template_sequence) + return chain_sequence, template_chain_id, mapping_offset + + # Try if there is an exact match in the (sub)sequence only. + for chain_id, chain_sequence in mmcif_object.chain_to_seqres.items(): + if chain_sequence and (template_sequence in chain_sequence): + logging.info('Found a sequence-only match %s_%s.', pdb_id, chain_id) + mapping_offset = chain_sequence.find(template_sequence) + return chain_sequence, chain_id, mapping_offset + + # Return a chain sequence that fuzzy matches (X = wildcard) the template. + # Make parentheses unnamed groups (?:_) to avoid the 100 named groups limit. + regex = ['.' if aa == 'X' else '(?:%s|X)' % aa for aa in template_sequence] + regex = re.compile(''.join(regex)) + for chain_id, chain_sequence in mmcif_object.chain_to_seqres.items(): + match = re.search(regex, chain_sequence) + if match: + logging.info('Found a fuzzy sequence-only match %s_%s.', pdb_id, chain_id) + mapping_offset = match.start() + return chain_sequence, chain_id, mapping_offset + + # No hits, raise an error. + raise SequenceNotInTemplateError( + 'Could not find the template sequence in %s_%s. Template sequence: %s, ' + 'chain_to_seqres: %s' % (pdb_id, template_chain_id, template_sequence, + mmcif_object.chain_to_seqres)) + + +def _realign_pdb_template_to_query( + old_template_sequence: str, + template_chain_id: str, + mmcif_object: mmcif_parsing.MmcifObject, + old_mapping: Mapping[int, int], + kalign_binary_path: str) -> Tuple[str, Mapping[int, int]]: + """Aligns template from the mmcif_object to the query. + + In case PDB70 contains a different version of the template sequence, we need + to perform a realignment to the actual sequence that is in the mmCIF file. + This method performs such realignment, but returns the new sequence and + mapping only if the sequence in the mmCIF file is 90% identical to the old + sequence. + + Note that the old_template_sequence comes from the hit, and contains only that + part of the chain that matches with the query while the new_template_sequence + is the full chain. + + Args: + old_template_sequence: The template sequence that was returned by the PDB + template search (typically done using HHSearch). + template_chain_id: The template chain id was returned by the PDB template + search (typically done using HHSearch). This is used to find the right + chain in the mmcif_object chain_to_seqres mapping. + mmcif_object: A mmcif_object which holds the actual template data. + old_mapping: A mapping from the query sequence to the template sequence. + This mapping will be used to compute the new mapping from the query + sequence to the actual mmcif_object template sequence by aligning the + old_template_sequence and the actual template sequence. + kalign_binary_path: The path to a kalign executable. + + Returns: + A tuple (new_template_sequence, new_query_to_template_mapping) where: + * new_template_sequence is the actual template sequence that was found in + the mmcif_object. + * new_query_to_template_mapping is the new mapping from the query to the + actual template found in the mmcif_object. + + Raises: + QueryToTemplateAlignError: + * If there was an error thrown by the alignment tool. + * Or if the actual template sequence differs by more than 10% from the + old_template_sequence. + """ + aligner = kalign.Kalign(binary_path=kalign_binary_path) + new_template_sequence = mmcif_object.chain_to_seqres.get( + template_chain_id, '') + + # Sometimes the template chain id is unknown. But if there is only a single + # sequence within the mmcif_object, it is safe to assume it is that one. + if not new_template_sequence: + if len(mmcif_object.chain_to_seqres) == 1: + logging.info('Could not find %s in %s, but there is only 1 sequence, so ' + 'using that one.', + template_chain_id, + mmcif_object.file_id) + new_template_sequence = list(mmcif_object.chain_to_seqres.values())[0] + else: + raise QueryToTemplateAlignError( + f'Could not find chain {template_chain_id} in {mmcif_object.file_id}. ' + 'If there are no mmCIF parsing errors, it is possible it was not a ' + 'protein chain.') + + try: + parsed_a3m = parsers.parse_a3m( + aligner.align([old_template_sequence, new_template_sequence])) + old_aligned_template, new_aligned_template = parsed_a3m.sequences + except Exception as e: + raise QueryToTemplateAlignError( + 'Could not align old template %s to template %s (%s_%s). Error: %s' % + (old_template_sequence, new_template_sequence, mmcif_object.file_id, + template_chain_id, str(e))) + + logging.info('Old aligned template: %s\nNew aligned template: %s', + old_aligned_template, new_aligned_template) + + old_to_new_template_mapping = {} + old_template_index = -1 + new_template_index = -1 + num_same = 0 + for old_template_aa, new_template_aa in zip( + old_aligned_template, new_aligned_template): + if old_template_aa != '-': + old_template_index += 1 + if new_template_aa != '-': + new_template_index += 1 + if old_template_aa != '-' and new_template_aa != '-': + old_to_new_template_mapping[old_template_index] = new_template_index + if old_template_aa == new_template_aa: + num_same += 1 + + # Require at least 90 % sequence identity wrt to the shorter of the sequences. + if float(num_same) / min( + len(old_template_sequence), len(new_template_sequence)) < 0.9: + raise QueryToTemplateAlignError( + 'Insufficient similarity of the sequence in the database: %s to the ' + 'actual sequence in the mmCIF file %s_%s: %s. We require at least ' + '90 %% similarity wrt to the shorter of the sequences. This is not a ' + 'problem unless you think this is a template that should be included.' % + (old_template_sequence, mmcif_object.file_id, template_chain_id, + new_template_sequence)) + + new_query_to_template_mapping = {} + for query_index, old_template_index in old_mapping.items(): + new_query_to_template_mapping[query_index] = ( + old_to_new_template_mapping.get(old_template_index, -1)) + + new_template_sequence = new_template_sequence.replace('-', '') + + return new_template_sequence, new_query_to_template_mapping + + +def _check_residue_distances(all_positions: np.ndarray, + all_positions_mask: np.ndarray, + max_ca_ca_distance: float): + """Checks if the distance between unmasked neighbor residues is ok.""" + ca_position = residue_constants.atom_order['CA'] + prev_is_unmasked = False + prev_calpha = None + for i, (coords, mask) in enumerate(zip(all_positions, all_positions_mask)): + this_is_unmasked = bool(mask[ca_position]) + if this_is_unmasked: + this_calpha = coords[ca_position] + if prev_is_unmasked: + distance = np.linalg.norm(this_calpha - prev_calpha) + if distance > max_ca_ca_distance: + raise CaDistanceError( + 'The distance between residues %d and %d is %f > limit %f.' % ( + i, i + 1, distance, max_ca_ca_distance)) + prev_calpha = this_calpha + prev_is_unmasked = this_is_unmasked + + +def _get_atom_positions( + mmcif_object: mmcif_parsing.MmcifObject, + auth_chain_id: str, + max_ca_ca_distance: float) -> Tuple[np.ndarray, np.ndarray]: + """Gets atom positions and mask from a list of Biopython Residues.""" + num_res = len(mmcif_object.chain_to_seqres[auth_chain_id]) + + relevant_chains = [c for c in mmcif_object.structure.get_chains() + if c.id == auth_chain_id] + if len(relevant_chains) != 1: + raise MultipleChainsError( + f'Expected exactly one chain in structure with id {auth_chain_id}.') + chain = relevant_chains[0] + + all_positions = np.zeros([num_res, residue_constants.atom_type_num, 3]) + all_positions_mask = np.zeros([num_res, residue_constants.atom_type_num], + dtype=np.int64) + for res_index in range(num_res): + pos = np.zeros([residue_constants.atom_type_num, 3], dtype=np.float32) + mask = np.zeros([residue_constants.atom_type_num], dtype=np.float32) + res_at_position = mmcif_object.seqres_to_structure[auth_chain_id][res_index] + if not res_at_position.is_missing: + res = chain[(res_at_position.hetflag, + res_at_position.position.residue_number, + res_at_position.position.insertion_code)] + for atom in res.get_atoms(): + atom_name = atom.get_name() + x, y, z = atom.get_coord() + if atom_name in residue_constants.atom_order.keys(): + pos[residue_constants.atom_order[atom_name]] = [x, y, z] + mask[residue_constants.atom_order[atom_name]] = 1.0 + elif atom_name.upper() == 'SE' and res.get_resname() == 'MSE': + # Put the coordinates of the selenium atom in the sulphur column. + pos[residue_constants.atom_order['SD']] = [x, y, z] + mask[residue_constants.atom_order['SD']] = 1.0 + + # Fix naming errors in arginine residues where NH2 is incorrectly + # assigned to be closer to CD than NH1. + cd = residue_constants.atom_order['CD'] + nh1 = residue_constants.atom_order['NH1'] + nh2 = residue_constants.atom_order['NH2'] + if (res.get_resname() == 'ARG' and + all(mask[atom_index] for atom_index in (cd, nh1, nh2)) and + (np.linalg.norm(pos[nh1] - pos[cd]) > + np.linalg.norm(pos[nh2] - pos[cd]))): + pos[nh1], pos[nh2] = pos[nh2].copy(), pos[nh1].copy() + mask[nh1], mask[nh2] = mask[nh2].copy(), mask[nh1].copy() + + all_positions[res_index] = pos + all_positions_mask[res_index] = mask + _check_residue_distances( + all_positions, all_positions_mask, max_ca_ca_distance) + return all_positions, all_positions_mask + + +def _extract_template_features( + mmcif_object: mmcif_parsing.MmcifObject, + pdb_id: str, + mapping: Mapping[int, int], + template_sequence: str, + query_sequence: str, + template_chain_id: str, + kalign_binary_path: str) -> Tuple[Dict[str, Any], Optional[str]]: + """Parses atom positions in the target structure and aligns with the query. + + Atoms for each residue in the template structure are indexed to coincide + with their corresponding residue in the query sequence, according to the + alignment mapping provided. + + Args: + mmcif_object: mmcif_parsing.MmcifObject representing the template. + pdb_id: PDB code for the template. + mapping: Dictionary mapping indices in the query sequence to indices in + the template sequence. + template_sequence: String describing the amino acid sequence for the + template protein. + query_sequence: String describing the amino acid sequence for the query + protein. + template_chain_id: String ID describing which chain in the structure proto + should be used. + kalign_binary_path: The path to a kalign executable used for template + realignment. + + Returns: + A tuple with: + * A dictionary containing the extra features derived from the template + protein structure. + * A warning message if the hit was realigned to the actual mmCIF sequence. + Otherwise None. + + Raises: + NoChainsError: If the mmcif object doesn't contain any chains. + SequenceNotInTemplateError: If the given chain id / sequence can't + be found in the mmcif object. + QueryToTemplateAlignError: If the actual template in the mmCIF file + can't be aligned to the query. + NoAtomDataInTemplateError: If the mmcif object doesn't contain + atom positions. + TemplateAtomMaskAllZerosError: If the mmcif object doesn't have any + unmasked residues. + """ + if mmcif_object is None or not mmcif_object.chain_to_seqres: + raise NoChainsError('No chains in PDB: %s_%s' % (pdb_id, template_chain_id)) + + warning = None + try: + seqres, chain_id, mapping_offset = _find_template_in_pdb( + template_chain_id=template_chain_id, + template_sequence=template_sequence, + mmcif_object=mmcif_object) + except SequenceNotInTemplateError: + # If PDB70 contains a different version of the template, we use the sequence + # from the mmcif_object. + chain_id = template_chain_id + warning = ( + f'The exact sequence {template_sequence} was not found in ' + f'{pdb_id}_{chain_id}. Realigning the template to the actual sequence.') + logging.warning(warning) + # This throws an exception if it fails to realign the hit. + seqres, mapping = _realign_pdb_template_to_query( + old_template_sequence=template_sequence, + template_chain_id=template_chain_id, + mmcif_object=mmcif_object, + old_mapping=mapping, + kalign_binary_path=kalign_binary_path) + logging.info('Sequence in %s_%s: %s successfully realigned to %s', + pdb_id, chain_id, template_sequence, seqres) + # The template sequence changed. + template_sequence = seqres + # No mapping offset, the query is aligned to the actual sequence. + mapping_offset = 0 + + try: + # Essentially set to infinity - we don't want to reject templates unless + # they're really really bad. + all_atom_positions, all_atom_mask = _get_atom_positions( + mmcif_object, chain_id, max_ca_ca_distance=150.0) + except (CaDistanceError, KeyError) as ex: + raise NoAtomDataInTemplateError( + 'Could not get atom data (%s_%s): %s' % (pdb_id, chain_id, str(ex)) + ) from ex + + all_atom_positions = np.split(all_atom_positions, all_atom_positions.shape[0]) + all_atom_masks = np.split(all_atom_mask, all_atom_mask.shape[0]) + + output_templates_sequence = [] + templates_all_atom_positions = [] + templates_all_atom_masks = [] + + for _ in query_sequence: + # Residues in the query_sequence that are not in the template_sequence: + templates_all_atom_positions.append( + np.zeros((residue_constants.atom_type_num, 3))) + templates_all_atom_masks.append(np.zeros(residue_constants.atom_type_num)) + output_templates_sequence.append('-') + + for k, v in mapping.items(): + template_index = v + mapping_offset + templates_all_atom_positions[k] = all_atom_positions[template_index][0] + templates_all_atom_masks[k] = all_atom_masks[template_index][0] + output_templates_sequence[k] = template_sequence[v] + + # Alanine (AA with the lowest number of atoms) has 5 atoms (C, CA, CB, N, O). + if np.sum(templates_all_atom_masks) < 5: + raise TemplateAtomMaskAllZerosError( + 'Template all atom mask was all zeros: %s_%s. Residue range: %d-%d' % + (pdb_id, chain_id, min(mapping.values()) + mapping_offset, + max(mapping.values()) + mapping_offset)) + + output_templates_sequence = ''.join(output_templates_sequence) + + templates_aatype = residue_constants.sequence_to_onehot( + output_templates_sequence, residue_constants.HHBLITS_AA_TO_ID) + + return ( + { + 'template_all_atom_positions': np.array(templates_all_atom_positions), + 'template_all_atom_masks': np.array(templates_all_atom_masks), + 'template_sequence': output_templates_sequence.encode(), + 'template_aatype': np.array(templates_aatype), + 'template_domain_names': f'{pdb_id.lower()}_{chain_id}'.encode(), + }, + warning) + + +def _build_query_to_hit_index_mapping( + hit_query_sequence: str, + hit_sequence: str, + indices_hit: Sequence[int], + indices_query: Sequence[int], + original_query_sequence: str) -> Mapping[int, int]: + """Gets mapping from indices in original query sequence to indices in the hit. + + hit_query_sequence and hit_sequence are two aligned sequences containing gap + characters. hit_query_sequence contains only the part of the original query + sequence that matched the hit. When interpreting the indices from the .hhr, we + need to correct for this to recover a mapping from original query sequence to + the hit sequence. + + Args: + hit_query_sequence: The portion of the query sequence that is in the .hhr + hit + hit_sequence: The portion of the hit sequence that is in the .hhr + indices_hit: The indices for each aminoacid relative to the hit sequence + indices_query: The indices for each aminoacid relative to the original query + sequence + original_query_sequence: String describing the original query sequence. + + Returns: + Dictionary with indices in the original query sequence as keys and indices + in the hit sequence as values. + """ + # If the hit is empty (no aligned residues), return empty mapping + if not hit_query_sequence: + return {} + + # Remove gaps and find the offset of hit.query relative to original query. + hhsearch_query_sequence = hit_query_sequence.replace('-', '') + hit_sequence = hit_sequence.replace('-', '') + hhsearch_query_offset = original_query_sequence.find(hhsearch_query_sequence) + + # Index of -1 used for gap characters. Subtract the min index ignoring gaps. + min_idx = min(x for x in indices_hit if x > -1) + fixed_indices_hit = [ + x - min_idx if x > -1 else -1 for x in indices_hit + ] + + min_idx = min(x for x in indices_query if x > -1) + fixed_indices_query = [x - min_idx if x > -1 else -1 for x in indices_query] + + # Zip the corrected indices, ignore case where both seqs have gap characters. + mapping = {} + for q_i, q_t in zip(fixed_indices_query, fixed_indices_hit): + if q_t != -1 and q_i != -1: + if (q_t >= len(hit_sequence) or + q_i + hhsearch_query_offset >= len(original_query_sequence)): + continue + mapping[q_i + hhsearch_query_offset] = q_t + + return mapping + + +@dataclasses.dataclass(frozen=True) +class SingleHitResult: + features: Optional[Mapping[str, Any]] + error: Optional[str] + warning: Optional[str] + + +@functools.lru_cache(16, typed=False) +def _read_file(path): + with open(path, 'r') as f: + file_data = f.read() + return file_data + + +def _process_single_hit( + query_sequence: str, + hit: parsers.TemplateHit, + mmcif_dir: str, + max_template_date: datetime.datetime, + release_dates: Mapping[str, datetime.datetime], + obsolete_pdbs: Mapping[str, Optional[str]], + kalign_binary_path: str, + strict_error_check: bool = False) -> SingleHitResult: + """Tries to extract template features from a single HHSearch hit.""" + # Fail hard if we can't get the PDB ID and chain name from the hit. + hit_pdb_code, hit_chain_id = _get_pdb_id_and_chain(hit) + + # This hit has been removed (obsoleted) from PDB, skip it. + if hit_pdb_code in obsolete_pdbs and obsolete_pdbs[hit_pdb_code] is None: + return SingleHitResult( + features=None, error=None, warning=f'Hit {hit_pdb_code} is obsolete.') + + if hit_pdb_code not in release_dates: + if hit_pdb_code in obsolete_pdbs: + hit_pdb_code = obsolete_pdbs[hit_pdb_code] + + # Pass hit_pdb_code since it might have changed due to the pdb being obsolete. + try: + _assess_hhsearch_hit( + hit=hit, + hit_pdb_code=hit_pdb_code, + query_sequence=query_sequence, + release_dates=release_dates, + release_date_cutoff=max_template_date) + except PrefilterError as e: + msg = f'hit {hit_pdb_code}_{hit_chain_id} did not pass prefilter: {str(e)}' + logging.info(msg) + if strict_error_check and isinstance(e, (DateError, DuplicateError)): + # In strict mode we treat some prefilter cases as errors. + return SingleHitResult(features=None, error=msg, warning=None) + + return SingleHitResult(features=None, error=None, warning=None) + + mapping = _build_query_to_hit_index_mapping( + hit.query, hit.hit_sequence, hit.indices_hit, hit.indices_query, + query_sequence) + + # The mapping is from the query to the actual hit sequence, so we need to + # remove gaps (which regardless have a missing confidence score). + template_sequence = hit.hit_sequence.replace('-', '') + + cif_path = os.path.join(mmcif_dir, hit_pdb_code + '.cif') + logging.debug('Reading PDB entry from %s. Query: %s, template: %s', cif_path, + query_sequence, template_sequence) + # Fail if we can't find the mmCIF file. + cif_string = _read_file(cif_path) + + parsing_result = mmcif_parsing.parse( + file_id=hit_pdb_code, mmcif_string=cif_string) + + if parsing_result.mmcif_object is not None: + hit_release_date = datetime.datetime.strptime( + parsing_result.mmcif_object.header['release_date'], '%Y-%m-%d') + if hit_release_date > max_template_date: + error = ('Template %s date (%s) > max template date (%s).' % + (hit_pdb_code, hit_release_date, max_template_date)) + if strict_error_check: + return SingleHitResult(features=None, error=error, warning=None) + else: + logging.debug(error) + return SingleHitResult(features=None, error=None, warning=None) + + try: + features, realign_warning = _extract_template_features( + mmcif_object=parsing_result.mmcif_object, + pdb_id=hit_pdb_code, + mapping=mapping, + template_sequence=template_sequence, + query_sequence=query_sequence, + template_chain_id=hit_chain_id, + kalign_binary_path=kalign_binary_path) + if hit.sum_probs is None: + features['template_sum_probs'] = [0] + else: + features['template_sum_probs'] = [hit.sum_probs] + + # It is possible there were some errors when parsing the other chains in the + # mmCIF file, but the template features for the chain we want were still + # computed. In such case the mmCIF parsing errors are not relevant. + return SingleHitResult( + features=features, error=None, warning=realign_warning) + except (NoChainsError, NoAtomDataInTemplateError, + TemplateAtomMaskAllZerosError) as e: + # These 3 errors indicate missing mmCIF experimental data rather than a + # problem with the template search, so turn them into warnings. + warning = ('%s_%s (sum_probs: %s, rank: %s): feature extracting errors: ' + '%s, mmCIF parsing errors: %s' + % (hit_pdb_code, hit_chain_id, hit.sum_probs, hit.index, + str(e), parsing_result.errors)) + if strict_error_check: + return SingleHitResult(features=None, error=warning, warning=None) + else: + return SingleHitResult(features=None, error=None, warning=warning) + except Error as e: + error = ('%s_%s (sum_probs: %.2f, rank: %d): feature extracting errors: ' + '%s, mmCIF parsing errors: %s' + % (hit_pdb_code, hit_chain_id, hit.sum_probs, hit.index, + str(e), parsing_result.errors)) + return SingleHitResult(features=None, error=error, warning=None) + + +@dataclasses.dataclass(frozen=True) +class TemplateSearchResult: + features: Mapping[str, Any] + errors: Sequence[str] + warnings: Sequence[str] + + +class TemplateHitFeaturizer(abc.ABC): + """An abstract base class for turning template hits to template features.""" + + def __init__( + self, + mmcif_dir: str, + max_template_date: str, + max_hits: int, + kalign_binary_path: str, + release_dates_path: Optional[str], + obsolete_pdbs_path: Optional[str], + strict_error_check: bool = False): + """Initializes the Template Search. + + Args: + mmcif_dir: Path to a directory with mmCIF structures. Once a template ID + is found by HHSearch, this directory is used to retrieve the template + data. + max_template_date: The maximum date permitted for template structures. No + template with date higher than this date will be returned. In ISO8601 + date format, YYYY-MM-DD. + max_hits: The maximum number of templates that will be returned. + kalign_binary_path: The path to a kalign executable used for template + realignment. + release_dates_path: An optional path to a file with a mapping from PDB IDs + to their release dates. Thanks to this we don't have to redundantly + parse mmCIF files to get that information. + obsolete_pdbs_path: An optional path to a file containing a mapping from + obsolete PDB IDs to the PDB IDs of their replacements. + strict_error_check: If True, then the following will be treated as errors: + * If any template date is after the max_template_date. + * If any template has identical PDB ID to the query. + * If any template is a duplicate of the query. + * Any feature computation errors. + """ + self._mmcif_dir = mmcif_dir + if not glob.glob(os.path.join(self._mmcif_dir, '*.cif')): + logging.error('Could not find CIFs in %s', self._mmcif_dir) + raise ValueError(f'Could not find CIFs in {self._mmcif_dir}') + + try: + self._max_template_date = datetime.datetime.strptime( + max_template_date, '%Y-%m-%d') + except ValueError: + raise ValueError( + 'max_template_date must be set and have format YYYY-MM-DD.') + self._max_hits = max_hits + self._kalign_binary_path = kalign_binary_path + self._strict_error_check = strict_error_check + + if release_dates_path: + logging.info('Using precomputed release dates %s.', release_dates_path) + self._release_dates = _parse_release_dates(release_dates_path) + else: + self._release_dates = {} + + if obsolete_pdbs_path: + logging.info('Using precomputed obsolete pdbs %s.', obsolete_pdbs_path) + self._obsolete_pdbs = _parse_obsolete(obsolete_pdbs_path) + else: + self._obsolete_pdbs = {} + + @abc.abstractmethod + def get_templates( + self, + query_sequence: str, + hits: Sequence[parsers.TemplateHit]) -> TemplateSearchResult: + """Computes the templates for given query sequence.""" + + +class HhsearchHitFeaturizer(TemplateHitFeaturizer): + """A class for turning a3m hits from hhsearch to template features.""" + + def get_templates( + self, + query_sequence: str, + hits: Sequence[parsers.TemplateHit]) -> TemplateSearchResult: + """Computes the templates for given query sequence (more details above).""" + logging.info('Searching for template for: %s', query_sequence) + + template_features = {} + for template_feature_name in TEMPLATE_FEATURES: + template_features[template_feature_name] = [] + + num_hits = 0 + errors = [] + warnings = [] + + for hit in sorted(hits, key=lambda x: x.sum_probs, reverse=True): + # We got all the templates we wanted, stop processing hits. + if num_hits >= self._max_hits: + break + + result = _process_single_hit( + query_sequence=query_sequence, + hit=hit, + mmcif_dir=self._mmcif_dir, + max_template_date=self._max_template_date, + release_dates=self._release_dates, + obsolete_pdbs=self._obsolete_pdbs, + strict_error_check=self._strict_error_check, + kalign_binary_path=self._kalign_binary_path) + + if result.error: + errors.append(result.error) + + # There could be an error even if there are some results, e.g. thrown by + # other unparsable chains in the same mmCIF file. + if result.warning: + warnings.append(result.warning) + + if result.features is None: + logging.info('Skipped invalid hit %s, error: %s, warning: %s', + hit.name, result.error, result.warning) + else: + # Increment the hit counter, since we got features out of this hit. + num_hits += 1 + for k in template_features: + template_features[k].append(result.features[k]) + + for name in template_features: + if num_hits > 0: + template_features[name] = np.stack( + template_features[name], axis=0).astype(TEMPLATE_FEATURES[name]) + else: + # Make sure the feature has correct dtype even if empty. + template_features[name] = np.array([], dtype=TEMPLATE_FEATURES[name]) + + return TemplateSearchResult( + features=template_features, errors=errors, warnings=warnings) + + +class HmmsearchHitFeaturizer(TemplateHitFeaturizer): + """A class for turning a3m hits from hmmsearch to template features.""" + + def get_templates( + self, + query_sequence: str, + hits: Sequence[parsers.TemplateHit]) -> TemplateSearchResult: + """Computes the templates for given query sequence (more details above).""" + logging.info('Searching for template for: %s', query_sequence) + + template_features = {} + for template_feature_name in TEMPLATE_FEATURES: + template_features[template_feature_name] = [] + + already_seen = set() + errors = [] + warnings = [] + + if not hits or hits[0].sum_probs is None: + sorted_hits = hits + else: + sorted_hits = sorted(hits, key=lambda x: x.sum_probs, reverse=True) + + for hit in sorted_hits: + # We got all the templates we wanted, stop processing hits. + if len(already_seen) >= self._max_hits: + break + + result = _process_single_hit( + query_sequence=query_sequence, + hit=hit, + mmcif_dir=self._mmcif_dir, + max_template_date=self._max_template_date, + release_dates=self._release_dates, + obsolete_pdbs=self._obsolete_pdbs, + strict_error_check=self._strict_error_check, + kalign_binary_path=self._kalign_binary_path) + + if result.error: + errors.append(result.error) + + # There could be an error even if there are some results, e.g. thrown by + # other unparsable chains in the same mmCIF file. + if result.warning: + warnings.append(result.warning) + + if result.features is None: + logging.debug('Skipped invalid hit %s, error: %s, warning: %s', + hit.name, result.error, result.warning) + else: + already_seen_key = result.features['template_sequence'] + if already_seen_key in already_seen: + continue + # Increment the hit counter, since we got features out of this hit. + already_seen.add(already_seen_key) + for k in template_features: + template_features[k].append(result.features[k]) + + if already_seen: + for name in template_features: + template_features[name] = np.stack( + template_features[name], axis=0).astype(TEMPLATE_FEATURES[name]) + else: + num_res = len(query_sequence) + # Construct a default template with all zeros. + template_features = { + 'template_aatype': np.zeros( + (1, num_res, len(residue_constants.restypes_with_x_and_gap)), + np.float32), + 'template_all_atom_masks': np.zeros( + (1, num_res, residue_constants.atom_type_num), np.float32), + 'template_all_atom_positions': np.zeros( + (1, num_res, residue_constants.atom_type_num, 3), np.float32), + 'template_domain_names': np.array([''.encode()], dtype=np.object), + 'template_sequence': np.array([''.encode()], dtype=np.object), + 'template_sum_probs': np.array([0], dtype=np.float32) + } + return TemplateSearchResult( + features=template_features, errors=errors, warnings=warnings) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/tools/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/tools/__init__.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,14 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Python wrappers for third party tools.""" diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/tools/hhblits.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/tools/hhblits.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,155 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Library to run HHblits from Python.""" + +import glob +import os +import subprocess +from typing import Any, List, Mapping, Optional, Sequence + +from absl import logging +from alphafold.data.tools import utils +# Internal import (7716). + + +_HHBLITS_DEFAULT_P = 20 +_HHBLITS_DEFAULT_Z = 500 + + +class HHBlits: + """Python wrapper of the HHblits binary.""" + + def __init__(self, + *, + binary_path: str, + databases: Sequence[str], + n_cpu: int = 4, + n_iter: int = 3, + e_value: float = 0.001, + maxseq: int = 1_000_000, + realign_max: int = 100_000, + maxfilt: int = 100_000, + min_prefilter_hits: int = 1000, + all_seqs: bool = False, + alt: Optional[int] = None, + p: int = _HHBLITS_DEFAULT_P, + z: int = _HHBLITS_DEFAULT_Z): + """Initializes the Python HHblits wrapper. + + Args: + binary_path: The path to the HHblits executable. + databases: A sequence of HHblits database paths. This should be the + common prefix for the database files (i.e. up to but not including + _hhm.ffindex etc.) + n_cpu: The number of CPUs to give HHblits. + n_iter: The number of HHblits iterations. + e_value: The E-value, see HHblits docs for more details. + maxseq: The maximum number of rows in an input alignment. Note that this + parameter is only supported in HHBlits version 3.1 and higher. + realign_max: Max number of HMM-HMM hits to realign. HHblits default: 500. + maxfilt: Max number of hits allowed to pass the 2nd prefilter. + HHblits default: 20000. + min_prefilter_hits: Min number of hits to pass prefilter. + HHblits default: 100. + all_seqs: Return all sequences in the MSA / Do not filter the result MSA. + HHblits default: False. + alt: Show up to this many alternative alignments. + p: Minimum Prob for a hit to be included in the output hhr file. + HHblits default: 20. + z: Hard cap on number of hits reported in the hhr file. + HHblits default: 500. NB: The relevant HHblits flag is -Z not -z. + + Raises: + RuntimeError: If HHblits binary not found within the path. + """ + self.binary_path = binary_path + self.databases = databases + + for database_path in self.databases: + if not glob.glob(database_path + '_*'): + logging.error('Could not find HHBlits database %s', database_path) + raise ValueError(f'Could not find HHBlits database {database_path}') + + self.n_cpu = n_cpu + self.n_iter = n_iter + self.e_value = e_value + self.maxseq = maxseq + self.realign_max = realign_max + self.maxfilt = maxfilt + self.min_prefilter_hits = min_prefilter_hits + self.all_seqs = all_seqs + self.alt = alt + self.p = p + self.z = z + + def query(self, input_fasta_path: str) -> List[Mapping[str, Any]]: + """Queries the database using HHblits.""" + with utils.tmpdir_manager() as query_tmp_dir: + a3m_path = os.path.join(query_tmp_dir, 'output.a3m') + + db_cmd = [] + for db_path in self.databases: + db_cmd.append('-d') + db_cmd.append(db_path) + cmd = [ + self.binary_path, + '-i', input_fasta_path, + '-cpu', str(self.n_cpu), + '-oa3m', a3m_path, + '-o', '/dev/null', + '-n', str(self.n_iter), + '-e', str(self.e_value), + '-maxseq', str(self.maxseq), + '-realign_max', str(self.realign_max), + '-maxfilt', str(self.maxfilt), + '-min_prefilter_hits', str(self.min_prefilter_hits)] + if self.all_seqs: + cmd += ['-all'] + if self.alt: + cmd += ['-alt', str(self.alt)] + if self.p != _HHBLITS_DEFAULT_P: + cmd += ['-p', str(self.p)] + if self.z != _HHBLITS_DEFAULT_Z: + cmd += ['-Z', str(self.z)] + cmd += db_cmd + + logging.info('Launching subprocess "%s"', ' '.join(cmd)) + process = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + + with utils.timing('HHblits query'): + stdout, stderr = process.communicate() + retcode = process.wait() + + if retcode: + # Logs have a 15k character limit, so log HHblits error line by line. + logging.error('HHblits failed. HHblits stderr begin:') + for error_line in stderr.decode('utf-8').splitlines(): + if error_line.strip(): + logging.error(error_line.strip()) + logging.error('HHblits stderr end') + raise RuntimeError('HHblits failed\nstdout:\n%s\n\nstderr:\n%s\n' % ( + stdout.decode('utf-8'), stderr[:500_000].decode('utf-8'))) + + with open(a3m_path) as f: + a3m = f.read() + + raw_output = dict( + a3m=a3m, + output=stdout, + stderr=stderr, + n_iter=self.n_iter, + e_value=self.e_value) + return [raw_output] diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/tools/hhsearch.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/tools/hhsearch.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,107 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Library to run HHsearch from Python.""" + +import glob +import os +import subprocess +from typing import Sequence + +from absl import logging + +from alphafold.data import parsers +from alphafold.data.tools import utils +# Internal import (7716). + + +class HHSearch: + """Python wrapper of the HHsearch binary.""" + + def __init__(self, + *, + binary_path: str, + databases: Sequence[str], + maxseq: int = 1_000_000): + """Initializes the Python HHsearch wrapper. + + Args: + binary_path: The path to the HHsearch executable. + databases: A sequence of HHsearch database paths. This should be the + common prefix for the database files (i.e. up to but not including + _hhm.ffindex etc.) + maxseq: The maximum number of rows in an input alignment. Note that this + parameter is only supported in HHBlits version 3.1 and higher. + + Raises: + RuntimeError: If HHsearch binary not found within the path. + """ + self.binary_path = binary_path + self.databases = databases + self.maxseq = maxseq + + for database_path in self.databases: + if not glob.glob(database_path + '_*'): + logging.error('Could not find HHsearch database %s', database_path) + raise ValueError(f'Could not find HHsearch database {database_path}') + + @property + def output_format(self) -> str: + return 'hhr' + + @property + def input_format(self) -> str: + return 'a3m' + + def query(self, a3m: str) -> str: + """Queries the database using HHsearch using a given a3m.""" + with utils.tmpdir_manager() as query_tmp_dir: + input_path = os.path.join(query_tmp_dir, 'query.a3m') + hhr_path = os.path.join(query_tmp_dir, 'output.hhr') + with open(input_path, 'w') as f: + f.write(a3m) + + db_cmd = [] + for db_path in self.databases: + db_cmd.append('-d') + db_cmd.append(db_path) + cmd = [self.binary_path, + '-i', input_path, + '-o', hhr_path, + '-maxseq', str(self.maxseq) + ] + db_cmd + + logging.info('Launching subprocess "%s"', ' '.join(cmd)) + process = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + with utils.timing('HHsearch query'): + stdout, stderr = process.communicate() + retcode = process.wait() + + if retcode: + # Stderr is truncated to prevent proto size errors in Beam. + raise RuntimeError( + 'HHSearch failed:\nstdout:\n%s\n\nstderr:\n%s\n' % ( + stdout.decode('utf-8'), stderr[:100_000].decode('utf-8'))) + + with open(hhr_path) as f: + hhr = f.read() + return hhr + + def get_template_hits(self, + output_string: str, + input_sequence: str) -> Sequence[parsers.TemplateHit]: + """Gets parsed template hits from the raw string output by the tool.""" + del input_sequence # Used by hmmseach but not needed for hhsearch. + return parsers.parse_hhr(output_string) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/tools/hmmbuild.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/tools/hmmbuild.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,138 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A Python wrapper for hmmbuild - construct HMM profiles from MSA.""" + +import os +import re +import subprocess + +from absl import logging +from alphafold.data.tools import utils +# Internal import (7716). + + +class Hmmbuild(object): + """Python wrapper of the hmmbuild binary.""" + + def __init__(self, + *, + binary_path: str, + singlemx: bool = False): + """Initializes the Python hmmbuild wrapper. + + Args: + binary_path: The path to the hmmbuild executable. + singlemx: Whether to use --singlemx flag. If True, it forces HMMBuild to + just use a common substitution score matrix. + + Raises: + RuntimeError: If hmmbuild binary not found within the path. + """ + self.binary_path = binary_path + self.singlemx = singlemx + + def build_profile_from_sto(self, sto: str, model_construction='fast') -> str: + """Builds a HHM for the aligned sequences given as an A3M string. + + Args: + sto: A string with the aligned sequences in the Stockholm format. + model_construction: Whether to use reference annotation in the msa to + determine consensus columns ('hand') or default ('fast'). + + Returns: + A string with the profile in the HMM format. + + Raises: + RuntimeError: If hmmbuild fails. + """ + return self._build_profile(sto, model_construction=model_construction) + + def build_profile_from_a3m(self, a3m: str) -> str: + """Builds a HHM for the aligned sequences given as an A3M string. + + Args: + a3m: A string with the aligned sequences in the A3M format. + + Returns: + A string with the profile in the HMM format. + + Raises: + RuntimeError: If hmmbuild fails. + """ + lines = [] + for line in a3m.splitlines(): + if not line.startswith('>'): + line = re.sub('[a-z]+', '', line) # Remove inserted residues. + lines.append(line + '\n') + msa = ''.join(lines) + return self._build_profile(msa, model_construction='fast') + + def _build_profile(self, msa: str, model_construction: str = 'fast') -> str: + """Builds a HMM for the aligned sequences given as an MSA string. + + Args: + msa: A string with the aligned sequences, in A3M or STO format. + model_construction: Whether to use reference annotation in the msa to + determine consensus columns ('hand') or default ('fast'). + + Returns: + A string with the profile in the HMM format. + + Raises: + RuntimeError: If hmmbuild fails. + ValueError: If unspecified arguments are provided. + """ + if model_construction not in {'hand', 'fast'}: + raise ValueError(f'Invalid model_construction {model_construction} - only' + 'hand and fast supported.') + + with utils.tmpdir_manager() as query_tmp_dir: + input_query = os.path.join(query_tmp_dir, 'query.msa') + output_hmm_path = os.path.join(query_tmp_dir, 'output.hmm') + + with open(input_query, 'w') as f: + f.write(msa) + + cmd = [self.binary_path] + # If adding flags, we have to do so before the output and input: + + if model_construction == 'hand': + cmd.append(f'--{model_construction}') + if self.singlemx: + cmd.append('--singlemx') + cmd.extend([ + '--amino', + output_hmm_path, + input_query, + ]) + + logging.info('Launching subprocess %s', cmd) + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + with utils.timing('hmmbuild query'): + stdout, stderr = process.communicate() + retcode = process.wait() + logging.info('hmmbuild stdout:\n%s\n\nstderr:\n%s\n', + stdout.decode('utf-8'), stderr.decode('utf-8')) + + if retcode: + raise RuntimeError('hmmbuild failed\nstdout:\n%s\n\nstderr:\n%s\n' + % (stdout.decode('utf-8'), stderr.decode('utf-8'))) + + with open(output_hmm_path, encoding='utf-8') as f: + hmm = f.read() + + return hmm diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/tools/hmmsearch.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/tools/hmmsearch.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,131 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A Python wrapper for hmmsearch - search profile against a sequence db.""" + +import os +import subprocess +from typing import Optional, Sequence + +from absl import logging +from alphafold.data import parsers +from alphafold.data.tools import hmmbuild +from alphafold.data.tools import utils +# Internal import (7716). + + +class Hmmsearch(object): + """Python wrapper of the hmmsearch binary.""" + + def __init__(self, + *, + binary_path: str, + hmmbuild_binary_path: str, + database_path: str, + flags: Optional[Sequence[str]] = None): + """Initializes the Python hmmsearch wrapper. + + Args: + binary_path: The path to the hmmsearch executable. + hmmbuild_binary_path: The path to the hmmbuild executable. Used to build + an hmm from an input a3m. + database_path: The path to the hmmsearch database (FASTA format). + flags: List of flags to be used by hmmsearch. + + Raises: + RuntimeError: If hmmsearch binary not found within the path. + """ + self.binary_path = binary_path + self.hmmbuild_runner = hmmbuild.Hmmbuild(binary_path=hmmbuild_binary_path) + self.database_path = database_path + if flags is None: + # Default hmmsearch run settings. + flags = ['--F1', '0.1', + '--F2', '0.1', + '--F3', '0.1', + '--incE', '100', + '-E', '100', + '--domE', '100', + '--incdomE', '100'] + self.flags = flags + + if not os.path.exists(self.database_path): + logging.error('Could not find hmmsearch database %s', database_path) + raise ValueError(f'Could not find hmmsearch database {database_path}') + + @property + def output_format(self) -> str: + return 'sto' + + @property + def input_format(self) -> str: + return 'sto' + + def query(self, msa_sto: str) -> str: + """Queries the database using hmmsearch using a given stockholm msa.""" + hmm = self.hmmbuild_runner.build_profile_from_sto(msa_sto, + model_construction='hand') + return self.query_with_hmm(hmm) + + def query_with_hmm(self, hmm: str) -> str: + """Queries the database using hmmsearch using a given hmm.""" + with utils.tmpdir_manager() as query_tmp_dir: + hmm_input_path = os.path.join(query_tmp_dir, 'query.hmm') + out_path = os.path.join(query_tmp_dir, 'output.sto') + with open(hmm_input_path, 'w') as f: + f.write(hmm) + + cmd = [ + self.binary_path, + '--noali', # Don't include the alignment in stdout. + '--cpu', '8' + ] + # If adding flags, we have to do so before the output and input: + if self.flags: + cmd.extend(self.flags) + cmd.extend([ + '-A', out_path, + hmm_input_path, + self.database_path, + ]) + + logging.info('Launching sub-process %s', cmd) + process = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + with utils.timing( + f'hmmsearch ({os.path.basename(self.database_path)}) query'): + stdout, stderr = process.communicate() + retcode = process.wait() + + if retcode: + raise RuntimeError( + 'hmmsearch failed:\nstdout:\n%s\n\nstderr:\n%s\n' % ( + stdout.decode('utf-8'), stderr.decode('utf-8'))) + + with open(out_path) as f: + out_msa = f.read() + + return out_msa + + def get_template_hits(self, + output_string: str, + input_sequence: str) -> Sequence[parsers.TemplateHit]: + """Gets parsed template hits from the raw string output by the tool.""" + a3m_string = parsers.convert_stockholm_to_a3m(output_string, + remove_first_row_gaps=False) + template_hits = parsers.parse_hmmsearch_a3m( + query_sequence=input_sequence, + a3m_string=a3m_string, + skip_first=False) + return template_hits diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/tools/jackhmmer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/tools/jackhmmer.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,201 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Library to run Jackhmmer from Python.""" + +from concurrent import futures +import glob +import os +import subprocess +from typing import Any, Callable, Mapping, Optional, Sequence +from urllib import request + +from absl import logging + +from alphafold.data.tools import utils +# Internal import (7716). + + +class Jackhmmer: + """Python wrapper of the Jackhmmer binary.""" + + def __init__(self, + *, + binary_path: str, + database_path: str, + n_cpu: int = 8, + n_iter: int = 1, + e_value: float = 0.0001, + z_value: Optional[int] = None, + get_tblout: bool = False, + filter_f1: float = 0.0005, + filter_f2: float = 0.00005, + filter_f3: float = 0.0000005, + incdom_e: Optional[float] = None, + dom_e: Optional[float] = None, + num_streamed_chunks: Optional[int] = None, + streaming_callback: Optional[Callable[[int], None]] = None): + """Initializes the Python Jackhmmer wrapper. + + Args: + binary_path: The path to the jackhmmer executable. + database_path: The path to the jackhmmer database (FASTA format). + n_cpu: The number of CPUs to give Jackhmmer. + n_iter: The number of Jackhmmer iterations. + e_value: The E-value, see Jackhmmer docs for more details. + z_value: The Z-value, see Jackhmmer docs for more details. + get_tblout: Whether to save tblout string. + filter_f1: MSV and biased composition pre-filter, set to >1.0 to turn off. + filter_f2: Viterbi pre-filter, set to >1.0 to turn off. + filter_f3: Forward pre-filter, set to >1.0 to turn off. + incdom_e: Domain e-value criteria for inclusion of domains in MSA/next + round. + dom_e: Domain e-value criteria for inclusion in tblout. + num_streamed_chunks: Number of database chunks to stream over. + streaming_callback: Callback function run after each chunk iteration with + the iteration number as argument. + """ + self.binary_path = binary_path + self.database_path = database_path + self.num_streamed_chunks = num_streamed_chunks + + if not os.path.exists(self.database_path) and num_streamed_chunks is None: + logging.error('Could not find Jackhmmer database %s', database_path) + raise ValueError(f'Could not find Jackhmmer database {database_path}') + + self.n_cpu = n_cpu + self.n_iter = n_iter + self.e_value = e_value + self.z_value = z_value + self.filter_f1 = filter_f1 + self.filter_f2 = filter_f2 + self.filter_f3 = filter_f3 + self.incdom_e = incdom_e + self.dom_e = dom_e + self.get_tblout = get_tblout + self.streaming_callback = streaming_callback + + def _query_chunk(self, input_fasta_path: str, database_path: str + ) -> Mapping[str, Any]: + """Queries the database chunk using Jackhmmer.""" + with utils.tmpdir_manager() as query_tmp_dir: + sto_path = os.path.join(query_tmp_dir, 'output.sto') + + # The F1/F2/F3 are the expected proportion to pass each of the filtering + # stages (which get progressively more expensive), reducing these + # speeds up the pipeline at the expensive of sensitivity. They are + # currently set very low to make querying Mgnify run in a reasonable + # amount of time. + cmd_flags = [ + # Don't pollute stdout with Jackhmmer output. + '-o', '/dev/null', + '-A', sto_path, + '--noali', + '--F1', str(self.filter_f1), + '--F2', str(self.filter_f2), + '--F3', str(self.filter_f3), + '--incE', str(self.e_value), + # Report only sequences with E-values <= x in per-sequence output. + '-E', str(self.e_value), + '--cpu', str(self.n_cpu), + '-N', str(self.n_iter) + ] + if self.get_tblout: + tblout_path = os.path.join(query_tmp_dir, 'tblout.txt') + cmd_flags.extend(['--tblout', tblout_path]) + + if self.z_value: + cmd_flags.extend(['-Z', str(self.z_value)]) + + if self.dom_e is not None: + cmd_flags.extend(['--domE', str(self.dom_e)]) + + if self.incdom_e is not None: + cmd_flags.extend(['--incdomE', str(self.incdom_e)]) + + cmd = [self.binary_path] + cmd_flags + [input_fasta_path, + database_path] + + logging.info('Launching subprocess "%s"', ' '.join(cmd)) + process = subprocess.Popen( + cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) + with utils.timing( + f'Jackhmmer ({os.path.basename(database_path)}) query'): + _, stderr = process.communicate() + retcode = process.wait() + + if retcode: + raise RuntimeError( + 'Jackhmmer failed\nstderr:\n%s\n' % stderr.decode('utf-8')) + + # Get e-values for each target name + tbl = '' + if self.get_tblout: + with open(tblout_path) as f: + tbl = f.read() + + with open(sto_path) as f: + sto = f.read() + + raw_output = dict( + sto=sto, + tbl=tbl, + stderr=stderr, + n_iter=self.n_iter, + e_value=self.e_value) + + return raw_output + + def query(self, input_fasta_path: str) -> Sequence[Mapping[str, Any]]: + """Queries the database using Jackhmmer.""" + if self.num_streamed_chunks is None: + return [self._query_chunk(input_fasta_path, self.database_path)] + + db_basename = os.path.basename(self.database_path) + db_remote_chunk = lambda db_idx: f'{self.database_path}.{db_idx}' + db_local_chunk = lambda db_idx: f'/tmp/ramdisk/{db_basename}.{db_idx}' + + # Remove existing files to prevent OOM + for f in glob.glob(db_local_chunk('[0-9]*')): + try: + os.remove(f) + except OSError: + print(f'OSError while deleting {f}') + + # Download the (i+1)-th chunk while Jackhmmer is running on the i-th chunk + with futures.ThreadPoolExecutor(max_workers=2) as executor: + chunked_output = [] + for i in range(1, self.num_streamed_chunks + 1): + # Copy the chunk locally + if i == 1: + future = executor.submit( + request.urlretrieve, db_remote_chunk(i), db_local_chunk(i)) + if i < self.num_streamed_chunks: + next_future = executor.submit( + request.urlretrieve, db_remote_chunk(i+1), db_local_chunk(i+1)) + + # Run Jackhmmer with the chunk + future.result() + chunked_output.append( + self._query_chunk(input_fasta_path, db_local_chunk(i))) + + # Remove the local copy of the chunk + os.remove(db_local_chunk(i)) + # Do not set next_future for the last chunk so that this works even for + # databases with only 1 chunk. + if i < self.num_streamed_chunks: + future = next_future + if self.streaming_callback: + self.streaming_callback(i) + return chunked_output diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/tools/kalign.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/tools/kalign.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,104 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A Python wrapper for Kalign.""" +import os +import subprocess +from typing import Sequence + +from absl import logging + +from alphafold.data.tools import utils +# Internal import (7716). + + +def _to_a3m(sequences: Sequence[str]) -> str: + """Converts sequences to an a3m file.""" + names = ['sequence %d' % i for i in range(1, len(sequences) + 1)] + a3m = [] + for sequence, name in zip(sequences, names): + a3m.append(u'>' + name + u'\n') + a3m.append(sequence + u'\n') + return ''.join(a3m) + + +class Kalign: + """Python wrapper of the Kalign binary.""" + + def __init__(self, *, binary_path: str): + """Initializes the Python Kalign wrapper. + + Args: + binary_path: The path to the Kalign binary. + + Raises: + RuntimeError: If Kalign binary not found within the path. + """ + self.binary_path = binary_path + + def align(self, sequences: Sequence[str]) -> str: + """Aligns the sequences and returns the alignment in A3M string. + + Args: + sequences: A list of query sequence strings. The sequences have to be at + least 6 residues long (Kalign requires this). Note that the order in + which you give the sequences might alter the output slightly as + different alignment tree might get constructed. + + Returns: + A string with the alignment in a3m format. + + Raises: + RuntimeError: If Kalign fails. + ValueError: If any of the sequences is less than 6 residues long. + """ + logging.info('Aligning %d sequences', len(sequences)) + + for s in sequences: + if len(s) < 6: + raise ValueError('Kalign requires all sequences to be at least 6 ' + 'residues long. Got %s (%d residues).' % (s, len(s))) + + with utils.tmpdir_manager() as query_tmp_dir: + input_fasta_path = os.path.join(query_tmp_dir, 'input.fasta') + output_a3m_path = os.path.join(query_tmp_dir, 'output.a3m') + + with open(input_fasta_path, 'w') as f: + f.write(_to_a3m(sequences)) + + cmd = [ + self.binary_path, + '-i', input_fasta_path, + '-o', output_a3m_path, + '-format', 'fasta', + ] + + logging.info('Launching subprocess "%s"', ' '.join(cmd)) + process = subprocess.Popen(cmd, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + + with utils.timing('Kalign query'): + stdout, stderr = process.communicate() + retcode = process.wait() + logging.info('Kalign stdout:\n%s\n\nstderr:\n%s\n', + stdout.decode('utf-8'), stderr.decode('utf-8')) + + if retcode: + raise RuntimeError('Kalign failed\nstdout:\n%s\n\nstderr:\n%s\n' + % (stdout.decode('utf-8'), stderr.decode('utf-8'))) + + with open(output_a3m_path) as f: + a3m = f.read() + + return a3m diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/data/tools/utils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/data/tools/utils.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,40 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Common utilities for data pipeline tools.""" +import contextlib +import shutil +import tempfile +import time +from typing import Optional + +from absl import logging + + +@contextlib.contextmanager +def tmpdir_manager(base_dir: Optional[str] = None): + """Context manager that deletes a temporary directory on exit.""" + tmpdir = tempfile.mkdtemp(dir=base_dir) + try: + yield tmpdir + finally: + shutil.rmtree(tmpdir, ignore_errors=True) + + +@contextlib.contextmanager +def timing(msg: str): + logging.info('Started %s', msg) + tic = time.time() + yield + toc = time.time() + logging.info('Finished %s in %.3f seconds', msg, toc - tic) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/__init__.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,14 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Alphafold model.""" diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/all_atom.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/all_atom.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,1141 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Ops for all atom representations. + +Generally we employ two different representations for all atom coordinates, +one is atom37 where each heavy atom corresponds to a given position in a 37 +dimensional array, This mapping is non amino acid specific, but each slot +corresponds to an atom of a given name, for example slot 12 always corresponds +to 'C delta 1', positions that are not present for a given amino acid are +zeroed out and denoted by a mask. +The other representation we employ is called atom14, this is a more dense way +of representing atoms with 14 slots. Here a given slot will correspond to a +different kind of atom depending on amino acid type, for example slot 5 +corresponds to 'N delta 2' for Aspargine, but to 'C delta 1' for Isoleucine. +14 is chosen because it is the maximum number of heavy atoms for any standard +amino acid. +The order of slots can be found in 'residue_constants.residue_atoms'. +Internally the model uses the atom14 representation because it is +computationally more efficient. +The internal atom14 representation is turned into the atom37 at the output of +the network to facilitate easier conversion to existing protein datastructures. +""" + +from typing import Dict, Optional +from alphafold.common import residue_constants + +from alphafold.model import r3 +from alphafold.model import utils +import jax +import jax.numpy as jnp +import numpy as np + + +def squared_difference(x, y): + return jnp.square(x - y) + + +def get_chi_atom_indices(): + """Returns atom indices needed to compute chi angles for all residue types. + + Returns: + A tensor of shape [residue_types=21, chis=4, atoms=4]. The residue types are + in the order specified in residue_constants.restypes + unknown residue type + at the end. For chi angles which are not defined on the residue, the + positions indices are by default set to 0. + """ + chi_atom_indices = [] + for residue_name in residue_constants.restypes: + residue_name = residue_constants.restype_1to3[residue_name] + residue_chi_angles = residue_constants.chi_angles_atoms[residue_name] + atom_indices = [] + for chi_angle in residue_chi_angles: + atom_indices.append( + [residue_constants.atom_order[atom] for atom in chi_angle]) + for _ in range(4 - len(atom_indices)): + atom_indices.append([0, 0, 0, 0]) # For chi angles not defined on the AA. + chi_atom_indices.append(atom_indices) + + chi_atom_indices.append([[0, 0, 0, 0]] * 4) # For UNKNOWN residue. + + return jnp.asarray(chi_atom_indices) + + +def atom14_to_atom37(atom14_data: jnp.ndarray, # (N, 14, ...) + batch: Dict[str, jnp.ndarray] + ) -> jnp.ndarray: # (N, 37, ...) + """Convert atom14 to atom37 representation.""" + assert len(atom14_data.shape) in [2, 3] + assert 'residx_atom37_to_atom14' in batch + assert 'atom37_atom_exists' in batch + + atom37_data = utils.batched_gather(atom14_data, + batch['residx_atom37_to_atom14'], + batch_dims=1) + if len(atom14_data.shape) == 2: + atom37_data *= batch['atom37_atom_exists'] + elif len(atom14_data.shape) == 3: + atom37_data *= batch['atom37_atom_exists'][:, :, + None].astype(atom37_data.dtype) + return atom37_data + + +def atom37_to_atom14( + atom37_data: jnp.ndarray, # (N, 37, ...) + batch: Dict[str, jnp.ndarray]) -> jnp.ndarray: # (N, 14, ...) + """Convert atom14 to atom37 representation.""" + assert len(atom37_data.shape) in [2, 3] + assert 'residx_atom14_to_atom37' in batch + assert 'atom14_atom_exists' in batch + + atom14_data = utils.batched_gather(atom37_data, + batch['residx_atom14_to_atom37'], + batch_dims=1) + if len(atom37_data.shape) == 2: + atom14_data *= batch['atom14_atom_exists'].astype(atom14_data.dtype) + elif len(atom37_data.shape) == 3: + atom14_data *= batch['atom14_atom_exists'][:, :, + None].astype(atom14_data.dtype) + return atom14_data + + +def atom37_to_frames( + aatype: jnp.ndarray, # (...) + all_atom_positions: jnp.ndarray, # (..., 37, 3) + all_atom_mask: jnp.ndarray, # (..., 37) +) -> Dict[str, jnp.ndarray]: + """Computes the frames for the up to 8 rigid groups for each residue. + + The rigid groups are defined by the possible torsions in a given amino acid. + We group the atoms according to their dependence on the torsion angles into + "rigid groups". E.g., the position of atoms in the chi2-group depend on + chi1 and chi2, but do not depend on chi3 or chi4. + Jumper et al. (2021) Suppl. Table 2 and corresponding text. + + Args: + aatype: Amino acid type, given as array with integers. + all_atom_positions: atom37 representation of all atom coordinates. + all_atom_mask: atom37 representation of mask on all atom coordinates. + Returns: + Dictionary containing: + * 'rigidgroups_gt_frames': 8 Frames corresponding to 'all_atom_positions' + represented as flat 12 dimensional array. + * 'rigidgroups_gt_exists': Mask denoting whether the atom positions for + the given frame are available in the ground truth, e.g. if they were + resolved in the experiment. + * 'rigidgroups_group_exists': Mask denoting whether given group is in + principle present for given amino acid type. + * 'rigidgroups_group_is_ambiguous': Mask denoting whether frame is + affected by naming ambiguity. + * 'rigidgroups_alt_gt_frames': 8 Frames with alternative atom renaming + corresponding to 'all_atom_positions' represented as flat + 12 dimensional array. + """ + # 0: 'backbone group', + # 1: 'pre-omega-group', (empty) + # 2: 'phi-group', (currently empty, because it defines only hydrogens) + # 3: 'psi-group', + # 4,5,6,7: 'chi1,2,3,4-group' + aatype_in_shape = aatype.shape + + # If there is a batch axis, just flatten it away, and reshape everything + # back at the end of the function. + aatype = jnp.reshape(aatype, [-1]) + all_atom_positions = jnp.reshape(all_atom_positions, [-1, 37, 3]) + all_atom_mask = jnp.reshape(all_atom_mask, [-1, 37]) + + # Create an array with the atom names. + # shape (num_restypes, num_rigidgroups, 3_atoms): (21, 8, 3) + restype_rigidgroup_base_atom_names = np.full([21, 8, 3], '', dtype=object) + + # 0: backbone frame + restype_rigidgroup_base_atom_names[:, 0, :] = ['C', 'CA', 'N'] + + # 3: 'psi-group' + restype_rigidgroup_base_atom_names[:, 3, :] = ['CA', 'C', 'O'] + + # 4,5,6,7: 'chi1,2,3,4-group' + for restype, restype_letter in enumerate(residue_constants.restypes): + resname = residue_constants.restype_1to3[restype_letter] + for chi_idx in range(4): + if residue_constants.chi_angles_mask[restype][chi_idx]: + atom_names = residue_constants.chi_angles_atoms[resname][chi_idx] + restype_rigidgroup_base_atom_names[ + restype, chi_idx + 4, :] = atom_names[1:] + + # Create mask for existing rigid groups. + restype_rigidgroup_mask = np.zeros([21, 8], dtype=np.float32) + restype_rigidgroup_mask[:, 0] = 1 + restype_rigidgroup_mask[:, 3] = 1 + restype_rigidgroup_mask[:20, 4:] = residue_constants.chi_angles_mask + + # Translate atom names into atom37 indices. + lookuptable = residue_constants.atom_order.copy() + lookuptable[''] = 0 + restype_rigidgroup_base_atom37_idx = np.vectorize(lambda x: lookuptable[x])( + restype_rigidgroup_base_atom_names) + + # Compute the gather indices for all residues in the chain. + # shape (N, 8, 3) + residx_rigidgroup_base_atom37_idx = utils.batched_gather( + restype_rigidgroup_base_atom37_idx, aatype) + + # Gather the base atom positions for each rigid group. + base_atom_pos = utils.batched_gather( + all_atom_positions, + residx_rigidgroup_base_atom37_idx, + batch_dims=1) + + # Compute the Rigids. + gt_frames = r3.rigids_from_3_points( + point_on_neg_x_axis=r3.vecs_from_tensor(base_atom_pos[:, :, 0, :]), + origin=r3.vecs_from_tensor(base_atom_pos[:, :, 1, :]), + point_on_xy_plane=r3.vecs_from_tensor(base_atom_pos[:, :, 2, :]) + ) + + # Compute a mask whether the group exists. + # (N, 8) + group_exists = utils.batched_gather(restype_rigidgroup_mask, aatype) + + # Compute a mask whether ground truth exists for the group + gt_atoms_exist = utils.batched_gather( # shape (N, 8, 3) + all_atom_mask.astype(jnp.float32), + residx_rigidgroup_base_atom37_idx, + batch_dims=1) + gt_exists = jnp.min(gt_atoms_exist, axis=-1) * group_exists # (N, 8) + + # Adapt backbone frame to old convention (mirror x-axis and z-axis). + rots = np.tile(np.eye(3, dtype=np.float32), [8, 1, 1]) + rots[0, 0, 0] = -1 + rots[0, 2, 2] = -1 + gt_frames = r3.rigids_mul_rots(gt_frames, r3.rots_from_tensor3x3(rots)) + + # The frames for ambiguous rigid groups are just rotated by 180 degree around + # the x-axis. The ambiguous group is always the last chi-group. + restype_rigidgroup_is_ambiguous = np.zeros([21, 8], dtype=np.float32) + restype_rigidgroup_rots = np.tile(np.eye(3, dtype=np.float32), [21, 8, 1, 1]) + + for resname, _ in residue_constants.residue_atom_renaming_swaps.items(): + restype = residue_constants.restype_order[ + residue_constants.restype_3to1[resname]] + chi_idx = int(sum(residue_constants.chi_angles_mask[restype]) - 1) + restype_rigidgroup_is_ambiguous[restype, chi_idx + 4] = 1 + restype_rigidgroup_rots[restype, chi_idx + 4, 1, 1] = -1 + restype_rigidgroup_rots[restype, chi_idx + 4, 2, 2] = -1 + + # Gather the ambiguity information for each residue. + residx_rigidgroup_is_ambiguous = utils.batched_gather( + restype_rigidgroup_is_ambiguous, aatype) + residx_rigidgroup_ambiguity_rot = utils.batched_gather( + restype_rigidgroup_rots, aatype) + + # Create the alternative ground truth frames. + alt_gt_frames = r3.rigids_mul_rots( + gt_frames, r3.rots_from_tensor3x3(residx_rigidgroup_ambiguity_rot)) + + gt_frames_flat12 = r3.rigids_to_tensor_flat12(gt_frames) + alt_gt_frames_flat12 = r3.rigids_to_tensor_flat12(alt_gt_frames) + + # reshape back to original residue layout + gt_frames_flat12 = jnp.reshape(gt_frames_flat12, aatype_in_shape + (8, 12)) + gt_exists = jnp.reshape(gt_exists, aatype_in_shape + (8,)) + group_exists = jnp.reshape(group_exists, aatype_in_shape + (8,)) + gt_frames_flat12 = jnp.reshape(gt_frames_flat12, aatype_in_shape + (8, 12)) + residx_rigidgroup_is_ambiguous = jnp.reshape(residx_rigidgroup_is_ambiguous, + aatype_in_shape + (8,)) + alt_gt_frames_flat12 = jnp.reshape(alt_gt_frames_flat12, + aatype_in_shape + (8, 12,)) + + return { + 'rigidgroups_gt_frames': gt_frames_flat12, # (..., 8, 12) + 'rigidgroups_gt_exists': gt_exists, # (..., 8) + 'rigidgroups_group_exists': group_exists, # (..., 8) + 'rigidgroups_group_is_ambiguous': + residx_rigidgroup_is_ambiguous, # (..., 8) + 'rigidgroups_alt_gt_frames': alt_gt_frames_flat12, # (..., 8, 12) + } + + +def atom37_to_torsion_angles( + aatype: jnp.ndarray, # (B, N) + all_atom_pos: jnp.ndarray, # (B, N, 37, 3) + all_atom_mask: jnp.ndarray, # (B, N, 37) + placeholder_for_undefined=False, +) -> Dict[str, jnp.ndarray]: + """Computes the 7 torsion angles (in sin, cos encoding) for each residue. + + The 7 torsion angles are in the order + '[pre_omega, phi, psi, chi_1, chi_2, chi_3, chi_4]', + here pre_omega denotes the omega torsion angle between the given amino acid + and the previous amino acid. + + Args: + aatype: Amino acid type, given as array with integers. + all_atom_pos: atom37 representation of all atom coordinates. + all_atom_mask: atom37 representation of mask on all atom coordinates. + placeholder_for_undefined: flag denoting whether to set masked torsion + angles to zero. + Returns: + Dict containing: + * 'torsion_angles_sin_cos': Array with shape (B, N, 7, 2) where the final + 2 dimensions denote sin and cos respectively + * 'alt_torsion_angles_sin_cos': same as 'torsion_angles_sin_cos', but + with the angle shifted by pi for all chi angles affected by the naming + ambiguities. + * 'torsion_angles_mask': Mask for which chi angles are present. + """ + + # Map aatype > 20 to 'Unknown' (20). + aatype = jnp.minimum(aatype, 20) + + # Compute the backbone angles. + num_batch, num_res = aatype.shape + + pad = jnp.zeros([num_batch, 1, 37, 3], jnp.float32) + prev_all_atom_pos = jnp.concatenate([pad, all_atom_pos[:, :-1, :, :]], axis=1) + + pad = jnp.zeros([num_batch, 1, 37], jnp.float32) + prev_all_atom_mask = jnp.concatenate([pad, all_atom_mask[:, :-1, :]], axis=1) + + # For each torsion angle collect the 4 atom positions that define this angle. + # shape (B, N, atoms=4, xyz=3) + pre_omega_atom_pos = jnp.concatenate( + [prev_all_atom_pos[:, :, 1:3, :], # prev CA, C + all_atom_pos[:, :, 0:2, :] # this N, CA + ], axis=-2) + phi_atom_pos = jnp.concatenate( + [prev_all_atom_pos[:, :, 2:3, :], # prev C + all_atom_pos[:, :, 0:3, :] # this N, CA, C + ], axis=-2) + psi_atom_pos = jnp.concatenate( + [all_atom_pos[:, :, 0:3, :], # this N, CA, C + all_atom_pos[:, :, 4:5, :] # this O + ], axis=-2) + + # Collect the masks from these atoms. + # Shape [batch, num_res] + pre_omega_mask = ( + jnp.prod(prev_all_atom_mask[:, :, 1:3], axis=-1) # prev CA, C + * jnp.prod(all_atom_mask[:, :, 0:2], axis=-1)) # this N, CA + phi_mask = ( + prev_all_atom_mask[:, :, 2] # prev C + * jnp.prod(all_atom_mask[:, :, 0:3], axis=-1)) # this N, CA, C + psi_mask = ( + jnp.prod(all_atom_mask[:, :, 0:3], axis=-1) * # this N, CA, C + all_atom_mask[:, :, 4]) # this O + + # Collect the atoms for the chi-angles. + # Compute the table of chi angle indices. Shape: [restypes, chis=4, atoms=4]. + chi_atom_indices = get_chi_atom_indices() + # Select atoms to compute chis. Shape: [batch, num_res, chis=4, atoms=4]. + atom_indices = utils.batched_gather( + params=chi_atom_indices, indices=aatype, axis=0, batch_dims=0) + # Gather atom positions. Shape: [batch, num_res, chis=4, atoms=4, xyz=3]. + chis_atom_pos = utils.batched_gather( + params=all_atom_pos, indices=atom_indices, axis=-2, + batch_dims=2) + + # Copy the chi angle mask, add the UNKNOWN residue. Shape: [restypes, 4]. + chi_angles_mask = list(residue_constants.chi_angles_mask) + chi_angles_mask.append([0.0, 0.0, 0.0, 0.0]) + chi_angles_mask = jnp.asarray(chi_angles_mask) + + # Compute the chi angle mask. I.e. which chis angles exist according to the + # aatype. Shape [batch, num_res, chis=4]. + chis_mask = utils.batched_gather(params=chi_angles_mask, indices=aatype, + axis=0, batch_dims=0) + + # Constrain the chis_mask to those chis, where the ground truth coordinates of + # all defining four atoms are available. + # Gather the chi angle atoms mask. Shape: [batch, num_res, chis=4, atoms=4]. + chi_angle_atoms_mask = utils.batched_gather( + params=all_atom_mask, indices=atom_indices, axis=-1, + batch_dims=2) + # Check if all 4 chi angle atoms were set. Shape: [batch, num_res, chis=4]. + chi_angle_atoms_mask = jnp.prod(chi_angle_atoms_mask, axis=[-1]) + chis_mask = chis_mask * (chi_angle_atoms_mask).astype(jnp.float32) + + # Stack all torsion angle atom positions. + # Shape (B, N, torsions=7, atoms=4, xyz=3) + torsions_atom_pos = jnp.concatenate( + [pre_omega_atom_pos[:, :, None, :, :], + phi_atom_pos[:, :, None, :, :], + psi_atom_pos[:, :, None, :, :], + chis_atom_pos + ], axis=2) + + # Stack up masks for all torsion angles. + # shape (B, N, torsions=7) + torsion_angles_mask = jnp.concatenate( + [pre_omega_mask[:, :, None], + phi_mask[:, :, None], + psi_mask[:, :, None], + chis_mask + ], axis=2) + + # Create a frame from the first three atoms: + # First atom: point on x-y-plane + # Second atom: point on negative x-axis + # Third atom: origin + # r3.Rigids (B, N, torsions=7) + torsion_frames = r3.rigids_from_3_points( + point_on_neg_x_axis=r3.vecs_from_tensor(torsions_atom_pos[:, :, :, 1, :]), + origin=r3.vecs_from_tensor(torsions_atom_pos[:, :, :, 2, :]), + point_on_xy_plane=r3.vecs_from_tensor(torsions_atom_pos[:, :, :, 0, :])) + + # Compute the position of the forth atom in this frame (y and z coordinate + # define the chi angle) + # r3.Vecs (B, N, torsions=7) + forth_atom_rel_pos = r3.rigids_mul_vecs( + r3.invert_rigids(torsion_frames), + r3.vecs_from_tensor(torsions_atom_pos[:, :, :, 3, :])) + + # Normalize to have the sin and cos of the torsion angle. + # jnp.ndarray (B, N, torsions=7, sincos=2) + torsion_angles_sin_cos = jnp.stack( + [forth_atom_rel_pos.z, forth_atom_rel_pos.y], axis=-1) + torsion_angles_sin_cos /= jnp.sqrt( + jnp.sum(jnp.square(torsion_angles_sin_cos), axis=-1, keepdims=True) + + 1e-8) + + # Mirror psi, because we computed it from the Oxygen-atom. + torsion_angles_sin_cos *= jnp.asarray( + [1., 1., -1., 1., 1., 1., 1.])[None, None, :, None] + + # Create alternative angles for ambiguous atom names. + chi_is_ambiguous = utils.batched_gather( + jnp.asarray(residue_constants.chi_pi_periodic), aatype) + mirror_torsion_angles = jnp.concatenate( + [jnp.ones([num_batch, num_res, 3]), + 1.0 - 2.0 * chi_is_ambiguous], axis=-1) + alt_torsion_angles_sin_cos = ( + torsion_angles_sin_cos * mirror_torsion_angles[:, :, :, None]) + + if placeholder_for_undefined: + # Add placeholder torsions in place of undefined torsion angles + # (e.g. N-terminus pre-omega) + placeholder_torsions = jnp.stack([ + jnp.ones(torsion_angles_sin_cos.shape[:-1]), + jnp.zeros(torsion_angles_sin_cos.shape[:-1]) + ], axis=-1) + torsion_angles_sin_cos = torsion_angles_sin_cos * torsion_angles_mask[ + ..., None] + placeholder_torsions * (1 - torsion_angles_mask[..., None]) + alt_torsion_angles_sin_cos = alt_torsion_angles_sin_cos * torsion_angles_mask[ + ..., None] + placeholder_torsions * (1 - torsion_angles_mask[..., None]) + + return { + 'torsion_angles_sin_cos': torsion_angles_sin_cos, # (B, N, 7, 2) + 'alt_torsion_angles_sin_cos': alt_torsion_angles_sin_cos, # (B, N, 7, 2) + 'torsion_angles_mask': torsion_angles_mask # (B, N, 7) + } + + +def torsion_angles_to_frames( + aatype: jnp.ndarray, # (N) + backb_to_global: r3.Rigids, # (N) + torsion_angles_sin_cos: jnp.ndarray # (N, 7, 2) +) -> r3.Rigids: # (N, 8) + """Compute rigid group frames from torsion angles. + + Jumper et al. (2021) Suppl. Alg. 24 "computeAllAtomCoordinates" lines 2-10 + Jumper et al. (2021) Suppl. Alg. 25 "makeRotX" + + Args: + aatype: aatype for each residue + backb_to_global: Rigid transformations describing transformation from + backbone frame to global frame. + torsion_angles_sin_cos: sin and cosine of the 7 torsion angles + Returns: + Frames corresponding to all the Sidechain Rigid Transforms + """ + assert len(aatype.shape) == 1 + assert len(backb_to_global.rot.xx.shape) == 1 + assert len(torsion_angles_sin_cos.shape) == 3 + assert torsion_angles_sin_cos.shape[1] == 7 + assert torsion_angles_sin_cos.shape[2] == 2 + + # Gather the default frames for all rigid groups. + # r3.Rigids with shape (N, 8) + m = utils.batched_gather(residue_constants.restype_rigid_group_default_frame, + aatype) + default_frames = r3.rigids_from_tensor4x4(m) + + # Create the rotation matrices according to the given angles (each frame is + # defined such that its rotation is around the x-axis). + sin_angles = torsion_angles_sin_cos[..., 0] + cos_angles = torsion_angles_sin_cos[..., 1] + + # insert zero rotation for backbone group. + num_residues, = aatype.shape + sin_angles = jnp.concatenate([jnp.zeros([num_residues, 1]), sin_angles], + axis=-1) + cos_angles = jnp.concatenate([jnp.ones([num_residues, 1]), cos_angles], + axis=-1) + zeros = jnp.zeros_like(sin_angles) + ones = jnp.ones_like(sin_angles) + + # all_rots are r3.Rots with shape (N, 8) + all_rots = r3.Rots(ones, zeros, zeros, + zeros, cos_angles, -sin_angles, + zeros, sin_angles, cos_angles) + + # Apply rotations to the frames. + all_frames = r3.rigids_mul_rots(default_frames, all_rots) + + # chi2, chi3, and chi4 frames do not transform to the backbone frame but to + # the previous frame. So chain them up accordingly. + chi2_frame_to_frame = jax.tree_map(lambda x: x[:, 5], all_frames) + chi3_frame_to_frame = jax.tree_map(lambda x: x[:, 6], all_frames) + chi4_frame_to_frame = jax.tree_map(lambda x: x[:, 7], all_frames) + + chi1_frame_to_backb = jax.tree_map(lambda x: x[:, 4], all_frames) + chi2_frame_to_backb = r3.rigids_mul_rigids(chi1_frame_to_backb, + chi2_frame_to_frame) + chi3_frame_to_backb = r3.rigids_mul_rigids(chi2_frame_to_backb, + chi3_frame_to_frame) + chi4_frame_to_backb = r3.rigids_mul_rigids(chi3_frame_to_backb, + chi4_frame_to_frame) + + # Recombine them to a r3.Rigids with shape (N, 8). + def _concat_frames(xall, x5, x6, x7): + return jnp.concatenate( + [xall[:, 0:5], x5[:, None], x6[:, None], x7[:, None]], axis=-1) + + all_frames_to_backb = jax.tree_map( + _concat_frames, + all_frames, + chi2_frame_to_backb, + chi3_frame_to_backb, + chi4_frame_to_backb) + + # Create the global frames. + # shape (N, 8) + all_frames_to_global = r3.rigids_mul_rigids( + jax.tree_map(lambda x: x[:, None], backb_to_global), + all_frames_to_backb) + + return all_frames_to_global + + +def frames_and_literature_positions_to_atom14_pos( + aatype: jnp.ndarray, # (N) + all_frames_to_global: r3.Rigids # (N, 8) +) -> r3.Vecs: # (N, 14) + """Put atom literature positions (atom14 encoding) in each rigid group. + + Jumper et al. (2021) Suppl. Alg. 24 "computeAllAtomCoordinates" line 11 + + Args: + aatype: aatype for each residue. + all_frames_to_global: All per residue coordinate frames. + Returns: + Positions of all atom coordinates in global frame. + """ + + # Pick the appropriate transform for every atom. + residx_to_group_idx = utils.batched_gather( + residue_constants.restype_atom14_to_rigid_group, aatype) + group_mask = jax.nn.one_hot( + residx_to_group_idx, num_classes=8) # shape (N, 14, 8) + + # r3.Rigids with shape (N, 14) + map_atoms_to_global = jax.tree_map( + lambda x: jnp.sum(x[:, None, :] * group_mask, axis=-1), + all_frames_to_global) + + # Gather the literature atom positions for each residue. + # r3.Vecs with shape (N, 14) + lit_positions = r3.vecs_from_tensor( + utils.batched_gather( + residue_constants.restype_atom14_rigid_group_positions, aatype)) + + # Transform each atom from its local frame to the global frame. + # r3.Vecs with shape (N, 14) + pred_positions = r3.rigids_mul_vecs(map_atoms_to_global, lit_positions) + + # Mask out non-existing atoms. + mask = utils.batched_gather(residue_constants.restype_atom14_mask, aatype) + pred_positions = jax.tree_map(lambda x: x * mask, pred_positions) + + return pred_positions + + +def extreme_ca_ca_distance_violations( + pred_atom_positions: jnp.ndarray, # (N, 37(14), 3) + pred_atom_mask: jnp.ndarray, # (N, 37(14)) + residue_index: jnp.ndarray, # (N) + max_angstrom_tolerance=1.5 + ) -> jnp.ndarray: + """Counts residues whose Ca is a large distance from its neighbour. + + Measures the fraction of CA-CA pairs between consecutive amino acids that are + more than 'max_angstrom_tolerance' apart. + + Args: + pred_atom_positions: Atom positions in atom37/14 representation + pred_atom_mask: Atom mask in atom37/14 representation + residue_index: Residue index for given amino acid, this is assumed to be + monotonically increasing. + max_angstrom_tolerance: Maximum distance allowed to not count as violation. + Returns: + Fraction of consecutive CA-CA pairs with violation. + """ + this_ca_pos = pred_atom_positions[:-1, 1, :] # (N - 1, 3) + this_ca_mask = pred_atom_mask[:-1, 1] # (N - 1) + next_ca_pos = pred_atom_positions[1:, 1, :] # (N - 1, 3) + next_ca_mask = pred_atom_mask[1:, 1] # (N - 1) + has_no_gap_mask = ((residue_index[1:] - residue_index[:-1]) == 1.0).astype( + jnp.float32) + ca_ca_distance = jnp.sqrt( + 1e-6 + jnp.sum(squared_difference(this_ca_pos, next_ca_pos), axis=-1)) + violations = (ca_ca_distance - + residue_constants.ca_ca) > max_angstrom_tolerance + mask = this_ca_mask * next_ca_mask * has_no_gap_mask + return utils.mask_mean(mask=mask, value=violations) + + +def between_residue_bond_loss( + pred_atom_positions: jnp.ndarray, # (N, 37(14), 3) + pred_atom_mask: jnp.ndarray, # (N, 37(14)) + residue_index: jnp.ndarray, # (N) + aatype: jnp.ndarray, # (N) + tolerance_factor_soft=12.0, + tolerance_factor_hard=12.0 +) -> Dict[str, jnp.ndarray]: + """Flat-bottom loss to penalize structural violations between residues. + + This is a loss penalizing any violation of the geometry around the peptide + bond between consecutive amino acids. This loss corresponds to + Jumper et al. (2021) Suppl. Sec. 1.9.11, eq 44, 45. + + Args: + pred_atom_positions: Atom positions in atom37/14 representation + pred_atom_mask: Atom mask in atom37/14 representation + residue_index: Residue index for given amino acid, this is assumed to be + monotonically increasing. + aatype: Amino acid type of given residue + tolerance_factor_soft: soft tolerance factor measured in standard deviations + of pdb distributions + tolerance_factor_hard: hard tolerance factor measured in standard deviations + of pdb distributions + + Returns: + Dict containing: + * 'c_n_loss_mean': Loss for peptide bond length violations + * 'ca_c_n_loss_mean': Loss for violations of bond angle around C spanned + by CA, C, N + * 'c_n_ca_loss_mean': Loss for violations of bond angle around N spanned + by C, N, CA + * 'per_residue_loss_sum': sum of all losses for each residue + * 'per_residue_violation_mask': mask denoting all residues with violation + present. + """ + assert len(pred_atom_positions.shape) == 3 + assert len(pred_atom_mask.shape) == 2 + assert len(residue_index.shape) == 1 + assert len(aatype.shape) == 1 + + # Get the positions of the relevant backbone atoms. + this_ca_pos = pred_atom_positions[:-1, 1, :] # (N - 1, 3) + this_ca_mask = pred_atom_mask[:-1, 1] # (N - 1) + this_c_pos = pred_atom_positions[:-1, 2, :] # (N - 1, 3) + this_c_mask = pred_atom_mask[:-1, 2] # (N - 1) + next_n_pos = pred_atom_positions[1:, 0, :] # (N - 1, 3) + next_n_mask = pred_atom_mask[1:, 0] # (N - 1) + next_ca_pos = pred_atom_positions[1:, 1, :] # (N - 1, 3) + next_ca_mask = pred_atom_mask[1:, 1] # (N - 1) + has_no_gap_mask = ((residue_index[1:] - residue_index[:-1]) == 1.0).astype( + jnp.float32) + + # Compute loss for the C--N bond. + c_n_bond_length = jnp.sqrt( + 1e-6 + jnp.sum(squared_difference(this_c_pos, next_n_pos), axis=-1)) + + # The C-N bond to proline has slightly different length because of the ring. + next_is_proline = ( + aatype[1:] == residue_constants.resname_to_idx['PRO']).astype(jnp.float32) + gt_length = ( + (1. - next_is_proline) * residue_constants.between_res_bond_length_c_n[0] + + next_is_proline * residue_constants.between_res_bond_length_c_n[1]) + gt_stddev = ( + (1. - next_is_proline) * + residue_constants.between_res_bond_length_stddev_c_n[0] + + next_is_proline * residue_constants.between_res_bond_length_stddev_c_n[1]) + c_n_bond_length_error = jnp.sqrt(1e-6 + + jnp.square(c_n_bond_length - gt_length)) + c_n_loss_per_residue = jax.nn.relu( + c_n_bond_length_error - tolerance_factor_soft * gt_stddev) + mask = this_c_mask * next_n_mask * has_no_gap_mask + c_n_loss = jnp.sum(mask * c_n_loss_per_residue) / (jnp.sum(mask) + 1e-6) + c_n_violation_mask = mask * ( + c_n_bond_length_error > (tolerance_factor_hard * gt_stddev)) + + # Compute loss for the angles. + ca_c_bond_length = jnp.sqrt(1e-6 + jnp.sum( + squared_difference(this_ca_pos, this_c_pos), axis=-1)) + n_ca_bond_length = jnp.sqrt(1e-6 + jnp.sum( + squared_difference(next_n_pos, next_ca_pos), axis=-1)) + + c_ca_unit_vec = (this_ca_pos - this_c_pos) / ca_c_bond_length[:, None] + c_n_unit_vec = (next_n_pos - this_c_pos) / c_n_bond_length[:, None] + n_ca_unit_vec = (next_ca_pos - next_n_pos) / n_ca_bond_length[:, None] + + ca_c_n_cos_angle = jnp.sum(c_ca_unit_vec * c_n_unit_vec, axis=-1) + gt_angle = residue_constants.between_res_cos_angles_ca_c_n[0] + gt_stddev = residue_constants.between_res_bond_length_stddev_c_n[0] + ca_c_n_cos_angle_error = jnp.sqrt( + 1e-6 + jnp.square(ca_c_n_cos_angle - gt_angle)) + ca_c_n_loss_per_residue = jax.nn.relu( + ca_c_n_cos_angle_error - tolerance_factor_soft * gt_stddev) + mask = this_ca_mask * this_c_mask * next_n_mask * has_no_gap_mask + ca_c_n_loss = jnp.sum(mask * ca_c_n_loss_per_residue) / (jnp.sum(mask) + 1e-6) + ca_c_n_violation_mask = mask * (ca_c_n_cos_angle_error > + (tolerance_factor_hard * gt_stddev)) + + c_n_ca_cos_angle = jnp.sum((-c_n_unit_vec) * n_ca_unit_vec, axis=-1) + gt_angle = residue_constants.between_res_cos_angles_c_n_ca[0] + gt_stddev = residue_constants.between_res_cos_angles_c_n_ca[1] + c_n_ca_cos_angle_error = jnp.sqrt( + 1e-6 + jnp.square(c_n_ca_cos_angle - gt_angle)) + c_n_ca_loss_per_residue = jax.nn.relu( + c_n_ca_cos_angle_error - tolerance_factor_soft * gt_stddev) + mask = this_c_mask * next_n_mask * next_ca_mask * has_no_gap_mask + c_n_ca_loss = jnp.sum(mask * c_n_ca_loss_per_residue) / (jnp.sum(mask) + 1e-6) + c_n_ca_violation_mask = mask * ( + c_n_ca_cos_angle_error > (tolerance_factor_hard * gt_stddev)) + + # Compute a per residue loss (equally distribute the loss to both + # neighbouring residues). + per_residue_loss_sum = (c_n_loss_per_residue + + ca_c_n_loss_per_residue + + c_n_ca_loss_per_residue) + per_residue_loss_sum = 0.5 * (jnp.pad(per_residue_loss_sum, [[0, 1]]) + + jnp.pad(per_residue_loss_sum, [[1, 0]])) + + # Compute hard violations. + violation_mask = jnp.max( + jnp.stack([c_n_violation_mask, + ca_c_n_violation_mask, + c_n_ca_violation_mask]), axis=0) + violation_mask = jnp.maximum( + jnp.pad(violation_mask, [[0, 1]]), + jnp.pad(violation_mask, [[1, 0]])) + + return {'c_n_loss_mean': c_n_loss, # shape () + 'ca_c_n_loss_mean': ca_c_n_loss, # shape () + 'c_n_ca_loss_mean': c_n_ca_loss, # shape () + 'per_residue_loss_sum': per_residue_loss_sum, # shape (N) + 'per_residue_violation_mask': violation_mask # shape (N) + } + + +def between_residue_clash_loss( + atom14_pred_positions: jnp.ndarray, # (N, 14, 3) + atom14_atom_exists: jnp.ndarray, # (N, 14) + atom14_atom_radius: jnp.ndarray, # (N, 14) + residue_index: jnp.ndarray, # (N) + overlap_tolerance_soft=1.5, + overlap_tolerance_hard=1.5 +) -> Dict[str, jnp.ndarray]: + """Loss to penalize steric clashes between residues. + + This is a loss penalizing any steric clashes due to non bonded atoms in + different peptides coming too close. This loss corresponds to the part with + different residues of + Jumper et al. (2021) Suppl. Sec. 1.9.11, eq 46. + + Args: + atom14_pred_positions: Predicted positions of atoms in + global prediction frame + atom14_atom_exists: Mask denoting whether atom at positions exists for given + amino acid type + atom14_atom_radius: Van der Waals radius for each atom. + residue_index: Residue index for given amino acid. + overlap_tolerance_soft: Soft tolerance factor. + overlap_tolerance_hard: Hard tolerance factor. + + Returns: + Dict containing: + * 'mean_loss': average clash loss + * 'per_atom_loss_sum': sum of all clash losses per atom, shape (N, 14) + * 'per_atom_clash_mask': mask whether atom clashes with any other atom + shape (N, 14) + """ + assert len(atom14_pred_positions.shape) == 3 + assert len(atom14_atom_exists.shape) == 2 + assert len(atom14_atom_radius.shape) == 2 + assert len(residue_index.shape) == 1 + + # Create the distance matrix. + # (N, N, 14, 14) + dists = jnp.sqrt(1e-10 + jnp.sum( + squared_difference( + atom14_pred_positions[:, None, :, None, :], + atom14_pred_positions[None, :, None, :, :]), + axis=-1)) + + # Create the mask for valid distances. + # shape (N, N, 14, 14) + dists_mask = (atom14_atom_exists[:, None, :, None] * + atom14_atom_exists[None, :, None, :]) + + # Mask out all the duplicate entries in the lower triangular matrix. + # Also mask out the diagonal (atom-pairs from the same residue) -- these atoms + # are handled separately. + dists_mask *= ( + residue_index[:, None, None, None] < residue_index[None, :, None, None]) + + # Backbone C--N bond between subsequent residues is no clash. + c_one_hot = jax.nn.one_hot(2, num_classes=14) + n_one_hot = jax.nn.one_hot(0, num_classes=14) + neighbour_mask = ((residue_index[:, None, None, None] + + 1) == residue_index[None, :, None, None]) + c_n_bonds = neighbour_mask * c_one_hot[None, None, :, + None] * n_one_hot[None, None, None, :] + dists_mask *= (1. - c_n_bonds) + + # Disulfide bridge between two cysteines is no clash. + cys_sg_idx = residue_constants.restype_name_to_atom14_names['CYS'].index('SG') + cys_sg_one_hot = jax.nn.one_hot(cys_sg_idx, num_classes=14) + disulfide_bonds = (cys_sg_one_hot[None, None, :, None] * + cys_sg_one_hot[None, None, None, :]) + dists_mask *= (1. - disulfide_bonds) + + # Compute the lower bound for the allowed distances. + # shape (N, N, 14, 14) + dists_lower_bound = dists_mask * (atom14_atom_radius[:, None, :, None] + + atom14_atom_radius[None, :, None, :]) + + # Compute the error. + # shape (N, N, 14, 14) + dists_to_low_error = dists_mask * jax.nn.relu( + dists_lower_bound - overlap_tolerance_soft - dists) + + # Compute the mean loss. + # shape () + mean_loss = (jnp.sum(dists_to_low_error) + / (1e-6 + jnp.sum(dists_mask))) + + # Compute the per atom loss sum. + # shape (N, 14) + per_atom_loss_sum = (jnp.sum(dists_to_low_error, axis=[0, 2]) + + jnp.sum(dists_to_low_error, axis=[1, 3])) + + # Compute the hard clash mask. + # shape (N, N, 14, 14) + clash_mask = dists_mask * ( + dists < (dists_lower_bound - overlap_tolerance_hard)) + + # Compute the per atom clash. + # shape (N, 14) + per_atom_clash_mask = jnp.maximum( + jnp.max(clash_mask, axis=[0, 2]), + jnp.max(clash_mask, axis=[1, 3])) + + return {'mean_loss': mean_loss, # shape () + 'per_atom_loss_sum': per_atom_loss_sum, # shape (N, 14) + 'per_atom_clash_mask': per_atom_clash_mask # shape (N, 14) + } + + +def within_residue_violations( + atom14_pred_positions: jnp.ndarray, # (N, 14, 3) + atom14_atom_exists: jnp.ndarray, # (N, 14) + atom14_dists_lower_bound: jnp.ndarray, # (N, 14, 14) + atom14_dists_upper_bound: jnp.ndarray, # (N, 14, 14) + tighten_bounds_for_loss=0.0, +) -> Dict[str, jnp.ndarray]: + """Loss to penalize steric clashes within residues. + + This is a loss penalizing any steric violations or clashes of non-bonded atoms + in a given peptide. This loss corresponds to the part with + the same residues of + Jumper et al. (2021) Suppl. Sec. 1.9.11, eq 46. + + Args: + atom14_pred_positions: Predicted positions of atoms in + global prediction frame + atom14_atom_exists: Mask denoting whether atom at positions exists for given + amino acid type + atom14_dists_lower_bound: Lower bound on allowed distances. + atom14_dists_upper_bound: Upper bound on allowed distances + tighten_bounds_for_loss: Extra factor to tighten loss + + Returns: + Dict containing: + * 'per_atom_loss_sum': sum of all clash losses per atom, shape (N, 14) + * 'per_atom_clash_mask': mask whether atom clashes with any other atom + shape (N, 14) + """ + assert len(atom14_pred_positions.shape) == 3 + assert len(atom14_atom_exists.shape) == 2 + assert len(atom14_dists_lower_bound.shape) == 3 + assert len(atom14_dists_upper_bound.shape) == 3 + + # Compute the mask for each residue. + # shape (N, 14, 14) + dists_masks = (1. - jnp.eye(14, 14)[None]) + dists_masks *= (atom14_atom_exists[:, :, None] * + atom14_atom_exists[:, None, :]) + + # Distance matrix + # shape (N, 14, 14) + dists = jnp.sqrt(1e-10 + jnp.sum( + squared_difference( + atom14_pred_positions[:, :, None, :], + atom14_pred_positions[:, None, :, :]), + axis=-1)) + + # Compute the loss. + # shape (N, 14, 14) + dists_to_low_error = jax.nn.relu( + atom14_dists_lower_bound + tighten_bounds_for_loss - dists) + dists_to_high_error = jax.nn.relu( + dists - (atom14_dists_upper_bound - tighten_bounds_for_loss)) + loss = dists_masks * (dists_to_low_error + dists_to_high_error) + + # Compute the per atom loss sum. + # shape (N, 14) + per_atom_loss_sum = (jnp.sum(loss, axis=1) + + jnp.sum(loss, axis=2)) + + # Compute the violations mask. + # shape (N, 14, 14) + violations = dists_masks * ((dists < atom14_dists_lower_bound) | + (dists > atom14_dists_upper_bound)) + + # Compute the per atom violations. + # shape (N, 14) + per_atom_violations = jnp.maximum( + jnp.max(violations, axis=1), jnp.max(violations, axis=2)) + + return {'per_atom_loss_sum': per_atom_loss_sum, # shape (N, 14) + 'per_atom_violations': per_atom_violations # shape (N, 14) + } + + +def find_optimal_renaming( + atom14_gt_positions: jnp.ndarray, # (N, 14, 3) + atom14_alt_gt_positions: jnp.ndarray, # (N, 14, 3) + atom14_atom_is_ambiguous: jnp.ndarray, # (N, 14) + atom14_gt_exists: jnp.ndarray, # (N, 14) + atom14_pred_positions: jnp.ndarray, # (N, 14, 3) + atom14_atom_exists: jnp.ndarray, # (N, 14) +) -> jnp.ndarray: # (N): + """Find optimal renaming for ground truth that maximizes LDDT. + + Jumper et al. (2021) Suppl. Alg. 26 + "renameSymmetricGroundTruthAtoms" lines 1-5 + + Args: + atom14_gt_positions: Ground truth positions in global frame of ground truth. + atom14_alt_gt_positions: Alternate ground truth positions in global frame of + ground truth with coordinates of ambiguous atoms swapped relative to + 'atom14_gt_positions'. + atom14_atom_is_ambiguous: Mask denoting whether atom is among ambiguous + atoms, see Jumper et al. (2021) Suppl. Table 3 + atom14_gt_exists: Mask denoting whether atom at positions exists in ground + truth. + atom14_pred_positions: Predicted positions of atoms in + global prediction frame + atom14_atom_exists: Mask denoting whether atom at positions exists for given + amino acid type + + Returns: + Float array of shape [N] with 1. where atom14_alt_gt_positions is closer to + prediction and 0. otherwise + """ + assert len(atom14_gt_positions.shape) == 3 + assert len(atom14_alt_gt_positions.shape) == 3 + assert len(atom14_atom_is_ambiguous.shape) == 2 + assert len(atom14_gt_exists.shape) == 2 + assert len(atom14_pred_positions.shape) == 3 + assert len(atom14_atom_exists.shape) == 2 + + # Create the pred distance matrix. + # shape (N, N, 14, 14) + pred_dists = jnp.sqrt(1e-10 + jnp.sum( + squared_difference( + atom14_pred_positions[:, None, :, None, :], + atom14_pred_positions[None, :, None, :, :]), + axis=-1)) + + # Compute distances for ground truth with original and alternative names. + # shape (N, N, 14, 14) + gt_dists = jnp.sqrt(1e-10 + jnp.sum( + squared_difference( + atom14_gt_positions[:, None, :, None, :], + atom14_gt_positions[None, :, None, :, :]), + axis=-1)) + alt_gt_dists = jnp.sqrt(1e-10 + jnp.sum( + squared_difference( + atom14_alt_gt_positions[:, None, :, None, :], + atom14_alt_gt_positions[None, :, None, :, :]), + axis=-1)) + + # Compute LDDT's. + # shape (N, N, 14, 14) + lddt = jnp.sqrt(1e-10 + squared_difference(pred_dists, gt_dists)) + alt_lddt = jnp.sqrt(1e-10 + squared_difference(pred_dists, alt_gt_dists)) + + # Create a mask for ambiguous atoms in rows vs. non-ambiguous atoms + # in cols. + # shape (N ,N, 14, 14) + mask = (atom14_gt_exists[:, None, :, None] * # rows + atom14_atom_is_ambiguous[:, None, :, None] * # rows + atom14_gt_exists[None, :, None, :] * # cols + (1. - atom14_atom_is_ambiguous[None, :, None, :])) # cols + + # Aggregate distances for each residue to the non-amibuguous atoms. + # shape (N) + per_res_lddt = jnp.sum(mask * lddt, axis=[1, 2, 3]) + alt_per_res_lddt = jnp.sum(mask * alt_lddt, axis=[1, 2, 3]) + + # Decide for each residue, whether alternative naming is better. + # shape (N) + alt_naming_is_better = (alt_per_res_lddt < per_res_lddt).astype(jnp.float32) + + return alt_naming_is_better # shape (N) + + +def frame_aligned_point_error( + pred_frames: r3.Rigids, # shape (num_frames) + target_frames: r3.Rigids, # shape (num_frames) + frames_mask: jnp.ndarray, # shape (num_frames) + pred_positions: r3.Vecs, # shape (num_positions) + target_positions: r3.Vecs, # shape (num_positions) + positions_mask: jnp.ndarray, # shape (num_positions) + length_scale: float, + l1_clamp_distance: Optional[float] = None, + epsilon=1e-4) -> jnp.ndarray: # shape () + """Measure point error under different alignments. + + Jumper et al. (2021) Suppl. Alg. 28 "computeFAPE" + + Computes error between two structures with B points under A alignments derived + from the given pairs of frames. + Args: + pred_frames: num_frames reference frames for 'pred_positions'. + target_frames: num_frames reference frames for 'target_positions'. + frames_mask: Mask for frame pairs to use. + pred_positions: num_positions predicted positions of the structure. + target_positions: num_positions target positions of the structure. + positions_mask: Mask on which positions to score. + length_scale: length scale to divide loss by. + l1_clamp_distance: Distance cutoff on error beyond which gradients will + be zero. + epsilon: small value used to regularize denominator for masked average. + Returns: + Masked Frame Aligned Point Error. + """ + assert pred_frames.rot.xx.ndim == 1 + assert target_frames.rot.xx.ndim == 1 + assert frames_mask.ndim == 1, frames_mask.ndim + assert pred_positions.x.ndim == 1 + assert target_positions.x.ndim == 1 + assert positions_mask.ndim == 1 + + # Compute array of predicted positions in the predicted frames. + # r3.Vecs (num_frames, num_positions) + local_pred_pos = r3.rigids_mul_vecs( + jax.tree_map(lambda r: r[:, None], r3.invert_rigids(pred_frames)), + jax.tree_map(lambda x: x[None, :], pred_positions)) + + # Compute array of target positions in the target frames. + # r3.Vecs (num_frames, num_positions) + local_target_pos = r3.rigids_mul_vecs( + jax.tree_map(lambda r: r[:, None], r3.invert_rigids(target_frames)), + jax.tree_map(lambda x: x[None, :], target_positions)) + + # Compute errors between the structures. + # jnp.ndarray (num_frames, num_positions) + error_dist = jnp.sqrt( + r3.vecs_squared_distance(local_pred_pos, local_target_pos) + + epsilon) + + if l1_clamp_distance: + error_dist = jnp.clip(error_dist, 0, l1_clamp_distance) + + normed_error = error_dist / length_scale + normed_error *= jnp.expand_dims(frames_mask, axis=-1) + normed_error *= jnp.expand_dims(positions_mask, axis=-2) + + normalization_factor = ( + jnp.sum(frames_mask, axis=-1) * + jnp.sum(positions_mask, axis=-1)) + return (jnp.sum(normed_error, axis=(-2, -1)) / + (epsilon + normalization_factor)) + + +def _make_renaming_matrices(): + """Matrices to map atoms to symmetry partners in ambiguous case.""" + # As the atom naming is ambiguous for 7 of the 20 amino acids, provide + # alternative groundtruth coordinates where the naming is swapped + restype_3 = [ + residue_constants.restype_1to3[res] for res in residue_constants.restypes + ] + restype_3 += ['UNK'] + # Matrices for renaming ambiguous atoms. + all_matrices = {res: np.eye(14, dtype=np.float32) for res in restype_3} + for resname, swap in residue_constants.residue_atom_renaming_swaps.items(): + correspondences = np.arange(14) + for source_atom_swap, target_atom_swap in swap.items(): + source_index = residue_constants.restype_name_to_atom14_names[ + resname].index(source_atom_swap) + target_index = residue_constants.restype_name_to_atom14_names[ + resname].index(target_atom_swap) + correspondences[source_index] = target_index + correspondences[target_index] = source_index + renaming_matrix = np.zeros((14, 14), dtype=np.float32) + for index, correspondence in enumerate(correspondences): + renaming_matrix[index, correspondence] = 1. + all_matrices[resname] = renaming_matrix.astype(np.float32) + renaming_matrices = np.stack([all_matrices[restype] for restype in restype_3]) + return renaming_matrices + + +RENAMING_MATRICES = _make_renaming_matrices() + + +def get_alt_atom14(aatype, positions, mask): + """Get alternative atom14 positions. + + Constructs renamed atom positions for ambiguous residues. + + Jumper et al. (2021) Suppl. Table 3 "Ambiguous atom names due to 180 degree- + rotation-symmetry" + + Args: + aatype: Amino acid at given position + positions: Atom positions as r3.Vecs in atom14 representation, (N, 14) + mask: Atom masks in atom14 representation, (N, 14) + Returns: + renamed atom positions, renamed atom mask + """ + # pick the transformation matrices for the given residue sequence + # shape (num_res, 14, 14) + renaming_transform = utils.batched_gather( + jnp.asarray(RENAMING_MATRICES), aatype) + + positions = jax.tree_map(lambda x: x[:, :, None], positions) + alternative_positions = jax.tree_map( + lambda x: jnp.sum(x, axis=1), positions * renaming_transform) + + # Create the mask for the alternative ground truth (differs from the + # ground truth mask, if only one of the atoms in an ambiguous pair has a + # ground truth position) + alternative_mask = jnp.sum(mask[..., None] * renaming_transform, axis=1) + + return alternative_positions, alternative_mask diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/all_atom_multimer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/all_atom_multimer.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,966 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Ops for all atom representations.""" + +from typing import Dict, Text + +from alphafold.common import residue_constants +from alphafold.model import geometry +from alphafold.model import utils +import jax +import jax.numpy as jnp +import numpy as np + + +def squared_difference(x, y): + return jnp.square(x - y) + + +def _make_chi_atom_indices(): + """Returns atom indices needed to compute chi angles for all residue types. + + Returns: + A tensor of shape [residue_types=21, chis=4, atoms=4]. The residue types are + in the order specified in residue_constants.restypes + unknown residue type + at the end. For chi angles which are not defined on the residue, the + positions indices are by default set to 0. + """ + chi_atom_indices = [] + for residue_name in residue_constants.restypes: + residue_name = residue_constants.restype_1to3[residue_name] + residue_chi_angles = residue_constants.chi_angles_atoms[residue_name] + atom_indices = [] + for chi_angle in residue_chi_angles: + atom_indices.append( + [residue_constants.atom_order[atom] for atom in chi_angle]) + for _ in range(4 - len(atom_indices)): + atom_indices.append([0, 0, 0, 0]) # For chi angles not defined on the AA. + chi_atom_indices.append(atom_indices) + + chi_atom_indices.append([[0, 0, 0, 0]] * 4) # For UNKNOWN residue. + + return np.array(chi_atom_indices) + + +def _make_renaming_matrices(): + """Matrices to map atoms to symmetry partners in ambiguous case.""" + # As the atom naming is ambiguous for 7 of the 20 amino acids, provide + # alternative groundtruth coordinates where the naming is swapped + restype_3 = [ + residue_constants.restype_1to3[res] for res in residue_constants.restypes + ] + restype_3 += ['UNK'] + # Matrices for renaming ambiguous atoms. + all_matrices = {res: np.eye(14, dtype=np.float32) for res in restype_3} + for resname, swap in residue_constants.residue_atom_renaming_swaps.items(): + correspondences = np.arange(14) + for source_atom_swap, target_atom_swap in swap.items(): + source_index = residue_constants.restype_name_to_atom14_names[ + resname].index(source_atom_swap) + target_index = residue_constants.restype_name_to_atom14_names[ + resname].index(target_atom_swap) + correspondences[source_index] = target_index + correspondences[target_index] = source_index + renaming_matrix = np.zeros((14, 14), dtype=np.float32) + for index, correspondence in enumerate(correspondences): + renaming_matrix[index, correspondence] = 1. + all_matrices[resname] = renaming_matrix.astype(np.float32) + renaming_matrices = np.stack([all_matrices[restype] for restype in restype_3]) + return renaming_matrices + + +def _make_restype_atom37_mask(): + """Mask of which atoms are present for which residue type in atom37.""" + # create the corresponding mask + restype_atom37_mask = np.zeros([21, 37], dtype=np.float32) + for restype, restype_letter in enumerate(residue_constants.restypes): + restype_name = residue_constants.restype_1to3[restype_letter] + atom_names = residue_constants.residue_atoms[restype_name] + for atom_name in atom_names: + atom_type = residue_constants.atom_order[atom_name] + restype_atom37_mask[restype, atom_type] = 1 + return restype_atom37_mask + + +def _make_restype_atom14_mask(): + """Mask of which atoms are present for which residue type in atom14.""" + restype_atom14_mask = [] + + for rt in residue_constants.restypes: + atom_names = residue_constants.restype_name_to_atom14_names[ + residue_constants.restype_1to3[rt]] + restype_atom14_mask.append([(1. if name else 0.) for name in atom_names]) + + restype_atom14_mask.append([0.] * 14) + restype_atom14_mask = np.array(restype_atom14_mask, dtype=np.float32) + return restype_atom14_mask + + +def _make_restype_atom37_to_atom14(): + """Map from atom37 to atom14 per residue type.""" + restype_atom37_to_atom14 = [] # mapping (restype, atom37) --> atom14 + for rt in residue_constants.restypes: + atom_names = residue_constants.restype_name_to_atom14_names[ + residue_constants.restype_1to3[rt]] + atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)} + restype_atom37_to_atom14.append([ + (atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0) + for name in residue_constants.atom_types + ]) + + restype_atom37_to_atom14.append([0] * 37) + restype_atom37_to_atom14 = np.array(restype_atom37_to_atom14, dtype=np.int32) + return restype_atom37_to_atom14 + + +def _make_restype_atom14_to_atom37(): + """Map from atom14 to atom37 per residue type.""" + restype_atom14_to_atom37 = [] # mapping (restype, atom14) --> atom37 + for rt in residue_constants.restypes: + atom_names = residue_constants.restype_name_to_atom14_names[ + residue_constants.restype_1to3[rt]] + restype_atom14_to_atom37.append([ + (residue_constants.atom_order[name] if name else 0) + for name in atom_names + ]) + # Add dummy mapping for restype 'UNK' + restype_atom14_to_atom37.append([0] * 14) + restype_atom14_to_atom37 = np.array(restype_atom14_to_atom37, dtype=np.int32) + return restype_atom14_to_atom37 + + +def _make_restype_atom14_is_ambiguous(): + """Mask which atoms are ambiguous in atom14.""" + # create an ambiguous atoms mask. shape: (21, 14) + restype_atom14_is_ambiguous = np.zeros((21, 14), dtype=np.float32) + for resname, swap in residue_constants.residue_atom_renaming_swaps.items(): + for atom_name1, atom_name2 in swap.items(): + restype = residue_constants.restype_order[ + residue_constants.restype_3to1[resname]] + atom_idx1 = residue_constants.restype_name_to_atom14_names[resname].index( + atom_name1) + atom_idx2 = residue_constants.restype_name_to_atom14_names[resname].index( + atom_name2) + restype_atom14_is_ambiguous[restype, atom_idx1] = 1 + restype_atom14_is_ambiguous[restype, atom_idx2] = 1 + + return restype_atom14_is_ambiguous + + +def _make_restype_rigidgroup_base_atom37_idx(): + """Create Map from rigidgroups to atom37 indices.""" + # Create an array with the atom names. + # shape (num_restypes, num_rigidgroups, 3_atoms): (21, 8, 3) + base_atom_names = np.full([21, 8, 3], '', dtype=object) + + # 0: backbone frame + base_atom_names[:, 0, :] = ['C', 'CA', 'N'] + + # 3: 'psi-group' + base_atom_names[:, 3, :] = ['CA', 'C', 'O'] + + # 4,5,6,7: 'chi1,2,3,4-group' + for restype, restype_letter in enumerate(residue_constants.restypes): + resname = residue_constants.restype_1to3[restype_letter] + for chi_idx in range(4): + if residue_constants.chi_angles_mask[restype][chi_idx]: + atom_names = residue_constants.chi_angles_atoms[resname][chi_idx] + base_atom_names[restype, chi_idx + 4, :] = atom_names[1:] + + # Translate atom names into atom37 indices. + lookuptable = residue_constants.atom_order.copy() + lookuptable[''] = 0 + restype_rigidgroup_base_atom37_idx = np.vectorize(lambda x: lookuptable[x])( + base_atom_names) + return restype_rigidgroup_base_atom37_idx + + +CHI_ATOM_INDICES = _make_chi_atom_indices() +RENAMING_MATRICES = _make_renaming_matrices() +RESTYPE_ATOM14_TO_ATOM37 = _make_restype_atom14_to_atom37() +RESTYPE_ATOM37_TO_ATOM14 = _make_restype_atom37_to_atom14() +RESTYPE_ATOM37_MASK = _make_restype_atom37_mask() +RESTYPE_ATOM14_MASK = _make_restype_atom14_mask() +RESTYPE_ATOM14_IS_AMBIGUOUS = _make_restype_atom14_is_ambiguous() +RESTYPE_RIGIDGROUP_BASE_ATOM37_IDX = _make_restype_rigidgroup_base_atom37_idx() + +# Create mask for existing rigid groups. +RESTYPE_RIGIDGROUP_MASK = np.zeros([21, 8], dtype=np.float32) +RESTYPE_RIGIDGROUP_MASK[:, 0] = 1 +RESTYPE_RIGIDGROUP_MASK[:, 3] = 1 +RESTYPE_RIGIDGROUP_MASK[:20, 4:] = residue_constants.chi_angles_mask + + +def get_atom37_mask(aatype): + return utils.batched_gather(jnp.asarray(RESTYPE_ATOM37_MASK), aatype) + + +def get_atom14_mask(aatype): + return utils.batched_gather(jnp.asarray(RESTYPE_ATOM14_MASK), aatype) + + +def get_atom14_is_ambiguous(aatype): + return utils.batched_gather(jnp.asarray(RESTYPE_ATOM14_IS_AMBIGUOUS), aatype) + + +def get_atom14_to_atom37_map(aatype): + return utils.batched_gather(jnp.asarray(RESTYPE_ATOM14_TO_ATOM37), aatype) + + +def get_atom37_to_atom14_map(aatype): + return utils.batched_gather(jnp.asarray(RESTYPE_ATOM37_TO_ATOM14), aatype) + + +def atom14_to_atom37(atom14_data: jnp.ndarray, # (N, 14, ...) + aatype: jnp.ndarray + ) -> jnp.ndarray: # (N, 37, ...) + """Convert atom14 to atom37 representation.""" + assert len(atom14_data.shape) in [2, 3] + idx_atom37_to_atom14 = get_atom37_to_atom14_map(aatype) + atom37_data = utils.batched_gather( + atom14_data, idx_atom37_to_atom14, batch_dims=1) + atom37_mask = get_atom37_mask(aatype) + if len(atom14_data.shape) == 2: + atom37_data *= atom37_mask + elif len(atom14_data.shape) == 3: + atom37_data *= atom37_mask[:, :, None].astype(atom37_data.dtype) + return atom37_data + + +def atom37_to_atom14(aatype, all_atom_pos, all_atom_mask): + """Convert Atom37 positions to Atom14 positions.""" + residx_atom14_to_atom37 = utils.batched_gather( + jnp.asarray(RESTYPE_ATOM14_TO_ATOM37), aatype) + atom14_mask = utils.batched_gather( + all_atom_mask, residx_atom14_to_atom37, batch_dims=1).astype(jnp.float32) + # create a mask for known groundtruth positions + atom14_mask *= utils.batched_gather(jnp.asarray(RESTYPE_ATOM14_MASK), aatype) + # gather the groundtruth positions + atom14_positions = jax.tree_map( + lambda x: utils.batched_gather(x, residx_atom14_to_atom37, batch_dims=1), + all_atom_pos) + atom14_positions = atom14_mask * atom14_positions + return atom14_positions, atom14_mask + + +def get_alt_atom14(aatype, positions: geometry.Vec3Array, mask): + """Get alternative atom14 positions.""" + # pick the transformation matrices for the given residue sequence + # shape (num_res, 14, 14) + renaming_transform = utils.batched_gather( + jnp.asarray(RENAMING_MATRICES), aatype) + + alternative_positions = jax.tree_map( + lambda x: jnp.sum(x, axis=1), positions[:, :, None] * renaming_transform) + + # Create the mask for the alternative ground truth (differs from the + # ground truth mask, if only one of the atoms in an ambiguous pair has a + # ground truth position) + alternative_mask = jnp.sum(mask[..., None] * renaming_transform, axis=1) + + return alternative_positions, alternative_mask + + +def atom37_to_frames( + aatype: jnp.ndarray, # (...) + all_atom_positions: geometry.Vec3Array, # (..., 37) + all_atom_mask: jnp.ndarray, # (..., 37) +) -> Dict[Text, jnp.ndarray]: + """Computes the frames for the up to 8 rigid groups for each residue.""" + # 0: 'backbone group', + # 1: 'pre-omega-group', (empty) + # 2: 'phi-group', (currently empty, because it defines only hydrogens) + # 3: 'psi-group', + # 4,5,6,7: 'chi1,2,3,4-group' + aatype_in_shape = aatype.shape + + # If there is a batch axis, just flatten it away, and reshape everything + # back at the end of the function. + aatype = jnp.reshape(aatype, [-1]) + all_atom_positions = jax.tree_map(lambda x: jnp.reshape(x, [-1, 37]), + all_atom_positions) + all_atom_mask = jnp.reshape(all_atom_mask, [-1, 37]) + + # Compute the gather indices for all residues in the chain. + # shape (N, 8, 3) + residx_rigidgroup_base_atom37_idx = utils.batched_gather( + RESTYPE_RIGIDGROUP_BASE_ATOM37_IDX, aatype) + + # Gather the base atom positions for each rigid group. + base_atom_pos = jax.tree_map( + lambda x: utils.batched_gather( # pylint: disable=g-long-lambda + x, residx_rigidgroup_base_atom37_idx, batch_dims=1), + all_atom_positions) + + # Compute the Rigids. + point_on_neg_x_axis = base_atom_pos[:, :, 0] + origin = base_atom_pos[:, :, 1] + point_on_xy_plane = base_atom_pos[:, :, 2] + gt_rotation = geometry.Rot3Array.from_two_vectors( + origin - point_on_neg_x_axis, point_on_xy_plane - origin) + + gt_frames = geometry.Rigid3Array(gt_rotation, origin) + + # Compute a mask whether the group exists. + # (N, 8) + group_exists = utils.batched_gather(RESTYPE_RIGIDGROUP_MASK, aatype) + + # Compute a mask whether ground truth exists for the group + gt_atoms_exist = utils.batched_gather( # shape (N, 8, 3) + all_atom_mask.astype(jnp.float32), + residx_rigidgroup_base_atom37_idx, + batch_dims=1) + gt_exists = jnp.min(gt_atoms_exist, axis=-1) * group_exists # (N, 8) + + # Adapt backbone frame to old convention (mirror x-axis and z-axis). + rots = np.tile(np.eye(3, dtype=np.float32), [8, 1, 1]) + rots[0, 0, 0] = -1 + rots[0, 2, 2] = -1 + gt_frames = gt_frames.compose_rotation( + geometry.Rot3Array.from_array(rots)) + + # The frames for ambiguous rigid groups are just rotated by 180 degree around + # the x-axis. The ambiguous group is always the last chi-group. + restype_rigidgroup_is_ambiguous = np.zeros([21, 8], dtype=np.float32) + restype_rigidgroup_rots = np.tile(np.eye(3, dtype=np.float32), [21, 8, 1, 1]) + + for resname, _ in residue_constants.residue_atom_renaming_swaps.items(): + restype = residue_constants.restype_order[ + residue_constants.restype_3to1[resname]] + chi_idx = int(sum(residue_constants.chi_angles_mask[restype]) - 1) + restype_rigidgroup_is_ambiguous[restype, chi_idx + 4] = 1 + restype_rigidgroup_rots[restype, chi_idx + 4, 1, 1] = -1 + restype_rigidgroup_rots[restype, chi_idx + 4, 2, 2] = -1 + + # Gather the ambiguity information for each residue. + residx_rigidgroup_is_ambiguous = utils.batched_gather( + restype_rigidgroup_is_ambiguous, aatype) + ambiguity_rot = utils.batched_gather(restype_rigidgroup_rots, aatype) + ambiguity_rot = geometry.Rot3Array.from_array(ambiguity_rot) + + # Create the alternative ground truth frames. + alt_gt_frames = gt_frames.compose_rotation(ambiguity_rot) + + fix_shape = lambda x: jnp.reshape(x, aatype_in_shape + (8,)) + + # reshape back to original residue layout + gt_frames = jax.tree_map(fix_shape, gt_frames) + gt_exists = fix_shape(gt_exists) + group_exists = fix_shape(group_exists) + residx_rigidgroup_is_ambiguous = fix_shape(residx_rigidgroup_is_ambiguous) + alt_gt_frames = jax.tree_map(fix_shape, alt_gt_frames) + + return { + 'rigidgroups_gt_frames': gt_frames, # Rigid (..., 8) + 'rigidgroups_gt_exists': gt_exists, # (..., 8) + 'rigidgroups_group_exists': group_exists, # (..., 8) + 'rigidgroups_group_is_ambiguous': + residx_rigidgroup_is_ambiguous, # (..., 8) + 'rigidgroups_alt_gt_frames': alt_gt_frames, # Rigid (..., 8) + } + + +def torsion_angles_to_frames( + aatype: jnp.ndarray, # (N) + backb_to_global: geometry.Rigid3Array, # (N) + torsion_angles_sin_cos: jnp.ndarray # (N, 7, 2) +) -> geometry.Rigid3Array: # (N, 8) + """Compute rigid group frames from torsion angles.""" + assert len(aatype.shape) == 1, ( + f'Expected array of rank 1, got array with shape: {aatype.shape}.') + assert len(backb_to_global.rotation.shape) == 1, ( + f'Expected array of rank 1, got array with shape: ' + f'{backb_to_global.rotation.shape}') + assert len(torsion_angles_sin_cos.shape) == 3, ( + f'Expected array of rank 3, got array with shape: ' + f'{torsion_angles_sin_cos.shape}') + assert torsion_angles_sin_cos.shape[1] == 7, ( + f'wrong shape {torsion_angles_sin_cos.shape}') + assert torsion_angles_sin_cos.shape[2] == 2, ( + f'wrong shape {torsion_angles_sin_cos.shape}') + + # Gather the default frames for all rigid groups. + # geometry.Rigid3Array with shape (N, 8) + m = utils.batched_gather(residue_constants.restype_rigid_group_default_frame, + aatype) + default_frames = geometry.Rigid3Array.from_array4x4(m) + + # Create the rotation matrices according to the given angles (each frame is + # defined such that its rotation is around the x-axis). + sin_angles = torsion_angles_sin_cos[..., 0] + cos_angles = torsion_angles_sin_cos[..., 1] + + # insert zero rotation for backbone group. + num_residues, = aatype.shape + sin_angles = jnp.concatenate([jnp.zeros([num_residues, 1]), sin_angles], + axis=-1) + cos_angles = jnp.concatenate([jnp.ones([num_residues, 1]), cos_angles], + axis=-1) + zeros = jnp.zeros_like(sin_angles) + ones = jnp.ones_like(sin_angles) + + # all_rots are geometry.Rot3Array with shape (N, 8) + all_rots = geometry.Rot3Array(ones, zeros, zeros, + zeros, cos_angles, -sin_angles, + zeros, sin_angles, cos_angles) + + # Apply rotations to the frames. + all_frames = default_frames.compose_rotation(all_rots) + + # chi2, chi3, and chi4 frames do not transform to the backbone frame but to + # the previous frame. So chain them up accordingly. + + chi1_frame_to_backb = all_frames[:, 4] + chi2_frame_to_backb = chi1_frame_to_backb @ all_frames[:, 5] + chi3_frame_to_backb = chi2_frame_to_backb @ all_frames[:, 6] + chi4_frame_to_backb = chi3_frame_to_backb @ all_frames[:, 7] + + all_frames_to_backb = jax.tree_multimap( + lambda *x: jnp.concatenate(x, axis=-1), all_frames[:, 0:5], + chi2_frame_to_backb[:, None], chi3_frame_to_backb[:, None], + chi4_frame_to_backb[:, None]) + + # Create the global frames. + # shape (N, 8) + all_frames_to_global = backb_to_global[:, None] @ all_frames_to_backb + + return all_frames_to_global + + +def frames_and_literature_positions_to_atom14_pos( + aatype: jnp.ndarray, # (N) + all_frames_to_global: geometry.Rigid3Array # (N, 8) +) -> geometry.Vec3Array: # (N, 14) + """Put atom literature positions (atom14 encoding) in each rigid group.""" + + # Pick the appropriate transform for every atom. + residx_to_group_idx = utils.batched_gather( + residue_constants.restype_atom14_to_rigid_group, aatype) + group_mask = jax.nn.one_hot( + residx_to_group_idx, num_classes=8) # shape (N, 14, 8) + + # geometry.Rigid3Array with shape (N, 14) + map_atoms_to_global = jax.tree_map( + lambda x: jnp.sum(x[:, None, :] * group_mask, axis=-1), + all_frames_to_global) + + # Gather the literature atom positions for each residue. + # geometry.Vec3Array with shape (N, 14) + lit_positions = geometry.Vec3Array.from_array( + utils.batched_gather( + residue_constants.restype_atom14_rigid_group_positions, aatype)) + + # Transform each atom from its local frame to the global frame. + # geometry.Vec3Array with shape (N, 14) + pred_positions = map_atoms_to_global.apply_to_point(lit_positions) + + # Mask out non-existing atoms. + mask = utils.batched_gather(residue_constants.restype_atom14_mask, aatype) + pred_positions = pred_positions * mask + + return pred_positions + + +def extreme_ca_ca_distance_violations( + positions: geometry.Vec3Array, # (N, 37(14)) + mask: jnp.ndarray, # (N, 37(14)) + residue_index: jnp.ndarray, # (N) + max_angstrom_tolerance=1.5 + ) -> jnp.ndarray: + """Counts residues whose Ca is a large distance from its neighbor.""" + this_ca_pos = positions[:-1, 1] # (N - 1,) + this_ca_mask = mask[:-1, 1] # (N - 1) + next_ca_pos = positions[1:, 1] # (N - 1,) + next_ca_mask = mask[1:, 1] # (N - 1) + has_no_gap_mask = ((residue_index[1:] - residue_index[:-1]) == 1.0).astype( + jnp.float32) + ca_ca_distance = geometry.euclidean_distance(this_ca_pos, next_ca_pos, 1e-6) + violations = (ca_ca_distance - + residue_constants.ca_ca) > max_angstrom_tolerance + mask = this_ca_mask * next_ca_mask * has_no_gap_mask + return utils.mask_mean(mask=mask, value=violations) + + +def between_residue_bond_loss( + pred_atom_positions: geometry.Vec3Array, # (N, 37(14)) + pred_atom_mask: jnp.ndarray, # (N, 37(14)) + residue_index: jnp.ndarray, # (N) + aatype: jnp.ndarray, # (N) + tolerance_factor_soft=12.0, + tolerance_factor_hard=12.0) -> Dict[Text, jnp.ndarray]: + """Flat-bottom loss to penalize structural violations between residues.""" + assert len(pred_atom_positions.shape) == 2 + assert len(pred_atom_mask.shape) == 2 + assert len(residue_index.shape) == 1 + assert len(aatype.shape) == 1 + + # Get the positions of the relevant backbone atoms. + this_ca_pos = pred_atom_positions[:-1, 1] # (N - 1) + this_ca_mask = pred_atom_mask[:-1, 1] # (N - 1) + this_c_pos = pred_atom_positions[:-1, 2] # (N - 1) + this_c_mask = pred_atom_mask[:-1, 2] # (N - 1) + next_n_pos = pred_atom_positions[1:, 0] # (N - 1) + next_n_mask = pred_atom_mask[1:, 0] # (N - 1) + next_ca_pos = pred_atom_positions[1:, 1] # (N - 1) + next_ca_mask = pred_atom_mask[1:, 1] # (N - 1) + has_no_gap_mask = ((residue_index[1:] - residue_index[:-1]) == 1.0).astype( + jnp.float32) + + # Compute loss for the C--N bond. + c_n_bond_length = geometry.euclidean_distance(this_c_pos, next_n_pos, 1e-6) + + # The C-N bond to proline has slightly different length because of the ring. + next_is_proline = ( + aatype[1:] == residue_constants.restype_order['P']).astype(jnp.float32) + gt_length = ( + (1. - next_is_proline) * residue_constants.between_res_bond_length_c_n[0] + + next_is_proline * residue_constants.between_res_bond_length_c_n[1]) + gt_stddev = ( + (1. - next_is_proline) * + residue_constants.between_res_bond_length_stddev_c_n[0] + + next_is_proline * residue_constants.between_res_bond_length_stddev_c_n[1]) + c_n_bond_length_error = jnp.sqrt(1e-6 + + jnp.square(c_n_bond_length - gt_length)) + c_n_loss_per_residue = jax.nn.relu( + c_n_bond_length_error - tolerance_factor_soft * gt_stddev) + mask = this_c_mask * next_n_mask * has_no_gap_mask + c_n_loss = jnp.sum(mask * c_n_loss_per_residue) / (jnp.sum(mask) + 1e-6) + c_n_violation_mask = mask * ( + c_n_bond_length_error > (tolerance_factor_hard * gt_stddev)) + + # Compute loss for the angles. + c_ca_unit_vec = (this_ca_pos - this_c_pos).normalized(1e-6) + c_n_unit_vec = (next_n_pos - this_c_pos) / c_n_bond_length + n_ca_unit_vec = (next_ca_pos - next_n_pos).normalized(1e-6) + + ca_c_n_cos_angle = c_ca_unit_vec.dot(c_n_unit_vec) + gt_angle = residue_constants.between_res_cos_angles_ca_c_n[0] + gt_stddev = residue_constants.between_res_bond_length_stddev_c_n[0] + ca_c_n_cos_angle_error = jnp.sqrt( + 1e-6 + jnp.square(ca_c_n_cos_angle - gt_angle)) + ca_c_n_loss_per_residue = jax.nn.relu( + ca_c_n_cos_angle_error - tolerance_factor_soft * gt_stddev) + mask = this_ca_mask * this_c_mask * next_n_mask * has_no_gap_mask + ca_c_n_loss = jnp.sum(mask * ca_c_n_loss_per_residue) / (jnp.sum(mask) + 1e-6) + ca_c_n_violation_mask = mask * (ca_c_n_cos_angle_error > + (tolerance_factor_hard * gt_stddev)) + + c_n_ca_cos_angle = (-c_n_unit_vec).dot(n_ca_unit_vec) + gt_angle = residue_constants.between_res_cos_angles_c_n_ca[0] + gt_stddev = residue_constants.between_res_cos_angles_c_n_ca[1] + c_n_ca_cos_angle_error = jnp.sqrt( + 1e-6 + jnp.square(c_n_ca_cos_angle - gt_angle)) + c_n_ca_loss_per_residue = jax.nn.relu( + c_n_ca_cos_angle_error - tolerance_factor_soft * gt_stddev) + mask = this_c_mask * next_n_mask * next_ca_mask * has_no_gap_mask + c_n_ca_loss = jnp.sum(mask * c_n_ca_loss_per_residue) / (jnp.sum(mask) + 1e-6) + c_n_ca_violation_mask = mask * ( + c_n_ca_cos_angle_error > (tolerance_factor_hard * gt_stddev)) + + # Compute a per residue loss (equally distribute the loss to both + # neighbouring residues). + per_residue_loss_sum = (c_n_loss_per_residue + + ca_c_n_loss_per_residue + + c_n_ca_loss_per_residue) + per_residue_loss_sum = 0.5 * (jnp.pad(per_residue_loss_sum, [[0, 1]]) + + jnp.pad(per_residue_loss_sum, [[1, 0]])) + + # Compute hard violations. + violation_mask = jnp.max( + jnp.stack([c_n_violation_mask, + ca_c_n_violation_mask, + c_n_ca_violation_mask]), axis=0) + violation_mask = jnp.maximum( + jnp.pad(violation_mask, [[0, 1]]), + jnp.pad(violation_mask, [[1, 0]])) + + return {'c_n_loss_mean': c_n_loss, # shape () + 'ca_c_n_loss_mean': ca_c_n_loss, # shape () + 'c_n_ca_loss_mean': c_n_ca_loss, # shape () + 'per_residue_loss_sum': per_residue_loss_sum, # shape (N) + 'per_residue_violation_mask': violation_mask # shape (N) + } + + +def between_residue_clash_loss( + pred_positions: geometry.Vec3Array, # (N, 14) + atom_exists: jnp.ndarray, # (N, 14) + atom_radius: jnp.ndarray, # (N, 14) + residue_index: jnp.ndarray, # (N) + overlap_tolerance_soft=1.5, + overlap_tolerance_hard=1.5) -> Dict[Text, jnp.ndarray]: + """Loss to penalize steric clashes between residues.""" + assert len(pred_positions.shape) == 2 + assert len(atom_exists.shape) == 2 + assert len(atom_radius.shape) == 2 + assert len(residue_index.shape) == 1 + + # Create the distance matrix. + # (N, N, 14, 14) + dists = geometry.euclidean_distance(pred_positions[:, None, :, None], + pred_positions[None, :, None, :], 1e-10) + + # Create the mask for valid distances. + # shape (N, N, 14, 14) + dists_mask = (atom_exists[:, None, :, None] * atom_exists[None, :, None, :]) + + # Mask out all the duplicate entries in the lower triangular matrix. + # Also mask out the diagonal (atom-pairs from the same residue) -- these atoms + # are handled separately. + dists_mask *= ( + residue_index[:, None, None, None] < residue_index[None, :, None, None]) + + # Backbone C--N bond between subsequent residues is no clash. + c_one_hot = jax.nn.one_hot(2, num_classes=14) + n_one_hot = jax.nn.one_hot(0, num_classes=14) + neighbour_mask = ((residue_index[:, None, None, None] + + 1) == residue_index[None, :, None, None]) + c_n_bonds = neighbour_mask * c_one_hot[None, None, :, + None] * n_one_hot[None, None, None, :] + dists_mask *= (1. - c_n_bonds) + + # Disulfide bridge between two cysteines is no clash. + cys_sg_idx = residue_constants.restype_name_to_atom14_names['CYS'].index('SG') + cys_sg_one_hot = jax.nn.one_hot(cys_sg_idx, num_classes=14) + disulfide_bonds = (cys_sg_one_hot[None, None, :, None] * + cys_sg_one_hot[None, None, None, :]) + dists_mask *= (1. - disulfide_bonds) + + # Compute the lower bound for the allowed distances. + # shape (N, N, 14, 14) + dists_lower_bound = dists_mask * ( + atom_radius[:, None, :, None] + atom_radius[None, :, None, :]) + + # Compute the error. + # shape (N, N, 14, 14) + dists_to_low_error = dists_mask * jax.nn.relu( + dists_lower_bound - overlap_tolerance_soft - dists) + + # Compute the mean loss. + # shape () + mean_loss = (jnp.sum(dists_to_low_error) + / (1e-6 + jnp.sum(dists_mask))) + + # Compute the per atom loss sum. + # shape (N, 14) + per_atom_loss_sum = (jnp.sum(dists_to_low_error, axis=[0, 2]) + + jnp.sum(dists_to_low_error, axis=[1, 3])) + + # Compute the hard clash mask. + # shape (N, N, 14, 14) + clash_mask = dists_mask * ( + dists < (dists_lower_bound - overlap_tolerance_hard)) + + # Compute the per atom clash. + # shape (N, 14) + per_atom_clash_mask = jnp.maximum( + jnp.max(clash_mask, axis=[0, 2]), + jnp.max(clash_mask, axis=[1, 3])) + + return {'mean_loss': mean_loss, # shape () + 'per_atom_loss_sum': per_atom_loss_sum, # shape (N, 14) + 'per_atom_clash_mask': per_atom_clash_mask # shape (N, 14) + } + + +def within_residue_violations( + pred_positions: geometry.Vec3Array, # (N, 14) + atom_exists: jnp.ndarray, # (N, 14) + dists_lower_bound: jnp.ndarray, # (N, 14, 14) + dists_upper_bound: jnp.ndarray, # (N, 14, 14) + tighten_bounds_for_loss=0.0, +) -> Dict[Text, jnp.ndarray]: + """Find within-residue violations.""" + assert len(pred_positions.shape) == 2 + assert len(atom_exists.shape) == 2 + assert len(dists_lower_bound.shape) == 3 + assert len(dists_upper_bound.shape) == 3 + + # Compute the mask for each residue. + # shape (N, 14, 14) + dists_masks = (1. - jnp.eye(14, 14)[None]) + dists_masks *= (atom_exists[:, :, None] * atom_exists[:, None, :]) + + # Distance matrix + # shape (N, 14, 14) + dists = geometry.euclidean_distance(pred_positions[:, :, None], + pred_positions[:, None, :], 1e-10) + + # Compute the loss. + # shape (N, 14, 14) + dists_to_low_error = jax.nn.relu( + dists_lower_bound + tighten_bounds_for_loss - dists) + dists_to_high_error = jax.nn.relu( + dists + tighten_bounds_for_loss - dists_upper_bound) + loss = dists_masks * (dists_to_low_error + dists_to_high_error) + + # Compute the per atom loss sum. + # shape (N, 14) + per_atom_loss_sum = (jnp.sum(loss, axis=1) + + jnp.sum(loss, axis=2)) + + # Compute the violations mask. + # shape (N, 14, 14) + violations = dists_masks * ((dists < dists_lower_bound) | + (dists > dists_upper_bound)) + + # Compute the per atom violations. + # shape (N, 14) + per_atom_violations = jnp.maximum( + jnp.max(violations, axis=1), jnp.max(violations, axis=2)) + + return {'per_atom_loss_sum': per_atom_loss_sum, # shape (N, 14) + 'per_atom_violations': per_atom_violations # shape (N, 14) + } + + +def find_optimal_renaming( + gt_positions: geometry.Vec3Array, # (N, 14) + alt_gt_positions: geometry.Vec3Array, # (N, 14) + atom_is_ambiguous: jnp.ndarray, # (N, 14) + gt_exists: jnp.ndarray, # (N, 14) + pred_positions: geometry.Vec3Array, # (N, 14) +) -> jnp.ndarray: # (N): + """Find optimal renaming for ground truth that maximizes LDDT.""" + assert len(gt_positions.shape) == 2 + assert len(alt_gt_positions.shape) == 2 + assert len(atom_is_ambiguous.shape) == 2 + assert len(gt_exists.shape) == 2 + assert len(pred_positions.shape) == 2 + + # Create the pred distance matrix. + # shape (N, N, 14, 14) + pred_dists = geometry.euclidean_distance(pred_positions[:, None, :, None], + pred_positions[None, :, None, :], + 1e-10) + + # Compute distances for ground truth with original and alternative names. + # shape (N, N, 14, 14) + gt_dists = geometry.euclidean_distance(gt_positions[:, None, :, None], + gt_positions[None, :, None, :], 1e-10) + + alt_gt_dists = geometry.euclidean_distance(alt_gt_positions[:, None, :, None], + alt_gt_positions[None, :, None, :], + 1e-10) + + # Compute LDDT's. + # shape (N, N, 14, 14) + lddt = jnp.sqrt(1e-10 + squared_difference(pred_dists, gt_dists)) + alt_lddt = jnp.sqrt(1e-10 + squared_difference(pred_dists, alt_gt_dists)) + + # Create a mask for ambiguous atoms in rows vs. non-ambiguous atoms + # in cols. + # shape (N ,N, 14, 14) + mask = ( + gt_exists[:, None, :, None] * # rows + atom_is_ambiguous[:, None, :, None] * # rows + gt_exists[None, :, None, :] * # cols + (1. - atom_is_ambiguous[None, :, None, :])) # cols + + # Aggregate distances for each residue to the non-amibuguous atoms. + # shape (N) + per_res_lddt = jnp.sum(mask * lddt, axis=[1, 2, 3]) + alt_per_res_lddt = jnp.sum(mask * alt_lddt, axis=[1, 2, 3]) + + # Decide for each residue, whether alternative naming is better. + # shape (N) + alt_naming_is_better = (alt_per_res_lddt < per_res_lddt).astype(jnp.float32) + + return alt_naming_is_better # shape (N) + + +def frame_aligned_point_error( + pred_frames: geometry.Rigid3Array, # shape (num_frames) + target_frames: geometry.Rigid3Array, # shape (num_frames) + frames_mask: jnp.ndarray, # shape (num_frames) + pred_positions: geometry.Vec3Array, # shape (num_positions) + target_positions: geometry.Vec3Array, # shape (num_positions) + positions_mask: jnp.ndarray, # shape (num_positions) + pair_mask: jnp.ndarray, # shape (num_frames, num_posiitons) + l1_clamp_distance: float, + length_scale=20., + epsilon=1e-4) -> jnp.ndarray: # shape () + """Measure point error under different alignements. + + Computes error between two structures with B points + under A alignments derived form the given pairs of frames. + Args: + pred_frames: num_frames reference frames for 'pred_positions'. + target_frames: num_frames reference frames for 'target_positions'. + frames_mask: Mask for frame pairs to use. + pred_positions: num_positions predicted positions of the structure. + target_positions: num_positions target positions of the structure. + positions_mask: Mask on which positions to score. + pair_mask: A (num_frames, num_positions) mask to use in the loss, useful + for separating intra from inter chain losses. + l1_clamp_distance: Distance cutoff on error beyond which gradients will + be zero. + length_scale: length scale to divide loss by. + epsilon: small value used to regularize denominator for masked average. + Returns: + Masked Frame aligned point error. + """ + # For now we do not allow any batch dimensions. + assert len(pred_frames.rotation.shape) == 1 + assert len(target_frames.rotation.shape) == 1 + assert frames_mask.ndim == 1 + assert pred_positions.x.ndim == 1 + assert target_positions.x.ndim == 1 + assert positions_mask.ndim == 1 + + # Compute array of predicted positions in the predicted frames. + # geometry.Vec3Array (num_frames, num_positions) + local_pred_pos = pred_frames[:, None].inverse().apply_to_point( + pred_positions[None, :]) + + # Compute array of target positions in the target frames. + # geometry.Vec3Array (num_frames, num_positions) + local_target_pos = target_frames[:, None].inverse().apply_to_point( + target_positions[None, :]) + + # Compute errors between the structures. + # jnp.ndarray (num_frames, num_positions) + error_dist = geometry.euclidean_distance(local_pred_pos, local_target_pos, + epsilon) + + clipped_error_dist = jnp.clip(error_dist, 0, l1_clamp_distance) + + normed_error = clipped_error_dist / length_scale + normed_error *= jnp.expand_dims(frames_mask, axis=-1) + normed_error *= jnp.expand_dims(positions_mask, axis=-2) + if pair_mask is not None: + normed_error *= pair_mask + + mask = (jnp.expand_dims(frames_mask, axis=-1) * + jnp.expand_dims(positions_mask, axis=-2)) + if pair_mask is not None: + mask *= pair_mask + normalization_factor = jnp.sum(mask, axis=(-1, -2)) + return (jnp.sum(normed_error, axis=(-2, -1)) / + (epsilon + normalization_factor)) + + +def get_chi_atom_indices(): + """Returns atom indices needed to compute chi angles for all residue types. + + Returns: + A tensor of shape [residue_types=21, chis=4, atoms=4]. The residue types are + in the order specified in residue_constants.restypes + unknown residue type + at the end. For chi angles which are not defined on the residue, the + positions indices are by default set to 0. + """ + chi_atom_indices = [] + for residue_name in residue_constants.restypes: + residue_name = residue_constants.restype_1to3[residue_name] + residue_chi_angles = residue_constants.chi_angles_atoms[residue_name] + atom_indices = [] + for chi_angle in residue_chi_angles: + atom_indices.append( + [residue_constants.atom_order[atom] for atom in chi_angle]) + for _ in range(4 - len(atom_indices)): + atom_indices.append([0, 0, 0, 0]) # For chi angles not defined on the AA. + chi_atom_indices.append(atom_indices) + + chi_atom_indices.append([[0, 0, 0, 0]] * 4) # For UNKNOWN residue. + + return jnp.asarray(chi_atom_indices) + + +def compute_chi_angles(positions: geometry.Vec3Array, + mask: geometry.Vec3Array, + aatype: geometry.Vec3Array): + """Computes the chi angles given all atom positions and the amino acid type. + + Args: + positions: A Vec3Array of shape + [num_res, residue_constants.atom_type_num], with positions of + atoms needed to calculate chi angles. Supports up to 1 batch dimension. + mask: An optional tensor of shape + [num_res, residue_constants.atom_type_num] that masks which atom + positions are set for each residue. If given, then the chi mask will be + set to 1 for a chi angle only if the amino acid has that chi angle and all + the chi atoms needed to calculate that chi angle are set. If not given + (set to None), the chi mask will be set to 1 for a chi angle if the amino + acid has that chi angle and whether the actual atoms needed to calculate + it were set will be ignored. + aatype: A tensor of shape [num_res] with amino acid type integer + code (0 to 21). Supports up to 1 batch dimension. + + Returns: + A tuple of tensors (chi_angles, mask), where both have shape + [num_res, 4]. The mask masks out unused chi angles for amino acid + types that have less than 4 chi angles. If atom_positions_mask is set, the + chi mask will also mask out uncomputable chi angles. + """ + + # Don't assert on the num_res and batch dimensions as they might be unknown. + assert positions.shape[-1] == residue_constants.atom_type_num + assert mask.shape[-1] == residue_constants.atom_type_num + + # Compute the table of chi angle indices. Shape: [restypes, chis=4, atoms=4]. + chi_atom_indices = get_chi_atom_indices() + # Select atoms to compute chis. Shape: [num_res, chis=4, atoms=4]. + atom_indices = utils.batched_gather( + params=chi_atom_indices, indices=aatype, axis=0) + # Gather atom positions. Shape: [num_res, chis=4, atoms=4, xyz=3]. + chi_angle_atoms = jax.tree_map( + lambda x: utils.batched_gather( # pylint: disable=g-long-lambda + params=x, indices=atom_indices, axis=-1, batch_dims=1), positions) + a, b, c, d = [chi_angle_atoms[..., i] for i in range(4)] + + chi_angles = geometry.dihedral_angle(a, b, c, d) + + # Copy the chi angle mask, add the UNKNOWN residue. Shape: [restypes, 4]. + chi_angles_mask = list(residue_constants.chi_angles_mask) + chi_angles_mask.append([0.0, 0.0, 0.0, 0.0]) + chi_angles_mask = jnp.asarray(chi_angles_mask) + # Compute the chi angle mask. Shape [num_res, chis=4]. + chi_mask = utils.batched_gather(params=chi_angles_mask, indices=aatype, + axis=0) + + # The chi_mask is set to 1 only when all necessary chi angle atoms were set. + # Gather the chi angle atoms mask. Shape: [num_res, chis=4, atoms=4]. + chi_angle_atoms_mask = utils.batched_gather( + params=mask, indices=atom_indices, axis=-1, batch_dims=1) + # Check if all 4 chi angle atoms were set. Shape: [num_res, chis=4]. + chi_angle_atoms_mask = jnp.prod(chi_angle_atoms_mask, axis=[-1]) + chi_mask = chi_mask * chi_angle_atoms_mask.astype(jnp.float32) + + return chi_angles, chi_mask + + +def make_transform_from_reference( + a_xyz: geometry.Vec3Array, + b_xyz: geometry.Vec3Array, + c_xyz: geometry.Vec3Array) -> geometry.Rigid3Array: + """Returns rotation and translation matrices to convert from reference. + + Note that this method does not take care of symmetries. If you provide the + coordinates in the non-standard way, the A atom will end up in the negative + y-axis rather than in the positive y-axis. You need to take care of such + cases in your code. + + Args: + a_xyz: A Vec3Array. + b_xyz: A Vec3Array. + c_xyz: A Vec3Array. + + Returns: + A Rigid3Array which, when applied to coordinates in a canonicalized + reference frame, will give coordinates approximately equal + the original coordinates (in the global frame). + """ + rotation = geometry.Rot3Array.from_two_vectors(c_xyz - b_xyz, + a_xyz - b_xyz) + return geometry.Rigid3Array(rotation, b_xyz) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/all_atom_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/all_atom_test.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,135 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for all_atom.""" + +from absl.testing import absltest +from absl.testing import parameterized +from alphafold.model import all_atom +from alphafold.model import r3 +import numpy as np + +L1_CLAMP_DISTANCE = 10 + + +def get_identity_rigid(shape): + """Returns identity rigid transform.""" + + ones = np.ones(shape) + zeros = np.zeros(shape) + rot = r3.Rots(ones, zeros, zeros, + zeros, ones, zeros, + zeros, zeros, ones) + trans = r3.Vecs(zeros, zeros, zeros) + return r3.Rigids(rot, trans) + + +def get_global_rigid_transform(rot_angle, translation, bcast_dims): + """Returns rigid transform that globally rotates/translates by same amount.""" + + rot_angle = np.asarray(rot_angle) + translation = np.asarray(translation) + if bcast_dims: + for _ in range(bcast_dims): + rot_angle = np.expand_dims(rot_angle, 0) + translation = np.expand_dims(translation, 0) + sin_angle = np.sin(np.deg2rad(rot_angle)) + cos_angle = np.cos(np.deg2rad(rot_angle)) + ones = np.ones_like(sin_angle) + zeros = np.zeros_like(sin_angle) + rot = r3.Rots(ones, zeros, zeros, + zeros, cos_angle, -sin_angle, + zeros, sin_angle, cos_angle) + trans = r3.Vecs(translation[..., 0], translation[..., 1], translation[..., 2]) + return r3.Rigids(rot, trans) + + +class AllAtomTest(parameterized.TestCase, absltest.TestCase): + + @parameterized.named_parameters( + ('identity', 0, [0, 0, 0]), + ('rot_90', 90, [0, 0, 0]), + ('trans_10', 0, [0, 0, 10]), + ('rot_174_trans_1', 174, [1, 1, 1])) + def test_frame_aligned_point_error_perfect_on_global_transform( + self, rot_angle, translation): + """Tests global transform between target and preds gives perfect score.""" + + # pylint: disable=bad-whitespace + target_positions = np.array( + [[ 21.182, 23.095, 19.731], + [ 22.055, 20.919, 17.294], + [ 24.599, 20.005, 15.041], + [ 25.567, 18.214, 12.166], + [ 28.063, 17.082, 10.043], + [ 28.779, 15.569, 6.985], + [ 30.581, 13.815, 4.612], + [ 29.258, 12.193, 2.296]]) + # pylint: enable=bad-whitespace + global_rigid_transform = get_global_rigid_transform( + rot_angle, translation, 1) + + target_positions = r3.vecs_from_tensor(target_positions) + pred_positions = r3.rigids_mul_vecs( + global_rigid_transform, target_positions) + positions_mask = np.ones(target_positions.x.shape[0]) + + target_frames = get_identity_rigid(10) + pred_frames = r3.rigids_mul_rigids(global_rigid_transform, target_frames) + frames_mask = np.ones(10) + + fape = all_atom.frame_aligned_point_error( + pred_frames, target_frames, frames_mask, pred_positions, + target_positions, positions_mask, L1_CLAMP_DISTANCE, + L1_CLAMP_DISTANCE, epsilon=0) + self.assertAlmostEqual(fape, 0.) + + @parameterized.named_parameters( + ('identity', + [[0, 0, 0], [5, 0, 0], [10, 0, 0]], + [[0, 0, 0], [5, 0, 0], [10, 0, 0]], + 0.), + ('shift_2.5', + [[0, 0, 0], [5, 0, 0], [10, 0, 0]], + [[2.5, 0, 0], [7.5, 0, 0], [7.5, 0, 0]], + 0.25), + ('shift_5', + [[0, 0, 0], [5, 0, 0], [10, 0, 0]], + [[5, 0, 0], [10, 0, 0], [15, 0, 0]], + 0.5), + ('shift_10', + [[0, 0, 0], [5, 0, 0], [10, 0, 0]], + [[10, 0, 0], [15, 0, 0], [0, 0, 0]], + 1.)) + def test_frame_aligned_point_error_matches_expected( + self, target_positions, pred_positions, expected_alddt): + """Tests score matches expected.""" + + target_frames = get_identity_rigid(2) + pred_frames = target_frames + frames_mask = np.ones(2) + + target_positions = r3.vecs_from_tensor(np.array(target_positions)) + pred_positions = r3.vecs_from_tensor(np.array(pred_positions)) + positions_mask = np.ones(target_positions.x.shape[0]) + + alddt = all_atom.frame_aligned_point_error( + pred_frames, target_frames, frames_mask, pred_positions, + target_positions, positions_mask, L1_CLAMP_DISTANCE, + L1_CLAMP_DISTANCE, epsilon=0) + self.assertAlmostEqual(alddt, expected_alddt) + + +if __name__ == '__main__': + absltest.main() diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/common_modules.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/common_modules.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,130 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A collection of common Haiku modules for use in protein folding.""" +import numbers +from typing import Union, Sequence + +import haiku as hk +import jax.numpy as jnp +import numpy as np + + +# Constant from scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) +TRUNCATED_NORMAL_STDDEV_FACTOR = np.asarray(.87962566103423978, + dtype=np.float32) + + +def get_initializer_scale(initializer_name, input_shape): + """Get Initializer for weights and scale to multiply activations by.""" + + if initializer_name == 'zeros': + w_init = hk.initializers.Constant(0.0) + else: + # fan-in scaling + scale = 1. + for channel_dim in input_shape: + scale /= channel_dim + if initializer_name == 'relu': + scale *= 2 + + noise_scale = scale + + stddev = np.sqrt(noise_scale) + # Adjust stddev for truncation. + stddev = stddev / TRUNCATED_NORMAL_STDDEV_FACTOR + w_init = hk.initializers.TruncatedNormal(mean=0.0, stddev=stddev) + + return w_init + + +class Linear(hk.Module): + """Protein folding specific Linear module. + + This differs from the standard Haiku Linear in a few ways: + * It supports inputs and outputs of arbitrary rank + * Initializers are specified by strings + """ + + def __init__(self, + num_output: Union[int, Sequence[int]], + initializer: str = 'linear', + num_input_dims: int = 1, + use_bias: bool = True, + bias_init: float = 0., + precision = None, + name: str = 'linear'): + """Constructs Linear Module. + + Args: + num_output: Number of output channels. Can be tuple when outputting + multiple dimensions. + initializer: What initializer to use, should be one of {'linear', 'relu', + 'zeros'} + num_input_dims: Number of dimensions from the end to project. + use_bias: Whether to include trainable bias + bias_init: Value used to initialize bias. + precision: What precision to use for matrix multiplication, defaults + to None. + name: Name of module, used for name scopes. + """ + super().__init__(name=name) + if isinstance(num_output, numbers.Integral): + self.output_shape = (num_output,) + else: + self.output_shape = tuple(num_output) + self.initializer = initializer + self.use_bias = use_bias + self.bias_init = bias_init + self.num_input_dims = num_input_dims + self.num_output_dims = len(self.output_shape) + self.precision = precision + + def __call__(self, inputs): + """Connects Module. + + Args: + inputs: Tensor with at least num_input_dims dimensions. + + Returns: + output of shape [...] + num_output. + """ + + num_input_dims = self.num_input_dims + + if self.num_input_dims > 0: + in_shape = inputs.shape[-self.num_input_dims:] + else: + in_shape = () + + weight_init = get_initializer_scale(self.initializer, in_shape) + + in_letters = 'abcde'[:self.num_input_dims] + out_letters = 'hijkl'[:self.num_output_dims] + + weight_shape = in_shape + self.output_shape + weights = hk.get_parameter('weights', weight_shape, inputs.dtype, + weight_init) + + equation = f'...{in_letters}, {in_letters}{out_letters}->...{out_letters}' + + output = jnp.einsum(equation, inputs, weights, precision=self.precision) + + if self.use_bias: + bias = hk.get_parameter('bias', self.output_shape, inputs.dtype, + hk.initializers.Constant(self.bias_init)) + output += bias + + return output + diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/config.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/config.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,657 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Model config.""" + +import copy +from alphafold.model.tf import shape_placeholders +import ml_collections + +NUM_RES = shape_placeholders.NUM_RES +NUM_MSA_SEQ = shape_placeholders.NUM_MSA_SEQ +NUM_EXTRA_SEQ = shape_placeholders.NUM_EXTRA_SEQ +NUM_TEMPLATES = shape_placeholders.NUM_TEMPLATES + + +def model_config(name: str) -> ml_collections.ConfigDict: + """Get the ConfigDict of a CASP14 model.""" + + if 'multimer' in name: + return CONFIG_MULTIMER + + if name not in CONFIG_DIFFS: + raise ValueError(f'Invalid model name {name}.') + cfg = copy.deepcopy(CONFIG) + cfg.update_from_flattened_dict(CONFIG_DIFFS[name]) + return cfg + + +MODEL_PRESETS = { + 'monomer': ( + 'model_1', + 'model_2', + 'model_3', + 'model_4', + 'model_5', + ), + 'monomer_ptm': ( + 'model_1_ptm', + 'model_2_ptm', + 'model_3_ptm', + 'model_4_ptm', + 'model_5_ptm', + ), + 'multimer': ( + 'model_1_multimer', + 'model_2_multimer', + 'model_3_multimer', + 'model_4_multimer', + 'model_5_multimer', + ), +} +MODEL_PRESETS['monomer_casp14'] = MODEL_PRESETS['monomer'] + + +CONFIG_DIFFS = { + 'model_1': { + # Jumper et al. (2021) Suppl. Table 5, Model 1.1.1 + 'data.common.max_extra_msa': 5120, + 'data.common.reduce_msa_clusters_by_max_templates': True, + 'data.common.use_templates': True, + 'model.embeddings_and_evoformer.template.embed_torsion_angles': True, + 'model.embeddings_and_evoformer.template.enabled': True + }, + 'model_2': { + # Jumper et al. (2021) Suppl. Table 5, Model 1.1.2 + 'data.common.reduce_msa_clusters_by_max_templates': True, + 'data.common.use_templates': True, + 'model.embeddings_and_evoformer.template.embed_torsion_angles': True, + 'model.embeddings_and_evoformer.template.enabled': True + }, + 'model_3': { + # Jumper et al. (2021) Suppl. Table 5, Model 1.2.1 + 'data.common.max_extra_msa': 5120, + }, + 'model_4': { + # Jumper et al. (2021) Suppl. Table 5, Model 1.2.2 + 'data.common.max_extra_msa': 5120, + }, + 'model_5': { + # Jumper et al. (2021) Suppl. Table 5, Model 1.2.3 + }, + + # The following models are fine-tuned from the corresponding models above + # with an additional predicted_aligned_error head that can produce + # predicted TM-score (pTM) and predicted aligned errors. + 'model_1_ptm': { + 'data.common.max_extra_msa': 5120, + 'data.common.reduce_msa_clusters_by_max_templates': True, + 'data.common.use_templates': True, + 'model.embeddings_and_evoformer.template.embed_torsion_angles': True, + 'model.embeddings_and_evoformer.template.enabled': True, + 'model.heads.predicted_aligned_error.weight': 0.1 + }, + 'model_2_ptm': { + 'data.common.reduce_msa_clusters_by_max_templates': True, + 'data.common.use_templates': True, + 'model.embeddings_and_evoformer.template.embed_torsion_angles': True, + 'model.embeddings_and_evoformer.template.enabled': True, + 'model.heads.predicted_aligned_error.weight': 0.1 + }, + 'model_3_ptm': { + 'data.common.max_extra_msa': 5120, + 'model.heads.predicted_aligned_error.weight': 0.1 + }, + 'model_4_ptm': { + 'data.common.max_extra_msa': 5120, + 'model.heads.predicted_aligned_error.weight': 0.1 + }, + 'model_5_ptm': { + 'model.heads.predicted_aligned_error.weight': 0.1 + } +} + +CONFIG = ml_collections.ConfigDict({ + 'data': { + 'common': { + 'masked_msa': { + 'profile_prob': 0.1, + 'same_prob': 0.1, + 'uniform_prob': 0.1 + }, + 'max_extra_msa': 1024, + 'msa_cluster_features': True, + 'num_recycle': 3, + 'reduce_msa_clusters_by_max_templates': False, + 'resample_msa_in_recycling': True, + 'template_features': [ + 'template_all_atom_positions', 'template_sum_probs', + 'template_aatype', 'template_all_atom_masks', + 'template_domain_names' + ], + 'unsupervised_features': [ + 'aatype', 'residue_index', 'sequence', 'msa', 'domain_name', + 'num_alignments', 'seq_length', 'between_segment_residues', + 'deletion_matrix' + ], + 'use_templates': False, + }, + 'eval': { + 'feat': { + 'aatype': [NUM_RES], + 'all_atom_mask': [NUM_RES, None], + 'all_atom_positions': [NUM_RES, None, None], + 'alt_chi_angles': [NUM_RES, None], + 'atom14_alt_gt_exists': [NUM_RES, None], + 'atom14_alt_gt_positions': [NUM_RES, None, None], + 'atom14_atom_exists': [NUM_RES, None], + 'atom14_atom_is_ambiguous': [NUM_RES, None], + 'atom14_gt_exists': [NUM_RES, None], + 'atom14_gt_positions': [NUM_RES, None, None], + 'atom37_atom_exists': [NUM_RES, None], + 'backbone_affine_mask': [NUM_RES], + 'backbone_affine_tensor': [NUM_RES, None], + 'bert_mask': [NUM_MSA_SEQ, NUM_RES], + 'chi_angles': [NUM_RES, None], + 'chi_mask': [NUM_RES, None], + 'extra_deletion_value': [NUM_EXTRA_SEQ, NUM_RES], + 'extra_has_deletion': [NUM_EXTRA_SEQ, NUM_RES], + 'extra_msa': [NUM_EXTRA_SEQ, NUM_RES], + 'extra_msa_mask': [NUM_EXTRA_SEQ, NUM_RES], + 'extra_msa_row_mask': [NUM_EXTRA_SEQ], + 'is_distillation': [], + 'msa_feat': [NUM_MSA_SEQ, NUM_RES, None], + 'msa_mask': [NUM_MSA_SEQ, NUM_RES], + 'msa_row_mask': [NUM_MSA_SEQ], + 'pseudo_beta': [NUM_RES, None], + 'pseudo_beta_mask': [NUM_RES], + 'random_crop_to_size_seed': [None], + 'residue_index': [NUM_RES], + 'residx_atom14_to_atom37': [NUM_RES, None], + 'residx_atom37_to_atom14': [NUM_RES, None], + 'resolution': [], + 'rigidgroups_alt_gt_frames': [NUM_RES, None, None], + 'rigidgroups_group_exists': [NUM_RES, None], + 'rigidgroups_group_is_ambiguous': [NUM_RES, None], + 'rigidgroups_gt_exists': [NUM_RES, None], + 'rigidgroups_gt_frames': [NUM_RES, None, None], + 'seq_length': [], + 'seq_mask': [NUM_RES], + 'target_feat': [NUM_RES, None], + 'template_aatype': [NUM_TEMPLATES, NUM_RES], + 'template_all_atom_masks': [NUM_TEMPLATES, NUM_RES, None], + 'template_all_atom_positions': [ + NUM_TEMPLATES, NUM_RES, None, None], + 'template_backbone_affine_mask': [NUM_TEMPLATES, NUM_RES], + 'template_backbone_affine_tensor': [ + NUM_TEMPLATES, NUM_RES, None], + 'template_mask': [NUM_TEMPLATES], + 'template_pseudo_beta': [NUM_TEMPLATES, NUM_RES, None], + 'template_pseudo_beta_mask': [NUM_TEMPLATES, NUM_RES], + 'template_sum_probs': [NUM_TEMPLATES, None], + 'true_msa': [NUM_MSA_SEQ, NUM_RES] + }, + 'fixed_size': True, + 'subsample_templates': False, # We want top templates. + 'masked_msa_replace_fraction': 0.15, + 'max_msa_clusters': 512, + 'max_templates': 4, + 'num_ensemble': 1, + }, + }, + 'model': { + 'embeddings_and_evoformer': { + 'evoformer_num_block': 48, + 'evoformer': { + 'msa_row_attention_with_pair_bias': { + 'dropout_rate': 0.15, + 'gating': True, + 'num_head': 8, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'msa_column_attention': { + 'dropout_rate': 0.0, + 'gating': True, + 'num_head': 8, + 'orientation': 'per_column', + 'shared_dropout': True + }, + 'msa_transition': { + 'dropout_rate': 0.0, + 'num_intermediate_factor': 4, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'outer_product_mean': { + 'first': False, + 'chunk_size': 128, + 'dropout_rate': 0.0, + 'num_outer_channel': 32, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'triangle_attention_starting_node': { + 'dropout_rate': 0.25, + 'gating': True, + 'num_head': 4, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'triangle_attention_ending_node': { + 'dropout_rate': 0.25, + 'gating': True, + 'num_head': 4, + 'orientation': 'per_column', + 'shared_dropout': True + }, + 'triangle_multiplication_outgoing': { + 'dropout_rate': 0.25, + 'equation': 'ikc,jkc->ijc', + 'num_intermediate_channel': 128, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'triangle_multiplication_incoming': { + 'dropout_rate': 0.25, + 'equation': 'kjc,kic->ijc', + 'num_intermediate_channel': 128, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'pair_transition': { + 'dropout_rate': 0.0, + 'num_intermediate_factor': 4, + 'orientation': 'per_row', + 'shared_dropout': True + } + }, + 'extra_msa_channel': 64, + 'extra_msa_stack_num_block': 4, + 'max_relative_feature': 32, + 'msa_channel': 256, + 'pair_channel': 128, + 'prev_pos': { + 'min_bin': 3.25, + 'max_bin': 20.75, + 'num_bins': 15 + }, + 'recycle_features': True, + 'recycle_pos': True, + 'seq_channel': 384, + 'template': { + 'attention': { + 'gating': False, + 'key_dim': 64, + 'num_head': 4, + 'value_dim': 64 + }, + 'dgram_features': { + 'min_bin': 3.25, + 'max_bin': 50.75, + 'num_bins': 39 + }, + 'embed_torsion_angles': False, + 'enabled': False, + 'template_pair_stack': { + 'num_block': 2, + 'triangle_attention_starting_node': { + 'dropout_rate': 0.25, + 'gating': True, + 'key_dim': 64, + 'num_head': 4, + 'orientation': 'per_row', + 'shared_dropout': True, + 'value_dim': 64 + }, + 'triangle_attention_ending_node': { + 'dropout_rate': 0.25, + 'gating': True, + 'key_dim': 64, + 'num_head': 4, + 'orientation': 'per_column', + 'shared_dropout': True, + 'value_dim': 64 + }, + 'triangle_multiplication_outgoing': { + 'dropout_rate': 0.25, + 'equation': 'ikc,jkc->ijc', + 'num_intermediate_channel': 64, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'triangle_multiplication_incoming': { + 'dropout_rate': 0.25, + 'equation': 'kjc,kic->ijc', + 'num_intermediate_channel': 64, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'pair_transition': { + 'dropout_rate': 0.0, + 'num_intermediate_factor': 2, + 'orientation': 'per_row', + 'shared_dropout': True + } + }, + 'max_templates': 4, + 'subbatch_size': 128, + 'use_template_unit_vector': False, + } + }, + 'global_config': { + 'deterministic': False, + 'multimer_mode': False, + 'subbatch_size': 4, + 'use_remat': False, + 'zero_init': True + }, + 'heads': { + 'distogram': { + 'first_break': 2.3125, + 'last_break': 21.6875, + 'num_bins': 64, + 'weight': 0.3 + }, + 'predicted_aligned_error': { + # `num_bins - 1` bins uniformly space the + # [0, max_error_bin A] range. + # The final bin covers [max_error_bin A, +infty] + # 31A gives bins with 0.5A width. + 'max_error_bin': 31., + 'num_bins': 64, + 'num_channels': 128, + 'filter_by_resolution': True, + 'min_resolution': 0.1, + 'max_resolution': 3.0, + 'weight': 0.0, + }, + 'experimentally_resolved': { + 'filter_by_resolution': True, + 'max_resolution': 3.0, + 'min_resolution': 0.1, + 'weight': 0.01 + }, + 'structure_module': { + 'num_layer': 8, + 'fape': { + 'clamp_distance': 10.0, + 'clamp_type': 'relu', + 'loss_unit_distance': 10.0 + }, + 'angle_norm_weight': 0.01, + 'chi_weight': 0.5, + 'clash_overlap_tolerance': 1.5, + 'compute_in_graph_metrics': True, + 'dropout': 0.1, + 'num_channel': 384, + 'num_head': 12, + 'num_layer_in_transition': 3, + 'num_point_qk': 4, + 'num_point_v': 8, + 'num_scalar_qk': 16, + 'num_scalar_v': 16, + 'position_scale': 10.0, + 'sidechain': { + 'atom_clamp_distance': 10.0, + 'num_channel': 128, + 'num_residual_block': 2, + 'weight_frac': 0.5, + 'length_scale': 10., + }, + 'structural_violation_loss_weight': 1.0, + 'violation_tolerance_factor': 12.0, + 'weight': 1.0 + }, + 'predicted_lddt': { + 'filter_by_resolution': True, + 'max_resolution': 3.0, + 'min_resolution': 0.1, + 'num_bins': 50, + 'num_channels': 128, + 'weight': 0.01 + }, + 'masked_msa': { + 'num_output': 23, + 'weight': 2.0 + }, + }, + 'num_recycle': 3, + 'resample_msa_in_recycling': True + }, +}) + + +CONFIG_MULTIMER = ml_collections.ConfigDict({ + 'model': { + 'embeddings_and_evoformer': { + 'evoformer_num_block': 48, + 'evoformer': { + 'msa_column_attention': { + 'dropout_rate': 0.0, + 'gating': True, + 'num_head': 8, + 'orientation': 'per_column', + 'shared_dropout': True + }, + 'msa_row_attention_with_pair_bias': { + 'dropout_rate': 0.15, + 'gating': True, + 'num_head': 8, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'msa_transition': { + 'dropout_rate': 0.0, + 'num_intermediate_factor': 4, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'outer_product_mean': { + 'chunk_size': 128, + 'dropout_rate': 0.0, + 'first': True, + 'num_outer_channel': 32, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'pair_transition': { + 'dropout_rate': 0.0, + 'num_intermediate_factor': 4, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'triangle_attention_ending_node': { + 'dropout_rate': 0.25, + 'gating': True, + 'num_head': 4, + 'orientation': 'per_column', + 'shared_dropout': True + }, + 'triangle_attention_starting_node': { + 'dropout_rate': 0.25, + 'gating': True, + 'num_head': 4, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'triangle_multiplication_incoming': { + 'dropout_rate': 0.25, + 'equation': 'kjc,kic->ijc', + 'num_intermediate_channel': 128, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'triangle_multiplication_outgoing': { + 'dropout_rate': 0.25, + 'equation': 'ikc,jkc->ijc', + 'num_intermediate_channel': 128, + 'orientation': 'per_row', + 'shared_dropout': True + } + }, + 'extra_msa_channel': 64, + 'extra_msa_stack_num_block': 4, + 'num_msa': 252, + 'num_extra_msa': 1152, + 'masked_msa': { + 'profile_prob': 0.1, + 'replace_fraction': 0.15, + 'same_prob': 0.1, + 'uniform_prob': 0.1 + }, + 'use_chain_relative': True, + 'max_relative_chain': 2, + 'max_relative_idx': 32, + 'seq_channel': 384, + 'msa_channel': 256, + 'pair_channel': 128, + 'prev_pos': { + 'max_bin': 20.75, + 'min_bin': 3.25, + 'num_bins': 15 + }, + 'recycle_features': True, + 'recycle_pos': True, + 'template': { + 'attention': { + 'gating': False, + 'num_head': 4 + }, + 'dgram_features': { + 'max_bin': 50.75, + 'min_bin': 3.25, + 'num_bins': 39 + }, + 'enabled': True, + 'max_templates': 4, + 'num_channels': 64, + 'subbatch_size': 128, + 'template_pair_stack': { + 'num_block': 2, + 'pair_transition': { + 'dropout_rate': 0.0, + 'num_intermediate_factor': 2, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'triangle_attention_ending_node': { + 'dropout_rate': 0.25, + 'gating': True, + 'num_head': 4, + 'orientation': 'per_column', + 'shared_dropout': True + }, + 'triangle_attention_starting_node': { + 'dropout_rate': 0.25, + 'gating': True, + 'num_head': 4, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'triangle_multiplication_incoming': { + 'dropout_rate': 0.25, + 'equation': 'kjc,kic->ijc', + 'num_intermediate_channel': 64, + 'orientation': 'per_row', + 'shared_dropout': True + }, + 'triangle_multiplication_outgoing': { + 'dropout_rate': 0.25, + 'equation': 'ikc,jkc->ijc', + 'num_intermediate_channel': 64, + 'orientation': 'per_row', + 'shared_dropout': True + } + } + }, + }, + 'global_config': { + 'deterministic': False, + 'multimer_mode': True, + 'subbatch_size': 4, + 'use_remat': False, + 'zero_init': True + }, + 'heads': { + 'distogram': { + 'first_break': 2.3125, + 'last_break': 21.6875, + 'num_bins': 64, + 'weight': 0.3 + }, + 'experimentally_resolved': { + 'filter_by_resolution': True, + 'max_resolution': 3.0, + 'min_resolution': 0.1, + 'weight': 0.01 + }, + 'masked_msa': { + 'weight': 2.0 + }, + 'predicted_aligned_error': { + 'filter_by_resolution': True, + 'max_error_bin': 31.0, + 'max_resolution': 3.0, + 'min_resolution': 0.1, + 'num_bins': 64, + 'num_channels': 128, + 'weight': 0.1 + }, + 'predicted_lddt': { + 'filter_by_resolution': True, + 'max_resolution': 3.0, + 'min_resolution': 0.1, + 'num_bins': 50, + 'num_channels': 128, + 'weight': 0.01 + }, + 'structure_module': { + 'angle_norm_weight': 0.01, + 'chi_weight': 0.5, + 'clash_overlap_tolerance': 1.5, + 'dropout': 0.1, + 'interface_fape': { + 'atom_clamp_distance': 1000.0, + 'loss_unit_distance': 20.0 + }, + 'intra_chain_fape': { + 'atom_clamp_distance': 10.0, + 'loss_unit_distance': 10.0 + }, + 'num_channel': 384, + 'num_head': 12, + 'num_layer': 8, + 'num_layer_in_transition': 3, + 'num_point_qk': 4, + 'num_point_v': 8, + 'num_scalar_qk': 16, + 'num_scalar_v': 16, + 'position_scale': 20.0, + 'sidechain': { + 'atom_clamp_distance': 10.0, + 'loss_unit_distance': 10.0, + 'num_channel': 128, + 'num_residual_block': 2, + 'weight_frac': 0.5 + }, + 'structural_violation_loss_weight': 1.0, + 'violation_tolerance_factor': 12.0, + 'weight': 1.0 + } + }, + 'num_ensemble_eval': 1, + 'num_recycle': 3, + 'resample_msa_in_recycling': True + } +}) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/data.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/data.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,39 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Convenience functions for reading data.""" + +import io +import os +from typing import List +from alphafold.model import utils +import haiku as hk +import numpy as np +# Internal import (7716). + + +def casp_model_names(data_dir: str) -> List[str]: + params = os.listdir(os.path.join(data_dir, 'params')) + return [os.path.splitext(filename)[0] for filename in params] + + +def get_model_haiku_params(model_name: str, data_dir: str) -> hk.Params: + """Get the Haiku parameters from a model name.""" + + path = os.path.join(data_dir, 'params', f'params_{model_name}.npz') + + with open(path, 'rb') as f: + params = np.load(io.BytesIO(f.read()), allow_pickle=False) + + return utils.flat_params_to_haiku(params) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/features.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/features.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,104 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Code to generate processed features.""" +import copy +from typing import List, Mapping, Tuple + +from alphafold.model.tf import input_pipeline +from alphafold.model.tf import proteins_dataset + +import ml_collections +import numpy as np +import tensorflow.compat.v1 as tf + +FeatureDict = Mapping[str, np.ndarray] + + +def make_data_config( + config: ml_collections.ConfigDict, + num_res: int, + ) -> Tuple[ml_collections.ConfigDict, List[str]]: + """Makes a data config for the input pipeline.""" + cfg = copy.deepcopy(config.data) + + feature_names = cfg.common.unsupervised_features + if cfg.common.use_templates: + feature_names += cfg.common.template_features + + with cfg.unlocked(): + cfg.eval.crop_size = num_res + + return cfg, feature_names + + +def tf_example_to_features(tf_example: tf.train.Example, + config: ml_collections.ConfigDict, + random_seed: int = 0) -> FeatureDict: + """Converts tf_example to numpy feature dictionary.""" + num_res = int(tf_example.features.feature['seq_length'].int64_list.value[0]) + cfg, feature_names = make_data_config(config, num_res=num_res) + + if 'deletion_matrix_int' in set(tf_example.features.feature): + deletion_matrix_int = ( + tf_example.features.feature['deletion_matrix_int'].int64_list.value) + feat = tf.train.Feature(float_list=tf.train.FloatList( + value=map(float, deletion_matrix_int))) + tf_example.features.feature['deletion_matrix'].CopyFrom(feat) + del tf_example.features.feature['deletion_matrix_int'] + + tf_graph = tf.Graph() + with tf_graph.as_default(), tf.device('/device:CPU:0'): + tf.compat.v1.set_random_seed(random_seed) + tensor_dict = proteins_dataset.create_tensor_dict( + raw_data=tf_example.SerializeToString(), + features=feature_names) + processed_batch = input_pipeline.process_tensors_from_config( + tensor_dict, cfg) + + tf_graph.finalize() + + with tf.Session(graph=tf_graph) as sess: + features = sess.run(processed_batch) + + return {k: v for k, v in features.items() if v.dtype != 'O'} + + +def np_example_to_features(np_example: FeatureDict, + config: ml_collections.ConfigDict, + random_seed: int = 0) -> FeatureDict: + """Preprocesses NumPy feature dict using TF pipeline.""" + np_example = dict(np_example) + num_res = int(np_example['seq_length'][0]) + cfg, feature_names = make_data_config(config, num_res=num_res) + + if 'deletion_matrix_int' in np_example: + np_example['deletion_matrix'] = ( + np_example.pop('deletion_matrix_int').astype(np.float32)) + + tf_graph = tf.Graph() + with tf_graph.as_default(), tf.device('/device:CPU:0'): + tf.compat.v1.set_random_seed(random_seed) + tensor_dict = proteins_dataset.np_to_tensor_dict( + np_example=np_example, features=feature_names) + + processed_batch = input_pipeline.process_tensors_from_config( + tensor_dict, cfg) + + tf_graph.finalize() + + with tf.Session(graph=tf_graph) as sess: + features = sess.run(processed_batch) + + return {k: v for k, v in features.items() if v.dtype != 'O'} diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/folding.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/folding.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,1009 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Modules and utilities for the structure module.""" + +import functools +from typing import Dict +from alphafold.common import residue_constants +from alphafold.model import all_atom +from alphafold.model import common_modules +from alphafold.model import prng +from alphafold.model import quat_affine +from alphafold.model import r3 +from alphafold.model import utils +import haiku as hk +import jax +import jax.numpy as jnp +import ml_collections +import numpy as np + + +def squared_difference(x, y): + return jnp.square(x - y) + + +class InvariantPointAttention(hk.Module): + """Invariant Point attention module. + + The high-level idea is that this attention module works over a set of points + and associated orientations in 3D space (e.g. protein residues). + + Each residue outputs a set of queries and keys as points in their local + reference frame. The attention is then defined as the euclidean distance + between the queries and keys in the global frame. + + Jumper et al. (2021) Suppl. Alg. 22 "InvariantPointAttention" + """ + + def __init__(self, + config, + global_config, + dist_epsilon=1e-8, + name='invariant_point_attention'): + """Initialize. + + Args: + config: Structure Module Config + global_config: Global Config of Model. + dist_epsilon: Small value to avoid NaN in distance calculation. + name: Haiku Module name. + """ + super().__init__(name=name) + + self._dist_epsilon = dist_epsilon + self._zero_initialize_last = global_config.zero_init + + self.config = config + + self.global_config = global_config + + def __call__(self, inputs_1d, inputs_2d, mask, affine): + """Compute geometry-aware attention. + + Given a set of query residues (defined by affines and associated scalar + features), this function computes geometry-aware attention between the + query residues and target residues. + + The residues produce points in their local reference frame, which + are converted into the global frame in order to compute attention via + euclidean distance. + + Equivalently, the target residues produce points in their local frame to be + used as attention values, which are converted into the query residues' + local frames. + + Args: + inputs_1d: (N, C) 1D input embedding that is the basis for the + scalar queries. + inputs_2d: (N, M, C') 2D input embedding, used for biases and values. + mask: (N, 1) mask to indicate which elements of inputs_1d participate + in the attention. + affine: QuatAffine object describing the position and orientation of + every element in inputs_1d. + + Returns: + Transformation of the input embedding. + """ + num_residues, _ = inputs_1d.shape + + # Improve readability by removing a large number of 'self's. + num_head = self.config.num_head + num_scalar_qk = self.config.num_scalar_qk + num_point_qk = self.config.num_point_qk + num_scalar_v = self.config.num_scalar_v + num_point_v = self.config.num_point_v + num_output = self.config.num_channel + + assert num_scalar_qk > 0 + assert num_point_qk > 0 + assert num_point_v > 0 + + # Construct scalar queries of shape: + # [num_query_residues, num_head, num_points] + q_scalar = common_modules.Linear( + num_head * num_scalar_qk, name='q_scalar')( + inputs_1d) + q_scalar = jnp.reshape( + q_scalar, [num_residues, num_head, num_scalar_qk]) + + # Construct scalar keys/values of shape: + # [num_target_residues, num_head, num_points] + kv_scalar = common_modules.Linear( + num_head * (num_scalar_v + num_scalar_qk), name='kv_scalar')( + inputs_1d) + kv_scalar = jnp.reshape(kv_scalar, + [num_residues, num_head, + num_scalar_v + num_scalar_qk]) + k_scalar, v_scalar = jnp.split(kv_scalar, [num_scalar_qk], axis=-1) + + # Construct query points of shape: + # [num_residues, num_head, num_point_qk] + + # First construct query points in local frame. + q_point_local = common_modules.Linear( + num_head * 3 * num_point_qk, name='q_point_local')( + inputs_1d) + q_point_local = jnp.split(q_point_local, 3, axis=-1) + # Project query points into global frame. + q_point_global = affine.apply_to_point(q_point_local, extra_dims=1) + # Reshape query point for later use. + q_point = [ + jnp.reshape(x, [num_residues, num_head, num_point_qk]) + for x in q_point_global] + + # Construct key and value points. + # Key points have shape [num_residues, num_head, num_point_qk] + # Value points have shape [num_residues, num_head, num_point_v] + + # Construct key and value points in local frame. + kv_point_local = common_modules.Linear( + num_head * 3 * (num_point_qk + num_point_v), name='kv_point_local')( + inputs_1d) + kv_point_local = jnp.split(kv_point_local, 3, axis=-1) + # Project key and value points into global frame. + kv_point_global = affine.apply_to_point(kv_point_local, extra_dims=1) + kv_point_global = [ + jnp.reshape(x, [num_residues, + num_head, (num_point_qk + num_point_v)]) + for x in kv_point_global] + # Split key and value points. + k_point, v_point = list( + zip(*[ + jnp.split(x, [num_point_qk,], axis=-1) + for x in kv_point_global + ])) + + # We assume that all queries and keys come iid from N(0, 1) distribution + # and compute the variances of the attention logits. + # Each scalar pair (q, k) contributes Var q*k = 1 + scalar_variance = max(num_scalar_qk, 1) * 1. + # Each point pair (q, k) contributes Var [0.5 ||q||^2 - ] = 9 / 2 + point_variance = max(num_point_qk, 1) * 9. / 2 + + # Allocate equal variance to scalar, point and attention 2d parts so that + # the sum is 1. + + num_logit_terms = 3 + + scalar_weights = np.sqrt(1.0 / (num_logit_terms * scalar_variance)) + point_weights = np.sqrt(1.0 / (num_logit_terms * point_variance)) + attention_2d_weights = np.sqrt(1.0 / (num_logit_terms)) + + # Trainable per-head weights for points. + trainable_point_weights = jax.nn.softplus(hk.get_parameter( + 'trainable_point_weights', shape=[num_head], + # softplus^{-1} (1) + init=hk.initializers.Constant(np.log(np.exp(1.) - 1.)))) + point_weights *= jnp.expand_dims(trainable_point_weights, axis=1) + + v_point = [jnp.swapaxes(x, -2, -3) for x in v_point] + + q_point = [jnp.swapaxes(x, -2, -3) for x in q_point] + k_point = [jnp.swapaxes(x, -2, -3) for x in k_point] + dist2 = [ + squared_difference(qx[:, :, None, :], kx[:, None, :, :]) + for qx, kx in zip(q_point, k_point) + ] + dist2 = sum(dist2) + attn_qk_point = -0.5 * jnp.sum( + point_weights[:, None, None, :] * dist2, axis=-1) + + v = jnp.swapaxes(v_scalar, -2, -3) + q = jnp.swapaxes(scalar_weights * q_scalar, -2, -3) + k = jnp.swapaxes(k_scalar, -2, -3) + attn_qk_scalar = jnp.matmul(q, jnp.swapaxes(k, -2, -1)) + attn_logits = attn_qk_scalar + attn_qk_point + + attention_2d = common_modules.Linear( + num_head, name='attention_2d')( + inputs_2d) + + attention_2d = jnp.transpose(attention_2d, [2, 0, 1]) + attention_2d = attention_2d_weights * attention_2d + attn_logits += attention_2d + + mask_2d = mask * jnp.swapaxes(mask, -1, -2) + attn_logits -= 1e5 * (1. - mask_2d) + + # [num_head, num_query_residues, num_target_residues] + attn = jax.nn.softmax(attn_logits) + + # [num_head, num_query_residues, num_head * num_scalar_v] + result_scalar = jnp.matmul(attn, v) + + # For point result, implement matmul manually so that it will be a float32 + # on TPU. This is equivalent to + # result_point_global = [jnp.einsum('bhqk,bhkc->bhqc', attn, vx) + # for vx in v_point] + # but on the TPU, doing the multiply and reduce_sum ensures the + # computation happens in float32 instead of bfloat16. + result_point_global = [jnp.sum( + attn[:, :, :, None] * vx[:, None, :, :], + axis=-2) for vx in v_point] + + # [num_query_residues, num_head, num_head * num_(scalar|point)_v] + result_scalar = jnp.swapaxes(result_scalar, -2, -3) + result_point_global = [ + jnp.swapaxes(x, -2, -3) + for x in result_point_global] + + # Features used in the linear output projection. Should have the size + # [num_query_residues, ?] + output_features = [] + + result_scalar = jnp.reshape( + result_scalar, [num_residues, num_head * num_scalar_v]) + output_features.append(result_scalar) + + result_point_global = [ + jnp.reshape(r, [num_residues, num_head * num_point_v]) + for r in result_point_global] + result_point_local = affine.invert_point(result_point_global, extra_dims=1) + output_features.extend(result_point_local) + + output_features.append(jnp.sqrt(self._dist_epsilon + + jnp.square(result_point_local[0]) + + jnp.square(result_point_local[1]) + + jnp.square(result_point_local[2]))) + + # Dimensions: h = heads, i and j = residues, + # c = inputs_2d channels + # Contraction happens over the second residue dimension, similarly to how + # the usual attention is performed. + result_attention_over_2d = jnp.einsum('hij, ijc->ihc', attn, inputs_2d) + num_out = num_head * result_attention_over_2d.shape[-1] + output_features.append( + jnp.reshape(result_attention_over_2d, + [num_residues, num_out])) + + final_init = 'zeros' if self._zero_initialize_last else 'linear' + + final_act = jnp.concatenate(output_features, axis=-1) + + return common_modules.Linear( + num_output, + initializer=final_init, + name='output_projection')(final_act) + + +class FoldIteration(hk.Module): + """A single iteration of the main structure module loop. + + Jumper et al. (2021) Suppl. Alg. 20 "StructureModule" lines 6-21 + + First, each residue attends to all residues using InvariantPointAttention. + Then, we apply transition layers to update the hidden representations. + Finally, we use the hidden representations to produce an update to the + affine of each residue. + """ + + def __init__(self, config, global_config, + name='fold_iteration'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, + activations, + sequence_mask, + update_affine, + is_training, + initial_act, + safe_key=None, + static_feat_2d=None, + aatype=None): + c = self.config + + if safe_key is None: + safe_key = prng.SafeKey(hk.next_rng_key()) + + def safe_dropout_fn(tensor, safe_key): + return prng.safe_dropout( + tensor=tensor, + safe_key=safe_key, + rate=c.dropout, + is_deterministic=self.global_config.deterministic, + is_training=is_training) + + affine = quat_affine.QuatAffine.from_tensor(activations['affine']) + + act = activations['act'] + attention_module = InvariantPointAttention(self.config, self.global_config) + # Attention + attn = attention_module( + inputs_1d=act, + inputs_2d=static_feat_2d, + mask=sequence_mask, + affine=affine) + act += attn + safe_key, *sub_keys = safe_key.split(3) + sub_keys = iter(sub_keys) + act = safe_dropout_fn(act, next(sub_keys)) + act = hk.LayerNorm( + axis=[-1], + create_scale=True, + create_offset=True, + name='attention_layer_norm')( + act) + + final_init = 'zeros' if self.global_config.zero_init else 'linear' + + # Transition + input_act = act + for i in range(c.num_layer_in_transition): + init = 'relu' if i < c.num_layer_in_transition - 1 else final_init + act = common_modules.Linear( + c.num_channel, + initializer=init, + name='transition')( + act) + if i < c.num_layer_in_transition - 1: + act = jax.nn.relu(act) + act += input_act + act = safe_dropout_fn(act, next(sub_keys)) + act = hk.LayerNorm( + axis=[-1], + create_scale=True, + create_offset=True, + name='transition_layer_norm')(act) + + if update_affine: + # This block corresponds to + # Jumper et al. (2021) Alg. 23 "Backbone update" + affine_update_size = 6 + + # Affine update + affine_update = common_modules.Linear( + affine_update_size, + initializer=final_init, + name='affine_update')( + act) + + affine = affine.pre_compose(affine_update) + + sc = MultiRigidSidechain(c.sidechain, self.global_config)( + affine.scale_translation(c.position_scale), [act, initial_act], aatype) + + outputs = {'affine': affine.to_tensor(), 'sc': sc} + + affine = affine.apply_rotation_tensor_fn(jax.lax.stop_gradient) + + new_activations = { + 'act': act, + 'affine': affine.to_tensor() + } + return new_activations, outputs + + +def generate_affines(representations, batch, config, global_config, + is_training, safe_key): + """Generate predicted affines for a single chain. + + Jumper et al. (2021) Suppl. Alg. 20 "StructureModule" + + This is the main part of the structure module - it iteratively applies + folding to produce a set of predicted residue positions. + + Args: + representations: Representations dictionary. + batch: Batch dictionary. + config: Config for the structure module. + global_config: Global config. + is_training: Whether the model is being trained. + safe_key: A prng.SafeKey object that wraps a PRNG key. + + Returns: + A dictionary containing residue affines and sidechain positions. + """ + c = config + sequence_mask = batch['seq_mask'][:, None] + + act = hk.LayerNorm( + axis=[-1], + create_scale=True, + create_offset=True, + name='single_layer_norm')( + representations['single']) + + initial_act = act + act = common_modules.Linear( + c.num_channel, name='initial_projection')( + act) + + affine = generate_new_affine(sequence_mask) + + fold_iteration = FoldIteration( + c, global_config, name='fold_iteration') + + assert len(batch['seq_mask'].shape) == 1 + + activations = {'act': act, + 'affine': affine.to_tensor(), + } + + act_2d = hk.LayerNorm( + axis=[-1], + create_scale=True, + create_offset=True, + name='pair_layer_norm')( + representations['pair']) + + outputs = [] + safe_keys = safe_key.split(c.num_layer) + for sub_key in safe_keys: + activations, output = fold_iteration( + activations, + initial_act=initial_act, + static_feat_2d=act_2d, + safe_key=sub_key, + sequence_mask=sequence_mask, + update_affine=True, + is_training=is_training, + aatype=batch['aatype']) + outputs.append(output) + + output = jax.tree_map(lambda *x: jnp.stack(x), *outputs) + # Include the activations in the output dict for use by the LDDT-Head. + output['act'] = activations['act'] + + return output + + +class StructureModule(hk.Module): + """StructureModule as a network head. + + Jumper et al. (2021) Suppl. Alg. 20 "StructureModule" + """ + + def __init__(self, config, global_config, compute_loss=True, + name='structure_module'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + self.compute_loss = compute_loss + + def __call__(self, representations, batch, is_training, + safe_key=None): + c = self.config + ret = {} + + if safe_key is None: + safe_key = prng.SafeKey(hk.next_rng_key()) + + output = generate_affines( + representations=representations, + batch=batch, + config=self.config, + global_config=self.global_config, + is_training=is_training, + safe_key=safe_key) + + ret['representations'] = {'structure_module': output['act']} + + ret['traj'] = output['affine'] * jnp.array([1.] * 4 + + [c.position_scale] * 3) + + ret['sidechains'] = output['sc'] + + atom14_pred_positions = r3.vecs_to_tensor(output['sc']['atom_pos'])[-1] + ret['final_atom14_positions'] = atom14_pred_positions # (N, 14, 3) + ret['final_atom14_mask'] = batch['atom14_atom_exists'] # (N, 14) + + atom37_pred_positions = all_atom.atom14_to_atom37(atom14_pred_positions, + batch) + atom37_pred_positions *= batch['atom37_atom_exists'][:, :, None] + ret['final_atom_positions'] = atom37_pred_positions # (N, 37, 3) + + ret['final_atom_mask'] = batch['atom37_atom_exists'] # (N, 37) + ret['final_affines'] = ret['traj'][-1] + + if self.compute_loss: + return ret + else: + no_loss_features = ['final_atom_positions', 'final_atom_mask', + 'representations'] + no_loss_ret = {k: ret[k] for k in no_loss_features} + return no_loss_ret + + def loss(self, value, batch): + ret = {'loss': 0.} + + ret['metrics'] = {} + # If requested, compute in-graph metrics. + if self.config.compute_in_graph_metrics: + atom14_pred_positions = value['final_atom14_positions'] + # Compute renaming and violations. + value.update(compute_renamed_ground_truth(batch, atom14_pred_positions)) + value['violations'] = find_structural_violations( + batch, atom14_pred_positions, self.config) + + # Several violation metrics: + violation_metrics = compute_violation_metrics( + batch=batch, + atom14_pred_positions=atom14_pred_positions, + violations=value['violations']) + ret['metrics'].update(violation_metrics) + + backbone_loss(ret, batch, value, self.config) + + if 'renamed_atom14_gt_positions' not in value: + value.update(compute_renamed_ground_truth( + batch, value['final_atom14_positions'])) + sc_loss = sidechain_loss(batch, value, self.config) + + ret['loss'] = ((1 - self.config.sidechain.weight_frac) * ret['loss'] + + self.config.sidechain.weight_frac * sc_loss['loss']) + ret['sidechain_fape'] = sc_loss['fape'] + + supervised_chi_loss(ret, batch, value, self.config) + + if self.config.structural_violation_loss_weight: + if 'violations' not in value: + value['violations'] = find_structural_violations( + batch, value['final_atom14_positions'], self.config) + structural_violation_loss(ret, batch, value, self.config) + + return ret + + +def compute_renamed_ground_truth( + batch: Dict[str, jnp.ndarray], + atom14_pred_positions: jnp.ndarray, + ) -> Dict[str, jnp.ndarray]: + """Find optimal renaming of ground truth based on the predicted positions. + + Jumper et al. (2021) Suppl. Alg. 26 "renameSymmetricGroundTruthAtoms" + + This renamed ground truth is then used for all losses, + such that each loss moves the atoms in the same direction. + Shape (N). + + Args: + batch: Dictionary containing: + * atom14_gt_positions: Ground truth positions. + * atom14_alt_gt_positions: Ground truth positions with renaming swaps. + * atom14_atom_is_ambiguous: 1.0 for atoms that are affected by + renaming swaps. + * atom14_gt_exists: Mask for which atoms exist in ground truth. + * atom14_alt_gt_exists: Mask for which atoms exist in ground truth + after renaming. + * atom14_atom_exists: Mask for whether each atom is part of the given + amino acid type. + atom14_pred_positions: Array of atom positions in global frame with shape + (N, 14, 3). + Returns: + Dictionary containing: + alt_naming_is_better: Array with 1.0 where alternative swap is better. + renamed_atom14_gt_positions: Array of optimal ground truth positions + after renaming swaps are performed. + renamed_atom14_gt_exists: Mask after renaming swap is performed. + """ + alt_naming_is_better = all_atom.find_optimal_renaming( + atom14_gt_positions=batch['atom14_gt_positions'], + atom14_alt_gt_positions=batch['atom14_alt_gt_positions'], + atom14_atom_is_ambiguous=batch['atom14_atom_is_ambiguous'], + atom14_gt_exists=batch['atom14_gt_exists'], + atom14_pred_positions=atom14_pred_positions, + atom14_atom_exists=batch['atom14_atom_exists']) + + renamed_atom14_gt_positions = ( + (1. - alt_naming_is_better[:, None, None]) + * batch['atom14_gt_positions'] + + alt_naming_is_better[:, None, None] + * batch['atom14_alt_gt_positions']) + + renamed_atom14_gt_mask = ( + (1. - alt_naming_is_better[:, None]) * batch['atom14_gt_exists'] + + alt_naming_is_better[:, None] * batch['atom14_alt_gt_exists']) + + return { + 'alt_naming_is_better': alt_naming_is_better, # (N) + 'renamed_atom14_gt_positions': renamed_atom14_gt_positions, # (N, 14, 3) + 'renamed_atom14_gt_exists': renamed_atom14_gt_mask, # (N, 14) + } + + +def backbone_loss(ret, batch, value, config): + """Backbone FAPE Loss. + + Jumper et al. (2021) Suppl. Alg. 20 "StructureModule" line 17 + + Args: + ret: Dictionary to write outputs into, needs to contain 'loss'. + batch: Batch, needs to contain 'backbone_affine_tensor', + 'backbone_affine_mask'. + value: Dictionary containing structure module output, needs to contain + 'traj', a trajectory of rigids. + config: Configuration of loss, should contain 'fape.clamp_distance' and + 'fape.loss_unit_distance'. + """ + affine_trajectory = quat_affine.QuatAffine.from_tensor(value['traj']) + rigid_trajectory = r3.rigids_from_quataffine(affine_trajectory) + + gt_affine = quat_affine.QuatAffine.from_tensor( + batch['backbone_affine_tensor']) + gt_rigid = r3.rigids_from_quataffine(gt_affine) + backbone_mask = batch['backbone_affine_mask'] + + fape_loss_fn = functools.partial( + all_atom.frame_aligned_point_error, + l1_clamp_distance=config.fape.clamp_distance, + length_scale=config.fape.loss_unit_distance) + + fape_loss_fn = jax.vmap(fape_loss_fn, (0, None, None, 0, None, None)) + fape_loss = fape_loss_fn(rigid_trajectory, gt_rigid, backbone_mask, + rigid_trajectory.trans, gt_rigid.trans, + backbone_mask) + + if 'use_clamped_fape' in batch: + # Jumper et al. (2021) Suppl. Sec. 1.11.5 "Loss clamping details" + use_clamped_fape = jnp.asarray(batch['use_clamped_fape'], jnp.float32) + unclamped_fape_loss_fn = functools.partial( + all_atom.frame_aligned_point_error, + l1_clamp_distance=None, + length_scale=config.fape.loss_unit_distance) + unclamped_fape_loss_fn = jax.vmap(unclamped_fape_loss_fn, + (0, None, None, 0, None, None)) + fape_loss_unclamped = unclamped_fape_loss_fn(rigid_trajectory, gt_rigid, + backbone_mask, + rigid_trajectory.trans, + gt_rigid.trans, + backbone_mask) + + fape_loss = (fape_loss * use_clamped_fape + + fape_loss_unclamped * (1 - use_clamped_fape)) + + ret['fape'] = fape_loss[-1] + ret['loss'] += jnp.mean(fape_loss) + + +def sidechain_loss(batch, value, config): + """All Atom FAPE Loss using renamed rigids.""" + # Rename Frames + # Jumper et al. (2021) Suppl. Alg. 26 "renameSymmetricGroundTruthAtoms" line 7 + alt_naming_is_better = value['alt_naming_is_better'] + renamed_gt_frames = ( + (1. - alt_naming_is_better[:, None, None]) + * batch['rigidgroups_gt_frames'] + + alt_naming_is_better[:, None, None] + * batch['rigidgroups_alt_gt_frames']) + + flat_gt_frames = r3.rigids_from_tensor_flat12( + jnp.reshape(renamed_gt_frames, [-1, 12])) + flat_frames_mask = jnp.reshape(batch['rigidgroups_gt_exists'], [-1]) + + flat_gt_positions = r3.vecs_from_tensor( + jnp.reshape(value['renamed_atom14_gt_positions'], [-1, 3])) + flat_positions_mask = jnp.reshape(value['renamed_atom14_gt_exists'], [-1]) + + # Compute frame_aligned_point_error score for the final layer. + pred_frames = value['sidechains']['frames'] + pred_positions = value['sidechains']['atom_pos'] + + def _slice_last_layer_and_flatten(x): + return jnp.reshape(x[-1], [-1]) + flat_pred_frames = jax.tree_map( + _slice_last_layer_and_flatten, pred_frames) + flat_pred_positions = jax.tree_map( + _slice_last_layer_and_flatten, pred_positions) + # FAPE Loss on sidechains + fape = all_atom.frame_aligned_point_error( + pred_frames=flat_pred_frames, + target_frames=flat_gt_frames, + frames_mask=flat_frames_mask, + pred_positions=flat_pred_positions, + target_positions=flat_gt_positions, + positions_mask=flat_positions_mask, + l1_clamp_distance=config.sidechain.atom_clamp_distance, + length_scale=config.sidechain.length_scale) + + return { + 'fape': fape, + 'loss': fape} + + +def structural_violation_loss(ret, batch, value, config): + """Computes loss for structural violations.""" + assert config.sidechain.weight_frac + + # Put all violation losses together to one large loss. + violations = value['violations'] + num_atoms = jnp.sum(batch['atom14_atom_exists']).astype(jnp.float32) + ret['loss'] += (config.structural_violation_loss_weight * ( + violations['between_residues']['bonds_c_n_loss_mean'] + + violations['between_residues']['angles_ca_c_n_loss_mean'] + + violations['between_residues']['angles_c_n_ca_loss_mean'] + + jnp.sum( + violations['between_residues']['clashes_per_atom_loss_sum'] + + violations['within_residues']['per_atom_loss_sum']) / + (1e-6 + num_atoms))) + + +def find_structural_violations( + batch: Dict[str, jnp.ndarray], + atom14_pred_positions: jnp.ndarray, # (N, 14, 3) + config: ml_collections.ConfigDict + ): + """Computes several checks for structural violations.""" + + # Compute between residue backbone violations of bonds and angles. + connection_violations = all_atom.between_residue_bond_loss( + pred_atom_positions=atom14_pred_positions, + pred_atom_mask=batch['atom14_atom_exists'].astype(jnp.float32), + residue_index=batch['residue_index'].astype(jnp.float32), + aatype=batch['aatype'], + tolerance_factor_soft=config.violation_tolerance_factor, + tolerance_factor_hard=config.violation_tolerance_factor) + + # Compute the Van der Waals radius for every atom + # (the first letter of the atom name is the element type). + # Shape: (N, 14). + atomtype_radius = jnp.array([ + residue_constants.van_der_waals_radius[name[0]] + for name in residue_constants.atom_types + ]) + atom14_atom_radius = batch['atom14_atom_exists'] * utils.batched_gather( + atomtype_radius, batch['residx_atom14_to_atom37']) + + # Compute the between residue clash loss. + between_residue_clashes = all_atom.between_residue_clash_loss( + atom14_pred_positions=atom14_pred_positions, + atom14_atom_exists=batch['atom14_atom_exists'], + atom14_atom_radius=atom14_atom_radius, + residue_index=batch['residue_index'], + overlap_tolerance_soft=config.clash_overlap_tolerance, + overlap_tolerance_hard=config.clash_overlap_tolerance) + + # Compute all within-residue violations (clashes, + # bond length and angle violations). + restype_atom14_bounds = residue_constants.make_atom14_dists_bounds( + overlap_tolerance=config.clash_overlap_tolerance, + bond_length_tolerance_factor=config.violation_tolerance_factor) + atom14_dists_lower_bound = utils.batched_gather( + restype_atom14_bounds['lower_bound'], batch['aatype']) + atom14_dists_upper_bound = utils.batched_gather( + restype_atom14_bounds['upper_bound'], batch['aatype']) + within_residue_violations = all_atom.within_residue_violations( + atom14_pred_positions=atom14_pred_positions, + atom14_atom_exists=batch['atom14_atom_exists'], + atom14_dists_lower_bound=atom14_dists_lower_bound, + atom14_dists_upper_bound=atom14_dists_upper_bound, + tighten_bounds_for_loss=0.0) + + # Combine them to a single per-residue violation mask (used later for LDDT). + per_residue_violations_mask = jnp.max(jnp.stack([ + connection_violations['per_residue_violation_mask'], + jnp.max(between_residue_clashes['per_atom_clash_mask'], axis=-1), + jnp.max(within_residue_violations['per_atom_violations'], + axis=-1)]), axis=0) + + return { + 'between_residues': { + 'bonds_c_n_loss_mean': + connection_violations['c_n_loss_mean'], # () + 'angles_ca_c_n_loss_mean': + connection_violations['ca_c_n_loss_mean'], # () + 'angles_c_n_ca_loss_mean': + connection_violations['c_n_ca_loss_mean'], # () + 'connections_per_residue_loss_sum': + connection_violations['per_residue_loss_sum'], # (N) + 'connections_per_residue_violation_mask': + connection_violations['per_residue_violation_mask'], # (N) + 'clashes_mean_loss': + between_residue_clashes['mean_loss'], # () + 'clashes_per_atom_loss_sum': + between_residue_clashes['per_atom_loss_sum'], # (N, 14) + 'clashes_per_atom_clash_mask': + between_residue_clashes['per_atom_clash_mask'], # (N, 14) + }, + 'within_residues': { + 'per_atom_loss_sum': + within_residue_violations['per_atom_loss_sum'], # (N, 14) + 'per_atom_violations': + within_residue_violations['per_atom_violations'], # (N, 14), + }, + 'total_per_residue_violations_mask': + per_residue_violations_mask, # (N) + } + + +def compute_violation_metrics( + batch: Dict[str, jnp.ndarray], + atom14_pred_positions: jnp.ndarray, # (N, 14, 3) + violations: Dict[str, jnp.ndarray], + ) -> Dict[str, jnp.ndarray]: + """Compute several metrics to assess the structural violations.""" + + ret = {} + extreme_ca_ca_violations = all_atom.extreme_ca_ca_distance_violations( + pred_atom_positions=atom14_pred_positions, + pred_atom_mask=batch['atom14_atom_exists'].astype(jnp.float32), + residue_index=batch['residue_index'].astype(jnp.float32)) + ret['violations_extreme_ca_ca_distance'] = extreme_ca_ca_violations + ret['violations_between_residue_bond'] = utils.mask_mean( + mask=batch['seq_mask'], + value=violations['between_residues'][ + 'connections_per_residue_violation_mask']) + ret['violations_between_residue_clash'] = utils.mask_mean( + mask=batch['seq_mask'], + value=jnp.max( + violations['between_residues']['clashes_per_atom_clash_mask'], + axis=-1)) + ret['violations_within_residue'] = utils.mask_mean( + mask=batch['seq_mask'], + value=jnp.max( + violations['within_residues']['per_atom_violations'], axis=-1)) + ret['violations_per_residue'] = utils.mask_mean( + mask=batch['seq_mask'], + value=violations['total_per_residue_violations_mask']) + return ret + + +def supervised_chi_loss(ret, batch, value, config): + """Computes loss for direct chi angle supervision. + + Jumper et al. (2021) Suppl. Alg. 27 "torsionAngleLoss" + + Args: + ret: Dictionary to write outputs into, needs to contain 'loss'. + batch: Batch, needs to contain 'seq_mask', 'chi_mask', 'chi_angles'. + value: Dictionary containing structure module output, needs to contain + value['sidechains']['angles_sin_cos'] for angles and + value['sidechains']['unnormalized_angles_sin_cos'] for unnormalized + angles. + config: Configuration of loss, should contain 'chi_weight' and + 'angle_norm_weight', 'angle_norm_weight' scales angle norm term, + 'chi_weight' scales torsion term. + """ + eps = 1e-6 + + sequence_mask = batch['seq_mask'] + num_res = sequence_mask.shape[0] + chi_mask = batch['chi_mask'].astype(jnp.float32) + pred_angles = jnp.reshape( + value['sidechains']['angles_sin_cos'], [-1, num_res, 7, 2]) + pred_angles = pred_angles[:, :, 3:] + + residue_type_one_hot = jax.nn.one_hot( + batch['aatype'], residue_constants.restype_num + 1, + dtype=jnp.float32)[None] + chi_pi_periodic = jnp.einsum('ijk, kl->ijl', residue_type_one_hot, + jnp.asarray(residue_constants.chi_pi_periodic)) + + true_chi = batch['chi_angles'][None] + sin_true_chi = jnp.sin(true_chi) + cos_true_chi = jnp.cos(true_chi) + sin_cos_true_chi = jnp.stack([sin_true_chi, cos_true_chi], axis=-1) + + # This is -1 if chi is pi-periodic and +1 if it's 2pi-periodic + shifted_mask = (1 - 2 * chi_pi_periodic)[..., None] + sin_cos_true_chi_shifted = shifted_mask * sin_cos_true_chi + + sq_chi_error = jnp.sum( + squared_difference(sin_cos_true_chi, pred_angles), -1) + sq_chi_error_shifted = jnp.sum( + squared_difference(sin_cos_true_chi_shifted, pred_angles), -1) + sq_chi_error = jnp.minimum(sq_chi_error, sq_chi_error_shifted) + + sq_chi_loss = utils.mask_mean(mask=chi_mask[None], value=sq_chi_error) + ret['chi_loss'] = sq_chi_loss + ret['loss'] += config.chi_weight * sq_chi_loss + unnormed_angles = jnp.reshape( + value['sidechains']['unnormalized_angles_sin_cos'], [-1, num_res, 7, 2]) + angle_norm = jnp.sqrt(jnp.sum(jnp.square(unnormed_angles), axis=-1) + eps) + norm_error = jnp.abs(angle_norm - 1.) + angle_norm_loss = utils.mask_mean(mask=sequence_mask[None, :, None], + value=norm_error) + + ret['angle_norm_loss'] = angle_norm_loss + ret['loss'] += config.angle_norm_weight * angle_norm_loss + + +def generate_new_affine(sequence_mask): + num_residues, _ = sequence_mask.shape + quaternion = jnp.tile( + jnp.reshape(jnp.asarray([1., 0., 0., 0.]), [1, 4]), + [num_residues, 1]) + + translation = jnp.zeros([num_residues, 3]) + return quat_affine.QuatAffine(quaternion, translation, unstack_inputs=True) + + +def l2_normalize(x, axis=-1, epsilon=1e-12): + return x / jnp.sqrt( + jnp.maximum(jnp.sum(x**2, axis=axis, keepdims=True), epsilon)) + + +class MultiRigidSidechain(hk.Module): + """Class to make side chain atoms.""" + + def __init__(self, config, global_config, name='rigid_sidechain'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, affine, representations_list, aatype): + """Predict side chains using multi-rigid representations. + + Args: + affine: The affines for each residue (translations in angstroms). + representations_list: A list of activations to predict side chains from. + aatype: Amino acid types. + + Returns: + Dict containing atom positions and frames (in angstroms). + """ + act = [ + common_modules.Linear( # pylint: disable=g-complex-comprehension + self.config.num_channel, + name='input_projection')(jax.nn.relu(x)) + for x in representations_list + ] + # Sum the activation list (equivalent to concat then Linear). + act = sum(act) + + final_init = 'zeros' if self.global_config.zero_init else 'linear' + + # Mapping with some residual blocks. + for _ in range(self.config.num_residual_block): + old_act = act + act = common_modules.Linear( + self.config.num_channel, + initializer='relu', + name='resblock1')( + jax.nn.relu(act)) + act = common_modules.Linear( + self.config.num_channel, + initializer=final_init, + name='resblock2')( + jax.nn.relu(act)) + act += old_act + + # Map activations to torsion angles. Shape: (num_res, 14). + num_res = act.shape[0] + unnormalized_angles = common_modules.Linear( + 14, name='unnormalized_angles')( + jax.nn.relu(act)) + unnormalized_angles = jnp.reshape( + unnormalized_angles, [num_res, 7, 2]) + angles = l2_normalize(unnormalized_angles, axis=-1) + + outputs = { + 'angles_sin_cos': angles, # jnp.ndarray (N, 7, 2) + 'unnormalized_angles_sin_cos': + unnormalized_angles, # jnp.ndarray (N, 7, 2) + } + + # Map torsion angles to frames. + backb_to_global = r3.rigids_from_quataffine(affine) + + # Jumper et al. (2021) Suppl. Alg. 24 "computeAllAtomCoordinates" + + # r3.Rigids with shape (N, 8). + all_frames_to_global = all_atom.torsion_angles_to_frames( + aatype, + backb_to_global, + angles) + + # Use frames and literature positions to create the final atom coordinates. + # r3.Vecs with shape (N, 14). + pred_positions = all_atom.frames_and_literature_positions_to_atom14_pos( + aatype, all_frames_to_global) + + outputs.update({ + 'atom_pos': pred_positions, # r3.Vecs (N, 14) + 'frames': all_frames_to_global, # r3.Rigids (N, 8) + }) + return outputs diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/folding_multimer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/folding_multimer.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,1160 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Modules and utilities for the structure module in the multimer system.""" + +import functools +import numbers +from typing import Any, Dict, Iterable, Mapping, Optional, Tuple, Union + +from alphafold.common import residue_constants +from alphafold.model import all_atom_multimer +from alphafold.model import common_modules +from alphafold.model import geometry +from alphafold.model import modules +from alphafold.model import prng +from alphafold.model import utils +from alphafold.model.geometry import utils as geometry_utils +import haiku as hk +import jax +import jax.numpy as jnp +import ml_collections +import numpy as np + + +EPSILON = 1e-8 +Float = Union[float, jnp.ndarray] + + +def squared_difference(x: jnp.ndarray, y: jnp.ndarray) -> jnp.ndarray: + """Computes Squared difference between two arrays.""" + return jnp.square(x - y) + + +def make_backbone_affine( + positions: geometry.Vec3Array, + mask: jnp.ndarray, + aatype: jnp.ndarray, + ) -> Tuple[geometry.Rigid3Array, jnp.ndarray]: + """Make backbone Rigid3Array and mask.""" + del aatype + a = residue_constants.atom_order['N'] + b = residue_constants.atom_order['CA'] + c = residue_constants.atom_order['C'] + + rigid_mask = (mask[:, a] * mask[:, b] * mask[:, c]).astype( + jnp.float32) + + rigid = all_atom_multimer.make_transform_from_reference( + a_xyz=positions[:, a], b_xyz=positions[:, b], c_xyz=positions[:, c]) + + return rigid, rigid_mask + + +class QuatRigid(hk.Module): + """Module for projecting Rigids via a quaternion.""" + + def __init__(self, + global_config: ml_collections.ConfigDict, + rigid_shape: Union[int, Iterable[int]] = tuple(), + full_quat: bool = False, + init: str = 'zeros', + name: str = 'quat_rigid'): + """Module projecting a Rigid Object. + + For this Module the Rotation is parametrized as a quaternion, + If 'full_quat' is True a 4 vector is produced for the rotation which is + normalized and treated as a quaternion. + When 'full_quat' is False a 3 vector is produced and the 1st component of + the quaternion is set to 1. + + Args: + global_config: Global Config, used to set certain properties of underlying + Linear module, see common_modules.Linear for details. + rigid_shape: Shape of Rigids relative to shape of activations, e.g. when + activations have shape (n,) and this is (m,) output will be (n, m) + full_quat: Whether to parametrize rotation using full quaternion. + init: initializer to use, see common_modules.Linear for details + name: Name to use for module. + """ + self.init = init + self.global_config = global_config + if isinstance(rigid_shape, int): + self.rigid_shape = (rigid_shape,) + else: + self.rigid_shape = tuple(rigid_shape) + self.full_quat = full_quat + super(QuatRigid, self).__init__(name=name) + + def __call__(self, activations: jnp.ndarray) -> geometry.Rigid3Array: + """Executes Module. + + This returns a set of rigid with the same shape as activations, projecting + the channel dimension, rigid_shape controls the trailing dimensions. + For example when activations is shape (12, 5) and rigid_shape is (3, 2) + then the shape of the output rigids will be (12, 3, 2). + This also supports passing in an empty tuple for rigid shape, in that case + the example would produce a rigid of shape (12,). + + Args: + activations: Activations to use for projection, shape [..., num_channel] + Returns: + Rigid transformations with shape [...] + rigid_shape + """ + if self.full_quat: + rigid_dim = 7 + else: + rigid_dim = 6 + linear_dims = self.rigid_shape + (rigid_dim,) + rigid_flat = common_modules.Linear( + linear_dims, + initializer=self.init, + precision=jax.lax.Precision.HIGHEST, + name='rigid')( + activations) + rigid_flat = geometry_utils.unstack(rigid_flat) + if self.full_quat: + qw, qx, qy, qz = rigid_flat[:4] + translation = rigid_flat[4:] + else: + qx, qy, qz = rigid_flat[:3] + qw = jnp.ones_like(qx) + translation = rigid_flat[3:] + rotation = geometry.Rot3Array.from_quaternion( + qw, qx, qy, qz, normalize=True) + translation = geometry.Vec3Array(*translation) + return geometry.Rigid3Array(rotation, translation) + + +class PointProjection(hk.Module): + """Given input reprensentation and frame produces points in global frame.""" + + def __init__(self, + num_points: Union[Iterable[int], int], + global_config: ml_collections.ConfigDict, + return_local_points: bool = False, + name: str = 'point_projection'): + """Constructs Linear Module. + + Args: + num_points: number of points to project. Can be tuple when outputting + multiple dimensions + global_config: Global Config, passed through to underlying Linear + return_local_points: Whether to return points in local frame as well. + name: name of module, used for name scopes. + """ + if isinstance(num_points, numbers.Integral): + self.num_points = (num_points,) + else: + self.num_points = tuple(num_points) + + self.return_local_points = return_local_points + + self.global_config = global_config + + super().__init__(name=name) + + def __call__( + self, activations: jnp.ndarray, rigids: geometry.Rigid3Array + ) -> Union[geometry.Vec3Array, Tuple[geometry.Vec3Array, geometry.Vec3Array]]: + output_shape = self.num_points + output_shape = output_shape[:-1] + (3 * output_shape[-1],) + points_local = common_modules.Linear( + output_shape, + precision=jax.lax.Precision.HIGHEST, + name='point_projection')( + activations) + points_local = jnp.split(points_local, 3, axis=-1) + points_local = geometry.Vec3Array(*points_local) + rigids = rigids[(...,) + (None,) * len(output_shape)] + points_global = rigids.apply_to_point(points_local) + if self.return_local_points: + return points_global, points_local + else: + return points_global + + +class InvariantPointAttention(hk.Module): + """Covariant attention module. + + The high-level idea is that this attention module works over a set of points + and associated orientations in 3D space (e.g. protein residues). + + Each residue outputs a set of queries and keys as points in their local + reference frame. The attention is then defined as the euclidean distance + between the queries and keys in the global frame. + """ + + def __init__(self, + config: ml_collections.ConfigDict, + global_config: ml_collections.ConfigDict, + dist_epsilon: float = 1e-8, + name: str = 'invariant_point_attention'): + """Initialize. + + Args: + config: iterative Fold Head Config + global_config: Global Config of Model. + dist_epsilon: Small value to avoid NaN in distance calculation. + name: Sonnet name. + """ + super().__init__(name=name) + + self._dist_epsilon = dist_epsilon + self._zero_initialize_last = global_config.zero_init + + self.config = config + + self.global_config = global_config + + def __call__( + self, + inputs_1d: jnp.ndarray, + inputs_2d: jnp.ndarray, + mask: jnp.ndarray, + rigid: geometry.Rigid3Array, + ) -> jnp.ndarray: + """Compute geometric aware attention. + + Given a set of query residues (defined by affines and associated scalar + features), this function computes geometric aware attention between the + query residues and target residues. + + The residues produce points in their local reference frame, which + are converted into the global frame to get attention via euclidean distance. + + Equivalently the target residues produce points in their local frame to be + used as attention values, which are converted into the query residues local + frames. + + Args: + inputs_1d: (N, C) 1D input embedding that is the basis for the + scalar queries. + inputs_2d: (N, M, C') 2D input embedding, used for biases values in the + attention between query_inputs_1d and target_inputs_1d. + mask: (N, 1) mask to indicate query_inputs_1d that participate in + the attention. + rigid: Rigid object describing the position and orientation of + every element in query_inputs_1d. + + Returns: + Transformation of the input embedding. + """ + + num_head = self.config.num_head + + attn_logits = 0. + + num_point_qk = self.config.num_point_qk + # Each point pair (q, k) contributes Var [0.5 ||q||^2 - ] = 9 / 2 + point_variance = max(num_point_qk, 1) * 9. / 2 + point_weights = np.sqrt(1.0 / point_variance) + + # This is equivalent to jax.nn.softplus, but avoids a bug in the test... + softplus = lambda x: jnp.logaddexp(x, jnp.zeros_like(x)) + raw_point_weights = hk.get_parameter( + 'trainable_point_weights', + shape=[num_head], + # softplus^{-1} (1) + init=hk.initializers.Constant(np.log(np.exp(1.) - 1.))) + + # Trainable per-head weights for points. + trainable_point_weights = softplus(raw_point_weights) + point_weights *= trainable_point_weights + q_point = PointProjection([num_head, num_point_qk], + self.global_config, + name='q_point_projection')(inputs_1d, + rigid) + + k_point = PointProjection([num_head, num_point_qk], + self.global_config, + name='k_point_projection')(inputs_1d, + rigid) + + dist2 = geometry.square_euclidean_distance( + q_point[:, None, :, :], k_point[None, :, :, :], epsilon=0.) + attn_qk_point = -0.5 * jnp.sum(point_weights[:, None] * dist2, axis=-1) + attn_logits += attn_qk_point + + num_scalar_qk = self.config.num_scalar_qk + # We assume that all queries and keys come iid from N(0, 1) distribution + # and compute the variances of the attention logits. + # Each scalar pair (q, k) contributes Var q*k = 1 + scalar_variance = max(num_scalar_qk, 1) * 1. + scalar_weights = np.sqrt(1.0 / scalar_variance) + q_scalar = common_modules.Linear([num_head, num_scalar_qk], + use_bias=False, + name='q_scalar_projection')( + inputs_1d) + + k_scalar = common_modules.Linear([num_head, num_scalar_qk], + use_bias=False, + name='k_scalar_projection')( + inputs_1d) + q_scalar *= scalar_weights + attn_logits += jnp.einsum('qhc,khc->qkh', q_scalar, k_scalar) + + attention_2d = common_modules.Linear( + num_head, name='attention_2d')(inputs_2d) + attn_logits += attention_2d + + mask_2d = mask * jnp.swapaxes(mask, -1, -2) + attn_logits -= 1e5 * (1. - mask_2d[..., None]) + + attn_logits *= np.sqrt(1. / 3) # Normalize by number of logit terms (3) + attn = jax.nn.softmax(attn_logits, axis=-2) + + num_scalar_v = self.config.num_scalar_v + + v_scalar = common_modules.Linear([num_head, num_scalar_v], + use_bias=False, + name='v_scalar_projection')( + inputs_1d) + + # [num_query_residues, num_head, num_scalar_v] + result_scalar = jnp.einsum('qkh, khc->qhc', attn, v_scalar) + + num_point_v = self.config.num_point_v + v_point = PointProjection([num_head, num_point_v], + self.global_config, + name='v_point_projection')(inputs_1d, + rigid) + + result_point_global = jax.tree_map( + lambda x: jnp.sum(attn[..., None] * x, axis=-3), v_point[None]) + + # Features used in the linear output projection. Should have the size + # [num_query_residues, ?] + output_features = [] + num_query_residues, _ = inputs_1d.shape + + flat_shape = [num_query_residues, -1] + + result_scalar = jnp.reshape(result_scalar, flat_shape) + output_features.append(result_scalar) + + result_point_global = jax.tree_map(lambda r: jnp.reshape(r, flat_shape), + result_point_global) + result_point_local = rigid[..., None].apply_inverse_to_point( + result_point_global) + output_features.extend( + [result_point_local.x, result_point_local.y, result_point_local.z]) + + point_norms = result_point_local.norm(self._dist_epsilon) + output_features.append(point_norms) + + # Dimensions: h = heads, i and j = residues, + # c = inputs_2d channels + # Contraction happens over the second residue dimension, similarly to how + # the usual attention is performed. + result_attention_over_2d = jnp.einsum('ijh, ijc->ihc', attn, inputs_2d) + output_features.append(jnp.reshape(result_attention_over_2d, flat_shape)) + + final_init = 'zeros' if self._zero_initialize_last else 'linear' + + final_act = jnp.concatenate(output_features, axis=-1) + + return common_modules.Linear( + self.config.num_channel, + initializer=final_init, + name='output_projection')(final_act) + + +class FoldIteration(hk.Module): + """A single iteration of iterative folding. + + First, each residue attends to all residues using InvariantPointAttention. + Then, we apply transition layers to update the hidden representations. + Finally, we use the hidden representations to produce an update to the + affine of each residue. + """ + + def __init__(self, + config: ml_collections.ConfigDict, + global_config: ml_collections.ConfigDict, + name: str = 'fold_iteration'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__( + self, + activations: Mapping[str, Any], + aatype: jnp.ndarray, + sequence_mask: jnp.ndarray, + update_rigid: bool, + is_training: bool, + initial_act: jnp.ndarray, + safe_key: Optional[prng.SafeKey] = None, + static_feat_2d: Optional[jnp.ndarray] = None, + ) -> Tuple[Dict[str, Any], Dict[str, Any]]: + + c = self.config + + if safe_key is None: + safe_key = prng.SafeKey(hk.next_rng_key()) + + def safe_dropout_fn(tensor, safe_key): + return modules.apply_dropout( + tensor=tensor, + safe_key=safe_key, + rate=0.0 if self.global_config.deterministic else c.dropout, + is_training=is_training) + + rigid = activations['rigid'] + + act = activations['act'] + attention_module = InvariantPointAttention( + self.config, self.global_config) + # Attention + act += attention_module( + inputs_1d=act, + inputs_2d=static_feat_2d, + mask=sequence_mask, + rigid=rigid) + + safe_key, *sub_keys = safe_key.split(3) + sub_keys = iter(sub_keys) + act = safe_dropout_fn(act, next(sub_keys)) + act = hk.LayerNorm( + axis=-1, + create_scale=True, + create_offset=True, + name='attention_layer_norm')( + act) + final_init = 'zeros' if self.global_config.zero_init else 'linear' + + # Transition + input_act = act + for i in range(c.num_layer_in_transition): + init = 'relu' if i < c.num_layer_in_transition - 1 else final_init + act = common_modules.Linear( + c.num_channel, + initializer=init, + name='transition')( + act) + if i < c.num_layer_in_transition - 1: + act = jax.nn.relu(act) + act += input_act + act = safe_dropout_fn(act, next(sub_keys)) + act = hk.LayerNorm( + axis=-1, + create_scale=True, + create_offset=True, + name='transition_layer_norm')(act) + if update_rigid: + # Rigid update + rigid_update = QuatRigid( + self.global_config, init=final_init)( + act) + rigid = rigid @ rigid_update + + sc = MultiRigidSidechain(c.sidechain, self.global_config)( + rigid.scale_translation(c.position_scale), [act, initial_act], aatype) + + outputs = {'rigid': rigid, 'sc': sc} + + rotation = jax.tree_map(jax.lax.stop_gradient, rigid.rotation) + rigid = geometry.Rigid3Array(rotation, rigid.translation) + + new_activations = { + 'act': act, + 'rigid': rigid + } + return new_activations, outputs + + +def generate_monomer_rigids(representations: Mapping[str, jnp.ndarray], + batch: Mapping[str, jnp.ndarray], + config: ml_collections.ConfigDict, + global_config: ml_collections.ConfigDict, + is_training: bool, + safe_key: prng.SafeKey + ) -> Dict[str, Any]: + """Generate predicted Rigid's for a single chain. + + This is the main part of the iterative fold head - it iteratively applies + folding to produce a set of predicted residue positions. + + Args: + representations: Embeddings dictionary. + batch: Batch dictionary. + config: config for the iterative fold head. + global_config: global config. + is_training: is training. + safe_key: A prng.SafeKey object that wraps a PRNG key. + + Returns: + A dictionary containing residue Rigid's and sidechain positions. + """ + c = config + sequence_mask = batch['seq_mask'][:, None] + act = hk.LayerNorm( + axis=-1, create_scale=True, create_offset=True, name='single_layer_norm')( + representations['single']) + + initial_act = act + act = common_modules.Linear( + c.num_channel, name='initial_projection')(act) + + # Sequence Mask has extra 1 at the end. + rigid = geometry.Rigid3Array.identity(sequence_mask.shape[:-1]) + + fold_iteration = FoldIteration( + c, global_config, name='fold_iteration') + + assert len(batch['seq_mask'].shape) == 1 + + activations = { + 'act': + act, + 'rigid': + rigid + } + + act_2d = hk.LayerNorm( + axis=-1, + create_scale=True, + create_offset=True, + name='pair_layer_norm')( + representations['pair']) + + safe_keys = safe_key.split(c.num_layer) + outputs = [] + for key in safe_keys: + + activations, output = fold_iteration( + activations, + initial_act=initial_act, + static_feat_2d=act_2d, + aatype=batch['aatype'], + safe_key=key, + sequence_mask=sequence_mask, + update_rigid=True, + is_training=is_training, + ) + outputs.append(output) + + output = jax.tree_multimap(lambda *x: jnp.stack(x), *outputs) + # Pass along for LDDT-Head. + output['act'] = activations['act'] + + return output + + +class StructureModule(hk.Module): + """StructureModule as a network head. + + Jumper et al. (2021) Suppl. Alg. 20 "StructureModule" + """ + + def __init__(self, + config: ml_collections.ConfigDict, + global_config: ml_collections.ConfigDict, + name: str = 'structure_module'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, + representations: Mapping[str, jnp.ndarray], + batch: Mapping[str, Any], + is_training: bool, + safe_key: Optional[prng.SafeKey] = None, + compute_loss: bool = False + ) -> Dict[str, Any]: + c = self.config + ret = {} + + if safe_key is None: + safe_key = prng.SafeKey(hk.next_rng_key()) + + output = generate_monomer_rigids( + representations=representations, + batch=batch, + config=self.config, + global_config=self.global_config, + is_training=is_training, + safe_key=safe_key) + + ret['traj'] = output['rigid'].scale_translation(c.position_scale).to_array() + ret['sidechains'] = output['sc'] + ret['sidechains']['atom_pos'] = ret['sidechains']['atom_pos'].to_array() + ret['sidechains']['frames'] = ret['sidechains']['frames'].to_array() + if 'local_atom_pos' in ret['sidechains']: + ret['sidechains']['local_atom_pos'] = ret['sidechains'][ + 'local_atom_pos'].to_array() + ret['sidechains']['local_frames'] = ret['sidechains'][ + 'local_frames'].to_array() + + aatype = batch['aatype'] + seq_mask = batch['seq_mask'] + + atom14_pred_mask = all_atom_multimer.get_atom14_mask( + aatype) * seq_mask[:, None] + atom14_pred_positions = output['sc']['atom_pos'][-1] + ret['final_atom14_positions'] = atom14_pred_positions # (N, 14, 3) + ret['final_atom14_mask'] = atom14_pred_mask # (N, 14) + + atom37_mask = all_atom_multimer.get_atom37_mask(aatype) * seq_mask[:, None] + atom37_pred_positions = all_atom_multimer.atom14_to_atom37( + atom14_pred_positions, aatype) + atom37_pred_positions *= atom37_mask[:, :, None] + ret['final_atom_positions'] = atom37_pred_positions # (N, 37, 3) + ret['final_atom_mask'] = atom37_mask # (N, 37) + ret['final_rigids'] = ret['traj'][-1] + + ret['act'] = output['act'] + + if compute_loss: + return ret + else: + no_loss_features = ['final_atom_positions', 'final_atom_mask', 'act'] + no_loss_ret = {k: ret[k] for k in no_loss_features} + return no_loss_ret + + def loss(self, + value: Mapping[str, Any], + batch: Mapping[str, Any] + ) -> Dict[str, Any]: + + raise NotImplementedError( + 'This function should be called on a batch with reordered chains (see ' + 'Evans et al (2021) Section 7.3. Multi-Chain Permutation Alignment.') + + ret = {'loss': 0.} + + ret['metrics'] = {} + + aatype = batch['aatype'] + all_atom_positions = batch['all_atom_positions'] + all_atom_positions = geometry.Vec3Array.from_array(all_atom_positions) + all_atom_mask = batch['all_atom_mask'] + seq_mask = batch['seq_mask'] + residue_index = batch['residue_index'] + + gt_rigid, gt_affine_mask = make_backbone_affine(all_atom_positions, + all_atom_mask, + aatype) + + chi_angles, chi_mask = all_atom_multimer.compute_chi_angles( + all_atom_positions, all_atom_mask, aatype) + + pred_mask = all_atom_multimer.get_atom14_mask(aatype) + pred_mask *= seq_mask[:, None] + pred_positions = value['final_atom14_positions'] + pred_positions = geometry.Vec3Array.from_array(pred_positions) + + gt_positions, gt_mask, alt_naming_is_better = compute_atom14_gt( + aatype, all_atom_positions, all_atom_mask, pred_positions) + + violations = find_structural_violations( + aatype=aatype, + residue_index=residue_index, + mask=pred_mask, + pred_positions=pred_positions, + config=self.config) + + sidechains = value['sidechains'] + + gt_chi_angles = get_renamed_chi_angles(aatype, chi_angles, + alt_naming_is_better) + + # Several violation metrics: + violation_metrics = compute_violation_metrics( + residue_index=residue_index, + mask=pred_mask, + seq_mask=seq_mask, + pred_positions=pred_positions, + violations=violations) + ret['metrics'].update(violation_metrics) + + target_rigid = geometry.Rigid3Array.from_array(value['traj']) + gt_frames_mask = gt_affine_mask + + # Split the loss into within-chain and between-chain components. + intra_chain_mask = batch['asym_id'][:, None] == batch['asym_id'][None, :] + intra_chain_bb_loss, intra_chain_fape = backbone_loss( + gt_rigid=gt_rigid, + gt_frames_mask=gt_frames_mask, + gt_positions_mask=gt_affine_mask, + target_rigid=target_rigid, + config=self.config.intra_chain_fape, + pair_mask=intra_chain_mask) + interface_bb_loss, interface_fape = backbone_loss( + gt_rigid=gt_rigid, + gt_frames_mask=gt_frames_mask, + gt_positions_mask=gt_affine_mask, + target_rigid=target_rigid, + config=self.config.interface_fape, + pair_mask=1. - intra_chain_mask) + + bb_loss = intra_chain_bb_loss + interface_bb_loss + ret['fape'] = intra_chain_fape + interface_fape + ret['bb_loss'] = bb_loss + ret['loss'] += bb_loss + + pred_frames = geometry.Rigid3Array.from_array(sidechains['frames']) + pred_positions = geometry.Vec3Array.from_array(sidechains['atom_pos']) + gt_sc_frames, gt_sc_frames_mask = compute_frames( + aatype=aatype, + all_atom_positions=all_atom_positions, + all_atom_mask=all_atom_mask, + use_alt=alt_naming_is_better) + + sc_loss = sidechain_loss( + gt_frames=gt_sc_frames, + gt_frames_mask=gt_sc_frames_mask, + gt_positions=gt_positions, + gt_mask=gt_mask, + pred_frames=pred_frames, + pred_positions=pred_positions, + config=self.config) + + ret['loss'] = ((1 - self.config.sidechain.weight_frac) * ret['loss'] + + self.config.sidechain.weight_frac * sc_loss['loss']) + ret['sidechain_fape'] = sc_loss['fape'] + + unnormed_angles = sidechains['unnormalized_angles_sin_cos'] + pred_angles = sidechains['angles_sin_cos'] + + sup_chi_loss, ret['chi_loss'], ret[ + 'angle_norm_loss'] = supervised_chi_loss( + sequence_mask=seq_mask, + target_chi_mask=chi_mask, + target_chi_angles=gt_chi_angles, + aatype=aatype, + pred_angles=pred_angles, + unnormed_angles=unnormed_angles, + config=self.config) + ret['loss'] += sup_chi_loss + + if self.config.structural_violation_loss_weight: + + ret['loss'] += structural_violation_loss( + mask=pred_mask, violations=violations, config=self.config) + + return ret + + +def compute_atom14_gt( + aatype: jnp.ndarray, + all_atom_positions: geometry.Vec3Array, + all_atom_mask: jnp.ndarray, + pred_pos: geometry.Vec3Array +) -> Tuple[geometry.Vec3Array, jnp.ndarray, jnp.ndarray]: + """Find atom14 positions, this includes finding the correct renaming.""" + gt_positions, gt_mask = all_atom_multimer.atom37_to_atom14( + aatype, all_atom_positions, + all_atom_mask) + alt_gt_positions, alt_gt_mask = all_atom_multimer.get_alt_atom14( + aatype, gt_positions, gt_mask) + atom_is_ambiguous = all_atom_multimer.get_atom14_is_ambiguous(aatype) + + alt_naming_is_better = all_atom_multimer.find_optimal_renaming( + gt_positions=gt_positions, + alt_gt_positions=alt_gt_positions, + atom_is_ambiguous=atom_is_ambiguous, + gt_exists=gt_mask, + pred_positions=pred_pos) + + use_alt = alt_naming_is_better[:, None] + + gt_mask = (1. - use_alt) * gt_mask + use_alt * alt_gt_mask + gt_positions = (1. - use_alt) * gt_positions + use_alt * alt_gt_positions + + return gt_positions, alt_gt_mask, alt_naming_is_better + + +def backbone_loss(gt_rigid: geometry.Rigid3Array, + gt_frames_mask: jnp.ndarray, + gt_positions_mask: jnp.ndarray, + target_rigid: geometry.Rigid3Array, + config: ml_collections.ConfigDict, + pair_mask: jnp.ndarray + ) -> Tuple[Float, jnp.ndarray]: + """Backbone FAPE Loss.""" + loss_fn = functools.partial( + all_atom_multimer.frame_aligned_point_error, + l1_clamp_distance=config.atom_clamp_distance, + loss_unit_distance=config.loss_unit_distance) + + loss_fn = jax.vmap(loss_fn, (0, None, None, 0, None, None, None)) + fape = loss_fn(target_rigid, gt_rigid, gt_frames_mask, + target_rigid.translation, gt_rigid.translation, + gt_positions_mask, pair_mask) + + return jnp.mean(fape), fape[-1] + + +def compute_frames( + aatype: jnp.ndarray, + all_atom_positions: geometry.Vec3Array, + all_atom_mask: jnp.ndarray, + use_alt: jnp.ndarray + ) -> Tuple[geometry.Rigid3Array, jnp.ndarray]: + """Compute Frames from all atom positions. + + Args: + aatype: array of aatypes, int of [N] + all_atom_positions: Vector of all atom positions, shape [N, 37] + all_atom_mask: mask, shape [N] + use_alt: whether to use alternative orientation for ambiguous aatypes + shape [N] + Returns: + Rigid corresponding to Frames w shape [N, 8], + mask which Rigids are present w shape [N, 8] + """ + frames_batch = all_atom_multimer.atom37_to_frames(aatype, all_atom_positions, + all_atom_mask) + gt_frames = frames_batch['rigidgroups_gt_frames'] + alt_gt_frames = frames_batch['rigidgroups_alt_gt_frames'] + use_alt = use_alt[:, None] + + renamed_gt_frames = jax.tree_multimap( + lambda x, y: (1. - use_alt) * x + use_alt * y, gt_frames, alt_gt_frames) + + return renamed_gt_frames, frames_batch['rigidgroups_gt_exists'] + + +def sidechain_loss(gt_frames: geometry.Rigid3Array, + gt_frames_mask: jnp.ndarray, + gt_positions: geometry.Vec3Array, + gt_mask: jnp.ndarray, + pred_frames: geometry.Rigid3Array, + pred_positions: geometry.Vec3Array, + config: ml_collections.ConfigDict + ) -> Dict[str, jnp.ndarray]: + """Sidechain Loss using cleaned up rigids.""" + + flat_gt_frames = jax.tree_map(jnp.ravel, gt_frames) + flat_frames_mask = jnp.ravel(gt_frames_mask) + + flat_gt_positions = jax.tree_map(jnp.ravel, gt_positions) + flat_positions_mask = jnp.ravel(gt_mask) + + # Compute frame_aligned_point_error score for the final layer. + def _slice_last_layer_and_flatten(x): + return jnp.ravel(x[-1]) + + flat_pred_frames = jax.tree_map(_slice_last_layer_and_flatten, pred_frames) + flat_pred_positions = jax.tree_map(_slice_last_layer_and_flatten, + pred_positions) + fape = all_atom_multimer.frame_aligned_point_error( + pred_frames=flat_pred_frames, + target_frames=flat_gt_frames, + frames_mask=flat_frames_mask, + pred_positions=flat_pred_positions, + target_positions=flat_gt_positions, + positions_mask=flat_positions_mask, + pair_mask=None, + length_scale=config.sidechain.loss_unit_distance, + l1_clamp_distance=config.sidechain.atom_clamp_distance) + + return { + 'fape': fape, + 'loss': fape} + + +def structural_violation_loss(mask: jnp.ndarray, + violations: Mapping[str, Float], + config: ml_collections.ConfigDict + ) -> Float: + """Computes Loss for structural Violations.""" + # Put all violation losses together to one large loss. + num_atoms = jnp.sum(mask).astype(jnp.float32) + 1e-6 + between_residues = violations['between_residues'] + within_residues = violations['within_residues'] + return (config.structural_violation_loss_weight * + (between_residues['bonds_c_n_loss_mean'] + + between_residues['angles_ca_c_n_loss_mean'] + + between_residues['angles_c_n_ca_loss_mean'] + + jnp.sum(between_residues['clashes_per_atom_loss_sum'] + + within_residues['per_atom_loss_sum']) / num_atoms + )) + + +def find_structural_violations( + aatype: jnp.ndarray, + residue_index: jnp.ndarray, + mask: jnp.ndarray, + pred_positions: geometry.Vec3Array, # (N, 14) + config: ml_collections.ConfigDict + ) -> Dict[str, Any]: + """Computes several checks for structural Violations.""" + + # Compute between residue backbone violations of bonds and angles. + connection_violations = all_atom_multimer.between_residue_bond_loss( + pred_atom_positions=pred_positions, + pred_atom_mask=mask.astype(jnp.float32), + residue_index=residue_index.astype(jnp.float32), + aatype=aatype, + tolerance_factor_soft=config.violation_tolerance_factor, + tolerance_factor_hard=config.violation_tolerance_factor) + + # Compute the van der Waals radius for every atom + # (the first letter of the atom name is the element type). + # shape (N, 14) + atomtype_radius = jnp.array([ + residue_constants.van_der_waals_radius[name[0]] + for name in residue_constants.atom_types + ]) + residx_atom14_to_atom37 = all_atom_multimer.get_atom14_to_atom37_map(aatype) + atom_radius = mask * utils.batched_gather(atomtype_radius, + residx_atom14_to_atom37) + + # Compute the between residue clash loss. + between_residue_clashes = all_atom_multimer.between_residue_clash_loss( + pred_positions=pred_positions, + atom_exists=mask, + atom_radius=atom_radius, + residue_index=residue_index, + overlap_tolerance_soft=config.clash_overlap_tolerance, + overlap_tolerance_hard=config.clash_overlap_tolerance) + + # Compute all within-residue violations (clashes, + # bond length and angle violations). + restype_atom14_bounds = residue_constants.make_atom14_dists_bounds( + overlap_tolerance=config.clash_overlap_tolerance, + bond_length_tolerance_factor=config.violation_tolerance_factor) + dists_lower_bound = utils.batched_gather(restype_atom14_bounds['lower_bound'], + aatype) + dists_upper_bound = utils.batched_gather(restype_atom14_bounds['upper_bound'], + aatype) + within_residue_violations = all_atom_multimer.within_residue_violations( + pred_positions=pred_positions, + atom_exists=mask, + dists_lower_bound=dists_lower_bound, + dists_upper_bound=dists_upper_bound, + tighten_bounds_for_loss=0.0) + + # Combine them to a single per-residue violation mask (used later for LDDT). + per_residue_violations_mask = jnp.max(jnp.stack([ + connection_violations['per_residue_violation_mask'], + jnp.max(between_residue_clashes['per_atom_clash_mask'], axis=-1), + jnp.max(within_residue_violations['per_atom_violations'], + axis=-1)]), axis=0) + + return { + 'between_residues': { + 'bonds_c_n_loss_mean': + connection_violations['c_n_loss_mean'], # () + 'angles_ca_c_n_loss_mean': + connection_violations['ca_c_n_loss_mean'], # () + 'angles_c_n_ca_loss_mean': + connection_violations['c_n_ca_loss_mean'], # () + 'connections_per_residue_loss_sum': + connection_violations['per_residue_loss_sum'], # (N) + 'connections_per_residue_violation_mask': + connection_violations['per_residue_violation_mask'], # (N) + 'clashes_mean_loss': + between_residue_clashes['mean_loss'], # () + 'clashes_per_atom_loss_sum': + between_residue_clashes['per_atom_loss_sum'], # (N, 14) + 'clashes_per_atom_clash_mask': + between_residue_clashes['per_atom_clash_mask'], # (N, 14) + }, + 'within_residues': { + 'per_atom_loss_sum': + within_residue_violations['per_atom_loss_sum'], # (N, 14) + 'per_atom_violations': + within_residue_violations['per_atom_violations'], # (N, 14), + }, + 'total_per_residue_violations_mask': + per_residue_violations_mask, # (N) + } + + +def compute_violation_metrics( + residue_index: jnp.ndarray, + mask: jnp.ndarray, + seq_mask: jnp.ndarray, + pred_positions: geometry.Vec3Array, # (N, 14) + violations: Mapping[str, jnp.ndarray], +) -> Dict[str, jnp.ndarray]: + """Compute several metrics to assess the structural violations.""" + ret = {} + between_residues = violations['between_residues'] + within_residues = violations['within_residues'] + extreme_ca_ca_violations = all_atom_multimer.extreme_ca_ca_distance_violations( + positions=pred_positions, + mask=mask.astype(jnp.float32), + residue_index=residue_index.astype(jnp.float32)) + ret['violations_extreme_ca_ca_distance'] = extreme_ca_ca_violations + ret['violations_between_residue_bond'] = utils.mask_mean( + mask=seq_mask, + value=between_residues['connections_per_residue_violation_mask']) + ret['violations_between_residue_clash'] = utils.mask_mean( + mask=seq_mask, + value=jnp.max(between_residues['clashes_per_atom_clash_mask'], axis=-1)) + ret['violations_within_residue'] = utils.mask_mean( + mask=seq_mask, + value=jnp.max(within_residues['per_atom_violations'], axis=-1)) + ret['violations_per_residue'] = utils.mask_mean( + mask=seq_mask, value=violations['total_per_residue_violations_mask']) + return ret + + +def supervised_chi_loss( + sequence_mask: jnp.ndarray, + target_chi_mask: jnp.ndarray, + aatype: jnp.ndarray, + target_chi_angles: jnp.ndarray, + pred_angles: jnp.ndarray, + unnormed_angles: jnp.ndarray, + config: ml_collections.ConfigDict) -> Tuple[Float, Float, Float]: + """Computes loss for direct chi angle supervision.""" + eps = 1e-6 + chi_mask = target_chi_mask.astype(jnp.float32) + + pred_angles = pred_angles[:, :, 3:] + + residue_type_one_hot = jax.nn.one_hot( + aatype, residue_constants.restype_num + 1, dtype=jnp.float32)[None] + chi_pi_periodic = jnp.einsum('ijk, kl->ijl', residue_type_one_hot, + jnp.asarray(residue_constants.chi_pi_periodic)) + + true_chi = target_chi_angles[None] + sin_true_chi = jnp.sin(true_chi) + cos_true_chi = jnp.cos(true_chi) + sin_cos_true_chi = jnp.stack([sin_true_chi, cos_true_chi], axis=-1) + + # This is -1 if chi is pi periodic and +1 if it's 2 pi periodic + shifted_mask = (1 - 2 * chi_pi_periodic)[..., None] + sin_cos_true_chi_shifted = shifted_mask * sin_cos_true_chi + + sq_chi_error = jnp.sum( + squared_difference(sin_cos_true_chi, pred_angles), -1) + sq_chi_error_shifted = jnp.sum( + squared_difference(sin_cos_true_chi_shifted, pred_angles), -1) + sq_chi_error = jnp.minimum(sq_chi_error, sq_chi_error_shifted) + + sq_chi_loss = utils.mask_mean(mask=chi_mask[None], value=sq_chi_error) + angle_norm = jnp.sqrt(jnp.sum(jnp.square(unnormed_angles), axis=-1) + eps) + norm_error = jnp.abs(angle_norm - 1.) + angle_norm_loss = utils.mask_mean(mask=sequence_mask[None, :, None], + value=norm_error) + loss = (config.chi_weight * sq_chi_loss + + config.angle_norm_weight * angle_norm_loss) + return loss, sq_chi_loss, angle_norm_loss + + +def l2_normalize(x: jnp.ndarray, + axis: int = -1, + epsilon: float = 1e-12 + ) -> jnp.ndarray: + return x / jnp.sqrt( + jnp.maximum(jnp.sum(x**2, axis=axis, keepdims=True), epsilon)) + + +def get_renamed_chi_angles(aatype: jnp.ndarray, + chi_angles: jnp.ndarray, + alt_is_better: jnp.ndarray + ) -> jnp.ndarray: + """Return renamed chi angles.""" + chi_angle_is_ambiguous = utils.batched_gather( + jnp.array(residue_constants.chi_pi_periodic, dtype=jnp.float32), aatype) + alt_chi_angles = chi_angles + np.pi * chi_angle_is_ambiguous + # Map back to [-pi, pi]. + alt_chi_angles = alt_chi_angles - 2 * np.pi * (alt_chi_angles > np.pi).astype( + jnp.float32) + alt_is_better = alt_is_better[:, None] + return (1. - alt_is_better) * chi_angles + alt_is_better * alt_chi_angles + + +class MultiRigidSidechain(hk.Module): + """Class to make side chain atoms.""" + + def __init__(self, + config: ml_collections.ConfigDict, + global_config: ml_collections.ConfigDict, + name: str = 'rigid_sidechain'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, + rigid: geometry.Rigid3Array, + representations_list: Iterable[jnp.ndarray], + aatype: jnp.ndarray + ) -> Dict[str, Any]: + """Predict sidechains using multi-rigid representations. + + Args: + rigid: The Rigid's for each residue (translations in angstoms) + representations_list: A list of activations to predict sidechains from. + aatype: amino acid types. + + Returns: + dict containing atom positions and frames (in angstrom) + """ + act = [ + common_modules.Linear( # pylint: disable=g-complex-comprehension + self.config.num_channel, + name='input_projection')(jax.nn.relu(x)) + for x in representations_list] + # Sum the activation list (equivalent to concat then Conv1D) + act = sum(act) + + final_init = 'zeros' if self.global_config.zero_init else 'linear' + + # Mapping with some residual blocks. + for _ in range(self.config.num_residual_block): + old_act = act + act = common_modules.Linear( + self.config.num_channel, + initializer='relu', + name='resblock1')( + jax.nn.relu(act)) + act = common_modules.Linear( + self.config.num_channel, + initializer=final_init, + name='resblock2')( + jax.nn.relu(act)) + act += old_act + + # Map activations to torsion angles. + # [batch_size, num_res, 14] + num_res = act.shape[0] + unnormalized_angles = common_modules.Linear( + 14, name='unnormalized_angles')( + jax.nn.relu(act)) + unnormalized_angles = jnp.reshape( + unnormalized_angles, [num_res, 7, 2]) + angles = l2_normalize(unnormalized_angles, axis=-1) + + outputs = { + 'angles_sin_cos': angles, # jnp.ndarray (N, 7, 2) + 'unnormalized_angles_sin_cos': + unnormalized_angles, # jnp.ndarray (N, 7, 2) + } + + # Map torsion angles to frames. + # geometry.Rigid3Array with shape (N, 8) + all_frames_to_global = all_atom_multimer.torsion_angles_to_frames( + aatype, + rigid, + angles) + + # Use frames and literature positions to create the final atom coordinates. + # geometry.Vec3Array with shape (N, 14) + pred_positions = all_atom_multimer.frames_and_literature_positions_to_atom14_pos( + aatype, all_frames_to_global) + + outputs.update({ + 'atom_pos': pred_positions, # geometry.Vec3Array (N, 14) + 'frames': all_frames_to_global, # geometry.Rigid3Array (N, 8) + }) + return outputs + diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/geometry/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/geometry/__init__.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,31 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Geometry Module.""" + +from alphafold.model.geometry import rigid_matrix_vector +from alphafold.model.geometry import rotation_matrix +from alphafold.model.geometry import struct_of_array +from alphafold.model.geometry import vector + +Rot3Array = rotation_matrix.Rot3Array +Rigid3Array = rigid_matrix_vector.Rigid3Array + +StructOfArray = struct_of_array.StructOfArray + +Vec3Array = vector.Vec3Array +square_euclidean_distance = vector.square_euclidean_distance +euclidean_distance = vector.euclidean_distance +dihedral_angle = vector.dihedral_angle +dot = vector.dot +cross = vector.cross diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/geometry/rigid_matrix_vector.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/geometry/rigid_matrix_vector.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,106 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Rigid3Array Transformations represented by a Matrix and a Vector.""" + +from __future__ import annotations +from typing import Union + +from alphafold.model.geometry import rotation_matrix +from alphafold.model.geometry import struct_of_array +from alphafold.model.geometry import vector +import jax +import jax.numpy as jnp + +Float = Union[float, jnp.ndarray] + +VERSION = '0.1' + + +@struct_of_array.StructOfArray(same_dtype=True) +class Rigid3Array: + """Rigid Transformation, i.e. element of special euclidean group.""" + + rotation: rotation_matrix.Rot3Array + translation: vector.Vec3Array + + def __matmul__(self, other: Rigid3Array) -> Rigid3Array: + new_rotation = self.rotation @ other.rotation + new_translation = self.apply_to_point(other.translation) + return Rigid3Array(new_rotation, new_translation) + + def inverse(self) -> Rigid3Array: + """Return Rigid3Array corresponding to inverse transform.""" + inv_rotation = self.rotation.inverse() + inv_translation = inv_rotation.apply_to_point(-self.translation) + return Rigid3Array(inv_rotation, inv_translation) + + def apply_to_point(self, point: vector.Vec3Array) -> vector.Vec3Array: + """Apply Rigid3Array transform to point.""" + return self.rotation.apply_to_point(point) + self.translation + + def apply_inverse_to_point(self, point: vector.Vec3Array) -> vector.Vec3Array: + """Apply inverse Rigid3Array transform to point.""" + new_point = point - self.translation + return self.rotation.apply_inverse_to_point(new_point) + + def compose_rotation(self, other_rotation): + rot = self.rotation @ other_rotation + trans = jax.tree_map(lambda x: jnp.broadcast_to(x, rot.shape), + self.translation) + return Rigid3Array(rot, trans) + + @classmethod + def identity(cls, shape, dtype=jnp.float32) -> Rigid3Array: + """Return identity Rigid3Array of given shape.""" + return cls( + rotation_matrix.Rot3Array.identity(shape, dtype=dtype), + vector.Vec3Array.zeros(shape, dtype=dtype)) + + def scale_translation(self, factor: Float) -> Rigid3Array: + """Scale translation in Rigid3Array by 'factor'.""" + return Rigid3Array(self.rotation, self.translation * factor) + + def to_array(self): + rot_array = self.rotation.to_array() + vec_array = self.translation.to_array() + return jnp.concatenate([rot_array, vec_array[..., None]], axis=-1) + + @classmethod + def from_array(cls, array): + rot = rotation_matrix.Rot3Array.from_array(array[..., :3]) + vec = vector.Vec3Array.from_array(array[..., -1]) + return cls(rot, vec) + + @classmethod + def from_array4x4(cls, array: jnp.ndarray) -> Rigid3Array: + """Construct Rigid3Array from homogeneous 4x4 array.""" + assert array.shape[-1] == 4 + assert array.shape[-2] == 4 + rotation = rotation_matrix.Rot3Array( + array[..., 0, 0], array[..., 0, 1], array[..., 0, 2], + array[..., 1, 0], array[..., 1, 1], array[..., 1, 2], + array[..., 2, 0], array[..., 2, 1], array[..., 2, 2] + ) + translation = vector.Vec3Array( + array[..., 0, 3], array[..., 1, 3], array[..., 2, 3]) + return cls(rotation, translation) + + def __getstate__(self): + return (VERSION, (self.rotation, self.translation)) + + def __setstate__(self, state): + version, (rot, trans) = state + del version + object.__setattr__(self, 'rotation', rot) + object.__setattr__(self, 'translation', trans) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/geometry/rotation_matrix.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/geometry/rotation_matrix.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,157 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Rot3Array Matrix Class.""" + +from __future__ import annotations +import dataclasses + +from alphafold.model.geometry import struct_of_array +from alphafold.model.geometry import utils +from alphafold.model.geometry import vector +import jax +import jax.numpy as jnp +import numpy as np + +COMPONENTS = ['xx', 'xy', 'xz', 'yx', 'yy', 'yz', 'zx', 'zy', 'zz'] + +VERSION = '0.1' + + +@struct_of_array.StructOfArray(same_dtype=True) +class Rot3Array: + """Rot3Array Matrix in 3 dimensional Space implemented as struct of arrays.""" + + xx: jnp.ndarray = dataclasses.field(metadata={'dtype': jnp.float32}) + xy: jnp.ndarray + xz: jnp.ndarray + yx: jnp.ndarray + yy: jnp.ndarray + yz: jnp.ndarray + zx: jnp.ndarray + zy: jnp.ndarray + zz: jnp.ndarray + + __array_ufunc__ = None + + def inverse(self) -> Rot3Array: + """Returns inverse of Rot3Array.""" + return Rot3Array(self.xx, self.yx, self.zx, + self.xy, self.yy, self.zy, + self.xz, self.yz, self.zz) + + def apply_to_point(self, point: vector.Vec3Array) -> vector.Vec3Array: + """Applies Rot3Array to point.""" + return vector.Vec3Array( + self.xx * point.x + self.xy * point.y + self.xz * point.z, + self.yx * point.x + self.yy * point.y + self.yz * point.z, + self.zx * point.x + self.zy * point.y + self.zz * point.z) + + def apply_inverse_to_point(self, point: vector.Vec3Array) -> vector.Vec3Array: + """Applies inverse Rot3Array to point.""" + return self.inverse().apply_to_point(point) + + def __matmul__(self, other: Rot3Array) -> Rot3Array: + """Composes two Rot3Arrays.""" + c0 = self.apply_to_point(vector.Vec3Array(other.xx, other.yx, other.zx)) + c1 = self.apply_to_point(vector.Vec3Array(other.xy, other.yy, other.zy)) + c2 = self.apply_to_point(vector.Vec3Array(other.xz, other.yz, other.zz)) + return Rot3Array(c0.x, c1.x, c2.x, c0.y, c1.y, c2.y, c0.z, c1.z, c2.z) + + @classmethod + def identity(cls, shape, dtype=jnp.float32) -> Rot3Array: + """Returns identity of given shape.""" + ones = jnp.ones(shape, dtype=dtype) + zeros = jnp.zeros(shape, dtype=dtype) + return cls(ones, zeros, zeros, zeros, ones, zeros, zeros, zeros, ones) + + @classmethod + def from_two_vectors(cls, e0: vector.Vec3Array, + e1: vector.Vec3Array) -> Rot3Array: + """Construct Rot3Array from two Vectors. + + Rot3Array is constructed such that in the corresponding frame 'e0' lies on + the positive x-Axis and 'e1' lies in the xy plane with positive sign of y. + + Args: + e0: Vector + e1: Vector + Returns: + Rot3Array + """ + # Normalize the unit vector for the x-axis, e0. + e0 = e0.normalized() + # make e1 perpendicular to e0. + c = e1.dot(e0) + e1 = (e1 - c * e0).normalized() + # Compute e2 as cross product of e0 and e1. + e2 = e0.cross(e1) + return cls(e0.x, e1.x, e2.x, e0.y, e1.y, e2.y, e0.z, e1.z, e2.z) + + @classmethod + def from_array(cls, array: jnp.ndarray) -> Rot3Array: + """Construct Rot3Array Matrix from array of shape. [..., 3, 3].""" + unstacked = utils.unstack(array, axis=-2) + unstacked = sum([utils.unstack(x, axis=-1) for x in unstacked], []) + return cls(*unstacked) + + def to_array(self) -> jnp.ndarray: + """Convert Rot3Array to array of shape [..., 3, 3].""" + return jnp.stack( + [jnp.stack([self.xx, self.xy, self.xz], axis=-1), + jnp.stack([self.yx, self.yy, self.yz], axis=-1), + jnp.stack([self.zx, self.zy, self.zz], axis=-1)], + axis=-2) + + @classmethod + def from_quaternion(cls, + w: jnp.ndarray, + x: jnp.ndarray, + y: jnp.ndarray, + z: jnp.ndarray, + normalize: bool = True, + epsilon: float = 1e-6) -> Rot3Array: + """Construct Rot3Array from components of quaternion.""" + if normalize: + inv_norm = jax.lax.rsqrt(jnp.maximum(epsilon, w**2 + x**2 + y**2 + z**2)) + w *= inv_norm + x *= inv_norm + y *= inv_norm + z *= inv_norm + xx = 1 - 2 * (jnp.square(y) + jnp.square(z)) + xy = 2 * (x * y - w * z) + xz = 2 * (x * z + w * y) + yx = 2 * (x * y + w * z) + yy = 1 - 2 * (jnp.square(x) + jnp.square(z)) + yz = 2 * (y * z - w * x) + zx = 2 * (x * z - w * y) + zy = 2 * (y * z + w * x) + zz = 1 - 2 * (jnp.square(x) + jnp.square(y)) + return cls(xx, xy, xz, yx, yy, yz, zx, zy, zz) + + @classmethod + def random_uniform(cls, key, shape, dtype=jnp.float32) -> Rot3Array: + """Samples uniform random Rot3Array according to Haar Measure.""" + quat_array = jax.random.normal(key, tuple(shape) + (4,), dtype=dtype) + quats = utils.unstack(quat_array) + return cls.from_quaternion(*quats) + + def __getstate__(self): + return (VERSION, + [np.asarray(getattr(self, field)) for field in COMPONENTS]) + + def __setstate__(self, state): + version, state = state + del version + for i, field in enumerate(COMPONENTS): + object.__setattr__(self, field, state[i]) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/geometry/struct_of_array.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/geometry/struct_of_array.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,220 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Class decorator to represent (nested) struct of arrays.""" + +import dataclasses + +import jax + + +def get_item(instance, key): + sliced = {} + for field in get_array_fields(instance): + num_trailing_dims = field.metadata.get('num_trailing_dims', 0) + this_key = key + if isinstance(key, tuple) and Ellipsis in this_key: + this_key += (slice(None),) * num_trailing_dims + sliced[field.name] = getattr(instance, field.name)[this_key] + return dataclasses.replace(instance, **sliced) + + +@property +def get_shape(instance): + """Returns Shape for given instance of dataclass.""" + first_field = dataclasses.fields(instance)[0] + num_trailing_dims = first_field.metadata.get('num_trailing_dims', None) + value = getattr(instance, first_field.name) + if num_trailing_dims: + return value.shape[:-num_trailing_dims] + else: + return value.shape + + +def get_len(instance): + """Returns length for given instance of dataclass.""" + shape = instance.shape + if shape: + return shape[0] + else: + raise TypeError('len() of unsized object') # Match jax.numpy behavior. + + +@property +def get_dtype(instance): + """Returns Dtype for given instance of dataclass.""" + fields = dataclasses.fields(instance) + sets_dtype = [ + field.name for field in fields if field.metadata.get('sets_dtype', False) + ] + if sets_dtype: + assert len(sets_dtype) == 1, 'at most field can set dtype' + field_value = getattr(instance, sets_dtype[0]) + elif instance.same_dtype: + field_value = getattr(instance, fields[0].name) + else: + # Should this be Value Error? + raise AttributeError('Trying to access Dtype on Struct of Array without' + 'either "same_dtype" or field setting dtype') + + if hasattr(field_value, 'dtype'): + return field_value.dtype + else: + # Should this be Value Error? + raise AttributeError(f'field_value {field_value} does not have dtype') + + +def replace(instance, **kwargs): + return dataclasses.replace(instance, **kwargs) + + +def post_init(instance): + """Validate instance has same shapes & dtypes.""" + array_fields = get_array_fields(instance) + arrays = list(get_array_fields(instance, return_values=True).values()) + first_field = array_fields[0] + # These slightly weird constructions about checking whether the leaves are + # actual arrays is since e.g. vmap internally relies on being able to + # construct pytree's with object() as leaves, this would break the checking + # as such we are only validating the object when the entries in the dataclass + # Are arrays or other dataclasses of arrays. + try: + dtype = instance.dtype + except AttributeError: + dtype = None + if dtype is not None: + first_shape = instance.shape + for array, field in zip(arrays, array_fields): + field_shape = array.shape + num_trailing_dims = field.metadata.get('num_trailing_dims', None) + if num_trailing_dims: + array_shape = array.shape + field_shape = array_shape[:-num_trailing_dims] + msg = (f'field {field} should have number of trailing dims' + ' {num_trailing_dims}') + assert len(array_shape) == len(first_shape) + num_trailing_dims, msg + else: + field_shape = array.shape + + shape_msg = (f"Stripped Shape {field_shape} of field {field} doesn't " + f"match shape {first_shape} of field {first_field}") + assert field_shape == first_shape, shape_msg + + field_dtype = array.dtype + + allowed_metadata_dtypes = field.metadata.get('allowed_dtypes', []) + if allowed_metadata_dtypes: + msg = f'Dtype is {field_dtype} but must be in {allowed_metadata_dtypes}' + assert field_dtype in allowed_metadata_dtypes, msg + + if 'dtype' in field.metadata: + target_dtype = field.metadata['dtype'] + else: + target_dtype = dtype + + msg = f'Dtype is {field_dtype} but must be {target_dtype}' + assert field_dtype == target_dtype, msg + + +def flatten(instance): + """Flatten Struct of Array instance.""" + array_likes = list(get_array_fields(instance, return_values=True).values()) + flat_array_likes = [] + inner_treedefs = [] + num_arrays = [] + for array_like in array_likes: + flat_array_like, inner_treedef = jax.tree_flatten(array_like) + inner_treedefs.append(inner_treedef) + flat_array_likes += flat_array_like + num_arrays.append(len(flat_array_like)) + metadata = get_metadata_fields(instance, return_values=True) + metadata = type(instance).metadata_cls(**metadata) + return flat_array_likes, (inner_treedefs, metadata, num_arrays) + + +def make_metadata_class(cls): + metadata_fields = get_fields(cls, + lambda x: x.metadata.get('is_metadata', False)) + metadata_cls = dataclasses.make_dataclass( + cls_name='Meta' + cls.__name__, + fields=[(field.name, field.type, field) for field in metadata_fields], + frozen=True, + eq=True) + return metadata_cls + + +def get_fields(cls_or_instance, filterfn, return_values=False): + fields = dataclasses.fields(cls_or_instance) + fields = [field for field in fields if filterfn(field)] + if return_values: + return { + field.name: getattr(cls_or_instance, field.name) for field in fields + } + else: + return fields + + +def get_array_fields(cls, return_values=False): + return get_fields( + cls, + lambda x: not x.metadata.get('is_metadata', False), + return_values=return_values) + + +def get_metadata_fields(cls, return_values=False): + return get_fields( + cls, + lambda x: x.metadata.get('is_metadata', False), + return_values=return_values) + + +class StructOfArray: + """Class Decorator for Struct Of Arrays.""" + + def __init__(self, same_dtype=True): + self.same_dtype = same_dtype + + def __call__(self, cls): + cls.__array_ufunc__ = None + cls.replace = replace + cls.same_dtype = self.same_dtype + cls.dtype = get_dtype + cls.shape = get_shape + cls.__len__ = get_len + cls.__getitem__ = get_item + cls.__post_init__ = post_init + new_cls = dataclasses.dataclass(cls, frozen=True, eq=False) # pytype: disable=wrong-keyword-args + # pytree claims to require metadata to be hashable, not sure why, + # But making derived dataclass that can just hold metadata + new_cls.metadata_cls = make_metadata_class(new_cls) + + def unflatten(aux, data): + inner_treedefs, metadata, num_arrays = aux + array_fields = [field.name for field in get_array_fields(new_cls)] + value_dict = {} + array_start = 0 + for num_array, inner_treedef, array_field in zip(num_arrays, + inner_treedefs, + array_fields): + value_dict[array_field] = jax.tree_unflatten( + inner_treedef, data[array_start:array_start + num_array]) + array_start += num_array + metadata_fields = get_metadata_fields(new_cls) + for field in metadata_fields: + value_dict[field.name] = getattr(metadata, field.name) + + return new_cls(**value_dict) + + jax.tree_util.register_pytree_node( + nodetype=new_cls, flatten_func=flatten, unflatten_func=unflatten) + return new_cls diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/geometry/test_utils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/geometry/test_utils.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,98 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Shared utils for tests.""" + +import dataclasses + +from alphafold.model.geometry import rigid_matrix_vector +from alphafold.model.geometry import rotation_matrix +from alphafold.model.geometry import vector +import jax.numpy as jnp +import numpy as np + + +def assert_rotation_matrix_equal(matrix1: rotation_matrix.Rot3Array, + matrix2: rotation_matrix.Rot3Array): + for field in dataclasses.fields(rotation_matrix.Rot3Array): + field = field.name + np.testing.assert_array_equal( + getattr(matrix1, field), getattr(matrix2, field)) + + +def assert_rotation_matrix_close(mat1: rotation_matrix.Rot3Array, + mat2: rotation_matrix.Rot3Array): + np.testing.assert_array_almost_equal(mat1.to_array(), mat2.to_array(), 6) + + +def assert_array_equal_to_rotation_matrix(array: jnp.ndarray, + matrix: rotation_matrix.Rot3Array): + """Check that array and Matrix match.""" + np.testing.assert_array_equal(matrix.xx, array[..., 0, 0]) + np.testing.assert_array_equal(matrix.xy, array[..., 0, 1]) + np.testing.assert_array_equal(matrix.xz, array[..., 0, 2]) + np.testing.assert_array_equal(matrix.yx, array[..., 1, 0]) + np.testing.assert_array_equal(matrix.yy, array[..., 1, 1]) + np.testing.assert_array_equal(matrix.yz, array[..., 1, 2]) + np.testing.assert_array_equal(matrix.zx, array[..., 2, 0]) + np.testing.assert_array_equal(matrix.zy, array[..., 2, 1]) + np.testing.assert_array_equal(matrix.zz, array[..., 2, 2]) + + +def assert_array_close_to_rotation_matrix(array: jnp.ndarray, + matrix: rotation_matrix.Rot3Array): + np.testing.assert_array_almost_equal(matrix.to_array(), array, 6) + + +def assert_vectors_equal(vec1: vector.Vec3Array, vec2: vector.Vec3Array): + np.testing.assert_array_equal(vec1.x, vec2.x) + np.testing.assert_array_equal(vec1.y, vec2.y) + np.testing.assert_array_equal(vec1.z, vec2.z) + + +def assert_vectors_close(vec1: vector.Vec3Array, vec2: vector.Vec3Array): + np.testing.assert_allclose(vec1.x, vec2.x, atol=1e-6, rtol=0.) + np.testing.assert_allclose(vec1.y, vec2.y, atol=1e-6, rtol=0.) + np.testing.assert_allclose(vec1.z, vec2.z, atol=1e-6, rtol=0.) + + +def assert_array_close_to_vector(array: jnp.ndarray, vec: vector.Vec3Array): + np.testing.assert_allclose(vec.to_array(), array, atol=1e-6, rtol=0.) + + +def assert_array_equal_to_vector(array: jnp.ndarray, vec: vector.Vec3Array): + np.testing.assert_array_equal(vec.to_array(), array) + + +def assert_rigid_equal_to_rigid(rigid1: rigid_matrix_vector.Rigid3Array, + rigid2: rigid_matrix_vector.Rigid3Array): + assert_rot_trans_equal_to_rigid(rigid1.rotation, rigid1.translation, rigid2) + + +def assert_rigid_close_to_rigid(rigid1: rigid_matrix_vector.Rigid3Array, + rigid2: rigid_matrix_vector.Rigid3Array): + assert_rot_trans_close_to_rigid(rigid1.rotation, rigid1.translation, rigid2) + + +def assert_rot_trans_equal_to_rigid(rot: rotation_matrix.Rot3Array, + trans: vector.Vec3Array, + rigid: rigid_matrix_vector.Rigid3Array): + assert_rotation_matrix_equal(rot, rigid.rotation) + assert_vectors_equal(trans, rigid.translation) + + +def assert_rot_trans_close_to_rigid(rot: rotation_matrix.Rot3Array, + trans: vector.Vec3Array, + rigid: rigid_matrix_vector.Rigid3Array): + assert_rotation_matrix_close(rot, rigid.rotation) + assert_vectors_close(trans, rigid.translation) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/geometry/utils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/geometry/utils.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,23 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Utils for geometry library.""" + +from typing import List + +import jax.numpy as jnp + + +def unstack(value: jnp.ndarray, axis: int = -1) -> List[jnp.ndarray]: + return [jnp.squeeze(v, axis=axis) + for v in jnp.split(value, value.shape[axis], axis=axis)] diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/geometry/vector.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/geometry/vector.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,217 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Vec3Array Class.""" + +from __future__ import annotations +import dataclasses +from typing import Union + +from alphafold.model.geometry import struct_of_array +from alphafold.model.geometry import utils +import jax +import jax.numpy as jnp +import numpy as np + +Float = Union[float, jnp.ndarray] + +VERSION = '0.1' + + +@struct_of_array.StructOfArray(same_dtype=True) +class Vec3Array: + """Vec3Array in 3 dimensional Space implemented as struct of arrays. + + This is done in order to improve performance and precision. + On TPU small matrix multiplications are very suboptimal and will waste large + compute ressources, furthermore any matrix multiplication on tpu happen in + mixed bfloat16/float32 precision, which is often undesirable when handling + physical coordinates. + In most cases this will also be faster on cpu's/gpu's since it allows for + easier use of vector instructions. + """ + + x: jnp.ndarray = dataclasses.field(metadata={'dtype': jnp.float32}) + y: jnp.ndarray + z: jnp.ndarray + + def __post_init__(self): + if hasattr(self.x, 'dtype'): + assert self.x.dtype == self.y.dtype + assert self.x.dtype == self.z.dtype + assert all([x == y for x, y in zip(self.x.shape, self.y.shape)]) + assert all([x == z for x, z in zip(self.x.shape, self.z.shape)]) + + def __add__(self, other: Vec3Array) -> Vec3Array: + return jax.tree_multimap(lambda x, y: x + y, self, other) + + def __sub__(self, other: Vec3Array) -> Vec3Array: + return jax.tree_multimap(lambda x, y: x - y, self, other) + + def __mul__(self, other: Float) -> Vec3Array: + return jax.tree_map(lambda x: x * other, self) + + def __rmul__(self, other: Float) -> Vec3Array: + return self * other + + def __truediv__(self, other: Float) -> Vec3Array: + return jax.tree_map(lambda x: x / other, self) + + def __neg__(self) -> Vec3Array: + return jax.tree_map(lambda x: -x, self) + + def __pos__(self) -> Vec3Array: + return jax.tree_map(lambda x: x, self) + + def cross(self, other: Vec3Array) -> Vec3Array: + """Compute cross product between 'self' and 'other'.""" + new_x = self.y * other.z - self.z * other.y + new_y = self.z * other.x - self.x * other.z + new_z = self.x * other.y - self.y * other.x + return Vec3Array(new_x, new_y, new_z) + + def dot(self, other: Vec3Array) -> Float: + """Compute dot product between 'self' and 'other'.""" + return self.x * other.x + self.y * other.y + self.z * other.z + + def norm(self, epsilon: float = 1e-6) -> Float: + """Compute Norm of Vec3Array, clipped to epsilon.""" + # To avoid NaN on the backward pass, we must use maximum before the sqrt + norm2 = self.dot(self) + if epsilon: + norm2 = jnp.maximum(norm2, epsilon**2) + return jnp.sqrt(norm2) + + def norm2(self): + return self.dot(self) + + def normalized(self, epsilon: float = 1e-6) -> Vec3Array: + """Return unit vector with optional clipping.""" + return self / self.norm(epsilon) + + @classmethod + def zeros(cls, shape, dtype=jnp.float32): + """Return Vec3Array corresponding to zeros of given shape.""" + return cls( + jnp.zeros(shape, dtype), jnp.zeros(shape, dtype), + jnp.zeros(shape, dtype)) + + def to_array(self) -> jnp.ndarray: + return jnp.stack([self.x, self.y, self.z], axis=-1) + + @classmethod + def from_array(cls, array): + return cls(*utils.unstack(array)) + + def __getstate__(self): + return (VERSION, + [np.asarray(self.x), + np.asarray(self.y), + np.asarray(self.z)]) + + def __setstate__(self, state): + version, state = state + del version + for i, letter in enumerate('xyz'): + object.__setattr__(self, letter, state[i]) + + +def square_euclidean_distance(vec1: Vec3Array, + vec2: Vec3Array, + epsilon: float = 1e-6) -> Float: + """Computes square of euclidean distance between 'vec1' and 'vec2'. + + Args: + vec1: Vec3Array to compute distance to + vec2: Vec3Array to compute distance from, should be + broadcast compatible with 'vec1' + epsilon: distance is clipped from below to be at least epsilon + + Returns: + Array of square euclidean distances; + shape will be result of broadcasting 'vec1' and 'vec2' + """ + difference = vec1 - vec2 + distance = difference.dot(difference) + if epsilon: + distance = jnp.maximum(distance, epsilon) + return distance + + +def dot(vector1: Vec3Array, vector2: Vec3Array) -> Float: + return vector1.dot(vector2) + + +def cross(vector1: Vec3Array, vector2: Vec3Array) -> Float: + return vector1.cross(vector2) + + +def norm(vector: Vec3Array, epsilon: float = 1e-6) -> Float: + return vector.norm(epsilon) + + +def normalized(vector: Vec3Array, epsilon: float = 1e-6) -> Vec3Array: + return vector.normalized(epsilon) + + +def euclidean_distance(vec1: Vec3Array, + vec2: Vec3Array, + epsilon: float = 1e-6) -> Float: + """Computes euclidean distance between 'vec1' and 'vec2'. + + Args: + vec1: Vec3Array to compute euclidean distance to + vec2: Vec3Array to compute euclidean distance from, should be + broadcast compatible with 'vec1' + epsilon: distance is clipped from below to be at least epsilon + + Returns: + Array of euclidean distances; + shape will be result of broadcasting 'vec1' and 'vec2' + """ + distance_sq = square_euclidean_distance(vec1, vec2, epsilon**2) + distance = jnp.sqrt(distance_sq) + return distance + + +def dihedral_angle(a: Vec3Array, b: Vec3Array, c: Vec3Array, + d: Vec3Array) -> Float: + """Computes torsion angle for a quadruple of points. + + For points (a, b, c, d), this is the angle between the planes defined by + points (a, b, c) and (b, c, d). It is also known as the dihedral angle. + + Arguments: + a: A Vec3Array of coordinates. + b: A Vec3Array of coordinates. + c: A Vec3Array of coordinates. + d: A Vec3Array of coordinates. + + Returns: + A tensor of angles in radians: [-pi, pi]. + """ + v1 = a - b + v2 = b - c + v3 = d - c + + c1 = v1.cross(v2) + c2 = v3.cross(v2) + c3 = c2.cross(c1) + + v2_mag = v2.norm() + return jnp.arctan2(c3.dot(v2), v2_mag * c1.dot(c2)) + + +def random_gaussian_vector(shape, key, dtype=jnp.float32): + vec_array = jax.random.normal(key, shape + (3,), dtype) + return Vec3Array.from_array(vec_array) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/layer_stack.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/layer_stack.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,274 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Function to stack repeats of a layer function without shared parameters.""" + +import collections +import contextlib +import functools +import inspect +from typing import Any, Callable, Optional, Tuple, Union + +import haiku as hk +import jax +import jax.numpy as jnp + +LayerStackCarry = collections.namedtuple('LayerStackCarry', ['x', 'rng']) +LayerStackScanned = collections.namedtuple('LayerStackScanned', + ['i', 'args_ys']) + +# WrappedFn should take in arbitrarily nested `jnp.ndarray`, and return the +# exact same type. We cannot express this with `typing`. So we just use it +# to inform the user. In reality, the typing below will accept anything. +NestedArray = Any +WrappedFn = Callable[..., Union[NestedArray, Tuple[NestedArray]]] + + +def _check_no_varargs(f): + if list(inspect.signature( + f).parameters.values())[0].kind == inspect.Parameter.VAR_POSITIONAL: + raise ValueError( + 'The function `f` should not have any `varargs` (that is *args) ' + 'argument. Instead, it should only use explicit positional' + 'arguments.') + + +@contextlib.contextmanager +def nullcontext(): + yield + + +def maybe_with_rng(key): + if key is not None: + return hk.with_rng(key) + else: + return nullcontext() + + +def maybe_fold_in(key, data): + if key is not None: + return jax.random.fold_in(key, data) + else: + return None + + +class _LayerStack(hk.Module): + """Module to compose parameterized functions, implemented as a scan.""" + + def __init__(self, + count: int, + unroll: int, + name: Optional[str] = None): + """Iterate a function `f` `count` times, with non-shared parameters.""" + super().__init__(name=name) + self._count = count + self._unroll = unroll + + def __call__(self, x, *args_ys): + count = self._count + if hk.running_init(): + # At initialization time, we run just one layer but add an extra first + # dimension to every initialized tensor, making sure to use different + # random keys for different slices. + def creator(next_creator, shape, dtype, init, context): + del context + + def multi_init(shape, dtype): + assert shape[0] == count + key = hk.maybe_next_rng_key() + + def rng_context_init(slice_idx): + slice_key = maybe_fold_in(key, slice_idx) + with maybe_with_rng(slice_key): + return init(shape[1:], dtype) + + return jax.vmap(rng_context_init)(jnp.arange(count)) + + return next_creator((count,) + tuple(shape), dtype, multi_init) + + def getter(next_getter, value, context): + trailing_dims = len(context.original_shape) + 1 + sliced_value = jax.lax.index_in_dim( + value, index=0, axis=value.ndim - trailing_dims, keepdims=False) + return next_getter(sliced_value) + + with hk.experimental.custom_creator( + creator), hk.experimental.custom_getter(getter): + if len(args_ys) == 1 and args_ys[0] is None: + args0 = (None,) + else: + args0 = [ + jax.lax.dynamic_index_in_dim(ys, 0, keepdims=False) + for ys in args_ys + ] + x, z = self._call_wrapped(x, *args0) + if z is None: + return x, z + + # Broadcast state to hold each layer state. + def broadcast_state(layer_state): + return jnp.broadcast_to( + layer_state, [count,] + list(layer_state.shape)) + zs = jax.tree_util.tree_map(broadcast_state, z) + return x, zs + else: + # Use scan during apply, threading through random seed so that it's + # unique for each layer. + def layer(carry: LayerStackCarry, scanned: LayerStackScanned): + rng = carry.rng + + def getter(next_getter, value, context): + # Getter slices the full param at the current loop index. + trailing_dims = len(context.original_shape) + 1 + assert value.shape[value.ndim - trailing_dims] == count, ( + f'Attempting to use a parameter stack of size ' + f'{value.shape[value.ndim - trailing_dims]} for a LayerStack of ' + f'size {count}.') + + sliced_value = jax.lax.dynamic_index_in_dim( + value, scanned.i, axis=value.ndim - trailing_dims, keepdims=False) + return next_getter(sliced_value) + + with hk.experimental.custom_getter(getter): + if rng is None: + out_x, z = self._call_wrapped(carry.x, *scanned.args_ys) + else: + rng, rng_ = jax.random.split(rng) + with hk.with_rng(rng_): + out_x, z = self._call_wrapped(carry.x, *scanned.args_ys) + return LayerStackCarry(x=out_x, rng=rng), z + + carry = LayerStackCarry(x=x, rng=hk.maybe_next_rng_key()) + scanned = LayerStackScanned(i=jnp.arange(count, dtype=jnp.int32), + args_ys=args_ys) + + carry, zs = hk.scan( + layer, carry, scanned, length=count, unroll=self._unroll) + return carry.x, zs + + def _call_wrapped(self, + x: jnp.ndarray, + *args, + ) -> Tuple[jnp.ndarray, Optional[jnp.ndarray]]: + raise NotImplementedError() + + +class _LayerStackNoState(_LayerStack): + """_LayerStack impl with no per-layer state provided to the function.""" + + def __init__(self, + f: WrappedFn, + count: int, + unroll: int, + name: Optional[str] = None): + super().__init__(count=count, unroll=unroll, name=name) + _check_no_varargs(f) + self._f = f + + @hk.transparent + def _call_wrapped(self, args, y): + del y + ret = self._f(*args) + if len(args) == 1: + # If the function takes a single argument, the wrapped function receives + # a tuple of length 1, and therefore it must return a tuple of length 1. + ret = (ret,) + return ret, None + + +class _LayerStackWithState(_LayerStack): + """_LayerStack impl with per-layer state provided to the function.""" + + def __init__(self, + f: WrappedFn, + count: int, + unroll: int, + name: Optional[str] = None): + super().__init__(count=count, unroll=unroll, name=name) + self._f = f + + @hk.transparent + def _call_wrapped(self, x, *args): + return self._f(x, *args) + + +def layer_stack(num_layers: int, + with_state=False, + unroll: int = 1, + name: Optional[str] = None): + """Utility to wrap a Haiku function and recursively apply it to an input. + + A function is valid if it uses only explicit position parameters, and + its return type matches its input type. The position parameters can be + arbitrarily nested structures with `jnp.ndarray` at the leaf nodes. Note + that kwargs are not supported, neither are functions with variable number + of parameters (specified by `*args`). + + If `with_state=False` then the new, wrapped function can be understood as + performing the following: + ``` + for i in range(num_layers): + x = f(x) + return x + ``` + + And if `with_state=True`, assuming `f` takes two arguments on top of `x`: + ``` + for i in range(num_layers): + x, zs[i] = f(x, ys_0[i], ys_1[i]) + return x, zs + ``` + The code using `layer_stack` for the above function would be: + ``` + def f(x, y_0, y_1): + ... + return new_x, z + x, zs = layer_stack.layer_stack(num_layers, + with_state=True)(f)(x, ys_0, ys_1) + ``` + + Crucially, any parameters created inside `f` will not be shared across + iterations. + + Args: + num_layers: The number of times to iterate the wrapped function. + with_state: Whether or not to pass per-layer state to the wrapped function. + unroll: the unroll used by `scan`. + name: Name of the Haiku context. + + Returns: + Callable that will produce a layer stack when called with a valid function. + """ + def iterate(f): + if with_state: + @functools.wraps(f) + def wrapped(x, *args): + for ys in args: + assert ys.shape[0] == num_layers + return _LayerStackWithState( + f, num_layers, unroll=unroll, name=name)(x, *args) + else: + _check_no_varargs(f) + @functools.wraps(f) + def wrapped(*args): + ret = _LayerStackNoState( + f, num_layers, unroll=unroll, name=name)(args, None)[0] + if len(args) == 1: + # If the function takes a single argument, we must also return a + # single value, and not a tuple of length 1. + ret = ret[0] + return ret + + return wrapped + return iterate diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/layer_stack_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/layer_stack_test.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,335 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for layer_stack.""" + +import functools +from absl.testing import absltest +from absl.testing import parameterized +from alphafold.model import layer_stack +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np +import scipy + + +# Suffixes applied by Haiku for repeated module names. +suffixes = [''] + [f'_{i}' for i in range(1, 100)] + + +def _slice_layers_params(layers_params): + sliced_layers_params = {} + for k, v in layers_params.items(): + for inner_k in v: + for var_slice, suffix in zip(v[inner_k], suffixes): + k_new = k.split('/')[-1] + suffix + if k_new not in sliced_layers_params: + sliced_layers_params[k_new] = {} + sliced_layers_params[k_new][inner_k] = var_slice + return sliced_layers_params + + +class LayerStackTest(parameterized.TestCase): + + @parameterized.parameters([1, 2, 4]) + def test_layer_stack(self, unroll): + """Compare layer_stack to the equivalent unrolled stack. + + Tests that the layer_stack application of a Haiku layer function is + equivalent to repeatedly applying the layer function in an unrolled loop. + + Args: + unroll: Number of unrolled layers. + """ + num_layers = 20 + + def inner_fn(x): + x += hk.Linear(100, name='linear1')(x) + x += hk.Linear(100, name='linear2')(x) + return x + + def outer_fn_unrolled(x): + for _ in range(num_layers): + x = inner_fn(x) + return x + + def outer_fn_layer_stack(x): + stack = layer_stack.layer_stack(num_layers, unroll=unroll)(inner_fn) + return stack(x) + + unrolled_fn = hk.transform(outer_fn_unrolled) + layer_stack_fn = hk.transform(outer_fn_layer_stack) + + x = jax.random.uniform(jax.random.PRNGKey(0), [10, 256, 100]) + + rng_init = jax.random.PRNGKey(42) + + params = layer_stack_fn.init(rng_init, x) + + sliced_params = _slice_layers_params(params) + + unrolled_pred = unrolled_fn.apply(sliced_params, None, x) + layer_stack_pred = layer_stack_fn.apply(params, None, x) + + np.testing.assert_allclose(unrolled_pred, layer_stack_pred) + + def test_layer_stack_multi_args(self): + """Compare layer_stack to the equivalent unrolled stack. + + Similar to `test_layer_stack`, but use a function that takes more than one + argument. + """ + num_layers = 20 + + def inner_fn(x, y): + x_out = x + hk.Linear(100, name='linear1')(y) + y_out = y + hk.Linear(100, name='linear2')(x) + return x_out, y_out + + def outer_fn_unrolled(x, y): + for _ in range(num_layers): + x, y = inner_fn(x, y) + return x, y + + def outer_fn_layer_stack(x, y): + stack = layer_stack.layer_stack(num_layers)(inner_fn) + return stack(x, y) + + unrolled_fn = hk.transform(outer_fn_unrolled) + layer_stack_fn = hk.transform(outer_fn_layer_stack) + + x = jax.random.uniform(jax.random.PRNGKey(0), [10, 256, 100]) + y = jax.random.uniform(jax.random.PRNGKey(1), [10, 256, 100]) + + rng_init = jax.random.PRNGKey(42) + + params = layer_stack_fn.init(rng_init, x, y) + + sliced_params = _slice_layers_params(params) + + unrolled_x, unrolled_y = unrolled_fn.apply(sliced_params, None, x, y) + layer_stack_x, layer_stack_y = layer_stack_fn.apply(params, None, x, y) + + np.testing.assert_allclose(unrolled_x, layer_stack_x) + np.testing.assert_allclose(unrolled_y, layer_stack_y) + + def test_layer_stack_no_varargs(self): + """Test an error is raised when using a function with varargs.""" + + class VarArgsModule(hk.Module): + """When used, this module should cause layer_stack to raise an Error.""" + + def __call__(self, *args): + return args + + class NoVarArgsModule(hk.Module): + """This module should be fine to use with layer_stack.""" + + def __call__(self, x): + return x + + def build_and_init_stack(module_class): + def stack_fn(x): + module = module_class() + return layer_stack.layer_stack(1)(module)(x) + + stack = hk.without_apply_rng(hk.transform(stack_fn)) + stack.init(jax.random.PRNGKey(1729), jnp.ones([5])) + + build_and_init_stack(NoVarArgsModule) + with self.assertRaisesRegex( + ValueError, 'The function `f` should not have any `varargs`'): + build_and_init_stack(VarArgsModule) + + @parameterized.parameters([1, 2, 4]) + def test_layer_stack_grads(self, unroll): + """Compare layer_stack gradients to the equivalent unrolled stack. + + Tests that the layer_stack application of a Haiku layer function is + equivalent to repeatedly applying the layer function in an unrolled loop. + + Args: + unroll: Number of unrolled layers. + """ + num_layers = 20 + + def inner_fn(x): + x += hk.Linear(100, name='linear1')(x) + x += hk.Linear(100, name='linear2')(x) + return x + + def outer_fn_unrolled(x): + for _ in range(num_layers): + x = inner_fn(x) + return x + + def outer_fn_layer_stack(x): + stack = layer_stack.layer_stack(num_layers, unroll=unroll)(inner_fn) + return stack(x) + + unrolled_fn = hk.transform(outer_fn_unrolled) + layer_stack_fn = hk.transform(outer_fn_layer_stack) + + x = jax.random.uniform(jax.random.PRNGKey(0), [10, 256, 100]) + + rng_init = jax.random.PRNGKey(42) + + params = layer_stack_fn.init(rng_init, x) + + sliced_params = _slice_layers_params(params) + + unrolled_grad = jax.grad( + lambda p, x: jnp.mean(unrolled_fn.apply(p, None, x)))(sliced_params, x) + layer_stack_grad = jax.grad( + lambda p, x: jnp.mean(layer_stack_fn.apply(p, None, x)))(params, x) + + assert_fn = functools.partial( + np.testing.assert_allclose, atol=1e-4, rtol=1e-4) + + jax.tree_multimap(assert_fn, unrolled_grad, + _slice_layers_params(layer_stack_grad)) + + def test_random(self): + """Random numbers should be handled correctly.""" + n = 100 + + @hk.transform + @layer_stack.layer_stack(n) + def add_random(x): + x = x + jax.random.normal(hk.next_rng_key()) + return x + + # Evaluate a bunch of times + key, *keys = jax.random.split(jax.random.PRNGKey(7), 1024 + 1) + params = add_random.init(key, 0.) + apply_fn = jax.jit(add_random.apply) + values = [apply_fn(params, key, 0.) for key in keys] + + # Should be roughly N(0, sqrt(n)) + cdf = scipy.stats.norm(scale=np.sqrt(n)).cdf + _, p = scipy.stats.kstest(values, cdf) + self.assertLess(0.3, p) + + def test_threading(self): + """Test @layer_stack when the function gets per-layer state.""" + n = 5 + + @layer_stack.layer_stack(n, with_state=True) + def f(x, y): + x = x + y * jax.nn.one_hot(y, len(x)) / 10 + return x, 2 * y + + @hk.without_apply_rng + @hk.transform + def g(x, ys): + x, zs = f(x, ys) + # Check here to catch issues at init time + self.assertEqual(zs.shape, (n,)) + return x, zs + + rng = jax.random.PRNGKey(7) + x = np.zeros(n) + ys = np.arange(n).astype(np.float32) + params = g.init(rng, x, ys) + x, zs = g.apply(params, x, ys) + self.assertTrue(np.allclose(x, [0, .1, .2, .3, .4])) + self.assertTrue(np.all(zs == 2 * ys)) + + def test_nested_stacks(self): + def stack_fn(x): + def layer_fn(x): + return hk.Linear(100)(x) + + outer_fn = layer_stack.layer_stack(10)(layer_fn) + + layer_outer = layer_stack.layer_stack(20)(outer_fn) + return layer_outer(x) + + hk_mod = hk.transform(stack_fn) + apply_rng, init_rng = jax.random.split(jax.random.PRNGKey(0)) + + params = hk_mod.init(init_rng, jnp.zeros([10, 100])) + + hk_mod.apply(params, apply_rng, jnp.zeros([10, 100])) + + p, = params.values() + + assert p['w'].shape == (10, 20, 100, 100) + assert p['b'].shape == (10, 20, 100) + + def test_with_state_multi_args(self): + """Test layer_stack with state with multiple arguments.""" + width = 4 + batch_size = 5 + stack_height = 3 + + def f_with_multi_args(x, a, b): + return hk.Linear( + width, w_init=hk.initializers.Constant( + jnp.eye(width)))(x) * a + b, None + + @hk.without_apply_rng + @hk.transform + def hk_fn(x): + return layer_stack.layer_stack( + stack_height, + with_state=True)(f_with_multi_args)(x, jnp.full([stack_height], 2.), + jnp.ones([stack_height])) + + x = jnp.zeros([batch_size, width]) + key_seq = hk.PRNGSequence(19) + params = hk_fn.init(next(key_seq), x) + output, z = hk_fn.apply(params, x) + self.assertIsNone(z) + self.assertEqual(output.shape, (batch_size, width)) + np.testing.assert_equal(output, np.full([batch_size, width], 7.)) + + def test_with_container_state(self): + width = 2 + batch_size = 2 + stack_height = 3 + + def f_with_container_state(x): + hk_layer = hk.Linear( + width, w_init=hk.initializers.Constant(jnp.eye(width))) + layer_output = hk_layer(x) + layer_state = { + 'raw_output': layer_output, + 'output_projection': jnp.sum(layer_output) + } + return layer_output + jnp.ones_like(layer_output), layer_state + + @hk.without_apply_rng + @hk.transform + def hk_fn(x): + return layer_stack.layer_stack( + stack_height, + with_state=True)(f_with_container_state)(x) + + x = jnp.zeros([batch_size, width]) + key_seq = hk.PRNGSequence(19) + params = hk_fn.init(next(key_seq), x) + output, z = hk_fn.apply(params, x) + self.assertEqual(z['raw_output'].shape, (stack_height, batch_size, width)) + self.assertEqual(output.shape, (batch_size, width)) + self.assertEqual(z['output_projection'].shape, (stack_height,)) + np.testing.assert_equal(np.sum(z['output_projection']), np.array(12.)) + np.testing.assert_equal( + np.all(z['raw_output'] == np.array([0., 1., 2.])[..., None, None]), + np.array(True)) + + +if __name__ == '__main__': + absltest.main() diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/lddt.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/lddt.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,88 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""lDDT protein distance score.""" +import jax.numpy as jnp + + +def lddt(predicted_points, + true_points, + true_points_mask, + cutoff=15., + per_residue=False): + """Measure (approximate) lDDT for a batch of coordinates. + + lDDT reference: + Mariani, V., Biasini, M., Barbato, A. & Schwede, T. lDDT: A local + superposition-free score for comparing protein structures and models using + distance difference tests. Bioinformatics 29, 2722–2728 (2013). + + lDDT is a measure of the difference between the true distance matrix and the + distance matrix of the predicted points. The difference is computed only on + points closer than cutoff *in the true structure*. + + This function does not compute the exact lDDT value that the original paper + describes because it does not include terms for physical feasibility + (e.g. bond length violations). Therefore this is only an approximate + lDDT score. + + Args: + predicted_points: (batch, length, 3) array of predicted 3D points + true_points: (batch, length, 3) array of true 3D points + true_points_mask: (batch, length, 1) binary-valued float array. This mask + should be 1 for points that exist in the true points. + cutoff: Maximum distance for a pair of points to be included + per_residue: If true, return score for each residue. Note that the overall + lDDT is not exactly the mean of the per_residue lDDT's because some + residues have more contacts than others. + + Returns: + An (approximate, see above) lDDT score in the range 0-1. + """ + + assert len(predicted_points.shape) == 3 + assert predicted_points.shape[-1] == 3 + assert true_points_mask.shape[-1] == 1 + assert len(true_points_mask.shape) == 3 + + # Compute true and predicted distance matrices. + dmat_true = jnp.sqrt(1e-10 + jnp.sum( + (true_points[:, :, None] - true_points[:, None, :])**2, axis=-1)) + + dmat_predicted = jnp.sqrt(1e-10 + jnp.sum( + (predicted_points[:, :, None] - + predicted_points[:, None, :])**2, axis=-1)) + + dists_to_score = ( + (dmat_true < cutoff).astype(jnp.float32) * true_points_mask * + jnp.transpose(true_points_mask, [0, 2, 1]) * + (1. - jnp.eye(dmat_true.shape[1])) # Exclude self-interaction. + ) + + # Shift unscored distances to be far away. + dist_l1 = jnp.abs(dmat_true - dmat_predicted) + + # True lDDT uses a number of fixed bins. + # We ignore the physical plausibility correction to lDDT, though. + score = 0.25 * ((dist_l1 < 0.5).astype(jnp.float32) + + (dist_l1 < 1.0).astype(jnp.float32) + + (dist_l1 < 2.0).astype(jnp.float32) + + (dist_l1 < 4.0).astype(jnp.float32)) + + # Normalize over the appropriate axes. + reduce_axes = (-1,) if per_residue else (-2, -1) + norm = 1. / (1e-10 + jnp.sum(dists_to_score, axis=reduce_axes)) + score = norm * (1e-10 + jnp.sum(dists_to_score * score, axis=reduce_axes)) + + return score diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/lddt_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/lddt_test.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,79 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for lddt.""" + +from absl.testing import absltest +from absl.testing import parameterized +from alphafold.model import lddt +import numpy as np + + +class LddtTest(parameterized.TestCase, absltest.TestCase): + + @parameterized.named_parameters( + ('same', + [[0, 0, 0], [5, 0, 0], [10, 0, 0]], + [[0, 0, 0], [5, 0, 0], [10, 0, 0]], + [1, 1, 1]), + ('all_shifted', + [[0, 0, 0], [5, 0, 0], [10, 0, 0]], + [[-1, 0, 0], [4, 0, 0], [9, 0, 0]], + [1, 1, 1]), + ('all_rotated', + [[0, 0, 0], [5, 0, 0], [10, 0, 0]], + [[0, 0, 0], [0, 5, 0], [0, 10, 0]], + [1, 1, 1]), + ('half_a_dist', + [[0, 0, 0], [5, 0, 0]], + [[0, 0, 0], [5.5-1e-5, 0, 0]], + [1, 1]), + ('one_a_dist', + [[0, 0, 0], [5, 0, 0]], + [[0, 0, 0], [6-1e-5, 0, 0]], + [0.75, 0.75]), + ('two_a_dist', + [[0, 0, 0], [5, 0, 0]], + [[0, 0, 0], [7-1e-5, 0, 0]], + [0.5, 0.5]), + ('four_a_dist', + [[0, 0, 0], [5, 0, 0]], + [[0, 0, 0], [9-1e-5, 0, 0]], + [0.25, 0.25],), + ('five_a_dist', + [[0, 0, 0], [16-1e-5, 0, 0]], + [[0, 0, 0], [11, 0, 0]], + [0, 0]), + ('no_pairs', + [[0, 0, 0], [20, 0, 0]], + [[0, 0, 0], [25-1e-5, 0, 0]], + [1, 1]), + ) + def test_lddt( + self, predicted_pos, true_pos, exp_lddt): + predicted_pos = np.array([predicted_pos], dtype=np.float32) + true_points_mask = np.array([[[1]] * len(true_pos)], dtype=np.float32) + true_pos = np.array([true_pos], dtype=np.float32) + cutoff = 15.0 + per_residue = True + + result = lddt.lddt( + predicted_pos, true_pos, true_points_mask, cutoff, + per_residue) + + np.testing.assert_almost_equal(result, [exp_lddt], decimal=4) + + +if __name__ == '__main__': + absltest.main() diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/mapping.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/mapping.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,218 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Specialized mapping functions.""" + +import functools + +from typing import Any, Callable, Optional, Sequence, Union + +import haiku as hk +import jax +import jax.numpy as jnp + + +PYTREE = Any +PYTREE_JAX_ARRAY = Any + +partial = functools.partial +PROXY = object() + + +def _maybe_slice(array, i, slice_size, axis): + if axis is PROXY: + return array + else: + return jax.lax.dynamic_slice_in_dim( + array, i, slice_size=slice_size, axis=axis) + + +def _maybe_get_size(array, axis): + if axis == PROXY: + return -1 + else: + return array.shape[axis] + + +def _expand_axes(axes, values, name='sharded_apply'): + values_tree_def = jax.tree_flatten(values)[1] + flat_axes = jax.api_util.flatten_axes(name, values_tree_def, axes) + # Replace None's with PROXY + flat_axes = [PROXY if x is None else x for x in flat_axes] + return jax.tree_unflatten(values_tree_def, flat_axes) + + +def sharded_map( + fun: Callable[..., PYTREE_JAX_ARRAY], + shard_size: Union[int, None] = 1, + in_axes: Union[int, PYTREE] = 0, + out_axes: Union[int, PYTREE] = 0) -> Callable[..., PYTREE_JAX_ARRAY]: + """Sharded vmap. + + Maps `fun` over axes, in a way similar to vmap, but does so in shards of + `shard_size`. This allows a smooth trade-off between memory usage + (as in a plain map) vs higher throughput (as in a vmap). + + Args: + fun: Function to apply smap transform to. + shard_size: Integer denoting shard size. + in_axes: Either integer or pytree describing which axis to map over for each + input to `fun`, None denotes broadcasting. + out_axes: integer or pytree denoting to what axis in the output the mapped + over axis maps. + + Returns: + function with smap applied. + """ + vmapped_fun = hk.vmap(fun, in_axes, out_axes) + return sharded_apply(vmapped_fun, shard_size, in_axes, out_axes) + + +def sharded_apply( + fun: Callable[..., PYTREE_JAX_ARRAY], # pylint: disable=g-bare-generic + shard_size: Union[int, None] = 1, + in_axes: Union[int, PYTREE] = 0, + out_axes: Union[int, PYTREE] = 0, + new_out_axes: bool = False) -> Callable[..., PYTREE_JAX_ARRAY]: + """Sharded apply. + + Applies `fun` over shards to axes, in a way similar to vmap, + but does so in shards of `shard_size`. Shards are stacked after. + This allows a smooth trade-off between + memory usage (as in a plain map) vs higher throughput (as in a vmap). + + Args: + fun: Function to apply smap transform to. + shard_size: Integer denoting shard size. + in_axes: Either integer or pytree describing which axis to map over for each + input to `fun`, None denotes broadcasting. + out_axes: integer or pytree denoting to what axis in the output the mapped + over axis maps. + new_out_axes: whether to stack outputs on new axes. This assumes that the + output sizes for each shard (including the possible remainder shard) are + the same. + + Returns: + function with smap applied. + """ + docstr = ('Mapped version of {fun}. Takes similar arguments to {fun} ' + 'but with additional array axes over which {fun} is mapped.') + if new_out_axes: + raise NotImplementedError('New output axes not yet implemented.') + + # shard size None denotes no sharding + if shard_size is None: + return fun + + @jax.util.wraps(fun, docstr=docstr) + def mapped_fn(*args): + # Expand in axes and Determine Loop range + in_axes_ = _expand_axes(in_axes, args) + + in_sizes = jax.tree_multimap(_maybe_get_size, args, in_axes_) + flat_sizes = jax.tree_flatten(in_sizes)[0] + in_size = max(flat_sizes) + assert all(i in {in_size, -1} for i in flat_sizes) + + num_extra_shards = (in_size - 1) // shard_size + + # Fix Up if necessary + last_shard_size = in_size % shard_size + last_shard_size = shard_size if last_shard_size == 0 else last_shard_size + + def apply_fun_to_slice(slice_start, slice_size): + input_slice = jax.tree_multimap( + lambda array, axis: _maybe_slice(array, slice_start, slice_size, axis + ), args, in_axes_) + return fun(*input_slice) + + remainder_shape_dtype = hk.eval_shape( + partial(apply_fun_to_slice, 0, last_shard_size)) + out_dtypes = jax.tree_map(lambda x: x.dtype, remainder_shape_dtype) + out_shapes = jax.tree_map(lambda x: x.shape, remainder_shape_dtype) + out_axes_ = _expand_axes(out_axes, remainder_shape_dtype) + + if num_extra_shards > 0: + regular_shard_shape_dtype = hk.eval_shape( + partial(apply_fun_to_slice, 0, shard_size)) + shard_shapes = jax.tree_map(lambda x: x.shape, regular_shard_shape_dtype) + + def make_output_shape(axis, shard_shape, remainder_shape): + return shard_shape[:axis] + ( + shard_shape[axis] * num_extra_shards + + remainder_shape[axis],) + shard_shape[axis + 1:] + + out_shapes = jax.tree_multimap(make_output_shape, out_axes_, shard_shapes, + out_shapes) + + # Calls dynamic Update slice with different argument order + # This is here since tree_multimap only works with positional arguments + def dynamic_update_slice_in_dim(full_array, update, axis, i): + return jax.lax.dynamic_update_slice_in_dim(full_array, update, i, axis) + + def compute_shard(outputs, slice_start, slice_size): + slice_out = apply_fun_to_slice(slice_start, slice_size) + update_slice = partial( + dynamic_update_slice_in_dim, i=slice_start) + return jax.tree_multimap(update_slice, outputs, slice_out, out_axes_) + + def scan_iteration(outputs, i): + new_outputs = compute_shard(outputs, i, shard_size) + return new_outputs, () + + slice_starts = jnp.arange(0, in_size - shard_size + 1, shard_size) + + def allocate_buffer(dtype, shape): + return jnp.zeros(shape, dtype=dtype) + + outputs = jax.tree_multimap(allocate_buffer, out_dtypes, out_shapes) + + if slice_starts.shape[0] > 0: + outputs, _ = hk.scan(scan_iteration, outputs, slice_starts) + + if last_shard_size != shard_size: + remainder_start = in_size - last_shard_size + outputs = compute_shard(outputs, remainder_start, last_shard_size) + + return outputs + + return mapped_fn + + +def inference_subbatch( + module: Callable[..., PYTREE_JAX_ARRAY], + subbatch_size: int, + batched_args: Sequence[PYTREE_JAX_ARRAY], + nonbatched_args: Sequence[PYTREE_JAX_ARRAY], + low_memory: bool = True, + input_subbatch_dim: int = 0, + output_subbatch_dim: Optional[int] = None) -> PYTREE_JAX_ARRAY: + """Run through subbatches (like batch apply but with split and concat).""" + assert len(batched_args) > 0 # pylint: disable=g-explicit-length-test + + if not low_memory: + args = list(batched_args) + list(nonbatched_args) + return module(*args) + + if output_subbatch_dim is None: + output_subbatch_dim = input_subbatch_dim + + def run_module(*batched_args): + args = list(batched_args) + list(nonbatched_args) + return module(*args) + sharded_module = sharded_apply(run_module, + shard_size=subbatch_size, + in_axes=input_subbatch_dim, + out_axes=output_subbatch_dim) + return sharded_module(*batched_args) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/model.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/model.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,177 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Code for constructing the model.""" +from typing import Any, Mapping, Optional, Union + +from absl import logging +from alphafold.common import confidence +from alphafold.model import features +from alphafold.model import modules +from alphafold.model import modules_multimer +import haiku as hk +import jax +import ml_collections +import numpy as np +import tensorflow.compat.v1 as tf +import tree + + +def get_confidence_metrics( + prediction_result: Mapping[str, Any], + multimer_mode: bool) -> Mapping[str, Any]: + """Post processes prediction_result to get confidence metrics.""" + confidence_metrics = {} + confidence_metrics['plddt'] = confidence.compute_plddt( + prediction_result['predicted_lddt']['logits']) + if 'predicted_aligned_error' in prediction_result: + confidence_metrics.update(confidence.compute_predicted_aligned_error( + logits=prediction_result['predicted_aligned_error']['logits'], + breaks=prediction_result['predicted_aligned_error']['breaks'])) + confidence_metrics['ptm'] = confidence.predicted_tm_score( + logits=prediction_result['predicted_aligned_error']['logits'], + breaks=prediction_result['predicted_aligned_error']['breaks'], + asym_id=None) + if multimer_mode: + # Compute the ipTM only for the multimer model. + confidence_metrics['iptm'] = confidence.predicted_tm_score( + logits=prediction_result['predicted_aligned_error']['logits'], + breaks=prediction_result['predicted_aligned_error']['breaks'], + asym_id=prediction_result['predicted_aligned_error']['asym_id'], + interface=True) + confidence_metrics['ranking_confidence'] = ( + 0.8 * confidence_metrics['iptm'] + 0.2 * confidence_metrics['ptm']) + + if not multimer_mode: + # Monomer models use mean pLDDT for model ranking. + confidence_metrics['ranking_confidence'] = np.mean( + confidence_metrics['plddt']) + + return confidence_metrics + + +class RunModel: + """Container for JAX model.""" + + def __init__(self, + config: ml_collections.ConfigDict, + params: Optional[Mapping[str, Mapping[str, np.ndarray]]] = None): + self.config = config + self.params = params + self.multimer_mode = config.model.global_config.multimer_mode + + if self.multimer_mode: + def _forward_fn(batch): + model = modules_multimer.AlphaFold(self.config.model) + return model( + batch, + is_training=False) + else: + def _forward_fn(batch): + model = modules.AlphaFold(self.config.model) + return model( + batch, + is_training=False, + compute_loss=False, + ensemble_representations=True) + + self.apply = jax.jit(hk.transform(_forward_fn).apply) + self.init = jax.jit(hk.transform(_forward_fn).init) + + def init_params(self, feat: features.FeatureDict, random_seed: int = 0): + """Initializes the model parameters. + + If none were provided when this class was instantiated then the parameters + are randomly initialized. + + Args: + feat: A dictionary of NumPy feature arrays as output by + RunModel.process_features. + random_seed: A random seed to use to initialize the parameters if none + were set when this class was initialized. + """ + if not self.params: + # Init params randomly. + rng = jax.random.PRNGKey(random_seed) + self.params = hk.data_structures.to_mutable_dict( + self.init(rng, feat)) + logging.warning('Initialized parameters randomly') + + def process_features( + self, + raw_features: Union[tf.train.Example, features.FeatureDict], + random_seed: int) -> features.FeatureDict: + """Processes features to prepare for feeding them into the model. + + Args: + raw_features: The output of the data pipeline either as a dict of NumPy + arrays or as a tf.train.Example. + random_seed: The random seed to use when processing the features. + + Returns: + A dict of NumPy feature arrays suitable for feeding into the model. + """ + + if self.multimer_mode: + return raw_features + + # Single-chain mode. + if isinstance(raw_features, dict): + return features.np_example_to_features( + np_example=raw_features, + config=self.config, + random_seed=random_seed) + else: + return features.tf_example_to_features( + tf_example=raw_features, + config=self.config, + random_seed=random_seed) + + def eval_shape(self, feat: features.FeatureDict) -> jax.ShapeDtypeStruct: + self.init_params(feat) + logging.info('Running eval_shape with shape(feat) = %s', + tree.map_structure(lambda x: x.shape, feat)) + shape = jax.eval_shape(self.apply, self.params, jax.random.PRNGKey(0), feat) + logging.info('Output shape was %s', shape) + return shape + + def predict(self, + feat: features.FeatureDict, + random_seed: int, + ) -> Mapping[str, Any]: + """Makes a prediction by inferencing the model on the provided features. + + Args: + feat: A dictionary of NumPy feature arrays as output by + RunModel.process_features. + random_seed: The random seed to use when running the model. In the + multimer model this controls the MSA sampling. + + Returns: + A dictionary of model outputs. + """ + self.init_params(feat) + logging.info('Running predict with shape(feat) = %s', + tree.map_structure(lambda x: x.shape, feat)) + result = self.apply(self.params, jax.random.PRNGKey(random_seed), feat) + + # This block is to ensure benchmark timings are accurate. Some blocking is + # already happening when computing get_confidence_metrics, and this ensures + # all outputs are blocked on. + jax.tree_map(lambda x: x.block_until_ready(), result) + result.update( + get_confidence_metrics(result, multimer_mode=self.multimer_mode)) + logging.info('Output shape was %s', + tree.map_structure(lambda x: x.shape, result)) + return result diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/modules.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/modules.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,2105 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Modules and code used in the core part of AlphaFold. + +The structure generation code is in 'folding.py'. +""" +import functools +from alphafold.common import residue_constants +from alphafold.model import all_atom +from alphafold.model import common_modules +from alphafold.model import folding +from alphafold.model import layer_stack +from alphafold.model import lddt +from alphafold.model import mapping +from alphafold.model import prng +from alphafold.model import quat_affine +from alphafold.model import utils +import haiku as hk +import jax +import jax.numpy as jnp + + +def softmax_cross_entropy(logits, labels): + """Computes softmax cross entropy given logits and one-hot class labels.""" + loss = -jnp.sum(labels * jax.nn.log_softmax(logits), axis=-1) + return jnp.asarray(loss) + + +def sigmoid_cross_entropy(logits, labels): + """Computes sigmoid cross entropy given logits and multiple class labels.""" + log_p = jax.nn.log_sigmoid(logits) + # log(1 - sigmoid(x)) = log_sigmoid(-x), the latter is more numerically stable + log_not_p = jax.nn.log_sigmoid(-logits) + loss = -labels * log_p - (1. - labels) * log_not_p + return jnp.asarray(loss) + + +def apply_dropout(*, tensor, safe_key, rate, is_training, broadcast_dim=None): + """Applies dropout to a tensor.""" + if is_training and rate != 0.0: + shape = list(tensor.shape) + if broadcast_dim is not None: + shape[broadcast_dim] = 1 + keep_rate = 1.0 - rate + keep = jax.random.bernoulli(safe_key.get(), keep_rate, shape=shape) + return keep * tensor / keep_rate + else: + return tensor + + +def dropout_wrapper(module, + input_act, + mask, + safe_key, + global_config, + output_act=None, + is_training=True, + **kwargs): + """Applies module + dropout + residual update.""" + if output_act is None: + output_act = input_act + + gc = global_config + residual = module(input_act, mask, is_training=is_training, **kwargs) + dropout_rate = 0.0 if gc.deterministic else module.config.dropout_rate + + if module.config.shared_dropout: + if module.config.orientation == 'per_row': + broadcast_dim = 0 + else: + broadcast_dim = 1 + else: + broadcast_dim = None + + residual = apply_dropout(tensor=residual, + safe_key=safe_key, + rate=dropout_rate, + is_training=is_training, + broadcast_dim=broadcast_dim) + + new_act = output_act + residual + + return new_act + + +def create_extra_msa_feature(batch): + """Expand extra_msa into 1hot and concat with other extra msa features. + + We do this as late as possible as the one_hot extra msa can be very large. + + Arguments: + batch: a dictionary with the following keys: + * 'extra_msa': [N_extra_seq, N_res] MSA that wasn't selected as a cluster + centre. Note, that this is not one-hot encoded. + * 'extra_has_deletion': [N_extra_seq, N_res] Whether there is a deletion to + the left of each position in the extra MSA. + * 'extra_deletion_value': [N_extra_seq, N_res] The number of deletions to + the left of each position in the extra MSA. + + Returns: + Concatenated tensor of extra MSA features. + """ + # 23 = 20 amino acids + 'X' for unknown + gap + bert mask + msa_1hot = jax.nn.one_hot(batch['extra_msa'], 23) + msa_feat = [msa_1hot, + jnp.expand_dims(batch['extra_has_deletion'], axis=-1), + jnp.expand_dims(batch['extra_deletion_value'], axis=-1)] + return jnp.concatenate(msa_feat, axis=-1) + + +class AlphaFoldIteration(hk.Module): + """A single recycling iteration of AlphaFold architecture. + + Computes ensembled (averaged) representations from the provided features. + These representations are then passed to the various heads + that have been requested by the configuration file. Each head also returns a + loss which is combined as a weighted sum to produce the total loss. + + Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 3-22 + """ + + def __init__(self, config, global_config, name='alphafold_iteration'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, + ensembled_batch, + non_ensembled_batch, + is_training, + compute_loss=False, + ensemble_representations=False, + return_representations=False): + + num_ensemble = jnp.asarray(ensembled_batch['seq_length'].shape[0]) + + if not ensemble_representations: + assert ensembled_batch['seq_length'].shape[0] == 1 + + def slice_batch(i): + b = {k: v[i] for k, v in ensembled_batch.items()} + b.update(non_ensembled_batch) + return b + + # Compute representations for each batch element and average. + evoformer_module = EmbeddingsAndEvoformer( + self.config.embeddings_and_evoformer, self.global_config) + batch0 = slice_batch(0) + representations = evoformer_module(batch0, is_training) + + # MSA representations are not ensembled so + # we don't pass tensor into the loop. + msa_representation = representations['msa'] + del representations['msa'] + + # Average the representations (except MSA) over the batch dimension. + if ensemble_representations: + def body(x): + """Add one element to the representations ensemble.""" + i, current_representations = x + feats = slice_batch(i) + representations_update = evoformer_module( + feats, is_training) + + new_representations = {} + for k in current_representations: + new_representations[k] = ( + current_representations[k] + representations_update[k]) + return i+1, new_representations + + if hk.running_init(): + # When initializing the Haiku module, run one iteration of the + # while_loop to initialize the Haiku modules used in `body`. + _, representations = body((1, representations)) + else: + _, representations = hk.while_loop( + lambda x: x[0] < num_ensemble, + body, + (1, representations)) + + for k in representations: + if k != 'msa': + representations[k] /= num_ensemble.astype(representations[k].dtype) + + representations['msa'] = msa_representation + batch = batch0 # We are not ensembled from here on. + + heads = {} + for head_name, head_config in sorted(self.config.heads.items()): + if not head_config.weight: + continue # Do not instantiate zero-weight heads. + + head_factory = { + 'masked_msa': MaskedMsaHead, + 'distogram': DistogramHead, + 'structure_module': functools.partial( + folding.StructureModule, compute_loss=compute_loss), + 'predicted_lddt': PredictedLDDTHead, + 'predicted_aligned_error': PredictedAlignedErrorHead, + 'experimentally_resolved': ExperimentallyResolvedHead, + }[head_name] + heads[head_name] = (head_config, + head_factory(head_config, self.global_config)) + + total_loss = 0. + ret = {} + ret['representations'] = representations + + def loss(module, head_config, ret, name, filter_ret=True): + if filter_ret: + value = ret[name] + else: + value = ret + loss_output = module.loss(value, batch) + ret[name].update(loss_output) + loss = head_config.weight * ret[name]['loss'] + return loss + + for name, (head_config, module) in heads.items(): + # Skip PredictedLDDTHead and PredictedAlignedErrorHead until + # StructureModule is executed. + if name in ('predicted_lddt', 'predicted_aligned_error'): + continue + else: + ret[name] = module(representations, batch, is_training) + if 'representations' in ret[name]: + # Extra representations from the head. Used by the structure module + # to provide activations for the PredictedLDDTHead. + representations.update(ret[name].pop('representations')) + if compute_loss: + total_loss += loss(module, head_config, ret, name) + + if self.config.heads.get('predicted_lddt.weight', 0.0): + # Add PredictedLDDTHead after StructureModule executes. + name = 'predicted_lddt' + # Feed all previous results to give access to structure_module result. + head_config, module = heads[name] + ret[name] = module(representations, batch, is_training) + if compute_loss: + total_loss += loss(module, head_config, ret, name, filter_ret=False) + + if ('predicted_aligned_error' in self.config.heads + and self.config.heads.get('predicted_aligned_error.weight', 0.0)): + # Add PredictedAlignedErrorHead after StructureModule executes. + name = 'predicted_aligned_error' + # Feed all previous results to give access to structure_module result. + head_config, module = heads[name] + ret[name] = module(representations, batch, is_training) + if compute_loss: + total_loss += loss(module, head_config, ret, name, filter_ret=False) + + if compute_loss: + return ret, total_loss + else: + return ret + + +class AlphaFold(hk.Module): + """AlphaFold model with recycling. + + Jumper et al. (2021) Suppl. Alg. 2 "Inference" + """ + + def __init__(self, config, name='alphafold'): + super().__init__(name=name) + self.config = config + self.global_config = config.global_config + + def __call__( + self, + batch, + is_training, + compute_loss=False, + ensemble_representations=False, + return_representations=False): + """Run the AlphaFold model. + + Arguments: + batch: Dictionary with inputs to the AlphaFold model. + is_training: Whether the system is in training or inference mode. + compute_loss: Whether to compute losses (requires extra features + to be present in the batch and knowing the true structure). + ensemble_representations: Whether to use ensembling of representations. + return_representations: Whether to also return the intermediate + representations. + + Returns: + When compute_loss is True: + a tuple of loss and output of AlphaFoldIteration. + When compute_loss is False: + just output of AlphaFoldIteration. + + The output of AlphaFoldIteration is a nested dictionary containing + predictions from the various heads. + """ + + impl = AlphaFoldIteration(self.config, self.global_config) + batch_size, num_residues = batch['aatype'].shape + + def get_prev(ret): + new_prev = { + 'prev_pos': + ret['structure_module']['final_atom_positions'], + 'prev_msa_first_row': ret['representations']['msa_first_row'], + 'prev_pair': ret['representations']['pair'], + } + return jax.tree_map(jax.lax.stop_gradient, new_prev) + + def do_call(prev, + recycle_idx, + compute_loss=compute_loss): + if self.config.resample_msa_in_recycling: + num_ensemble = batch_size // (self.config.num_recycle + 1) + def slice_recycle_idx(x): + start = recycle_idx * num_ensemble + size = num_ensemble + return jax.lax.dynamic_slice_in_dim(x, start, size, axis=0) + ensembled_batch = jax.tree_map(slice_recycle_idx, batch) + else: + num_ensemble = batch_size + ensembled_batch = batch + + non_ensembled_batch = jax.tree_map(lambda x: x, prev) + + return impl( + ensembled_batch=ensembled_batch, + non_ensembled_batch=non_ensembled_batch, + is_training=is_training, + compute_loss=compute_loss, + ensemble_representations=ensemble_representations) + + if self.config.num_recycle: + emb_config = self.config.embeddings_and_evoformer + prev = { + 'prev_pos': jnp.zeros( + [num_residues, residue_constants.atom_type_num, 3]), + 'prev_msa_first_row': jnp.zeros( + [num_residues, emb_config.msa_channel]), + 'prev_pair': jnp.zeros( + [num_residues, num_residues, emb_config.pair_channel]), + } + + if 'num_iter_recycling' in batch: + # Training time: num_iter_recycling is in batch. + # The value for each ensemble batch is the same, so arbitrarily taking + # 0-th. + num_iter = batch['num_iter_recycling'][0] + + # Add insurance that we will not run more + # recyclings than the model is configured to run. + num_iter = jnp.minimum(num_iter, self.config.num_recycle) + else: + # Eval mode or tests: use the maximum number of iterations. + num_iter = self.config.num_recycle + + body = lambda x: (x[0] + 1, # pylint: disable=g-long-lambda + get_prev(do_call(x[1], recycle_idx=x[0], + compute_loss=False))) + if hk.running_init(): + # When initializing the Haiku module, run one iteration of the + # while_loop to initialize the Haiku modules used in `body`. + _, prev = body((0, prev)) + else: + _, prev = hk.while_loop( + lambda x: x[0] < num_iter, + body, + (0, prev)) + else: + prev = {} + num_iter = 0 + + ret = do_call(prev=prev, recycle_idx=num_iter) + if compute_loss: + ret = ret[0], [ret[1]] + + if not return_representations: + del (ret[0] if compute_loss else ret)['representations'] # pytype: disable=unsupported-operands + return ret + + +class TemplatePairStack(hk.Module): + """Pair stack for the templates. + + Jumper et al. (2021) Suppl. Alg. 16 "TemplatePairStack" + """ + + def __init__(self, config, global_config, name='template_pair_stack'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, pair_act, pair_mask, is_training, safe_key=None): + """Builds TemplatePairStack module. + + Arguments: + pair_act: Pair activations for single template, shape [N_res, N_res, c_t]. + pair_mask: Pair mask, shape [N_res, N_res]. + is_training: Whether the module is in training mode. + safe_key: Safe key object encapsulating the random number generation key. + + Returns: + Updated pair_act, shape [N_res, N_res, c_t]. + """ + + if safe_key is None: + safe_key = prng.SafeKey(hk.next_rng_key()) + + gc = self.global_config + c = self.config + + if not c.num_block: + return pair_act + + def block(x): + """One block of the template pair stack.""" + pair_act, safe_key = x + + dropout_wrapper_fn = functools.partial( + dropout_wrapper, is_training=is_training, global_config=gc) + + safe_key, *sub_keys = safe_key.split(6) + sub_keys = iter(sub_keys) + + pair_act = dropout_wrapper_fn( + TriangleAttention(c.triangle_attention_starting_node, gc, + name='triangle_attention_starting_node'), + pair_act, + pair_mask, + next(sub_keys)) + pair_act = dropout_wrapper_fn( + TriangleAttention(c.triangle_attention_ending_node, gc, + name='triangle_attention_ending_node'), + pair_act, + pair_mask, + next(sub_keys)) + pair_act = dropout_wrapper_fn( + TriangleMultiplication(c.triangle_multiplication_outgoing, gc, + name='triangle_multiplication_outgoing'), + pair_act, + pair_mask, + next(sub_keys)) + pair_act = dropout_wrapper_fn( + TriangleMultiplication(c.triangle_multiplication_incoming, gc, + name='triangle_multiplication_incoming'), + pair_act, + pair_mask, + next(sub_keys)) + pair_act = dropout_wrapper_fn( + Transition(c.pair_transition, gc, name='pair_transition'), + pair_act, + pair_mask, + next(sub_keys)) + + return pair_act, safe_key + + if gc.use_remat: + block = hk.remat(block) + + res_stack = layer_stack.layer_stack(c.num_block)(block) + pair_act, safe_key = res_stack((pair_act, safe_key)) + return pair_act + + +class Transition(hk.Module): + """Transition layer. + + Jumper et al. (2021) Suppl. Alg. 9 "MSATransition" + Jumper et al. (2021) Suppl. Alg. 15 "PairTransition" + """ + + def __init__(self, config, global_config, name='transition_block'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, act, mask, is_training=True): + """Builds Transition module. + + Arguments: + act: A tensor of queries of size [batch_size, N_res, N_channel]. + mask: A tensor denoting the mask of size [batch_size, N_res]. + is_training: Whether the module is in training mode. + + Returns: + A float32 tensor of size [batch_size, N_res, N_channel]. + """ + _, _, nc = act.shape + + num_intermediate = int(nc * self.config.num_intermediate_factor) + mask = jnp.expand_dims(mask, axis=-1) + + act = hk.LayerNorm( + axis=[-1], + create_scale=True, + create_offset=True, + name='input_layer_norm')( + act) + + transition_module = hk.Sequential([ + common_modules.Linear( + num_intermediate, + initializer='relu', + name='transition1'), jax.nn.relu, + common_modules.Linear( + nc, + initializer=utils.final_init(self.global_config), + name='transition2') + ]) + + act = mapping.inference_subbatch( + transition_module, + self.global_config.subbatch_size, + batched_args=[act], + nonbatched_args=[], + low_memory=not is_training) + + return act + + +def glorot_uniform(): + return hk.initializers.VarianceScaling(scale=1.0, + mode='fan_avg', + distribution='uniform') + + +class Attention(hk.Module): + """Multihead attention.""" + + def __init__(self, config, global_config, output_dim, name='attention'): + super().__init__(name=name) + + self.config = config + self.global_config = global_config + self.output_dim = output_dim + + def __call__(self, q_data, m_data, bias, nonbatched_bias=None): + """Builds Attention module. + + Arguments: + q_data: A tensor of queries, shape [batch_size, N_queries, q_channels]. + m_data: A tensor of memories from which the keys and values are + projected, shape [batch_size, N_keys, m_channels]. + bias: A bias for the attention, shape [batch_size, N_queries, N_keys]. + nonbatched_bias: Shared bias, shape [N_queries, N_keys]. + + Returns: + A float32 tensor of shape [batch_size, N_queries, output_dim]. + """ + # Sensible default for when the config keys are missing + key_dim = self.config.get('key_dim', int(q_data.shape[-1])) + value_dim = self.config.get('value_dim', int(m_data.shape[-1])) + num_head = self.config.num_head + assert key_dim % num_head == 0 + assert value_dim % num_head == 0 + key_dim = key_dim // num_head + value_dim = value_dim // num_head + + q_weights = hk.get_parameter( + 'query_w', shape=(q_data.shape[-1], num_head, key_dim), + init=glorot_uniform()) + k_weights = hk.get_parameter( + 'key_w', shape=(m_data.shape[-1], num_head, key_dim), + init=glorot_uniform()) + v_weights = hk.get_parameter( + 'value_w', shape=(m_data.shape[-1], num_head, value_dim), + init=glorot_uniform()) + + q = jnp.einsum('bqa,ahc->bqhc', q_data, q_weights) * key_dim**(-0.5) + k = jnp.einsum('bka,ahc->bkhc', m_data, k_weights) + v = jnp.einsum('bka,ahc->bkhc', m_data, v_weights) + logits = jnp.einsum('bqhc,bkhc->bhqk', q, k) + bias + if nonbatched_bias is not None: + logits += jnp.expand_dims(nonbatched_bias, axis=0) + weights = jax.nn.softmax(logits) + weighted_avg = jnp.einsum('bhqk,bkhc->bqhc', weights, v) + + if self.global_config.zero_init: + init = hk.initializers.Constant(0.0) + else: + init = glorot_uniform() + + if self.config.gating: + gating_weights = hk.get_parameter( + 'gating_w', + shape=(q_data.shape[-1], num_head, value_dim), + init=hk.initializers.Constant(0.0)) + gating_bias = hk.get_parameter( + 'gating_b', + shape=(num_head, value_dim), + init=hk.initializers.Constant(1.0)) + + gate_values = jnp.einsum('bqc, chv->bqhv', q_data, + gating_weights) + gating_bias + + gate_values = jax.nn.sigmoid(gate_values) + + weighted_avg *= gate_values + + o_weights = hk.get_parameter( + 'output_w', shape=(num_head, value_dim, self.output_dim), + init=init) + o_bias = hk.get_parameter('output_b', shape=(self.output_dim,), + init=hk.initializers.Constant(0.0)) + + output = jnp.einsum('bqhc,hco->bqo', weighted_avg, o_weights) + o_bias + + return output + + +class GlobalAttention(hk.Module): + """Global attention. + + Jumper et al. (2021) Suppl. Alg. 19 "MSAColumnGlobalAttention" lines 2-7 + """ + + def __init__(self, config, global_config, output_dim, name='attention'): + super().__init__(name=name) + + self.config = config + self.global_config = global_config + self.output_dim = output_dim + + def __call__(self, q_data, m_data, q_mask, bias): + """Builds GlobalAttention module. + + Arguments: + q_data: A tensor of queries with size [batch_size, N_queries, + q_channels] + m_data: A tensor of memories from which the keys and values + projected. Size [batch_size, N_keys, m_channels] + q_mask: A binary mask for q_data with zeros in the padded sequence + elements and ones otherwise. Size [batch_size, N_queries, q_channels] + (or broadcastable to this shape). + bias: A bias for the attention. + + Returns: + A float32 tensor of size [batch_size, N_queries, output_dim]. + """ + # Sensible default for when the config keys are missing + key_dim = self.config.get('key_dim', int(q_data.shape[-1])) + value_dim = self.config.get('value_dim', int(m_data.shape[-1])) + num_head = self.config.num_head + assert key_dim % num_head == 0 + assert value_dim % num_head == 0 + key_dim = key_dim // num_head + value_dim = value_dim // num_head + + q_weights = hk.get_parameter( + 'query_w', shape=(q_data.shape[-1], num_head, key_dim), + init=glorot_uniform()) + k_weights = hk.get_parameter( + 'key_w', shape=(m_data.shape[-1], key_dim), + init=glorot_uniform()) + v_weights = hk.get_parameter( + 'value_w', shape=(m_data.shape[-1], value_dim), + init=glorot_uniform()) + + v = jnp.einsum('bka,ac->bkc', m_data, v_weights) + + q_avg = utils.mask_mean(q_mask, q_data, axis=1) + + q = jnp.einsum('ba,ahc->bhc', q_avg, q_weights) * key_dim**(-0.5) + k = jnp.einsum('bka,ac->bkc', m_data, k_weights) + bias = (1e9 * (q_mask[:, None, :, 0] - 1.)) + logits = jnp.einsum('bhc,bkc->bhk', q, k) + bias + weights = jax.nn.softmax(logits) + weighted_avg = jnp.einsum('bhk,bkc->bhc', weights, v) + + if self.global_config.zero_init: + init = hk.initializers.Constant(0.0) + else: + init = glorot_uniform() + + o_weights = hk.get_parameter( + 'output_w', shape=(num_head, value_dim, self.output_dim), + init=init) + o_bias = hk.get_parameter('output_b', shape=(self.output_dim,), + init=hk.initializers.Constant(0.0)) + + if self.config.gating: + gating_weights = hk.get_parameter( + 'gating_w', + shape=(q_data.shape[-1], num_head, value_dim), + init=hk.initializers.Constant(0.0)) + gating_bias = hk.get_parameter( + 'gating_b', + shape=(num_head, value_dim), + init=hk.initializers.Constant(1.0)) + + gate_values = jnp.einsum('bqc, chv->bqhv', q_data, gating_weights) + gate_values = jax.nn.sigmoid(gate_values + gating_bias) + weighted_avg = weighted_avg[:, None] * gate_values + output = jnp.einsum('bqhc,hco->bqo', weighted_avg, o_weights) + o_bias + else: + output = jnp.einsum('bhc,hco->bo', weighted_avg, o_weights) + o_bias + output = output[:, None] + return output + + +class MSARowAttentionWithPairBias(hk.Module): + """MSA per-row attention biased by the pair representation. + + Jumper et al. (2021) Suppl. Alg. 7 "MSARowAttentionWithPairBias" + """ + + def __init__(self, config, global_config, + name='msa_row_attention_with_pair_bias'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, + msa_act, + msa_mask, + pair_act, + is_training=False): + """Builds MSARowAttentionWithPairBias module. + + Arguments: + msa_act: [N_seq, N_res, c_m] MSA representation. + msa_mask: [N_seq, N_res] mask of non-padded regions. + pair_act: [N_res, N_res, c_z] pair representation. + is_training: Whether the module is in training mode. + + Returns: + Update to msa_act, shape [N_seq, N_res, c_m]. + """ + c = self.config + + assert len(msa_act.shape) == 3 + assert len(msa_mask.shape) == 2 + assert c.orientation == 'per_row' + + bias = (1e9 * (msa_mask - 1.))[:, None, None, :] + assert len(bias.shape) == 4 + + msa_act = hk.LayerNorm( + axis=[-1], create_scale=True, create_offset=True, name='query_norm')( + msa_act) + + pair_act = hk.LayerNorm( + axis=[-1], + create_scale=True, + create_offset=True, + name='feat_2d_norm')( + pair_act) + + init_factor = 1. / jnp.sqrt(int(pair_act.shape[-1])) + weights = hk.get_parameter( + 'feat_2d_weights', + shape=(pair_act.shape[-1], c.num_head), + init=hk.initializers.RandomNormal(stddev=init_factor)) + nonbatched_bias = jnp.einsum('qkc,ch->hqk', pair_act, weights) + + attn_mod = Attention( + c, self.global_config, msa_act.shape[-1]) + msa_act = mapping.inference_subbatch( + attn_mod, + self.global_config.subbatch_size, + batched_args=[msa_act, msa_act, bias], + nonbatched_args=[nonbatched_bias], + low_memory=not is_training) + + return msa_act + + +class MSAColumnAttention(hk.Module): + """MSA per-column attention. + + Jumper et al. (2021) Suppl. Alg. 8 "MSAColumnAttention" + """ + + def __init__(self, config, global_config, name='msa_column_attention'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, + msa_act, + msa_mask, + is_training=False): + """Builds MSAColumnAttention module. + + Arguments: + msa_act: [N_seq, N_res, c_m] MSA representation. + msa_mask: [N_seq, N_res] mask of non-padded regions. + is_training: Whether the module is in training mode. + + Returns: + Update to msa_act, shape [N_seq, N_res, c_m] + """ + c = self.config + + assert len(msa_act.shape) == 3 + assert len(msa_mask.shape) == 2 + assert c.orientation == 'per_column' + + msa_act = jnp.swapaxes(msa_act, -2, -3) + msa_mask = jnp.swapaxes(msa_mask, -1, -2) + + bias = (1e9 * (msa_mask - 1.))[:, None, None, :] + assert len(bias.shape) == 4 + + msa_act = hk.LayerNorm( + axis=[-1], create_scale=True, create_offset=True, name='query_norm')( + msa_act) + + attn_mod = Attention( + c, self.global_config, msa_act.shape[-1]) + msa_act = mapping.inference_subbatch( + attn_mod, + self.global_config.subbatch_size, + batched_args=[msa_act, msa_act, bias], + nonbatched_args=[], + low_memory=not is_training) + + msa_act = jnp.swapaxes(msa_act, -2, -3) + + return msa_act + + +class MSAColumnGlobalAttention(hk.Module): + """MSA per-column global attention. + + Jumper et al. (2021) Suppl. Alg. 19 "MSAColumnGlobalAttention" + """ + + def __init__(self, config, global_config, name='msa_column_global_attention'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, + msa_act, + msa_mask, + is_training=False): + """Builds MSAColumnGlobalAttention module. + + Arguments: + msa_act: [N_seq, N_res, c_m] MSA representation. + msa_mask: [N_seq, N_res] mask of non-padded regions. + is_training: Whether the module is in training mode. + + Returns: + Update to msa_act, shape [N_seq, N_res, c_m]. + """ + c = self.config + + assert len(msa_act.shape) == 3 + assert len(msa_mask.shape) == 2 + assert c.orientation == 'per_column' + + msa_act = jnp.swapaxes(msa_act, -2, -3) + msa_mask = jnp.swapaxes(msa_mask, -1, -2) + + bias = (1e9 * (msa_mask - 1.))[:, None, None, :] + assert len(bias.shape) == 4 + + msa_act = hk.LayerNorm( + axis=[-1], create_scale=True, create_offset=True, name='query_norm')( + msa_act) + + attn_mod = GlobalAttention( + c, self.global_config, msa_act.shape[-1], + name='attention') + # [N_seq, N_res, 1] + msa_mask = jnp.expand_dims(msa_mask, axis=-1) + msa_act = mapping.inference_subbatch( + attn_mod, + self.global_config.subbatch_size, + batched_args=[msa_act, msa_act, msa_mask, bias], + nonbatched_args=[], + low_memory=not is_training) + + msa_act = jnp.swapaxes(msa_act, -2, -3) + + return msa_act + + +class TriangleAttention(hk.Module): + """Triangle Attention. + + Jumper et al. (2021) Suppl. Alg. 13 "TriangleAttentionStartingNode" + Jumper et al. (2021) Suppl. Alg. 14 "TriangleAttentionEndingNode" + """ + + def __init__(self, config, global_config, name='triangle_attention'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, pair_act, pair_mask, is_training=False): + """Builds TriangleAttention module. + + Arguments: + pair_act: [N_res, N_res, c_z] pair activations tensor + pair_mask: [N_res, N_res] mask of non-padded regions in the tensor. + is_training: Whether the module is in training mode. + + Returns: + Update to pair_act, shape [N_res, N_res, c_z]. + """ + c = self.config + + assert len(pair_act.shape) == 3 + assert len(pair_mask.shape) == 2 + assert c.orientation in ['per_row', 'per_column'] + + if c.orientation == 'per_column': + pair_act = jnp.swapaxes(pair_act, -2, -3) + pair_mask = jnp.swapaxes(pair_mask, -1, -2) + + bias = (1e9 * (pair_mask - 1.))[:, None, None, :] + assert len(bias.shape) == 4 + + pair_act = hk.LayerNorm( + axis=[-1], create_scale=True, create_offset=True, name='query_norm')( + pair_act) + + init_factor = 1. / jnp.sqrt(int(pair_act.shape[-1])) + weights = hk.get_parameter( + 'feat_2d_weights', + shape=(pair_act.shape[-1], c.num_head), + init=hk.initializers.RandomNormal(stddev=init_factor)) + nonbatched_bias = jnp.einsum('qkc,ch->hqk', pair_act, weights) + + attn_mod = Attention( + c, self.global_config, pair_act.shape[-1]) + pair_act = mapping.inference_subbatch( + attn_mod, + self.global_config.subbatch_size, + batched_args=[pair_act, pair_act, bias], + nonbatched_args=[nonbatched_bias], + low_memory=not is_training) + + if c.orientation == 'per_column': + pair_act = jnp.swapaxes(pair_act, -2, -3) + + return pair_act + + +class MaskedMsaHead(hk.Module): + """Head to predict MSA at the masked locations. + + The MaskedMsaHead employs a BERT-style objective to reconstruct a masked + version of the full MSA, based on a linear projection of + the MSA representation. + Jumper et al. (2021) Suppl. Sec. 1.9.9 "Masked MSA prediction" + """ + + def __init__(self, config, global_config, name='masked_msa_head'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + if global_config.multimer_mode: + self.num_output = len(residue_constants.restypes_with_x_and_gap) + else: + self.num_output = config.num_output + + def __call__(self, representations, batch, is_training): + """Builds MaskedMsaHead module. + + Arguments: + representations: Dictionary of representations, must contain: + * 'msa': MSA representation, shape [N_seq, N_res, c_m]. + batch: Batch, unused. + is_training: Whether the module is in training mode. + + Returns: + Dictionary containing: + * 'logits': logits of shape [N_seq, N_res, N_aatype] with + (unnormalized) log probabilies of predicted aatype at position. + """ + del batch + logits = common_modules.Linear( + self.num_output, + initializer=utils.final_init(self.global_config), + name='logits')( + representations['msa']) + return dict(logits=logits) + + def loss(self, value, batch): + errors = softmax_cross_entropy( + labels=jax.nn.one_hot(batch['true_msa'], num_classes=self.num_output), + logits=value['logits']) + loss = (jnp.sum(errors * batch['bert_mask'], axis=(-2, -1)) / + (1e-8 + jnp.sum(batch['bert_mask'], axis=(-2, -1)))) + return {'loss': loss} + + +class PredictedLDDTHead(hk.Module): + """Head to predict the per-residue LDDT to be used as a confidence measure. + + Jumper et al. (2021) Suppl. Sec. 1.9.6 "Model confidence prediction (pLDDT)" + Jumper et al. (2021) Suppl. Alg. 29 "predictPerResidueLDDT_Ca" + """ + + def __init__(self, config, global_config, name='predicted_lddt_head'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, representations, batch, is_training): + """Builds PredictedLDDTHead module. + + Arguments: + representations: Dictionary of representations, must contain: + * 'structure_module': Single representation from the structure module, + shape [N_res, c_s]. + batch: Batch, unused. + is_training: Whether the module is in training mode. + + Returns: + Dictionary containing : + * 'logits': logits of shape [N_res, N_bins] with + (unnormalized) log probabilies of binned predicted lDDT. + """ + act = representations['structure_module'] + + act = hk.LayerNorm( + axis=[-1], + create_scale=True, + create_offset=True, + name='input_layer_norm')( + act) + + act = common_modules.Linear( + self.config.num_channels, + initializer='relu', + name='act_0')( + act) + act = jax.nn.relu(act) + + act = common_modules.Linear( + self.config.num_channels, + initializer='relu', + name='act_1')( + act) + act = jax.nn.relu(act) + + logits = common_modules.Linear( + self.config.num_bins, + initializer=utils.final_init(self.global_config), + name='logits')( + act) + # Shape (batch_size, num_res, num_bins) + return dict(logits=logits) + + def loss(self, value, batch): + # Shape (num_res, 37, 3) + pred_all_atom_pos = value['structure_module']['final_atom_positions'] + # Shape (num_res, 37, 3) + true_all_atom_pos = batch['all_atom_positions'] + # Shape (num_res, 37) + all_atom_mask = batch['all_atom_mask'] + + # Shape (num_res,) + lddt_ca = lddt.lddt( + # Shape (batch_size, num_res, 3) + predicted_points=pred_all_atom_pos[None, :, 1, :], + # Shape (batch_size, num_res, 3) + true_points=true_all_atom_pos[None, :, 1, :], + # Shape (batch_size, num_res, 1) + true_points_mask=all_atom_mask[None, :, 1:2].astype(jnp.float32), + cutoff=15., + per_residue=True) + lddt_ca = jax.lax.stop_gradient(lddt_ca) + + num_bins = self.config.num_bins + bin_index = jnp.floor(lddt_ca * num_bins).astype(jnp.int32) + + # protect against out of range for lddt_ca == 1 + bin_index = jnp.minimum(bin_index, num_bins - 1) + lddt_ca_one_hot = jax.nn.one_hot(bin_index, num_classes=num_bins) + + # Shape (num_res, num_channel) + logits = value['predicted_lddt']['logits'] + errors = softmax_cross_entropy(labels=lddt_ca_one_hot, logits=logits) + + # Shape (num_res,) + mask_ca = all_atom_mask[:, residue_constants.atom_order['CA']] + mask_ca = mask_ca.astype(jnp.float32) + loss = jnp.sum(errors * mask_ca) / (jnp.sum(mask_ca) + 1e-8) + + if self.config.filter_by_resolution: + # NMR & distillation have resolution = 0 + loss *= ((batch['resolution'] >= self.config.min_resolution) + & (batch['resolution'] <= self.config.max_resolution)).astype( + jnp.float32) + + output = {'loss': loss} + return output + + +class PredictedAlignedErrorHead(hk.Module): + """Head to predict the distance errors in the backbone alignment frames. + + Can be used to compute predicted TM-Score. + Jumper et al. (2021) Suppl. Sec. 1.9.7 "TM-score prediction" + """ + + def __init__(self, config, global_config, + name='predicted_aligned_error_head'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, representations, batch, is_training): + """Builds PredictedAlignedErrorHead module. + + Arguments: + representations: Dictionary of representations, must contain: + * 'pair': pair representation, shape [N_res, N_res, c_z]. + batch: Batch, unused. + is_training: Whether the module is in training mode. + + Returns: + Dictionary containing: + * logits: logits for aligned error, shape [N_res, N_res, N_bins]. + * bin_breaks: array containing bin breaks, shape [N_bins - 1]. + """ + + act = representations['pair'] + + # Shape (num_res, num_res, num_bins) + logits = common_modules.Linear( + self.config.num_bins, + initializer=utils.final_init(self.global_config), + name='logits')(act) + # Shape (num_bins,) + breaks = jnp.linspace( + 0., self.config.max_error_bin, self.config.num_bins - 1) + return dict(logits=logits, breaks=breaks) + + def loss(self, value, batch): + # Shape (num_res, 7) + predicted_affine = quat_affine.QuatAffine.from_tensor( + value['structure_module']['final_affines']) + # Shape (num_res, 7) + true_affine = quat_affine.QuatAffine.from_tensor( + batch['backbone_affine_tensor']) + # Shape (num_res) + mask = batch['backbone_affine_mask'] + # Shape (num_res, num_res) + square_mask = mask[:, None] * mask[None, :] + num_bins = self.config.num_bins + # (1, num_bins - 1) + breaks = value['predicted_aligned_error']['breaks'] + # (1, num_bins) + logits = value['predicted_aligned_error']['logits'] + + # Compute the squared error for each alignment. + def _local_frame_points(affine): + points = [jnp.expand_dims(x, axis=-2) for x in affine.translation] + return affine.invert_point(points, extra_dims=1) + error_dist2_xyz = [ + jnp.square(a - b) + for a, b in zip(_local_frame_points(predicted_affine), + _local_frame_points(true_affine))] + error_dist2 = sum(error_dist2_xyz) + # Shape (num_res, num_res) + # First num_res are alignment frames, second num_res are the residues. + error_dist2 = jax.lax.stop_gradient(error_dist2) + + sq_breaks = jnp.square(breaks) + true_bins = jnp.sum(( + error_dist2[..., None] > sq_breaks).astype(jnp.int32), axis=-1) + + errors = softmax_cross_entropy( + labels=jax.nn.one_hot(true_bins, num_bins, axis=-1), logits=logits) + + loss = (jnp.sum(errors * square_mask, axis=(-2, -1)) / + (1e-8 + jnp.sum(square_mask, axis=(-2, -1)))) + + if self.config.filter_by_resolution: + # NMR & distillation have resolution = 0 + loss *= ((batch['resolution'] >= self.config.min_resolution) + & (batch['resolution'] <= self.config.max_resolution)).astype( + jnp.float32) + + output = {'loss': loss} + return output + + +class ExperimentallyResolvedHead(hk.Module): + """Predicts if an atom is experimentally resolved in a high-res structure. + + Only trained on high-resolution X-ray crystals & cryo-EM. + Jumper et al. (2021) Suppl. Sec. 1.9.10 '"Experimentally resolved" prediction' + """ + + def __init__(self, config, global_config, + name='experimentally_resolved_head'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, representations, batch, is_training): + """Builds ExperimentallyResolvedHead module. + + Arguments: + representations: Dictionary of representations, must contain: + * 'single': Single representation, shape [N_res, c_s]. + batch: Batch, unused. + is_training: Whether the module is in training mode. + + Returns: + Dictionary containing: + * 'logits': logits of shape [N_res, 37], + log probability that an atom is resolved in atom37 representation, + can be converted to probability by applying sigmoid. + """ + logits = common_modules.Linear( + 37, # atom_exists.shape[-1] + initializer=utils.final_init(self.global_config), + name='logits')(representations['single']) + return dict(logits=logits) + + def loss(self, value, batch): + logits = value['logits'] + assert len(logits.shape) == 2 + + # Does the atom appear in the amino acid? + atom_exists = batch['atom37_atom_exists'] + # Is the atom resolved in the experiment? Subset of atom_exists, + # *except for OXT* + all_atom_mask = batch['all_atom_mask'].astype(jnp.float32) + + xent = sigmoid_cross_entropy(labels=all_atom_mask, logits=logits) + loss = jnp.sum(xent * atom_exists) / (1e-8 + jnp.sum(atom_exists)) + + if self.config.filter_by_resolution: + # NMR & distillation examples have resolution = 0. + loss *= ((batch['resolution'] >= self.config.min_resolution) + & (batch['resolution'] <= self.config.max_resolution)).astype( + jnp.float32) + + output = {'loss': loss} + return output + + +class TriangleMultiplication(hk.Module): + """Triangle multiplication layer ("outgoing" or "incoming"). + + Jumper et al. (2021) Suppl. Alg. 11 "TriangleMultiplicationOutgoing" + Jumper et al. (2021) Suppl. Alg. 12 "TriangleMultiplicationIncoming" + """ + + def __init__(self, config, global_config, name='triangle_multiplication'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, act, mask, is_training=True): + """Builds TriangleMultiplication module. + + Arguments: + act: Pair activations, shape [N_res, N_res, c_z] + mask: Pair mask, shape [N_res, N_res]. + is_training: Whether the module is in training mode. + + Returns: + Outputs, same shape/type as act. + """ + del is_training + c = self.config + gc = self.global_config + + mask = mask[..., None] + + act = hk.LayerNorm(axis=[-1], create_scale=True, create_offset=True, + name='layer_norm_input')(act) + input_act = act + + left_projection = common_modules.Linear( + c.num_intermediate_channel, + name='left_projection') + left_proj_act = mask * left_projection(act) + + right_projection = common_modules.Linear( + c.num_intermediate_channel, + name='right_projection') + right_proj_act = mask * right_projection(act) + + left_gate_values = jax.nn.sigmoid(common_modules.Linear( + c.num_intermediate_channel, + bias_init=1., + initializer=utils.final_init(gc), + name='left_gate')(act)) + + right_gate_values = jax.nn.sigmoid(common_modules.Linear( + c.num_intermediate_channel, + bias_init=1., + initializer=utils.final_init(gc), + name='right_gate')(act)) + + left_proj_act *= left_gate_values + right_proj_act *= right_gate_values + + # "Outgoing" edges equation: 'ikc,jkc->ijc' + # "Incoming" edges equation: 'kjc,kic->ijc' + # Note on the Suppl. Alg. 11 & 12 notation: + # For the "outgoing" edges, a = left_proj_act and b = right_proj_act + # For the "incoming" edges, it's swapped: + # b = left_proj_act and a = right_proj_act + act = jnp.einsum(c.equation, left_proj_act, right_proj_act) + + act = hk.LayerNorm( + axis=[-1], + create_scale=True, + create_offset=True, + name='center_layer_norm')( + act) + + output_channel = int(input_act.shape[-1]) + + act = common_modules.Linear( + output_channel, + initializer=utils.final_init(gc), + name='output_projection')(act) + + gate_values = jax.nn.sigmoid(common_modules.Linear( + output_channel, + bias_init=1., + initializer=utils.final_init(gc), + name='gating_linear')(input_act)) + act *= gate_values + + return act + + +class DistogramHead(hk.Module): + """Head to predict a distogram. + + Jumper et al. (2021) Suppl. Sec. 1.9.8 "Distogram prediction" + """ + + def __init__(self, config, global_config, name='distogram_head'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, representations, batch, is_training): + """Builds DistogramHead module. + + Arguments: + representations: Dictionary of representations, must contain: + * 'pair': pair representation, shape [N_res, N_res, c_z]. + batch: Batch, unused. + is_training: Whether the module is in training mode. + + Returns: + Dictionary containing: + * logits: logits for distogram, shape [N_res, N_res, N_bins]. + * bin_breaks: array containing bin breaks, shape [N_bins - 1,]. + """ + half_logits = common_modules.Linear( + self.config.num_bins, + initializer=utils.final_init(self.global_config), + name='half_logits')( + representations['pair']) + + logits = half_logits + jnp.swapaxes(half_logits, -2, -3) + breaks = jnp.linspace(self.config.first_break, self.config.last_break, + self.config.num_bins - 1) + + return dict(logits=logits, bin_edges=breaks) + + def loss(self, value, batch): + return _distogram_log_loss(value['logits'], value['bin_edges'], + batch, self.config.num_bins) + + +def _distogram_log_loss(logits, bin_edges, batch, num_bins): + """Log loss of a distogram.""" + + assert len(logits.shape) == 3 + positions = batch['pseudo_beta'] + mask = batch['pseudo_beta_mask'] + + assert positions.shape[-1] == 3 + + sq_breaks = jnp.square(bin_edges) + + dist2 = jnp.sum( + jnp.square( + jnp.expand_dims(positions, axis=-2) - + jnp.expand_dims(positions, axis=-3)), + axis=-1, + keepdims=True) + + true_bins = jnp.sum(dist2 > sq_breaks, axis=-1) + + errors = softmax_cross_entropy( + labels=jax.nn.one_hot(true_bins, num_bins), logits=logits) + + square_mask = jnp.expand_dims(mask, axis=-2) * jnp.expand_dims(mask, axis=-1) + + avg_error = ( + jnp.sum(errors * square_mask, axis=(-2, -1)) / + (1e-6 + jnp.sum(square_mask, axis=(-2, -1)))) + dist2 = dist2[..., 0] + return dict(loss=avg_error, true_dist=jnp.sqrt(1e-6 + dist2)) + + +class OuterProductMean(hk.Module): + """Computes mean outer product. + + Jumper et al. (2021) Suppl. Alg. 10 "OuterProductMean" + """ + + def __init__(self, + config, + global_config, + num_output_channel, + name='outer_product_mean'): + super().__init__(name=name) + self.global_config = global_config + self.config = config + self.num_output_channel = num_output_channel + + def __call__(self, act, mask, is_training=True): + """Builds OuterProductMean module. + + Arguments: + act: MSA representation, shape [N_seq, N_res, c_m]. + mask: MSA mask, shape [N_seq, N_res]. + is_training: Whether the module is in training mode. + + Returns: + Update to pair representation, shape [N_res, N_res, c_z]. + """ + gc = self.global_config + c = self.config + + mask = mask[..., None] + act = hk.LayerNorm([-1], True, True, name='layer_norm_input')(act) + + left_act = mask * common_modules.Linear( + c.num_outer_channel, + initializer='linear', + name='left_projection')( + act) + + right_act = mask * common_modules.Linear( + c.num_outer_channel, + initializer='linear', + name='right_projection')( + act) + + if gc.zero_init: + init_w = hk.initializers.Constant(0.0) + else: + init_w = hk.initializers.VarianceScaling(scale=2., mode='fan_in') + + output_w = hk.get_parameter( + 'output_w', + shape=(c.num_outer_channel, c.num_outer_channel, + self.num_output_channel), + init=init_w) + output_b = hk.get_parameter( + 'output_b', shape=(self.num_output_channel,), + init=hk.initializers.Constant(0.0)) + + def compute_chunk(left_act): + # This is equivalent to + # + # act = jnp.einsum('abc,ade->dceb', left_act, right_act) + # act = jnp.einsum('dceb,cef->bdf', act, output_w) + output_b + # + # but faster. + left_act = jnp.transpose(left_act, [0, 2, 1]) + act = jnp.einsum('acb,ade->dceb', left_act, right_act) + act = jnp.einsum('dceb,cef->dbf', act, output_w) + output_b + return jnp.transpose(act, [1, 0, 2]) + + act = mapping.inference_subbatch( + compute_chunk, + c.chunk_size, + batched_args=[left_act], + nonbatched_args=[], + low_memory=True, + input_subbatch_dim=1, + output_subbatch_dim=0) + + epsilon = 1e-3 + norm = jnp.einsum('abc,adc->bdc', mask, mask) + act /= epsilon + norm + + return act + + +def dgram_from_positions(positions, num_bins, min_bin, max_bin): + """Compute distogram from amino acid positions. + + Arguments: + positions: [N_res, 3] Position coordinates. + num_bins: The number of bins in the distogram. + min_bin: The left edge of the first bin. + max_bin: The left edge of the final bin. The final bin catches + everything larger than `max_bin`. + + Returns: + Distogram with the specified number of bins. + """ + + def squared_difference(x, y): + return jnp.square(x - y) + + lower_breaks = jnp.linspace(min_bin, max_bin, num_bins) + lower_breaks = jnp.square(lower_breaks) + upper_breaks = jnp.concatenate([lower_breaks[1:], + jnp.array([1e8], dtype=jnp.float32)], axis=-1) + dist2 = jnp.sum( + squared_difference( + jnp.expand_dims(positions, axis=-2), + jnp.expand_dims(positions, axis=-3)), + axis=-1, keepdims=True) + + dgram = ((dist2 > lower_breaks).astype(jnp.float32) * + (dist2 < upper_breaks).astype(jnp.float32)) + return dgram + + +def pseudo_beta_fn(aatype, all_atom_positions, all_atom_masks): + """Create pseudo beta features.""" + + is_gly = jnp.equal(aatype, residue_constants.restype_order['G']) + ca_idx = residue_constants.atom_order['CA'] + cb_idx = residue_constants.atom_order['CB'] + pseudo_beta = jnp.where( + jnp.tile(is_gly[..., None], [1] * len(is_gly.shape) + [3]), + all_atom_positions[..., ca_idx, :], + all_atom_positions[..., cb_idx, :]) + + if all_atom_masks is not None: + pseudo_beta_mask = jnp.where( + is_gly, all_atom_masks[..., ca_idx], all_atom_masks[..., cb_idx]) + pseudo_beta_mask = pseudo_beta_mask.astype(jnp.float32) + return pseudo_beta, pseudo_beta_mask + else: + return pseudo_beta + + +class EvoformerIteration(hk.Module): + """Single iteration (block) of Evoformer stack. + + Jumper et al. (2021) Suppl. Alg. 6 "EvoformerStack" lines 2-10 + """ + + def __init__(self, config, global_config, is_extra_msa, + name='evoformer_iteration'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + self.is_extra_msa = is_extra_msa + + def __call__(self, activations, masks, is_training=True, safe_key=None): + """Builds EvoformerIteration module. + + Arguments: + activations: Dictionary containing activations: + * 'msa': MSA activations, shape [N_seq, N_res, c_m]. + * 'pair': pair activations, shape [N_res, N_res, c_z]. + masks: Dictionary of masks: + * 'msa': MSA mask, shape [N_seq, N_res]. + * 'pair': pair mask, shape [N_res, N_res]. + is_training: Whether the module is in training mode. + safe_key: prng.SafeKey encapsulating rng key. + + Returns: + Outputs, same shape/type as act. + """ + c = self.config + gc = self.global_config + + msa_act, pair_act = activations['msa'], activations['pair'] + + if safe_key is None: + safe_key = prng.SafeKey(hk.next_rng_key()) + + msa_mask, pair_mask = masks['msa'], masks['pair'] + + dropout_wrapper_fn = functools.partial( + dropout_wrapper, + is_training=is_training, + global_config=gc) + + safe_key, *sub_keys = safe_key.split(10) + sub_keys = iter(sub_keys) + + outer_module = OuterProductMean( + config=c.outer_product_mean, + global_config=self.global_config, + num_output_channel=int(pair_act.shape[-1]), + name='outer_product_mean') + if c.outer_product_mean.first: + pair_act = dropout_wrapper_fn( + outer_module, + msa_act, + msa_mask, + safe_key=next(sub_keys), + output_act=pair_act) + + msa_act = dropout_wrapper_fn( + MSARowAttentionWithPairBias( + c.msa_row_attention_with_pair_bias, gc, + name='msa_row_attention_with_pair_bias'), + msa_act, + msa_mask, + safe_key=next(sub_keys), + pair_act=pair_act) + + if not self.is_extra_msa: + attn_mod = MSAColumnAttention( + c.msa_column_attention, gc, name='msa_column_attention') + else: + attn_mod = MSAColumnGlobalAttention( + c.msa_column_attention, gc, name='msa_column_global_attention') + msa_act = dropout_wrapper_fn( + attn_mod, + msa_act, + msa_mask, + safe_key=next(sub_keys)) + + msa_act = dropout_wrapper_fn( + Transition(c.msa_transition, gc, name='msa_transition'), + msa_act, + msa_mask, + safe_key=next(sub_keys)) + + if not c.outer_product_mean.first: + pair_act = dropout_wrapper_fn( + outer_module, + msa_act, + msa_mask, + safe_key=next(sub_keys), + output_act=pair_act) + + pair_act = dropout_wrapper_fn( + TriangleMultiplication(c.triangle_multiplication_outgoing, gc, + name='triangle_multiplication_outgoing'), + pair_act, + pair_mask, + safe_key=next(sub_keys)) + pair_act = dropout_wrapper_fn( + TriangleMultiplication(c.triangle_multiplication_incoming, gc, + name='triangle_multiplication_incoming'), + pair_act, + pair_mask, + safe_key=next(sub_keys)) + + pair_act = dropout_wrapper_fn( + TriangleAttention(c.triangle_attention_starting_node, gc, + name='triangle_attention_starting_node'), + pair_act, + pair_mask, + safe_key=next(sub_keys)) + pair_act = dropout_wrapper_fn( + TriangleAttention(c.triangle_attention_ending_node, gc, + name='triangle_attention_ending_node'), + pair_act, + pair_mask, + safe_key=next(sub_keys)) + + pair_act = dropout_wrapper_fn( + Transition(c.pair_transition, gc, name='pair_transition'), + pair_act, + pair_mask, + safe_key=next(sub_keys)) + + return {'msa': msa_act, 'pair': pair_act} + + +class EmbeddingsAndEvoformer(hk.Module): + """Embeds the input data and runs Evoformer. + + Produces the MSA, single and pair representations. + Jumper et al. (2021) Suppl. Alg. 2 "Inference" line 5-18 + """ + + def __init__(self, config, global_config, name='evoformer'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, batch, is_training, safe_key=None): + + c = self.config + gc = self.global_config + + if safe_key is None: + safe_key = prng.SafeKey(hk.next_rng_key()) + + # Embed clustered MSA. + # Jumper et al. (2021) Suppl. Alg. 2 "Inference" line 5 + # Jumper et al. (2021) Suppl. Alg. 3 "InputEmbedder" + preprocess_1d = common_modules.Linear( + c.msa_channel, name='preprocess_1d')( + batch['target_feat']) + + preprocess_msa = common_modules.Linear( + c.msa_channel, name='preprocess_msa')( + batch['msa_feat']) + + msa_activations = jnp.expand_dims(preprocess_1d, axis=0) + preprocess_msa + + left_single = common_modules.Linear( + c.pair_channel, name='left_single')( + batch['target_feat']) + right_single = common_modules.Linear( + c.pair_channel, name='right_single')( + batch['target_feat']) + pair_activations = left_single[:, None] + right_single[None] + mask_2d = batch['seq_mask'][:, None] * batch['seq_mask'][None, :] + + # Inject previous outputs for recycling. + # Jumper et al. (2021) Suppl. Alg. 2 "Inference" line 6 + # Jumper et al. (2021) Suppl. Alg. 32 "RecyclingEmbedder" + if c.recycle_pos and 'prev_pos' in batch: + prev_pseudo_beta = pseudo_beta_fn( + batch['aatype'], batch['prev_pos'], None) + dgram = dgram_from_positions(prev_pseudo_beta, **self.config.prev_pos) + pair_activations += common_modules.Linear( + c.pair_channel, name='prev_pos_linear')( + dgram) + + if c.recycle_features: + if 'prev_msa_first_row' in batch: + prev_msa_first_row = hk.LayerNorm([-1], + True, + True, + name='prev_msa_first_row_norm')( + batch['prev_msa_first_row']) + msa_activations = msa_activations.at[0].add(prev_msa_first_row) + + if 'prev_pair' in batch: + pair_activations += hk.LayerNorm([-1], + True, + True, + name='prev_pair_norm')( + batch['prev_pair']) + + # Relative position encoding. + # Jumper et al. (2021) Suppl. Alg. 4 "relpos" + # Jumper et al. (2021) Suppl. Alg. 5 "one_hot" + if c.max_relative_feature: + # Add one-hot-encoded clipped residue distances to the pair activations. + pos = batch['residue_index'] + offset = pos[:, None] - pos[None, :] + rel_pos = jax.nn.one_hot( + jnp.clip( + offset + c.max_relative_feature, + a_min=0, + a_max=2 * c.max_relative_feature), + 2 * c.max_relative_feature + 1) + pair_activations += common_modules.Linear( + c.pair_channel, name='pair_activiations')( + rel_pos) + + # Embed templates into the pair activations. + # Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 9-13 + if c.template.enabled: + template_batch = {k: batch[k] for k in batch if k.startswith('template_')} + template_pair_representation = TemplateEmbedding(c.template, gc)( + pair_activations, + template_batch, + mask_2d, + is_training=is_training) + + pair_activations += template_pair_representation + + # Embed extra MSA features. + # Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 14-16 + extra_msa_feat = create_extra_msa_feature(batch) + extra_msa_activations = common_modules.Linear( + c.extra_msa_channel, + name='extra_msa_activations')( + extra_msa_feat) + + # Extra MSA Stack. + # Jumper et al. (2021) Suppl. Alg. 18 "ExtraMsaStack" + extra_msa_stack_input = { + 'msa': extra_msa_activations, + 'pair': pair_activations, + } + + extra_msa_stack_iteration = EvoformerIteration( + c.evoformer, gc, is_extra_msa=True, name='extra_msa_stack') + + def extra_msa_stack_fn(x): + act, safe_key = x + safe_key, safe_subkey = safe_key.split() + extra_evoformer_output = extra_msa_stack_iteration( + activations=act, + masks={ + 'msa': batch['extra_msa_mask'], + 'pair': mask_2d + }, + is_training=is_training, + safe_key=safe_subkey) + return (extra_evoformer_output, safe_key) + + if gc.use_remat: + extra_msa_stack_fn = hk.remat(extra_msa_stack_fn) + + extra_msa_stack = layer_stack.layer_stack( + c.extra_msa_stack_num_block)( + extra_msa_stack_fn) + extra_msa_output, safe_key = extra_msa_stack( + (extra_msa_stack_input, safe_key)) + + pair_activations = extra_msa_output['pair'] + + evoformer_input = { + 'msa': msa_activations, + 'pair': pair_activations, + } + + evoformer_masks = {'msa': batch['msa_mask'], 'pair': mask_2d} + + # Append num_templ rows to msa_activations with template embeddings. + # Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 7-8 + if c.template.enabled and c.template.embed_torsion_angles: + num_templ, num_res = batch['template_aatype'].shape + + # Embed the templates aatypes. + aatype_one_hot = jax.nn.one_hot(batch['template_aatype'], 22, axis=-1) + + # Embed the templates aatype, torsion angles and masks. + # Shape (templates, residues, msa_channels) + ret = all_atom.atom37_to_torsion_angles( + aatype=batch['template_aatype'], + all_atom_pos=batch['template_all_atom_positions'], + all_atom_mask=batch['template_all_atom_masks'], + # Ensure consistent behaviour during testing: + placeholder_for_undefined=not gc.zero_init) + + template_features = jnp.concatenate([ + aatype_one_hot, + jnp.reshape( + ret['torsion_angles_sin_cos'], [num_templ, num_res, 14]), + jnp.reshape( + ret['alt_torsion_angles_sin_cos'], [num_templ, num_res, 14]), + ret['torsion_angles_mask']], axis=-1) + + template_activations = common_modules.Linear( + c.msa_channel, + initializer='relu', + name='template_single_embedding')( + template_features) + template_activations = jax.nn.relu(template_activations) + template_activations = common_modules.Linear( + c.msa_channel, + initializer='relu', + name='template_projection')( + template_activations) + + # Concatenate the templates to the msa. + evoformer_input['msa'] = jnp.concatenate( + [evoformer_input['msa'], template_activations], axis=0) + # Concatenate templates masks to the msa masks. + # Use mask from the psi angle, as it only depends on the backbone atoms + # from a single residue. + torsion_angle_mask = ret['torsion_angles_mask'][:, :, 2] + torsion_angle_mask = torsion_angle_mask.astype( + evoformer_masks['msa'].dtype) + evoformer_masks['msa'] = jnp.concatenate( + [evoformer_masks['msa'], torsion_angle_mask], axis=0) + + # Main trunk of the network + # Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 17-18 + evoformer_iteration = EvoformerIteration( + c.evoformer, gc, is_extra_msa=False, name='evoformer_iteration') + + def evoformer_fn(x): + act, safe_key = x + safe_key, safe_subkey = safe_key.split() + evoformer_output = evoformer_iteration( + activations=act, + masks=evoformer_masks, + is_training=is_training, + safe_key=safe_subkey) + return (evoformer_output, safe_key) + + if gc.use_remat: + evoformer_fn = hk.remat(evoformer_fn) + + evoformer_stack = layer_stack.layer_stack(c.evoformer_num_block)( + evoformer_fn) + evoformer_output, safe_key = evoformer_stack( + (evoformer_input, safe_key)) + + msa_activations = evoformer_output['msa'] + pair_activations = evoformer_output['pair'] + + single_activations = common_modules.Linear( + c.seq_channel, name='single_activations')( + msa_activations[0]) + + num_sequences = batch['msa_feat'].shape[0] + output = { + 'single': single_activations, + 'pair': pair_activations, + # Crop away template rows such that they are not used in MaskedMsaHead. + 'msa': msa_activations[:num_sequences, :, :], + 'msa_first_row': msa_activations[0], + } + + return output + + +class SingleTemplateEmbedding(hk.Module): + """Embeds a single template. + + Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 9+11 + """ + + def __init__(self, config, global_config, name='single_template_embedding'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, query_embedding, batch, mask_2d, is_training): + """Build the single template embedding. + + Arguments: + query_embedding: Query pair representation, shape [N_res, N_res, c_z]. + batch: A batch of template features (note the template dimension has been + stripped out as this module only runs over a single template). + mask_2d: Padding mask (Note: this doesn't care if a template exists, + unlike the template_pseudo_beta_mask). + is_training: Whether the module is in training mode. + + Returns: + A template embedding [N_res, N_res, c_z]. + """ + assert mask_2d.dtype == query_embedding.dtype + dtype = query_embedding.dtype + num_res = batch['template_aatype'].shape[0] + num_channels = (self.config.template_pair_stack + .triangle_attention_ending_node.value_dim) + template_mask = batch['template_pseudo_beta_mask'] + template_mask_2d = template_mask[:, None] * template_mask[None, :] + template_mask_2d = template_mask_2d.astype(dtype) + + template_dgram = dgram_from_positions(batch['template_pseudo_beta'], + **self.config.dgram_features) + template_dgram = template_dgram.astype(dtype) + + to_concat = [template_dgram, template_mask_2d[:, :, None]] + + aatype = jax.nn.one_hot(batch['template_aatype'], 22, axis=-1, dtype=dtype) + + to_concat.append(jnp.tile(aatype[None, :, :], [num_res, 1, 1])) + to_concat.append(jnp.tile(aatype[:, None, :], [1, num_res, 1])) + + n, ca, c = [residue_constants.atom_order[a] for a in ('N', 'CA', 'C')] + rot, trans = quat_affine.make_transform_from_reference( + n_xyz=batch['template_all_atom_positions'][:, n], + ca_xyz=batch['template_all_atom_positions'][:, ca], + c_xyz=batch['template_all_atom_positions'][:, c]) + affines = quat_affine.QuatAffine( + quaternion=quat_affine.rot_to_quat(rot, unstack_inputs=True), + translation=trans, + rotation=rot, + unstack_inputs=True) + points = [jnp.expand_dims(x, axis=-2) for x in affines.translation] + affine_vec = affines.invert_point(points, extra_dims=1) + inv_distance_scalar = jax.lax.rsqrt( + 1e-6 + sum([jnp.square(x) for x in affine_vec])) + + # Backbone affine mask: whether the residue has C, CA, N + # (the template mask defined above only considers pseudo CB). + template_mask = ( + batch['template_all_atom_masks'][..., n] * + batch['template_all_atom_masks'][..., ca] * + batch['template_all_atom_masks'][..., c]) + template_mask_2d = template_mask[:, None] * template_mask[None, :] + + inv_distance_scalar *= template_mask_2d.astype(inv_distance_scalar.dtype) + + unit_vector = [(x * inv_distance_scalar)[..., None] for x in affine_vec] + + unit_vector = [x.astype(dtype) for x in unit_vector] + template_mask_2d = template_mask_2d.astype(dtype) + if not self.config.use_template_unit_vector: + unit_vector = [jnp.zeros_like(x) for x in unit_vector] + to_concat.extend(unit_vector) + + to_concat.append(template_mask_2d[..., None]) + + act = jnp.concatenate(to_concat, axis=-1) + + # Mask out non-template regions so we don't get arbitrary values in the + # distogram for these regions. + act *= template_mask_2d[..., None] + + # Jumper et al. (2021) Suppl. Alg. 2 "Inference" line 9 + act = common_modules.Linear( + num_channels, + initializer='relu', + name='embedding2d')( + act) + + # Jumper et al. (2021) Suppl. Alg. 2 "Inference" line 11 + act = TemplatePairStack( + self.config.template_pair_stack, self.global_config)( + act, mask_2d, is_training) + + act = hk.LayerNorm([-1], True, True, name='output_layer_norm')(act) + return act + + +class TemplateEmbedding(hk.Module): + """Embeds a set of templates. + + Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 9-12 + Jumper et al. (2021) Suppl. Alg. 17 "TemplatePointwiseAttention" + """ + + def __init__(self, config, global_config, name='template_embedding'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, query_embedding, template_batch, mask_2d, is_training): + """Build TemplateEmbedding module. + + Arguments: + query_embedding: Query pair representation, shape [N_res, N_res, c_z]. + template_batch: A batch of template features. + mask_2d: Padding mask (Note: this doesn't care if a template exists, + unlike the template_pseudo_beta_mask). + is_training: Whether the module is in training mode. + + Returns: + A template embedding [N_res, N_res, c_z]. + """ + + num_templates = template_batch['template_mask'].shape[0] + num_channels = (self.config.template_pair_stack + .triangle_attention_ending_node.value_dim) + num_res = query_embedding.shape[0] + + dtype = query_embedding.dtype + template_mask = template_batch['template_mask'] + template_mask = template_mask.astype(dtype) + + query_num_channels = query_embedding.shape[-1] + + # Make sure the weights are shared across templates by constructing the + # embedder here. + # Jumper et al. (2021) Suppl. Alg. 2 "Inference" lines 9-12 + template_embedder = SingleTemplateEmbedding(self.config, self.global_config) + + def map_fn(batch): + return template_embedder(query_embedding, batch, mask_2d, is_training) + + template_pair_representation = mapping.sharded_map(map_fn, in_axes=0)( + template_batch) + + # Cross attend from the query to the templates along the residue + # dimension by flattening everything else into the batch dimension. + # Jumper et al. (2021) Suppl. Alg. 17 "TemplatePointwiseAttention" + flat_query = jnp.reshape(query_embedding, + [num_res * num_res, 1, query_num_channels]) + + flat_templates = jnp.reshape( + jnp.transpose(template_pair_representation, [1, 2, 0, 3]), + [num_res * num_res, num_templates, num_channels]) + + bias = (1e9 * (template_mask[None, None, None, :] - 1.)) + + template_pointwise_attention_module = Attention( + self.config.attention, self.global_config, query_num_channels) + nonbatched_args = [bias] + batched_args = [flat_query, flat_templates] + + embedding = mapping.inference_subbatch( + template_pointwise_attention_module, + self.config.subbatch_size, + batched_args=batched_args, + nonbatched_args=nonbatched_args, + low_memory=not is_training) + embedding = jnp.reshape(embedding, + [num_res, num_res, query_num_channels]) + + # No gradients if no templates. + embedding *= (jnp.sum(template_mask) > 0.).astype(embedding.dtype) + + return embedding diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/modules_multimer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/modules_multimer.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,1129 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Core modules, which have been refactored in AlphaFold-Multimer. + +The main difference is that MSA sampling pipeline is moved inside the JAX model +for easier implementation of recycling and ensembling. + +Lower-level modules up to EvoformerIteration are reused from modules.py. +""" + +import functools +from typing import Sequence + +from alphafold.common import residue_constants +from alphafold.model import all_atom_multimer +from alphafold.model import common_modules +from alphafold.model import folding_multimer +from alphafold.model import geometry +from alphafold.model import layer_stack +from alphafold.model import modules +from alphafold.model import prng +from alphafold.model import utils + +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np + + +def reduce_fn(x, mode): + if mode == 'none' or mode is None: + return jnp.asarray(x) + elif mode == 'sum': + return jnp.asarray(x).sum() + elif mode == 'mean': + return jnp.mean(jnp.asarray(x)) + else: + raise ValueError('Unsupported reduction option.') + + +def gumbel_noise(key: jnp.ndarray, shape: Sequence[int]) -> jnp.ndarray: + """Generate Gumbel Noise of given Shape. + + This generates samples from Gumbel(0, 1). + + Args: + key: Jax random number key. + shape: Shape of noise to return. + + Returns: + Gumbel noise of given shape. + """ + epsilon = 1e-6 + uniform = utils.padding_consistent_rng(jax.random.uniform) + uniform_noise = uniform( + key, shape=shape, dtype=jnp.float32, minval=0., maxval=1.) + gumbel = -jnp.log(-jnp.log(uniform_noise + epsilon) + epsilon) + return gumbel + + +def gumbel_max_sample(key: jnp.ndarray, logits: jnp.ndarray) -> jnp.ndarray: + """Samples from a probability distribution given by 'logits'. + + This uses Gumbel-max trick to implement the sampling in an efficient manner. + + Args: + key: prng key. + logits: Logarithm of probabilities to sample from, probabilities can be + unnormalized. + + Returns: + Sample from logprobs in one-hot form. + """ + z = gumbel_noise(key, logits.shape) + return jax.nn.one_hot( + jnp.argmax(logits + z, axis=-1), + logits.shape[-1], + dtype=logits.dtype) + + +def gumbel_argsort_sample_idx(key: jnp.ndarray, + logits: jnp.ndarray) -> jnp.ndarray: + """Samples with replacement from a distribution given by 'logits'. + + This uses Gumbel trick to implement the sampling an efficient manner. For a + distribution over k items this samples k times without replacement, so this + is effectively sampling a random permutation with probabilities over the + permutations derived from the logprobs. + + Args: + key: prng key. + logits: Logarithm of probabilities to sample from, probabilities can be + unnormalized. + + Returns: + Sample from logprobs in one-hot form. + """ + z = gumbel_noise(key, logits.shape) + # This construction is equivalent to jnp.argsort, but using a non stable sort, + # since stable sort's aren't supported by jax2tf. + axis = len(logits.shape) - 1 + iota = jax.lax.broadcasted_iota(jnp.int64, logits.shape, axis) + _, perm = jax.lax.sort_key_val( + logits + z, iota, dimension=-1, is_stable=False) + return perm[::-1] + + +def make_masked_msa(batch, key, config, epsilon=1e-6): + """Create data for BERT on raw MSA.""" + # Add a random amino acid uniformly. + random_aa = jnp.array([0.05] * 20 + [0., 0.], dtype=jnp.float32) + + categorical_probs = ( + config.uniform_prob * random_aa + + config.profile_prob * batch['msa_profile'] + + config.same_prob * jax.nn.one_hot(batch['msa'], 22)) + + # Put all remaining probability on [MASK] which is a new column. + pad_shapes = [[0, 0] for _ in range(len(categorical_probs.shape))] + pad_shapes[-1][1] = 1 + mask_prob = 1. - config.profile_prob - config.same_prob - config.uniform_prob + assert mask_prob >= 0. + categorical_probs = jnp.pad( + categorical_probs, pad_shapes, constant_values=mask_prob) + sh = batch['msa'].shape + key, mask_subkey, gumbel_subkey = key.split(3) + uniform = utils.padding_consistent_rng(jax.random.uniform) + mask_position = uniform(mask_subkey.get(), sh) < config.replace_fraction + mask_position *= batch['msa_mask'] + + logits = jnp.log(categorical_probs + epsilon) + bert_msa = gumbel_max_sample(gumbel_subkey.get(), logits) + bert_msa = jnp.where(mask_position, + jnp.argmax(bert_msa, axis=-1), batch['msa']) + bert_msa *= batch['msa_mask'] + + # Mix real and masked MSA. + if 'bert_mask' in batch: + batch['bert_mask'] *= mask_position.astype(jnp.float32) + else: + batch['bert_mask'] = mask_position.astype(jnp.float32) + batch['true_msa'] = batch['msa'] + batch['msa'] = bert_msa + + return batch + + +def nearest_neighbor_clusters(batch, gap_agreement_weight=0.): + """Assign each extra MSA sequence to its nearest neighbor in sampled MSA.""" + + # Determine how much weight we assign to each agreement. In theory, we could + # use a full blosum matrix here, but right now let's just down-weight gap + # agreement because it could be spurious. + # Never put weight on agreeing on BERT mask. + + weights = jnp.array( + [1.] * 21 + [gap_agreement_weight] + [0.], dtype=jnp.float32) + + msa_mask = batch['msa_mask'] + msa_one_hot = jax.nn.one_hot(batch['msa'], 23) + + extra_mask = batch['extra_msa_mask'] + extra_one_hot = jax.nn.one_hot(batch['extra_msa'], 23) + + msa_one_hot_masked = msa_mask[:, :, None] * msa_one_hot + extra_one_hot_masked = extra_mask[:, :, None] * extra_one_hot + + agreement = jnp.einsum('mrc, nrc->nm', extra_one_hot_masked, + weights * msa_one_hot_masked) + + cluster_assignment = jax.nn.softmax(1e3 * agreement, axis=0) + cluster_assignment *= jnp.einsum('mr, nr->mn', msa_mask, extra_mask) + + cluster_count = jnp.sum(cluster_assignment, axis=-1) + cluster_count += 1. # We always include the sequence itself. + + msa_sum = jnp.einsum('nm, mrc->nrc', cluster_assignment, extra_one_hot_masked) + msa_sum += msa_one_hot_masked + + cluster_profile = msa_sum / cluster_count[:, None, None] + + extra_deletion_matrix = batch['extra_deletion_matrix'] + deletion_matrix = batch['deletion_matrix'] + + del_sum = jnp.einsum('nm, mc->nc', cluster_assignment, + extra_mask * extra_deletion_matrix) + del_sum += deletion_matrix # Original sequence. + cluster_deletion_mean = del_sum / cluster_count[:, None] + + return cluster_profile, cluster_deletion_mean + + +def create_msa_feat(batch): + """Create and concatenate MSA features.""" + msa_1hot = jax.nn.one_hot(batch['msa'], 23) + deletion_matrix = batch['deletion_matrix'] + has_deletion = jnp.clip(deletion_matrix, 0., 1.)[..., None] + deletion_value = (jnp.arctan(deletion_matrix / 3.) * (2. / jnp.pi))[..., None] + + deletion_mean_value = (jnp.arctan(batch['cluster_deletion_mean'] / 3.) * + (2. / jnp.pi))[..., None] + + msa_feat = [ + msa_1hot, + has_deletion, + deletion_value, + batch['cluster_profile'], + deletion_mean_value + ] + + return jnp.concatenate(msa_feat, axis=-1) + + +def create_extra_msa_feature(batch, num_extra_msa): + """Expand extra_msa into 1hot and concat with other extra msa features. + + We do this as late as possible as the one_hot extra msa can be very large. + + Args: + batch: a dictionary with the following keys: + * 'extra_msa': [num_seq, num_res] MSA that wasn't selected as a cluster + centre. Note - This isn't one-hotted. + * 'extra_deletion_matrix': [num_seq, num_res] Number of deletions at given + position. + num_extra_msa: Number of extra msa to use. + + Returns: + Concatenated tensor of extra MSA features. + """ + # 23 = 20 amino acids + 'X' for unknown + gap + bert mask + extra_msa = batch['extra_msa'][:num_extra_msa] + deletion_matrix = batch['extra_deletion_matrix'][:num_extra_msa] + msa_1hot = jax.nn.one_hot(extra_msa, 23) + has_deletion = jnp.clip(deletion_matrix, 0., 1.)[..., None] + deletion_value = (jnp.arctan(deletion_matrix / 3.) * (2. / jnp.pi))[..., None] + extra_msa_mask = batch['extra_msa_mask'][:num_extra_msa] + return jnp.concatenate([msa_1hot, has_deletion, deletion_value], + axis=-1), extra_msa_mask + + +def sample_msa(key, batch, max_seq): + """Sample MSA randomly, remaining sequences are stored as `extra_*`. + + Args: + key: safe key for random number generation. + batch: batch to sample msa from. + max_seq: number of sequences to sample. + Returns: + Protein with sampled msa. + """ + # Sample uniformly among sequences with at least one non-masked position. + logits = (jnp.clip(jnp.sum(batch['msa_mask'], axis=-1), 0., 1.) - 1.) * 1e6 + # The cluster_bias_mask can be used to preserve the first row (target + # sequence) for each chain, for example. + if 'cluster_bias_mask' not in batch: + cluster_bias_mask = jnp.pad( + jnp.zeros(batch['msa'].shape[0] - 1), (1, 0), constant_values=1.) + else: + cluster_bias_mask = batch['cluster_bias_mask'] + + logits += cluster_bias_mask * 1e6 + index_order = gumbel_argsort_sample_idx(key.get(), logits) + sel_idx = index_order[:max_seq] + extra_idx = index_order[max_seq:] + + for k in ['msa', 'deletion_matrix', 'msa_mask', 'bert_mask']: + if k in batch: + batch['extra_' + k] = batch[k][extra_idx] + batch[k] = batch[k][sel_idx] + + return batch + + +def make_msa_profile(batch): + """Compute the MSA profile.""" + + # Compute the profile for every residue (over all MSA sequences). + return utils.mask_mean( + batch['msa_mask'][:, :, None], jax.nn.one_hot(batch['msa'], 22), axis=0) + + +class AlphaFoldIteration(hk.Module): + """A single recycling iteration of AlphaFold architecture. + + Computes ensembled (averaged) representations from the provided features. + These representations are then passed to the various heads + that have been requested by the configuration file. + """ + + def __init__(self, config, global_config, name='alphafold_iteration'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, + batch, + is_training, + return_representations=False, + safe_key=None): + + if is_training: + num_ensemble = np.asarray(self.config.num_ensemble_train) + else: + num_ensemble = np.asarray(self.config.num_ensemble_eval) + + # Compute representations for each MSA sample and average. + embedding_module = EmbeddingsAndEvoformer( + self.config.embeddings_and_evoformer, self.global_config) + repr_shape = hk.eval_shape( + lambda: embedding_module(batch, is_training)) + representations = { + k: jnp.zeros(v.shape, v.dtype) for (k, v) in repr_shape.items() + } + + def ensemble_body(x, unused_y): + """Add into representations ensemble.""" + del unused_y + representations, safe_key = x + safe_key, safe_subkey = safe_key.split() + representations_update = embedding_module( + batch, is_training, safe_key=safe_subkey) + + for k in representations: + if k not in {'msa', 'true_msa', 'bert_mask'}: + representations[k] += representations_update[k] * ( + 1. / num_ensemble).astype(representations[k].dtype) + else: + representations[k] = representations_update[k] + + return (representations, safe_key), None + + (representations, _), _ = hk.scan( + ensemble_body, (representations, safe_key), None, length=num_ensemble) + + self.representations = representations + self.batch = batch + self.heads = {} + for head_name, head_config in sorted(self.config.heads.items()): + if not head_config.weight: + continue # Do not instantiate zero-weight heads. + + head_factory = { + 'masked_msa': + modules.MaskedMsaHead, + 'distogram': + modules.DistogramHead, + 'structure_module': + folding_multimer.StructureModule, + 'predicted_aligned_error': + modules.PredictedAlignedErrorHead, + 'predicted_lddt': + modules.PredictedLDDTHead, + 'experimentally_resolved': + modules.ExperimentallyResolvedHead, + }[head_name] + self.heads[head_name] = (head_config, + head_factory(head_config, self.global_config)) + + structure_module_output = None + if 'entity_id' in batch and 'all_atom_positions' in batch: + _, fold_module = self.heads['structure_module'] + structure_module_output = fold_module(representations, batch, is_training) + + ret = {} + ret['representations'] = representations + + for name, (head_config, module) in self.heads.items(): + if name == 'structure_module' and structure_module_output is not None: + ret[name] = structure_module_output + representations['structure_module'] = structure_module_output.pop('act') + # Skip confidence heads until StructureModule is executed. + elif name in {'predicted_lddt', 'predicted_aligned_error', + 'experimentally_resolved'}: + continue + else: + ret[name] = module(representations, batch, is_training) + + # Add confidence heads after StructureModule is executed. + if self.config.heads.get('predicted_lddt.weight', 0.0): + name = 'predicted_lddt' + head_config, module = self.heads[name] + ret[name] = module(representations, batch, is_training) + + if self.config.heads.experimentally_resolved.weight: + name = 'experimentally_resolved' + head_config, module = self.heads[name] + ret[name] = module(representations, batch, is_training) + + if self.config.heads.get('predicted_aligned_error.weight', 0.0): + name = 'predicted_aligned_error' + head_config, module = self.heads[name] + ret[name] = module(representations, batch, is_training) + # Will be used for ipTM computation. + ret[name]['asym_id'] = batch['asym_id'] + + return ret + + +class AlphaFold(hk.Module): + """AlphaFold-Multimer model with recycling. + """ + + def __init__(self, config, name='alphafold'): + super().__init__(name=name) + self.config = config + self.global_config = config.global_config + + def __call__( + self, + batch, + is_training, + return_representations=False, + safe_key=None): + + c = self.config + impl = AlphaFoldIteration(c, self.global_config) + + if safe_key is None: + safe_key = prng.SafeKey(hk.next_rng_key()) + elif isinstance(safe_key, jnp.ndarray): + safe_key = prng.SafeKey(safe_key) + + assert isinstance(batch, dict) + num_res = batch['aatype'].shape[0] + + def get_prev(ret): + new_prev = { + 'prev_pos': + ret['structure_module']['final_atom_positions'], + 'prev_msa_first_row': ret['representations']['msa_first_row'], + 'prev_pair': ret['representations']['pair'], + } + return jax.tree_map(jax.lax.stop_gradient, new_prev) + + def apply_network(prev, safe_key): + recycled_batch = {**batch, **prev} + return impl( + batch=recycled_batch, + is_training=is_training, + safe_key=safe_key) + + if self.config.num_recycle: + emb_config = self.config.embeddings_and_evoformer + prev = { + 'prev_pos': + jnp.zeros([num_res, residue_constants.atom_type_num, 3]), + 'prev_msa_first_row': + jnp.zeros([num_res, emb_config.msa_channel]), + 'prev_pair': + jnp.zeros([num_res, num_res, emb_config.pair_channel]), + } + + if 'num_iter_recycling' in batch: + # Training time: num_iter_recycling is in batch. + # Value for each ensemble batch is the same, so arbitrarily taking 0-th. + num_iter = batch['num_iter_recycling'][0] + + # Add insurance that even when ensembling, we will not run more + # recyclings than the model is configured to run. + num_iter = jnp.minimum(num_iter, c.num_recycle) + else: + # Eval mode or tests: use the maximum number of iterations. + num_iter = c.num_recycle + + def recycle_body(i, x): + del i + prev, safe_key = x + safe_key1, safe_key2 = safe_key.split() if c.resample_msa_in_recycling else safe_key.duplicate() # pylint: disable=line-too-long + ret = apply_network(prev=prev, safe_key=safe_key2) + return get_prev(ret), safe_key1 + + prev, safe_key = hk.fori_loop(0, num_iter, recycle_body, (prev, safe_key)) + else: + prev = {} + + # Run extra iteration. + ret = apply_network(prev=prev, safe_key=safe_key) + + if not return_representations: + del ret['representations'] + return ret + + +class EmbeddingsAndEvoformer(hk.Module): + """Embeds the input data and runs Evoformer. + + Produces the MSA, single and pair representations. + """ + + def __init__(self, config, global_config, name='evoformer'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def _relative_encoding(self, batch): + """Add relative position encodings. + + For position (i, j), the value is (i-j) clipped to [-k, k] and one-hotted. + + When not using 'use_chain_relative' the residue indices are used as is, e.g. + for heteromers relative positions will be computed using the positions in + the corresponding chains. + + When using 'use_chain_relative' we add an extra bin that denotes + 'different chain'. Furthermore we also provide the relative chain index + (i.e. sym_id) clipped and one-hotted to the network. And an extra feature + which denotes whether they belong to the same chain type, i.e. it's 0 if + they are in different heteromer chains and 1 otherwise. + + Args: + batch: batch. + Returns: + Feature embedding using the features as described before. + """ + c = self.config + rel_feats = [] + pos = batch['residue_index'] + asym_id = batch['asym_id'] + asym_id_same = jnp.equal(asym_id[:, None], asym_id[None, :]) + offset = pos[:, None] - pos[None, :] + + clipped_offset = jnp.clip( + offset + c.max_relative_idx, a_min=0, a_max=2 * c.max_relative_idx) + + if c.use_chain_relative: + + final_offset = jnp.where(asym_id_same, clipped_offset, + (2 * c.max_relative_idx + 1) * + jnp.ones_like(clipped_offset)) + + rel_pos = jax.nn.one_hot(final_offset, 2 * c.max_relative_idx + 2) + + rel_feats.append(rel_pos) + + entity_id = batch['entity_id'] + entity_id_same = jnp.equal(entity_id[:, None], entity_id[None, :]) + rel_feats.append(entity_id_same.astype(rel_pos.dtype)[..., None]) + + sym_id = batch['sym_id'] + rel_sym_id = sym_id[:, None] - sym_id[None, :] + + max_rel_chain = c.max_relative_chain + + clipped_rel_chain = jnp.clip( + rel_sym_id + max_rel_chain, a_min=0, a_max=2 * max_rel_chain) + + final_rel_chain = jnp.where(entity_id_same, clipped_rel_chain, + (2 * max_rel_chain + 1) * + jnp.ones_like(clipped_rel_chain)) + rel_chain = jax.nn.one_hot(final_rel_chain, 2 * c.max_relative_chain + 2) + + rel_feats.append(rel_chain) + + else: + rel_pos = jax.nn.one_hot(clipped_offset, 2 * c.max_relative_idx + 1) + rel_feats.append(rel_pos) + + rel_feat = jnp.concatenate(rel_feats, axis=-1) + + return common_modules.Linear( + c.pair_channel, + name='position_activations')( + rel_feat) + + def __call__(self, batch, is_training, safe_key=None): + + c = self.config + gc = self.global_config + + batch = dict(batch) + + if safe_key is None: + safe_key = prng.SafeKey(hk.next_rng_key()) + + output = {} + + batch['msa_profile'] = make_msa_profile(batch) + + target_feat = jax.nn.one_hot(batch['aatype'], 21) + + preprocess_1d = common_modules.Linear( + c.msa_channel, name='preprocess_1d')( + target_feat) + + safe_key, sample_key, mask_key = safe_key.split(3) + batch = sample_msa(sample_key, batch, c.num_msa) + batch = make_masked_msa(batch, mask_key, c.masked_msa) + + (batch['cluster_profile'], + batch['cluster_deletion_mean']) = nearest_neighbor_clusters(batch) + + msa_feat = create_msa_feat(batch) + + preprocess_msa = common_modules.Linear( + c.msa_channel, name='preprocess_msa')( + msa_feat) + + msa_activations = jnp.expand_dims(preprocess_1d, axis=0) + preprocess_msa + + left_single = common_modules.Linear( + c.pair_channel, name='left_single')( + target_feat) + right_single = common_modules.Linear( + c.pair_channel, name='right_single')( + target_feat) + pair_activations = left_single[:, None] + right_single[None] + mask_2d = batch['seq_mask'][:, None] * batch['seq_mask'][None, :] + mask_2d = mask_2d.astype(jnp.float32) + + if c.recycle_pos and 'prev_pos' in batch: + prev_pseudo_beta = modules.pseudo_beta_fn( + batch['aatype'], batch['prev_pos'], None) + + dgram = modules.dgram_from_positions( + prev_pseudo_beta, **self.config.prev_pos) + pair_activations += common_modules.Linear( + c.pair_channel, name='prev_pos_linear')( + dgram) + + if c.recycle_features: + if 'prev_msa_first_row' in batch: + prev_msa_first_row = hk.LayerNorm( + axis=[-1], + create_scale=True, + create_offset=True, + name='prev_msa_first_row_norm')( + batch['prev_msa_first_row']) + msa_activations = msa_activations.at[0].add(prev_msa_first_row) + + if 'prev_pair' in batch: + pair_activations += hk.LayerNorm( + axis=[-1], + create_scale=True, + create_offset=True, + name='prev_pair_norm')( + batch['prev_pair']) + + if c.max_relative_idx: + pair_activations += self._relative_encoding(batch) + + if c.template.enabled: + template_module = TemplateEmbedding(c.template, gc) + template_batch = { + 'template_aatype': batch['template_aatype'], + 'template_all_atom_positions': batch['template_all_atom_positions'], + 'template_all_atom_mask': batch['template_all_atom_mask'] + } + # Construct a mask such that only intra-chain template features are + # computed, since all templates are for each chain individually. + multichain_mask = batch['asym_id'][:, None] == batch['asym_id'][None, :] + safe_key, safe_subkey = safe_key.split() + template_act = template_module( + query_embedding=pair_activations, + template_batch=template_batch, + padding_mask_2d=mask_2d, + multichain_mask_2d=multichain_mask, + is_training=is_training, + safe_key=safe_subkey) + pair_activations += template_act + + # Extra MSA stack. + (extra_msa_feat, + extra_msa_mask) = create_extra_msa_feature(batch, c.num_extra_msa) + extra_msa_activations = common_modules.Linear( + c.extra_msa_channel, + name='extra_msa_activations')( + extra_msa_feat) + extra_msa_mask = extra_msa_mask.astype(jnp.float32) + + extra_evoformer_input = { + 'msa': extra_msa_activations, + 'pair': pair_activations, + } + extra_masks = {'msa': extra_msa_mask, 'pair': mask_2d} + + extra_evoformer_iteration = modules.EvoformerIteration( + c.evoformer, gc, is_extra_msa=True, name='extra_msa_stack') + + def extra_evoformer_fn(x): + act, safe_key = x + safe_key, safe_subkey = safe_key.split() + extra_evoformer_output = extra_evoformer_iteration( + activations=act, + masks=extra_masks, + is_training=is_training, + safe_key=safe_subkey) + return (extra_evoformer_output, safe_key) + + if gc.use_remat: + extra_evoformer_fn = hk.remat(extra_evoformer_fn) + + safe_key, safe_subkey = safe_key.split() + extra_evoformer_stack = layer_stack.layer_stack( + c.extra_msa_stack_num_block)( + extra_evoformer_fn) + extra_evoformer_output, safe_key = extra_evoformer_stack( + (extra_evoformer_input, safe_subkey)) + + pair_activations = extra_evoformer_output['pair'] + + # Get the size of the MSA before potentially adding templates, so we + # can crop out the templates later. + num_msa_sequences = msa_activations.shape[0] + evoformer_input = { + 'msa': msa_activations, + 'pair': pair_activations, + } + evoformer_masks = {'msa': batch['msa_mask'].astype(jnp.float32), + 'pair': mask_2d} + + if c.template.enabled: + template_features, template_masks = ( + template_embedding_1d(batch=batch, num_channel=c.msa_channel)) + + evoformer_input['msa'] = jnp.concatenate( + [evoformer_input['msa'], template_features], axis=0) + evoformer_masks['msa'] = jnp.concatenate( + [evoformer_masks['msa'], template_masks], axis=0) + + evoformer_iteration = modules.EvoformerIteration( + c.evoformer, gc, is_extra_msa=False, name='evoformer_iteration') + + def evoformer_fn(x): + act, safe_key = x + safe_key, safe_subkey = safe_key.split() + evoformer_output = evoformer_iteration( + activations=act, + masks=evoformer_masks, + is_training=is_training, + safe_key=safe_subkey) + return (evoformer_output, safe_key) + + if gc.use_remat: + evoformer_fn = hk.remat(evoformer_fn) + + safe_key, safe_subkey = safe_key.split() + evoformer_stack = layer_stack.layer_stack(c.evoformer_num_block)( + evoformer_fn) + + def run_evoformer(evoformer_input): + evoformer_output, _ = evoformer_stack((evoformer_input, safe_subkey)) + return evoformer_output + + evoformer_output = run_evoformer(evoformer_input) + + msa_activations = evoformer_output['msa'] + pair_activations = evoformer_output['pair'] + + single_activations = common_modules.Linear( + c.seq_channel, name='single_activations')( + msa_activations[0]) + + output.update({ + 'single': + single_activations, + 'pair': + pair_activations, + # Crop away template rows such that they are not used in MaskedMsaHead. + 'msa': + msa_activations[:num_msa_sequences, :, :], + 'msa_first_row': + msa_activations[0], + }) + + return output + + +class TemplateEmbedding(hk.Module): + """Embed a set of templates.""" + + def __init__(self, config, global_config, name='template_embedding'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, query_embedding, template_batch, padding_mask_2d, + multichain_mask_2d, is_training, + safe_key=None): + """Generate an embedding for a set of templates. + + Args: + query_embedding: [num_res, num_res, num_channel] a query tensor that will + be used to attend over the templates to remove the num_templates + dimension. + template_batch: A dictionary containing: + `template_aatype`: [num_templates, num_res] aatype for each template. + `template_all_atom_positions`: [num_templates, num_res, 37, 3] atom + positions for all templates. + `template_all_atom_mask`: [num_templates, num_res, 37] mask for each + template. + padding_mask_2d: [num_res, num_res] Pair mask for attention operations. + multichain_mask_2d: [num_res, num_res] Mask indicating which residue pairs + are intra-chain, used to mask out residue distance based features + between chains. + is_training: bool indicating where we are running in training mode. + safe_key: random key generator. + + Returns: + An embedding of size [num_res, num_res, num_channels] + """ + c = self.config + if safe_key is None: + safe_key = prng.SafeKey(hk.next_rng_key()) + + num_templates = template_batch['template_aatype'].shape[0] + num_res, _, query_num_channels = query_embedding.shape + + # Embed each template separately. + template_embedder = SingleTemplateEmbedding(self.config, self.global_config) + def partial_template_embedder(template_aatype, + template_all_atom_positions, + template_all_atom_mask, + unsafe_key): + safe_key = prng.SafeKey(unsafe_key) + return template_embedder(query_embedding, + template_aatype, + template_all_atom_positions, + template_all_atom_mask, + padding_mask_2d, + multichain_mask_2d, + is_training, + safe_key) + + safe_key, unsafe_key = safe_key.split() + unsafe_keys = jax.random.split(unsafe_key._key, num_templates) + + def scan_fn(carry, x): + return carry + partial_template_embedder(*x), None + + scan_init = jnp.zeros((num_res, num_res, c.num_channels), + dtype=query_embedding.dtype) + summed_template_embeddings, _ = hk.scan( + scan_fn, scan_init, + (template_batch['template_aatype'], + template_batch['template_all_atom_positions'], + template_batch['template_all_atom_mask'], unsafe_keys)) + + embedding = summed_template_embeddings / num_templates + embedding = jax.nn.relu(embedding) + embedding = common_modules.Linear( + query_num_channels, + initializer='relu', + name='output_linear')(embedding) + + return embedding + + +class SingleTemplateEmbedding(hk.Module): + """Embed a single template.""" + + def __init__(self, config, global_config, name='single_template_embedding'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, query_embedding, template_aatype, + template_all_atom_positions, template_all_atom_mask, + padding_mask_2d, multichain_mask_2d, is_training, + safe_key): + """Build the single template embedding graph. + + Args: + query_embedding: (num_res, num_res, num_channels) - embedding of the + query sequence/msa. + template_aatype: [num_res] aatype for each template. + template_all_atom_positions: [num_res, 37, 3] atom positions for all + templates. + template_all_atom_mask: [num_res, 37] mask for each template. + padding_mask_2d: Padding mask (Note: this doesn't care if a template + exists, unlike the template_pseudo_beta_mask). + multichain_mask_2d: A mask indicating intra-chain residue pairs, used + to mask out between chain distances/features when templates are for + single chains. + is_training: Are we in training mode. + safe_key: Random key generator. + + Returns: + A template embedding (num_res, num_res, num_channels). + """ + gc = self.global_config + c = self.config + assert padding_mask_2d.dtype == query_embedding.dtype + dtype = query_embedding.dtype + num_channels = self.config.num_channels + + def construct_input(query_embedding, template_aatype, + template_all_atom_positions, template_all_atom_mask, + multichain_mask_2d): + + # Compute distogram feature for the template. + template_positions, pseudo_beta_mask = modules.pseudo_beta_fn( + template_aatype, template_all_atom_positions, template_all_atom_mask) + pseudo_beta_mask_2d = (pseudo_beta_mask[:, None] * + pseudo_beta_mask[None, :]) + pseudo_beta_mask_2d *= multichain_mask_2d + template_dgram = modules.dgram_from_positions( + template_positions, **self.config.dgram_features) + template_dgram *= pseudo_beta_mask_2d[..., None] + template_dgram = template_dgram.astype(dtype) + pseudo_beta_mask_2d = pseudo_beta_mask_2d.astype(dtype) + to_concat = [(template_dgram, 1), (pseudo_beta_mask_2d, 0)] + + aatype = jax.nn.one_hot(template_aatype, 22, axis=-1, dtype=dtype) + to_concat.append((aatype[None, :, :], 1)) + to_concat.append((aatype[:, None, :], 1)) + + # Compute a feature representing the normalized vector between each + # backbone affine - i.e. in each residues local frame, what direction are + # each of the other residues. + raw_atom_pos = template_all_atom_positions + + atom_pos = geometry.Vec3Array.from_array(raw_atom_pos) + rigid, backbone_mask = folding_multimer.make_backbone_affine( + atom_pos, + template_all_atom_mask, + template_aatype) + points = rigid.translation + rigid_vec = rigid[:, None].inverse().apply_to_point(points) + unit_vector = rigid_vec.normalized() + unit_vector = [unit_vector.x, unit_vector.y, unit_vector.z] + + backbone_mask_2d = backbone_mask[:, None] * backbone_mask[None, :] + backbone_mask_2d *= multichain_mask_2d + unit_vector = [x*backbone_mask_2d for x in unit_vector] + + # Note that the backbone_mask takes into account C, CA and N (unlike + # pseudo beta mask which just needs CB) so we add both masks as features. + to_concat.extend([(x, 0) for x in unit_vector]) + to_concat.append((backbone_mask_2d, 0)) + + query_embedding = hk.LayerNorm( + axis=[-1], + create_scale=True, + create_offset=True, + name='query_embedding_norm')( + query_embedding) + # Allow the template embedder to see the query embedding. Note this + # contains the position relative feature, so this is how the network knows + # which residues are next to each other. + to_concat.append((query_embedding, 1)) + + act = 0 + + for i, (x, n_input_dims) in enumerate(to_concat): + + act += common_modules.Linear( + num_channels, + num_input_dims=n_input_dims, + initializer='relu', + name=f'template_pair_embedding_{i}')(x) + return act + + act = construct_input(query_embedding, template_aatype, + template_all_atom_positions, template_all_atom_mask, + multichain_mask_2d) + + template_iteration = TemplateEmbeddingIteration( + c.template_pair_stack, gc, name='template_embedding_iteration') + + def template_iteration_fn(x): + act, safe_key = x + + safe_key, safe_subkey = safe_key.split() + act = template_iteration( + act=act, + pair_mask=padding_mask_2d, + is_training=is_training, + safe_key=safe_subkey) + return (act, safe_key) + + if gc.use_remat: + template_iteration_fn = hk.remat(template_iteration_fn) + + safe_key, safe_subkey = safe_key.split() + template_stack = layer_stack.layer_stack( + c.template_pair_stack.num_block)( + template_iteration_fn) + act, safe_key = template_stack((act, safe_subkey)) + + act = hk.LayerNorm( + axis=[-1], + create_scale=True, + create_offset=True, + name='output_layer_norm')( + act) + return act + + +class TemplateEmbeddingIteration(hk.Module): + """Single Iteration of Template Embedding.""" + + def __init__(self, config, global_config, + name='template_embedding_iteration'): + super().__init__(name=name) + self.config = config + self.global_config = global_config + + def __call__(self, act, pair_mask, is_training=True, + safe_key=None): + """Build a single iteration of the template embedder. + + Args: + act: [num_res, num_res, num_channel] Input pairwise activations. + pair_mask: [num_res, num_res] padding mask. + is_training: Whether to run in training mode. + safe_key: Safe pseudo-random generator key. + + Returns: + [num_res, num_res, num_channel] tensor of activations. + """ + c = self.config + gc = self.global_config + + if safe_key is None: + safe_key = prng.SafeKey(hk.next_rng_key()) + + dropout_wrapper_fn = functools.partial( + modules.dropout_wrapper, + is_training=is_training, + global_config=gc) + + safe_key, *sub_keys = safe_key.split(20) + sub_keys = iter(sub_keys) + + act = dropout_wrapper_fn( + modules.TriangleMultiplication(c.triangle_multiplication_outgoing, gc, + name='triangle_multiplication_outgoing'), + act, + pair_mask, + safe_key=next(sub_keys)) + + act = dropout_wrapper_fn( + modules.TriangleMultiplication(c.triangle_multiplication_incoming, gc, + name='triangle_multiplication_incoming'), + act, + pair_mask, + safe_key=next(sub_keys)) + + act = dropout_wrapper_fn( + modules.TriangleAttention(c.triangle_attention_starting_node, gc, + name='triangle_attention_starting_node'), + act, + pair_mask, + safe_key=next(sub_keys)) + + act = dropout_wrapper_fn( + modules.TriangleAttention(c.triangle_attention_ending_node, gc, + name='triangle_attention_ending_node'), + act, + pair_mask, + safe_key=next(sub_keys)) + + act = dropout_wrapper_fn( + modules.Transition(c.pair_transition, gc, + name='pair_transition'), + act, + pair_mask, + safe_key=next(sub_keys)) + + return act + + +def template_embedding_1d(batch, num_channel): + """Embed templates into an (num_res, num_templates, num_channels) embedding. + + Args: + batch: A batch containing: + template_aatype, (num_templates, num_res) aatype for the templates. + template_all_atom_positions, (num_templates, num_residues, 37, 3) atom + positions for the templates. + template_all_atom_mask, (num_templates, num_residues, 37) atom mask for + each template. + num_channel: The number of channels in the output. + + Returns: + An embedding of shape (num_templates, num_res, num_channels) and a mask of + shape (num_templates, num_res). + """ + + # Embed the templates aatypes. + aatype_one_hot = jax.nn.one_hot(batch['template_aatype'], 22, axis=-1) + + num_templates = batch['template_aatype'].shape[0] + all_chi_angles = [] + all_chi_masks = [] + for i in range(num_templates): + atom_pos = geometry.Vec3Array.from_array( + batch['template_all_atom_positions'][i, :, :, :]) + template_chi_angles, template_chi_mask = all_atom_multimer.compute_chi_angles( + atom_pos, + batch['template_all_atom_mask'][i, :, :], + batch['template_aatype'][i, :]) + all_chi_angles.append(template_chi_angles) + all_chi_masks.append(template_chi_mask) + chi_angles = jnp.stack(all_chi_angles, axis=0) + chi_mask = jnp.stack(all_chi_masks, axis=0) + + template_features = jnp.concatenate([ + aatype_one_hot, + jnp.sin(chi_angles) * chi_mask, + jnp.cos(chi_angles) * chi_mask, + chi_mask], axis=-1) + + template_mask = chi_mask[:, :, 0] + + template_activations = common_modules.Linear( + num_channel, + initializer='relu', + name='template_single_embedding')( + template_features) + template_activations = jax.nn.relu(template_activations) + template_activations = common_modules.Linear( + num_channel, + initializer='relu', + name='template_projection')( + template_activations) + return template_activations, template_mask diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/prng.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/prng.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,69 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A collection of utilities surrounding PRNG usage in protein folding.""" + +import haiku as hk +import jax + + +def safe_dropout(*, tensor, safe_key, rate, is_deterministic, is_training): + if is_training and rate != 0.0 and not is_deterministic: + return hk.dropout(safe_key.get(), rate, tensor) + else: + return tensor + + +class SafeKey: + """Safety wrapper for PRNG keys.""" + + def __init__(self, key): + self._key = key + self._used = False + + def _assert_not_used(self): + if self._used: + raise RuntimeError('Random key has been used previously.') + + def get(self): + self._assert_not_used() + self._used = True + return self._key + + def split(self, num_keys=2): + self._assert_not_used() + self._used = True + new_keys = jax.random.split(self._key, num_keys) + return jax.tree_map(SafeKey, tuple(new_keys)) + + def duplicate(self, num_keys=2): + self._assert_not_used() + self._used = True + return tuple(SafeKey(self._key) for _ in range(num_keys)) + + +def _safe_key_flatten(safe_key): + # Flatten transfers "ownership" to the tree + return (safe_key._key,), safe_key._used # pylint: disable=protected-access + + +def _safe_key_unflatten(aux_data, children): + ret = SafeKey(children[0]) + ret._used = aux_data # pylint: disable=protected-access + return ret + + +jax.tree_util.register_pytree_node( + SafeKey, _safe_key_flatten, _safe_key_unflatten) + diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/prng_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/prng_test.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,46 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for prng.""" + +from absl.testing import absltest +from alphafold.model import prng +import jax + + +class PrngTest(absltest.TestCase): + + def test_key_reuse(self): + + init_key = jax.random.PRNGKey(42) + safe_key = prng.SafeKey(init_key) + _, safe_key = safe_key.split() + + raw_key = safe_key.get() + + self.assertNotEqual(raw_key[0], init_key[0]) + self.assertNotEqual(raw_key[1], init_key[1]) + + with self.assertRaises(RuntimeError): + safe_key.get() + + with self.assertRaises(RuntimeError): + safe_key.split() + + with self.assertRaises(RuntimeError): + safe_key.duplicate() + + +if __name__ == '__main__': + absltest.main() diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/quat_affine.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/quat_affine.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,459 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Quaternion geometry modules. + +This introduces a representation of coordinate frames that is based around a +‘QuatAffine’ object. This object describes an array of coordinate frames. +It consists of vectors corresponding to the +origin of the frames as well as orientations which are stored in two +ways, as unit quaternions as well as a rotation matrices. +The rotation matrices are derived from the unit quaternions and the two are kept +in sync. +For an explanation of the relation between unit quaternions and rotations see +https://en.wikipedia.org/wiki/Quaternions_and_spatial_rotation + +This representation is used in the model for the backbone frames. + +One important thing to note here, is that while we update both representations +the jit compiler is going to ensure that only the parts that are +actually used are executed. +""" + + +import functools +from typing import Tuple + +import jax +import jax.numpy as jnp +import numpy as np + +# pylint: disable=bad-whitespace +QUAT_TO_ROT = np.zeros((4, 4, 3, 3), dtype=np.float32) + +QUAT_TO_ROT[0, 0] = [[ 1, 0, 0], [ 0, 1, 0], [ 0, 0, 1]] # rr +QUAT_TO_ROT[1, 1] = [[ 1, 0, 0], [ 0,-1, 0], [ 0, 0,-1]] # ii +QUAT_TO_ROT[2, 2] = [[-1, 0, 0], [ 0, 1, 0], [ 0, 0,-1]] # jj +QUAT_TO_ROT[3, 3] = [[-1, 0, 0], [ 0,-1, 0], [ 0, 0, 1]] # kk + +QUAT_TO_ROT[1, 2] = [[ 0, 2, 0], [ 2, 0, 0], [ 0, 0, 0]] # ij +QUAT_TO_ROT[1, 3] = [[ 0, 0, 2], [ 0, 0, 0], [ 2, 0, 0]] # ik +QUAT_TO_ROT[2, 3] = [[ 0, 0, 0], [ 0, 0, 2], [ 0, 2, 0]] # jk + +QUAT_TO_ROT[0, 1] = [[ 0, 0, 0], [ 0, 0,-2], [ 0, 2, 0]] # ir +QUAT_TO_ROT[0, 2] = [[ 0, 0, 2], [ 0, 0, 0], [-2, 0, 0]] # jr +QUAT_TO_ROT[0, 3] = [[ 0,-2, 0], [ 2, 0, 0], [ 0, 0, 0]] # kr + +QUAT_MULTIPLY = np.zeros((4, 4, 4), dtype=np.float32) +QUAT_MULTIPLY[:, :, 0] = [[ 1, 0, 0, 0], + [ 0,-1, 0, 0], + [ 0, 0,-1, 0], + [ 0, 0, 0,-1]] + +QUAT_MULTIPLY[:, :, 1] = [[ 0, 1, 0, 0], + [ 1, 0, 0, 0], + [ 0, 0, 0, 1], + [ 0, 0,-1, 0]] + +QUAT_MULTIPLY[:, :, 2] = [[ 0, 0, 1, 0], + [ 0, 0, 0,-1], + [ 1, 0, 0, 0], + [ 0, 1, 0, 0]] + +QUAT_MULTIPLY[:, :, 3] = [[ 0, 0, 0, 1], + [ 0, 0, 1, 0], + [ 0,-1, 0, 0], + [ 1, 0, 0, 0]] + +QUAT_MULTIPLY_BY_VEC = QUAT_MULTIPLY[:, 1:, :] +# pylint: enable=bad-whitespace + + +def rot_to_quat(rot, unstack_inputs=False): + """Convert rotation matrix to quaternion. + + Note that this function calls self_adjoint_eig which is extremely expensive on + the GPU. If at all possible, this function should run on the CPU. + + Args: + rot: rotation matrix (see below for format). + unstack_inputs: If true, rotation matrix should be shape (..., 3, 3) + otherwise the rotation matrix should be a list of lists of tensors. + + Returns: + Quaternion as (..., 4) tensor. + """ + if unstack_inputs: + rot = [jnp.moveaxis(x, -1, 0) for x in jnp.moveaxis(rot, -2, 0)] + + [[xx, xy, xz], [yx, yy, yz], [zx, zy, zz]] = rot + + # pylint: disable=bad-whitespace + k = [[ xx + yy + zz, zy - yz, xz - zx, yx - xy,], + [ zy - yz, xx - yy - zz, xy + yx, xz + zx,], + [ xz - zx, xy + yx, yy - xx - zz, yz + zy,], + [ yx - xy, xz + zx, yz + zy, zz - xx - yy,]] + # pylint: enable=bad-whitespace + + k = (1./3.) * jnp.stack([jnp.stack(x, axis=-1) for x in k], + axis=-2) + + # Get eigenvalues in non-decreasing order and associated. + _, qs = jnp.linalg.eigh(k) + return qs[..., -1] + + +def rot_list_to_tensor(rot_list): + """Convert list of lists to rotation tensor.""" + return jnp.stack( + [jnp.stack(rot_list[0], axis=-1), + jnp.stack(rot_list[1], axis=-1), + jnp.stack(rot_list[2], axis=-1)], + axis=-2) + + +def vec_list_to_tensor(vec_list): + """Convert list to vector tensor.""" + return jnp.stack(vec_list, axis=-1) + + +def quat_to_rot(normalized_quat): + """Convert a normalized quaternion to a rotation matrix.""" + rot_tensor = jnp.sum( + np.reshape(QUAT_TO_ROT, (4, 4, 9)) * + normalized_quat[..., :, None, None] * + normalized_quat[..., None, :, None], + axis=(-3, -2)) + rot = jnp.moveaxis(rot_tensor, -1, 0) # Unstack. + return [[rot[0], rot[1], rot[2]], + [rot[3], rot[4], rot[5]], + [rot[6], rot[7], rot[8]]] + + +def quat_multiply_by_vec(quat, vec): + """Multiply a quaternion by a pure-vector quaternion.""" + return jnp.sum( + QUAT_MULTIPLY_BY_VEC * + quat[..., :, None, None] * + vec[..., None, :, None], + axis=(-3, -2)) + + +def quat_multiply(quat1, quat2): + """Multiply a quaternion by another quaternion.""" + return jnp.sum( + QUAT_MULTIPLY * + quat1[..., :, None, None] * + quat2[..., None, :, None], + axis=(-3, -2)) + + +def apply_rot_to_vec(rot, vec, unstack=False): + """Multiply rotation matrix by a vector.""" + if unstack: + x, y, z = [vec[:, i] for i in range(3)] + else: + x, y, z = vec + return [rot[0][0] * x + rot[0][1] * y + rot[0][2] * z, + rot[1][0] * x + rot[1][1] * y + rot[1][2] * z, + rot[2][0] * x + rot[2][1] * y + rot[2][2] * z] + + +def apply_inverse_rot_to_vec(rot, vec): + """Multiply the inverse of a rotation matrix by a vector.""" + # Inverse rotation is just transpose + return [rot[0][0] * vec[0] + rot[1][0] * vec[1] + rot[2][0] * vec[2], + rot[0][1] * vec[0] + rot[1][1] * vec[1] + rot[2][1] * vec[2], + rot[0][2] * vec[0] + rot[1][2] * vec[1] + rot[2][2] * vec[2]] + + +class QuatAffine(object): + """Affine transformation represented by quaternion and vector.""" + + def __init__(self, quaternion, translation, rotation=None, normalize=True, + unstack_inputs=False): + """Initialize from quaternion and translation. + + Args: + quaternion: Rotation represented by a quaternion, to be applied + before translation. Must be a unit quaternion unless normalize==True. + translation: Translation represented as a vector. + rotation: Same rotation as the quaternion, represented as a (..., 3, 3) + tensor. If None, rotation will be calculated from the quaternion. + normalize: If True, l2 normalize the quaternion on input. + unstack_inputs: If True, translation is a vector with last component 3 + """ + + if quaternion is not None: + assert quaternion.shape[-1] == 4 + + if unstack_inputs: + if rotation is not None: + rotation = [jnp.moveaxis(x, -1, 0) # Unstack. + for x in jnp.moveaxis(rotation, -2, 0)] # Unstack. + translation = jnp.moveaxis(translation, -1, 0) # Unstack. + + if normalize and quaternion is not None: + quaternion = quaternion / jnp.linalg.norm(quaternion, axis=-1, + keepdims=True) + + if rotation is None: + rotation = quat_to_rot(quaternion) + + self.quaternion = quaternion + self.rotation = [list(row) for row in rotation] + self.translation = list(translation) + + assert all(len(row) == 3 for row in self.rotation) + assert len(self.translation) == 3 + + def to_tensor(self): + return jnp.concatenate( + [self.quaternion] + + [jnp.expand_dims(x, axis=-1) for x in self.translation], + axis=-1) + + def apply_tensor_fn(self, tensor_fn): + """Return a new QuatAffine with tensor_fn applied (e.g. stop_gradient).""" + return QuatAffine( + tensor_fn(self.quaternion), + [tensor_fn(x) for x in self.translation], + rotation=[[tensor_fn(x) for x in row] for row in self.rotation], + normalize=False) + + def apply_rotation_tensor_fn(self, tensor_fn): + """Return a new QuatAffine with tensor_fn applied to the rotation part.""" + return QuatAffine( + tensor_fn(self.quaternion), + [x for x in self.translation], + rotation=[[tensor_fn(x) for x in row] for row in self.rotation], + normalize=False) + + def scale_translation(self, position_scale): + """Return a new quat affine with a different scale for translation.""" + + return QuatAffine( + self.quaternion, + [x * position_scale for x in self.translation], + rotation=[[x for x in row] for row in self.rotation], + normalize=False) + + @classmethod + def from_tensor(cls, tensor, normalize=False): + quaternion, tx, ty, tz = jnp.split(tensor, [4, 5, 6], axis=-1) + return cls(quaternion, + [tx[..., 0], ty[..., 0], tz[..., 0]], + normalize=normalize) + + def pre_compose(self, update): + """Return a new QuatAffine which applies the transformation update first. + + Args: + update: Length-6 vector. 3-vector of x, y, and z such that the quaternion + update is (1, x, y, z) and zero for the 3-vector is the identity + quaternion. 3-vector for translation concatenated. + + Returns: + New QuatAffine object. + """ + vector_quaternion_update, x, y, z = jnp.split(update, [3, 4, 5], axis=-1) + trans_update = [jnp.squeeze(x, axis=-1), + jnp.squeeze(y, axis=-1), + jnp.squeeze(z, axis=-1)] + + new_quaternion = (self.quaternion + + quat_multiply_by_vec(self.quaternion, + vector_quaternion_update)) + + trans_update = apply_rot_to_vec(self.rotation, trans_update) + new_translation = [ + self.translation[0] + trans_update[0], + self.translation[1] + trans_update[1], + self.translation[2] + trans_update[2]] + + return QuatAffine(new_quaternion, new_translation) + + def apply_to_point(self, point, extra_dims=0): + """Apply affine to a point. + + Args: + point: List of 3 tensors to apply affine. + extra_dims: Number of dimensions at the end of the transformed_point + shape that are not present in the rotation and translation. The most + common use is rotation N points at once with extra_dims=1 for use in a + network. + + Returns: + Transformed point after applying affine. + """ + rotation = self.rotation + translation = self.translation + for _ in range(extra_dims): + expand_fn = functools.partial(jnp.expand_dims, axis=-1) + rotation = jax.tree_map(expand_fn, rotation) + translation = jax.tree_map(expand_fn, translation) + + rot_point = apply_rot_to_vec(rotation, point) + return [ + rot_point[0] + translation[0], + rot_point[1] + translation[1], + rot_point[2] + translation[2]] + + def invert_point(self, transformed_point, extra_dims=0): + """Apply inverse of transformation to a point. + + Args: + transformed_point: List of 3 tensors to apply affine + extra_dims: Number of dimensions at the end of the transformed_point + shape that are not present in the rotation and translation. The most + common use is rotation N points at once with extra_dims=1 for use in a + network. + + Returns: + Transformed point after applying affine. + """ + rotation = self.rotation + translation = self.translation + for _ in range(extra_dims): + expand_fn = functools.partial(jnp.expand_dims, axis=-1) + rotation = jax.tree_map(expand_fn, rotation) + translation = jax.tree_map(expand_fn, translation) + + rot_point = [ + transformed_point[0] - translation[0], + transformed_point[1] - translation[1], + transformed_point[2] - translation[2]] + + return apply_inverse_rot_to_vec(rotation, rot_point) + + def __repr__(self): + return 'QuatAffine(%r, %r)' % (self.quaternion, self.translation) + + +def _multiply(a, b): + return jnp.stack([ + jnp.array([a[0][0]*b[0][0] + a[0][1]*b[1][0] + a[0][2]*b[2][0], + a[0][0]*b[0][1] + a[0][1]*b[1][1] + a[0][2]*b[2][1], + a[0][0]*b[0][2] + a[0][1]*b[1][2] + a[0][2]*b[2][2]]), + + jnp.array([a[1][0]*b[0][0] + a[1][1]*b[1][0] + a[1][2]*b[2][0], + a[1][0]*b[0][1] + a[1][1]*b[1][1] + a[1][2]*b[2][1], + a[1][0]*b[0][2] + a[1][1]*b[1][2] + a[1][2]*b[2][2]]), + + jnp.array([a[2][0]*b[0][0] + a[2][1]*b[1][0] + a[2][2]*b[2][0], + a[2][0]*b[0][1] + a[2][1]*b[1][1] + a[2][2]*b[2][1], + a[2][0]*b[0][2] + a[2][1]*b[1][2] + a[2][2]*b[2][2]])]) + + +def make_canonical_transform( + n_xyz: jnp.ndarray, + ca_xyz: jnp.ndarray, + c_xyz: jnp.ndarray) -> Tuple[jnp.ndarray, jnp.ndarray]: + """Returns translation and rotation matrices to canonicalize residue atoms. + + Note that this method does not take care of symmetries. If you provide the + atom positions in the non-standard way, the N atom will end up not at + [-0.527250, 1.359329, 0.0] but instead at [-0.527250, -1.359329, 0.0]. You + need to take care of such cases in your code. + + Args: + n_xyz: An array of shape [batch, 3] of nitrogen xyz coordinates. + ca_xyz: An array of shape [batch, 3] of carbon alpha xyz coordinates. + c_xyz: An array of shape [batch, 3] of carbon xyz coordinates. + + Returns: + A tuple (translation, rotation) where: + translation is an array of shape [batch, 3] defining the translation. + rotation is an array of shape [batch, 3, 3] defining the rotation. + After applying the translation and rotation to all atoms in a residue: + * All atoms will be shifted so that CA is at the origin, + * All atoms will be rotated so that C is at the x-axis, + * All atoms will be shifted so that N is in the xy plane. + """ + assert len(n_xyz.shape) == 2, n_xyz.shape + assert n_xyz.shape[-1] == 3, n_xyz.shape + assert n_xyz.shape == ca_xyz.shape == c_xyz.shape, ( + n_xyz.shape, ca_xyz.shape, c_xyz.shape) + + # Place CA at the origin. + translation = -ca_xyz + n_xyz = n_xyz + translation + c_xyz = c_xyz + translation + + # Place C on the x-axis. + c_x, c_y, c_z = [c_xyz[:, i] for i in range(3)] + # Rotate by angle c1 in the x-y plane (around the z-axis). + sin_c1 = -c_y / jnp.sqrt(1e-20 + c_x**2 + c_y**2) + cos_c1 = c_x / jnp.sqrt(1e-20 + c_x**2 + c_y**2) + zeros = jnp.zeros_like(sin_c1) + ones = jnp.ones_like(sin_c1) + # pylint: disable=bad-whitespace + c1_rot_matrix = jnp.stack([jnp.array([cos_c1, -sin_c1, zeros]), + jnp.array([sin_c1, cos_c1, zeros]), + jnp.array([zeros, zeros, ones])]) + + # Rotate by angle c2 in the x-z plane (around the y-axis). + sin_c2 = c_z / jnp.sqrt(1e-20 + c_x**2 + c_y**2 + c_z**2) + cos_c2 = jnp.sqrt(c_x**2 + c_y**2) / jnp.sqrt( + 1e-20 + c_x**2 + c_y**2 + c_z**2) + c2_rot_matrix = jnp.stack([jnp.array([cos_c2, zeros, sin_c2]), + jnp.array([zeros, ones, zeros]), + jnp.array([-sin_c2, zeros, cos_c2])]) + + c_rot_matrix = _multiply(c2_rot_matrix, c1_rot_matrix) + n_xyz = jnp.stack(apply_rot_to_vec(c_rot_matrix, n_xyz, unstack=True)).T + + # Place N in the x-y plane. + _, n_y, n_z = [n_xyz[:, i] for i in range(3)] + # Rotate by angle alpha in the y-z plane (around the x-axis). + sin_n = -n_z / jnp.sqrt(1e-20 + n_y**2 + n_z**2) + cos_n = n_y / jnp.sqrt(1e-20 + n_y**2 + n_z**2) + n_rot_matrix = jnp.stack([jnp.array([ones, zeros, zeros]), + jnp.array([zeros, cos_n, -sin_n]), + jnp.array([zeros, sin_n, cos_n])]) + # pylint: enable=bad-whitespace + + return (translation, + jnp.transpose(_multiply(n_rot_matrix, c_rot_matrix), [2, 0, 1])) + + +def make_transform_from_reference( + n_xyz: jnp.ndarray, + ca_xyz: jnp.ndarray, + c_xyz: jnp.ndarray) -> Tuple[jnp.ndarray, jnp.ndarray]: + """Returns rotation and translation matrices to convert from reference. + + Note that this method does not take care of symmetries. If you provide the + atom positions in the non-standard way, the N atom will end up not at + [-0.527250, 1.359329, 0.0] but instead at [-0.527250, -1.359329, 0.0]. You + need to take care of such cases in your code. + + Args: + n_xyz: An array of shape [batch, 3] of nitrogen xyz coordinates. + ca_xyz: An array of shape [batch, 3] of carbon alpha xyz coordinates. + c_xyz: An array of shape [batch, 3] of carbon xyz coordinates. + + Returns: + A tuple (rotation, translation) where: + rotation is an array of shape [batch, 3, 3] defining the rotation. + translation is an array of shape [batch, 3] defining the translation. + After applying the translation and rotation to the reference backbone, + the coordinates will approximately equal to the input coordinates. + + The order of translation and rotation differs from make_canonical_transform + because the rotation from this function should be applied before the + translation, unlike make_canonical_transform. + """ + translation, rotation = make_canonical_transform(n_xyz, ca_xyz, c_xyz) + return np.transpose(rotation, (0, 2, 1)), -translation diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/quat_affine_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/quat_affine_test.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,150 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for quat_affine.""" + +from absl import logging +from absl.testing import absltest +from alphafold.model import quat_affine +import jax +import jax.numpy as jnp +import numpy as np + +VERBOSE = False +np.set_printoptions(precision=3, suppress=True) + +r2t = quat_affine.rot_list_to_tensor +v2t = quat_affine.vec_list_to_tensor + +q2r = lambda q: r2t(quat_affine.quat_to_rot(q)) + + +class QuatAffineTest(absltest.TestCase): + + def _assert_check(self, to_check, tol=1e-5): + for k, (correct, generated) in to_check.items(): + if VERBOSE: + logging.info(k) + logging.info('Correct %s', correct) + logging.info('Predicted %s', generated) + self.assertLess(np.max(np.abs(correct - generated)), tol) + + def test_conversion(self): + quat = jnp.array([-2., 5., -1., 4.]) + + rotation = jnp.array([ + [0.26087, 0.130435, 0.956522], + [-0.565217, -0.782609, 0.26087], + [0.782609, -0.608696, -0.130435]]) + + translation = jnp.array([1., -3., 4.]) + point = jnp.array([0.7, 3.2, -2.9]) + + a = quat_affine.QuatAffine(quat, translation, unstack_inputs=True) + true_new_point = jnp.matmul(rotation, point[:, None])[:, 0] + translation + + self._assert_check({ + 'rot': (rotation, r2t(a.rotation)), + 'trans': (translation, v2t(a.translation)), + 'point': (true_new_point, + v2t(a.apply_to_point(jnp.moveaxis(point, -1, 0)))), + # Because of the double cover, we must be careful and compare rotations + 'quat': (q2r(a.quaternion), + q2r(quat_affine.rot_to_quat(a.rotation))), + + }) + + def test_double_cover(self): + """Test that -q is the same rotation as q.""" + rng = jax.random.PRNGKey(42) + keys = jax.random.split(rng) + q = jax.random.normal(keys[0], (2, 4)) + trans = jax.random.normal(keys[1], (2, 3)) + a1 = quat_affine.QuatAffine(q, trans, unstack_inputs=True) + a2 = quat_affine.QuatAffine(-q, trans, unstack_inputs=True) + + self._assert_check({ + 'rot': (r2t(a1.rotation), + r2t(a2.rotation)), + 'trans': (v2t(a1.translation), + v2t(a2.translation)), + }) + + def test_homomorphism(self): + rng = jax.random.PRNGKey(42) + keys = jax.random.split(rng, 4) + vec_q1 = jax.random.normal(keys[0], (2, 3)) + + q1 = jnp.concatenate([ + jnp.ones_like(vec_q1)[:, :1], + vec_q1], axis=-1) + + q2 = jax.random.normal(keys[1], (2, 4)) + t1 = jax.random.normal(keys[2], (2, 3)) + t2 = jax.random.normal(keys[3], (2, 3)) + + a1 = quat_affine.QuatAffine(q1, t1, unstack_inputs=True) + a2 = quat_affine.QuatAffine(q2, t2, unstack_inputs=True) + a21 = a2.pre_compose(jnp.concatenate([vec_q1, t1], axis=-1)) + + rng, key = jax.random.split(rng) + x = jax.random.normal(key, (2, 3)) + new_x = a21.apply_to_point(jnp.moveaxis(x, -1, 0)) + new_x_apply2 = a2.apply_to_point(a1.apply_to_point(jnp.moveaxis(x, -1, 0))) + + self._assert_check({ + 'quat': (q2r(quat_affine.quat_multiply(a2.quaternion, a1.quaternion)), + q2r(a21.quaternion)), + 'rot': (jnp.matmul(r2t(a2.rotation), r2t(a1.rotation)), + r2t(a21.rotation)), + 'point': (v2t(new_x_apply2), + v2t(new_x)), + 'inverse': (x, v2t(a21.invert_point(new_x))), + }) + + def test_batching(self): + """Test that affine applies batchwise.""" + rng = jax.random.PRNGKey(42) + keys = jax.random.split(rng, 3) + q = jax.random.uniform(keys[0], (5, 2, 4)) + t = jax.random.uniform(keys[1], (2, 3)) + x = jax.random.uniform(keys[2], (5, 1, 3)) + + a = quat_affine.QuatAffine(q, t, unstack_inputs=True) + y = v2t(a.apply_to_point(jnp.moveaxis(x, -1, 0))) + + y_list = [] + for i in range(5): + for j in range(2): + a_local = quat_affine.QuatAffine(q[i, j], t[j], + unstack_inputs=True) + y_local = v2t(a_local.apply_to_point(jnp.moveaxis(x[i, 0], -1, 0))) + y_list.append(y_local) + y_combine = jnp.reshape(jnp.stack(y_list, axis=0), (5, 2, 3)) + + self._assert_check({ + 'batch': (y_combine, y), + 'quat': (q2r(a.quaternion), + q2r(quat_affine.rot_to_quat(a.rotation))), + }) + + def assertAllClose(self, a, b, rtol=1e-06, atol=1e-06): + self.assertTrue(np.allclose(a, b, rtol=rtol, atol=atol)) + + def assertAllEqual(self, a, b): + self.assertTrue(np.all(np.array(a) == np.array(b))) + + +if __name__ == '__main__': + absltest.main() diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/r3.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/r3.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,320 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Transformations for 3D coordinates. + +This Module contains objects for representing Vectors (Vecs), Rotation Matrices +(Rots) and proper Rigid transformation (Rigids). These are represented as +named tuples with arrays for each entry, for example a set of +[N, M] points would be represented as a Vecs object with arrays of shape [N, M] +for x, y and z. + +This is being done to improve readability by making it very clear what objects +are geometric objects rather than relying on comments and array shapes. +Another reason for this is to avoid using matrix +multiplication primitives like matmul or einsum, on modern accelerator hardware +these can end up on specialized cores such as tensor cores on GPU or the MXU on +cloud TPUs, this often involves lower computational precision which can be +problematic for coordinate geometry. Also these cores are typically optimized +for larger matrices than 3 dimensional, this code is written to avoid any +unintended use of these cores on both GPUs and TPUs. +""" + +import collections +from typing import List +from alphafold.model import quat_affine +import jax.numpy as jnp +import tree + +# Array of 3-component vectors, stored as individual array for +# each component. +Vecs = collections.namedtuple('Vecs', ['x', 'y', 'z']) + +# Array of 3x3 rotation matrices, stored as individual array for +# each component. +Rots = collections.namedtuple('Rots', ['xx', 'xy', 'xz', + 'yx', 'yy', 'yz', + 'zx', 'zy', 'zz']) +# Array of rigid 3D transformations, stored as array of rotations and +# array of translations. +Rigids = collections.namedtuple('Rigids', ['rot', 'trans']) + + +def squared_difference(x, y): + return jnp.square(x - y) + + +def invert_rigids(r: Rigids) -> Rigids: + """Computes group inverse of rigid transformations 'r'.""" + inv_rots = invert_rots(r.rot) + t = rots_mul_vecs(inv_rots, r.trans) + inv_trans = Vecs(-t.x, -t.y, -t.z) + return Rigids(inv_rots, inv_trans) + + +def invert_rots(m: Rots) -> Rots: + """Computes inverse of rotations 'm'.""" + return Rots(m.xx, m.yx, m.zx, + m.xy, m.yy, m.zy, + m.xz, m.yz, m.zz) + + +def rigids_from_3_points( + point_on_neg_x_axis: Vecs, # shape (...) + origin: Vecs, # shape (...) + point_on_xy_plane: Vecs, # shape (...) +) -> Rigids: # shape (...) + """Create Rigids from 3 points. + + Jumper et al. (2021) Suppl. Alg. 21 "rigidFrom3Points" + This creates a set of rigid transformations from 3 points by Gram Schmidt + orthogonalization. + + Args: + point_on_neg_x_axis: Vecs corresponding to points on the negative x axis + origin: Origin of resulting rigid transformations + point_on_xy_plane: Vecs corresponding to points in the xy plane + Returns: + Rigid transformations from global frame to local frames derived from + the input points. + """ + m = rots_from_two_vecs( + e0_unnormalized=vecs_sub(origin, point_on_neg_x_axis), + e1_unnormalized=vecs_sub(point_on_xy_plane, origin)) + + return Rigids(rot=m, trans=origin) + + +def rigids_from_list(l: List[jnp.ndarray]) -> Rigids: + """Converts flat list of arrays to rigid transformations.""" + assert len(l) == 12 + return Rigids(Rots(*(l[:9])), Vecs(*(l[9:]))) + + +def rigids_from_quataffine(a: quat_affine.QuatAffine) -> Rigids: + """Converts QuatAffine object to the corresponding Rigids object.""" + return Rigids(Rots(*tree.flatten(a.rotation)), + Vecs(*a.translation)) + + +def rigids_from_tensor4x4( + m: jnp.ndarray # shape (..., 4, 4) +) -> Rigids: # shape (...) + """Construct Rigids object from an 4x4 array. + + Here the 4x4 is representing the transformation in homogeneous coordinates. + + Args: + m: Array representing transformations in homogeneous coordinates. + Returns: + Rigids object corresponding to transformations m + """ + assert m.shape[-1] == 4 + assert m.shape[-2] == 4 + return Rigids( + Rots(m[..., 0, 0], m[..., 0, 1], m[..., 0, 2], + m[..., 1, 0], m[..., 1, 1], m[..., 1, 2], + m[..., 2, 0], m[..., 2, 1], m[..., 2, 2]), + Vecs(m[..., 0, 3], m[..., 1, 3], m[..., 2, 3])) + + +def rigids_from_tensor_flat9( + m: jnp.ndarray # shape (..., 9) +) -> Rigids: # shape (...) + """Flat9 encoding: first two columns of rotation matrix + translation.""" + assert m.shape[-1] == 9 + e0 = Vecs(m[..., 0], m[..., 1], m[..., 2]) + e1 = Vecs(m[..., 3], m[..., 4], m[..., 5]) + trans = Vecs(m[..., 6], m[..., 7], m[..., 8]) + return Rigids(rot=rots_from_two_vecs(e0, e1), + trans=trans) + + +def rigids_from_tensor_flat12( + m: jnp.ndarray # shape (..., 12) +) -> Rigids: # shape (...) + """Flat12 encoding: rotation matrix (9 floats) + translation (3 floats).""" + assert m.shape[-1] == 12 + x = jnp.moveaxis(m, -1, 0) # Unstack + return Rigids(Rots(*x[:9]), Vecs(*x[9:])) + + +def rigids_mul_rigids(a: Rigids, b: Rigids) -> Rigids: + """Group composition of Rigids 'a' and 'b'.""" + return Rigids( + rots_mul_rots(a.rot, b.rot), + vecs_add(a.trans, rots_mul_vecs(a.rot, b.trans))) + + +def rigids_mul_rots(r: Rigids, m: Rots) -> Rigids: + """Compose rigid transformations 'r' with rotations 'm'.""" + return Rigids(rots_mul_rots(r.rot, m), r.trans) + + +def rigids_mul_vecs(r: Rigids, v: Vecs) -> Vecs: + """Apply rigid transforms 'r' to points 'v'.""" + return vecs_add(rots_mul_vecs(r.rot, v), r.trans) + + +def rigids_to_list(r: Rigids) -> List[jnp.ndarray]: + """Turn Rigids into flat list, inverse of 'rigids_from_list'.""" + return list(r.rot) + list(r.trans) + + +def rigids_to_quataffine(r: Rigids) -> quat_affine.QuatAffine: + """Convert Rigids r into QuatAffine, inverse of 'rigids_from_quataffine'.""" + return quat_affine.QuatAffine( + quaternion=None, + rotation=[[r.rot.xx, r.rot.xy, r.rot.xz], + [r.rot.yx, r.rot.yy, r.rot.yz], + [r.rot.zx, r.rot.zy, r.rot.zz]], + translation=[r.trans.x, r.trans.y, r.trans.z]) + + +def rigids_to_tensor_flat9( + r: Rigids # shape (...) +) -> jnp.ndarray: # shape (..., 9) + """Flat9 encoding: first two columns of rotation matrix + translation.""" + return jnp.stack( + [r.rot.xx, r.rot.yx, r.rot.zx, r.rot.xy, r.rot.yy, r.rot.zy] + + list(r.trans), axis=-1) + + +def rigids_to_tensor_flat12( + r: Rigids # shape (...) +) -> jnp.ndarray: # shape (..., 12) + """Flat12 encoding: rotation matrix (9 floats) + translation (3 floats).""" + return jnp.stack(list(r.rot) + list(r.trans), axis=-1) + + +def rots_from_tensor3x3( + m: jnp.ndarray, # shape (..., 3, 3) +) -> Rots: # shape (...) + """Convert rotations represented as (3, 3) array to Rots.""" + assert m.shape[-1] == 3 + assert m.shape[-2] == 3 + return Rots(m[..., 0, 0], m[..., 0, 1], m[..., 0, 2], + m[..., 1, 0], m[..., 1, 1], m[..., 1, 2], + m[..., 2, 0], m[..., 2, 1], m[..., 2, 2]) + + +def rots_from_two_vecs(e0_unnormalized: Vecs, e1_unnormalized: Vecs) -> Rots: + """Create rotation matrices from unnormalized vectors for the x and y-axes. + + This creates a rotation matrix from two vectors using Gram-Schmidt + orthogonalization. + + Args: + e0_unnormalized: vectors lying along x-axis of resulting rotation + e1_unnormalized: vectors lying in xy-plane of resulting rotation + Returns: + Rotations resulting from Gram-Schmidt procedure. + """ + # Normalize the unit vector for the x-axis, e0. + e0 = vecs_robust_normalize(e0_unnormalized) + + # make e1 perpendicular to e0. + c = vecs_dot_vecs(e1_unnormalized, e0) + e1 = Vecs(e1_unnormalized.x - c * e0.x, + e1_unnormalized.y - c * e0.y, + e1_unnormalized.z - c * e0.z) + e1 = vecs_robust_normalize(e1) + + # Compute e2 as cross product of e0 and e1. + e2 = vecs_cross_vecs(e0, e1) + + return Rots(e0.x, e1.x, e2.x, e0.y, e1.y, e2.y, e0.z, e1.z, e2.z) + + +def rots_mul_rots(a: Rots, b: Rots) -> Rots: + """Composition of rotations 'a' and 'b'.""" + c0 = rots_mul_vecs(a, Vecs(b.xx, b.yx, b.zx)) + c1 = rots_mul_vecs(a, Vecs(b.xy, b.yy, b.zy)) + c2 = rots_mul_vecs(a, Vecs(b.xz, b.yz, b.zz)) + return Rots(c0.x, c1.x, c2.x, c0.y, c1.y, c2.y, c0.z, c1.z, c2.z) + + +def rots_mul_vecs(m: Rots, v: Vecs) -> Vecs: + """Apply rotations 'm' to vectors 'v'.""" + return Vecs(m.xx * v.x + m.xy * v.y + m.xz * v.z, + m.yx * v.x + m.yy * v.y + m.yz * v.z, + m.zx * v.x + m.zy * v.y + m.zz * v.z) + + +def vecs_add(v1: Vecs, v2: Vecs) -> Vecs: + """Add two vectors 'v1' and 'v2'.""" + return Vecs(v1.x + v2.x, v1.y + v2.y, v1.z + v2.z) + + +def vecs_dot_vecs(v1: Vecs, v2: Vecs) -> jnp.ndarray: + """Dot product of vectors 'v1' and 'v2'.""" + return v1.x * v2.x + v1.y * v2.y + v1.z * v2.z + + +def vecs_cross_vecs(v1: Vecs, v2: Vecs) -> Vecs: + """Cross product of vectors 'v1' and 'v2'.""" + return Vecs(v1.y * v2.z - v1.z * v2.y, + v1.z * v2.x - v1.x * v2.z, + v1.x * v2.y - v1.y * v2.x) + + +def vecs_from_tensor(x: jnp.ndarray # shape (..., 3) + ) -> Vecs: # shape (...) + """Converts from tensor of shape (3,) to Vecs.""" + num_components = x.shape[-1] + assert num_components == 3 + return Vecs(x[..., 0], x[..., 1], x[..., 2]) + + +def vecs_robust_normalize(v: Vecs, epsilon: float = 1e-8) -> Vecs: + """Normalizes vectors 'v'. + + Args: + v: vectors to be normalized. + epsilon: small regularizer added to squared norm before taking square root. + Returns: + normalized vectors + """ + norms = vecs_robust_norm(v, epsilon) + return Vecs(v.x / norms, v.y / norms, v.z / norms) + + +def vecs_robust_norm(v: Vecs, epsilon: float = 1e-8) -> jnp.ndarray: + """Computes norm of vectors 'v'. + + Args: + v: vectors to be normalized. + epsilon: small regularizer added to squared norm before taking square root. + Returns: + norm of 'v' + """ + return jnp.sqrt(jnp.square(v.x) + jnp.square(v.y) + jnp.square(v.z) + epsilon) + + +def vecs_sub(v1: Vecs, v2: Vecs) -> Vecs: + """Computes v1 - v2.""" + return Vecs(v1.x - v2.x, v1.y - v2.y, v1.z - v2.z) + + +def vecs_squared_distance(v1: Vecs, v2: Vecs) -> jnp.ndarray: + """Computes squared euclidean difference between 'v1' and 'v2'.""" + return (squared_difference(v1.x, v2.x) + + squared_difference(v1.y, v2.y) + + squared_difference(v1.z, v2.z)) + + +def vecs_to_tensor(v: Vecs # shape (...) + ) -> jnp.ndarray: # shape(..., 3) + """Converts 'v' to tensor with shape 3, inverse of 'vecs_from_tensor'.""" + return jnp.stack([v.x, v.y, v.z], axis=-1) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/tf/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/tf/__init__.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,14 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Alphafold model TensorFlow code.""" diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/tf/data_transforms.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/tf/data_transforms.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,625 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Data for AlphaFold.""" + +from alphafold.common import residue_constants +from alphafold.model.tf import shape_helpers +from alphafold.model.tf import shape_placeholders +from alphafold.model.tf import utils +import numpy as np +import tensorflow.compat.v1 as tf + +# Pylint gets confused by the curry1 decorator because it changes the number +# of arguments to the function. +# pylint:disable=no-value-for-parameter + + +NUM_RES = shape_placeholders.NUM_RES +NUM_MSA_SEQ = shape_placeholders.NUM_MSA_SEQ +NUM_EXTRA_SEQ = shape_placeholders.NUM_EXTRA_SEQ +NUM_TEMPLATES = shape_placeholders.NUM_TEMPLATES + + +def cast_64bit_ints(protein): + + for k, v in protein.items(): + if v.dtype == tf.int64: + protein[k] = tf.cast(v, tf.int32) + return protein + + +_MSA_FEATURE_NAMES = [ + 'msa', 'deletion_matrix', 'msa_mask', 'msa_row_mask', 'bert_mask', + 'true_msa' +] + + +def make_seq_mask(protein): + protein['seq_mask'] = tf.ones( + shape_helpers.shape_list(protein['aatype']), dtype=tf.float32) + return protein + + +def make_template_mask(protein): + protein['template_mask'] = tf.ones( + shape_helpers.shape_list(protein['template_domain_names']), + dtype=tf.float32) + return protein + + +def curry1(f): + """Supply all arguments but the first.""" + + def fc(*args, **kwargs): + return lambda x: f(x, *args, **kwargs) + + return fc + + +@curry1 +def add_distillation_flag(protein, distillation): + protein['is_distillation'] = tf.constant(float(distillation), + shape=[], + dtype=tf.float32) + return protein + + +def make_all_atom_aatype(protein): + protein['all_atom_aatype'] = protein['aatype'] + return protein + + +def fix_templates_aatype(protein): + """Fixes aatype encoding of templates.""" + # Map one-hot to indices. + protein['template_aatype'] = tf.argmax( + protein['template_aatype'], output_type=tf.int32, axis=-1) + # Map hhsearch-aatype to our aatype. + new_order_list = residue_constants.MAP_HHBLITS_AATYPE_TO_OUR_AATYPE + new_order = tf.constant(new_order_list, dtype=tf.int32) + protein['template_aatype'] = tf.gather(params=new_order, + indices=protein['template_aatype']) + return protein + + +def correct_msa_restypes(protein): + """Correct MSA restype to have the same order as residue_constants.""" + new_order_list = residue_constants.MAP_HHBLITS_AATYPE_TO_OUR_AATYPE + new_order = tf.constant(new_order_list, dtype=protein['msa'].dtype) + protein['msa'] = tf.gather(new_order, protein['msa'], axis=0) + + perm_matrix = np.zeros((22, 22), dtype=np.float32) + perm_matrix[range(len(new_order_list)), new_order_list] = 1. + + for k in protein: + if 'profile' in k: # Include both hhblits and psiblast profiles + num_dim = protein[k].shape.as_list()[-1] + assert num_dim in [20, 21, 22], ( + 'num_dim for %s out of expected range: %s' % (k, num_dim)) + protein[k] = tf.tensordot(protein[k], perm_matrix[:num_dim, :num_dim], 1) + return protein + + +def squeeze_features(protein): + """Remove singleton and repeated dimensions in protein features.""" + protein['aatype'] = tf.argmax( + protein['aatype'], axis=-1, output_type=tf.int32) + for k in [ + 'domain_name', 'msa', 'num_alignments', 'seq_length', 'sequence', + 'superfamily', 'deletion_matrix', 'resolution', + 'between_segment_residues', 'residue_index', 'template_all_atom_masks']: + if k in protein: + final_dim = shape_helpers.shape_list(protein[k])[-1] + if isinstance(final_dim, int) and final_dim == 1: + protein[k] = tf.squeeze(protein[k], axis=-1) + + for k in ['seq_length', 'num_alignments']: + if k in protein: + protein[k] = protein[k][0] # Remove fake sequence dimension + return protein + + +def make_random_crop_to_size_seed(protein): + """Random seed for cropping residues and templates.""" + protein['random_crop_to_size_seed'] = utils.make_random_seed() + return protein + + +@curry1 +def randomly_replace_msa_with_unknown(protein, replace_proportion): + """Replace a proportion of the MSA with 'X'.""" + msa_mask = (tf.random.uniform(shape_helpers.shape_list(protein['msa'])) < + replace_proportion) + x_idx = 20 + gap_idx = 21 + msa_mask = tf.logical_and(msa_mask, protein['msa'] != gap_idx) + protein['msa'] = tf.where(msa_mask, + tf.ones_like(protein['msa']) * x_idx, + protein['msa']) + aatype_mask = ( + tf.random.uniform(shape_helpers.shape_list(protein['aatype'])) < + replace_proportion) + + protein['aatype'] = tf.where(aatype_mask, + tf.ones_like(protein['aatype']) * x_idx, + protein['aatype']) + return protein + + +@curry1 +def sample_msa(protein, max_seq, keep_extra): + """Sample MSA randomly, remaining sequences are stored as `extra_*`. + + Args: + protein: batch to sample msa from. + max_seq: number of sequences to sample. + keep_extra: When True sequences not sampled are put into fields starting + with 'extra_*'. + + Returns: + Protein with sampled msa. + """ + num_seq = tf.shape(protein['msa'])[0] + shuffled = tf.random_shuffle(tf.range(1, num_seq)) + index_order = tf.concat([[0], shuffled], axis=0) + num_sel = tf.minimum(max_seq, num_seq) + + sel_seq, not_sel_seq = tf.split(index_order, [num_sel, num_seq - num_sel]) + + for k in _MSA_FEATURE_NAMES: + if k in protein: + if keep_extra: + protein['extra_' + k] = tf.gather(protein[k], not_sel_seq) + protein[k] = tf.gather(protein[k], sel_seq) + + return protein + + +@curry1 +def crop_extra_msa(protein, max_extra_msa): + """MSA features are cropped so only `max_extra_msa` sequences are kept.""" + num_seq = tf.shape(protein['extra_msa'])[0] + num_sel = tf.minimum(max_extra_msa, num_seq) + select_indices = tf.random_shuffle(tf.range(0, num_seq))[:num_sel] + for k in _MSA_FEATURE_NAMES: + if 'extra_' + k in protein: + protein['extra_' + k] = tf.gather(protein['extra_' + k], select_indices) + + return protein + + +def delete_extra_msa(protein): + for k in _MSA_FEATURE_NAMES: + if 'extra_' + k in protein: + del protein['extra_' + k] + return protein + + +@curry1 +def block_delete_msa(protein, config): + """Sample MSA by deleting contiguous blocks. + + Jumper et al. (2021) Suppl. Alg. 1 "MSABlockDeletion" + + Arguments: + protein: batch dict containing the msa + config: ConfigDict with parameters + + Returns: + updated protein + """ + num_seq = shape_helpers.shape_list(protein['msa'])[0] + block_num_seq = tf.cast( + tf.floor(tf.cast(num_seq, tf.float32) * config.msa_fraction_per_block), + tf.int32) + + if config.randomize_num_blocks: + nb = tf.random.uniform([], 0, config.num_blocks + 1, dtype=tf.int32) + else: + nb = config.num_blocks + + del_block_starts = tf.random.uniform([nb], 0, num_seq, dtype=tf.int32) + del_blocks = del_block_starts[:, None] + tf.range(block_num_seq) + del_blocks = tf.clip_by_value(del_blocks, 0, num_seq - 1) + del_indices = tf.unique(tf.sort(tf.reshape(del_blocks, [-1])))[0] + + # Make sure we keep the original sequence + sparse_diff = tf.sets.difference(tf.range(1, num_seq)[None], + del_indices[None]) + keep_indices = tf.squeeze(tf.sparse.to_dense(sparse_diff), 0) + keep_indices = tf.concat([[0], keep_indices], axis=0) + + for k in _MSA_FEATURE_NAMES: + if k in protein: + protein[k] = tf.gather(protein[k], keep_indices) + + return protein + + +@curry1 +def nearest_neighbor_clusters(protein, gap_agreement_weight=0.): + """Assign each extra MSA sequence to its nearest neighbor in sampled MSA.""" + + # Determine how much weight we assign to each agreement. In theory, we could + # use a full blosum matrix here, but right now let's just down-weight gap + # agreement because it could be spurious. + # Never put weight on agreeing on BERT mask + weights = tf.concat([ + tf.ones(21), + gap_agreement_weight * tf.ones(1), + np.zeros(1)], 0) + + # Make agreement score as weighted Hamming distance + sample_one_hot = (protein['msa_mask'][:, :, None] * + tf.one_hot(protein['msa'], 23)) + extra_one_hot = (protein['extra_msa_mask'][:, :, None] * + tf.one_hot(protein['extra_msa'], 23)) + + num_seq, num_res, _ = shape_helpers.shape_list(sample_one_hot) + extra_num_seq, _, _ = shape_helpers.shape_list(extra_one_hot) + + # Compute tf.einsum('mrc,nrc,c->mn', sample_one_hot, extra_one_hot, weights) + # in an optimized fashion to avoid possible memory or computation blowup. + agreement = tf.matmul( + tf.reshape(extra_one_hot, [extra_num_seq, num_res * 23]), + tf.reshape(sample_one_hot * weights, [num_seq, num_res * 23]), + transpose_b=True) + + # Assign each sequence in the extra sequences to the closest MSA sample + protein['extra_cluster_assignment'] = tf.argmax( + agreement, axis=1, output_type=tf.int32) + + return protein + + +@curry1 +def summarize_clusters(protein): + """Produce profile and deletion_matrix_mean within each cluster.""" + num_seq = shape_helpers.shape_list(protein['msa'])[0] + def csum(x): + return tf.math.unsorted_segment_sum( + x, protein['extra_cluster_assignment'], num_seq) + + mask = protein['extra_msa_mask'] + mask_counts = 1e-6 + protein['msa_mask'] + csum(mask) # Include center + + msa_sum = csum(mask[:, :, None] * tf.one_hot(protein['extra_msa'], 23)) + msa_sum += tf.one_hot(protein['msa'], 23) # Original sequence + protein['cluster_profile'] = msa_sum / mask_counts[:, :, None] + + del msa_sum + + del_sum = csum(mask * protein['extra_deletion_matrix']) + del_sum += protein['deletion_matrix'] # Original sequence + protein['cluster_deletion_mean'] = del_sum / mask_counts + del del_sum + + return protein + + +def make_msa_mask(protein): + """Mask features are all ones, but will later be zero-padded.""" + protein['msa_mask'] = tf.ones( + shape_helpers.shape_list(protein['msa']), dtype=tf.float32) + protein['msa_row_mask'] = tf.ones( + shape_helpers.shape_list(protein['msa'])[0], dtype=tf.float32) + return protein + + +def pseudo_beta_fn(aatype, all_atom_positions, all_atom_masks): + """Create pseudo beta features.""" + is_gly = tf.equal(aatype, residue_constants.restype_order['G']) + ca_idx = residue_constants.atom_order['CA'] + cb_idx = residue_constants.atom_order['CB'] + pseudo_beta = tf.where( + tf.tile(is_gly[..., None], [1] * len(is_gly.shape) + [3]), + all_atom_positions[..., ca_idx, :], + all_atom_positions[..., cb_idx, :]) + + if all_atom_masks is not None: + pseudo_beta_mask = tf.where( + is_gly, all_atom_masks[..., ca_idx], all_atom_masks[..., cb_idx]) + pseudo_beta_mask = tf.cast(pseudo_beta_mask, tf.float32) + return pseudo_beta, pseudo_beta_mask + else: + return pseudo_beta + + +@curry1 +def make_pseudo_beta(protein, prefix=''): + """Create pseudo-beta (alpha for glycine) position and mask.""" + assert prefix in ['', 'template_'] + protein[prefix + 'pseudo_beta'], protein[prefix + 'pseudo_beta_mask'] = ( + pseudo_beta_fn( + protein['template_aatype' if prefix else 'all_atom_aatype'], + protein[prefix + 'all_atom_positions'], + protein['template_all_atom_masks' if prefix else 'all_atom_mask'])) + return protein + + +@curry1 +def add_constant_field(protein, key, value): + protein[key] = tf.convert_to_tensor(value) + return protein + + +def shaped_categorical(probs, epsilon=1e-10): + ds = shape_helpers.shape_list(probs) + num_classes = ds[-1] + counts = tf.random.categorical( + tf.reshape(tf.log(probs + epsilon), [-1, num_classes]), + 1, + dtype=tf.int32) + return tf.reshape(counts, ds[:-1]) + + +def make_hhblits_profile(protein): + """Compute the HHblits MSA profile if not already present.""" + if 'hhblits_profile' in protein: + return protein + + # Compute the profile for every residue (over all MSA sequences). + protein['hhblits_profile'] = tf.reduce_mean( + tf.one_hot(protein['msa'], 22), axis=0) + return protein + + +@curry1 +def make_masked_msa(protein, config, replace_fraction): + """Create data for BERT on raw MSA.""" + # Add a random amino acid uniformly + random_aa = tf.constant([0.05] * 20 + [0., 0.], dtype=tf.float32) + + categorical_probs = ( + config.uniform_prob * random_aa + + config.profile_prob * protein['hhblits_profile'] + + config.same_prob * tf.one_hot(protein['msa'], 22)) + + # Put all remaining probability on [MASK] which is a new column + pad_shapes = [[0, 0] for _ in range(len(categorical_probs.shape))] + pad_shapes[-1][1] = 1 + mask_prob = 1. - config.profile_prob - config.same_prob - config.uniform_prob + assert mask_prob >= 0. + categorical_probs = tf.pad( + categorical_probs, pad_shapes, constant_values=mask_prob) + + sh = shape_helpers.shape_list(protein['msa']) + mask_position = tf.random.uniform(sh) < replace_fraction + + bert_msa = shaped_categorical(categorical_probs) + bert_msa = tf.where(mask_position, bert_msa, protein['msa']) + + # Mix real and masked MSA + protein['bert_mask'] = tf.cast(mask_position, tf.float32) + protein['true_msa'] = protein['msa'] + protein['msa'] = bert_msa + + return protein + + +@curry1 +def make_fixed_size(protein, shape_schema, msa_cluster_size, extra_msa_size, + num_res, num_templates=0): + """Guess at the MSA and sequence dimensions to make fixed size.""" + + pad_size_map = { + NUM_RES: num_res, + NUM_MSA_SEQ: msa_cluster_size, + NUM_EXTRA_SEQ: extra_msa_size, + NUM_TEMPLATES: num_templates, + } + + for k, v in protein.items(): + # Don't transfer this to the accelerator. + if k == 'extra_cluster_assignment': + continue + shape = v.shape.as_list() + schema = shape_schema[k] + assert len(shape) == len(schema), ( + f'Rank mismatch between shape and shape schema for {k}: ' + f'{shape} vs {schema}') + pad_size = [ + pad_size_map.get(s2, None) or s1 for (s1, s2) in zip(shape, schema) + ] + padding = [(0, p - tf.shape(v)[i]) for i, p in enumerate(pad_size)] + if padding: + protein[k] = tf.pad( + v, padding, name=f'pad_to_fixed_{k}') + protein[k].set_shape(pad_size) + + return protein + + +@curry1 +def make_msa_feat(protein): + """Create and concatenate MSA features.""" + # Whether there is a domain break. Always zero for chains, but keeping + # for compatibility with domain datasets. + has_break = tf.clip_by_value( + tf.cast(protein['between_segment_residues'], tf.float32), + 0, 1) + aatype_1hot = tf.one_hot(protein['aatype'], 21, axis=-1) + + target_feat = [ + tf.expand_dims(has_break, axis=-1), + aatype_1hot, # Everyone gets the original sequence. + ] + + msa_1hot = tf.one_hot(protein['msa'], 23, axis=-1) + has_deletion = tf.clip_by_value(protein['deletion_matrix'], 0., 1.) + deletion_value = tf.atan(protein['deletion_matrix'] / 3.) * (2. / np.pi) + + msa_feat = [ + msa_1hot, + tf.expand_dims(has_deletion, axis=-1), + tf.expand_dims(deletion_value, axis=-1), + ] + + if 'cluster_profile' in protein: + deletion_mean_value = ( + tf.atan(protein['cluster_deletion_mean'] / 3.) * (2. / np.pi)) + msa_feat.extend([ + protein['cluster_profile'], + tf.expand_dims(deletion_mean_value, axis=-1), + ]) + + if 'extra_deletion_matrix' in protein: + protein['extra_has_deletion'] = tf.clip_by_value( + protein['extra_deletion_matrix'], 0., 1.) + protein['extra_deletion_value'] = tf.atan( + protein['extra_deletion_matrix'] / 3.) * (2. / np.pi) + + protein['msa_feat'] = tf.concat(msa_feat, axis=-1) + protein['target_feat'] = tf.concat(target_feat, axis=-1) + return protein + + +@curry1 +def select_feat(protein, feature_list): + return {k: v for k, v in protein.items() if k in feature_list} + + +@curry1 +def crop_templates(protein, max_templates): + for k, v in protein.items(): + if k.startswith('template_'): + protein[k] = v[:max_templates] + return protein + + +@curry1 +def random_crop_to_size(protein, crop_size, max_templates, shape_schema, + subsample_templates=False): + """Crop randomly to `crop_size`, or keep as is if shorter than that.""" + seq_length = protein['seq_length'] + if 'template_mask' in protein: + num_templates = tf.cast( + shape_helpers.shape_list(protein['template_mask'])[0], tf.int32) + else: + num_templates = tf.constant(0, dtype=tf.int32) + num_res_crop_size = tf.math.minimum(seq_length, crop_size) + + # Ensures that the cropping of residues and templates happens in the same way + # across ensembling iterations. + # Do not use for randomness that should vary in ensembling. + seed_maker = utils.SeedMaker(initial_seed=protein['random_crop_to_size_seed']) + + if subsample_templates: + templates_crop_start = tf.random.stateless_uniform( + shape=(), minval=0, maxval=num_templates + 1, dtype=tf.int32, + seed=seed_maker()) + else: + templates_crop_start = 0 + + num_templates_crop_size = tf.math.minimum( + num_templates - templates_crop_start, max_templates) + + num_res_crop_start = tf.random.stateless_uniform( + shape=(), minval=0, maxval=seq_length - num_res_crop_size + 1, + dtype=tf.int32, seed=seed_maker()) + + templates_select_indices = tf.argsort(tf.random.stateless_uniform( + [num_templates], seed=seed_maker())) + + for k, v in protein.items(): + if k not in shape_schema or ( + 'template' not in k and NUM_RES not in shape_schema[k]): + continue + + # randomly permute the templates before cropping them. + if k.startswith('template') and subsample_templates: + v = tf.gather(v, templates_select_indices) + + crop_sizes = [] + crop_starts = [] + for i, (dim_size, dim) in enumerate(zip(shape_schema[k], + shape_helpers.shape_list(v))): + is_num_res = (dim_size == NUM_RES) + if i == 0 and k.startswith('template'): + crop_size = num_templates_crop_size + crop_start = templates_crop_start + else: + crop_start = num_res_crop_start if is_num_res else 0 + crop_size = (num_res_crop_size if is_num_res else + (-1 if dim is None else dim)) + crop_sizes.append(crop_size) + crop_starts.append(crop_start) + protein[k] = tf.slice(v, crop_starts, crop_sizes) + + protein['seq_length'] = num_res_crop_size + return protein + + +def make_atom14_masks(protein): + """Construct denser atom positions (14 dimensions instead of 37).""" + restype_atom14_to_atom37 = [] # mapping (restype, atom14) --> atom37 + restype_atom37_to_atom14 = [] # mapping (restype, atom37) --> atom14 + restype_atom14_mask = [] + + for rt in residue_constants.restypes: + atom_names = residue_constants.restype_name_to_atom14_names[ + residue_constants.restype_1to3[rt]] + + restype_atom14_to_atom37.append([ + (residue_constants.atom_order[name] if name else 0) + for name in atom_names + ]) + + atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)} + restype_atom37_to_atom14.append([ + (atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0) + for name in residue_constants.atom_types + ]) + + restype_atom14_mask.append([(1. if name else 0.) for name in atom_names]) + + # Add dummy mapping for restype 'UNK' + restype_atom14_to_atom37.append([0] * 14) + restype_atom37_to_atom14.append([0] * 37) + restype_atom14_mask.append([0.] * 14) + + restype_atom14_to_atom37 = np.array(restype_atom14_to_atom37, dtype=np.int32) + restype_atom37_to_atom14 = np.array(restype_atom37_to_atom14, dtype=np.int32) + restype_atom14_mask = np.array(restype_atom14_mask, dtype=np.float32) + + # create the mapping for (residx, atom14) --> atom37, i.e. an array + # with shape (num_res, 14) containing the atom37 indices for this protein + residx_atom14_to_atom37 = tf.gather(restype_atom14_to_atom37, + protein['aatype']) + residx_atom14_mask = tf.gather(restype_atom14_mask, + protein['aatype']) + + protein['atom14_atom_exists'] = residx_atom14_mask + protein['residx_atom14_to_atom37'] = residx_atom14_to_atom37 + + # create the gather indices for mapping back + residx_atom37_to_atom14 = tf.gather(restype_atom37_to_atom14, + protein['aatype']) + protein['residx_atom37_to_atom14'] = residx_atom37_to_atom14 + + # create the corresponding mask + restype_atom37_mask = np.zeros([21, 37], dtype=np.float32) + for restype, restype_letter in enumerate(residue_constants.restypes): + restype_name = residue_constants.restype_1to3[restype_letter] + atom_names = residue_constants.residue_atoms[restype_name] + for atom_name in atom_names: + atom_type = residue_constants.atom_order[atom_name] + restype_atom37_mask[restype, atom_type] = 1 + + residx_atom37_mask = tf.gather(restype_atom37_mask, + protein['aatype']) + protein['atom37_atom_exists'] = residx_atom37_mask + + return protein diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/tf/input_pipeline.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/tf/input_pipeline.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,166 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Feature pre-processing input pipeline for AlphaFold.""" + +from alphafold.model.tf import data_transforms +from alphafold.model.tf import shape_placeholders +import tensorflow.compat.v1 as tf +import tree + +# Pylint gets confused by the curry1 decorator because it changes the number +# of arguments to the function. +# pylint:disable=no-value-for-parameter + + +NUM_RES = shape_placeholders.NUM_RES +NUM_MSA_SEQ = shape_placeholders.NUM_MSA_SEQ +NUM_EXTRA_SEQ = shape_placeholders.NUM_EXTRA_SEQ +NUM_TEMPLATES = shape_placeholders.NUM_TEMPLATES + + +def nonensembled_map_fns(data_config): + """Input pipeline functions which are not ensembled.""" + common_cfg = data_config.common + + map_fns = [ + data_transforms.correct_msa_restypes, + data_transforms.add_distillation_flag(False), + data_transforms.cast_64bit_ints, + data_transforms.squeeze_features, + # Keep to not disrupt RNG. + data_transforms.randomly_replace_msa_with_unknown(0.0), + data_transforms.make_seq_mask, + data_transforms.make_msa_mask, + # Compute the HHblits profile if it's not set. This has to be run before + # sampling the MSA. + data_transforms.make_hhblits_profile, + data_transforms.make_random_crop_to_size_seed, + ] + if common_cfg.use_templates: + map_fns.extend([ + data_transforms.fix_templates_aatype, + data_transforms.make_template_mask, + data_transforms.make_pseudo_beta('template_') + ]) + map_fns.extend([ + data_transforms.make_atom14_masks, + ]) + + return map_fns + + +def ensembled_map_fns(data_config): + """Input pipeline functions that can be ensembled and averaged.""" + common_cfg = data_config.common + eval_cfg = data_config.eval + + map_fns = [] + + if common_cfg.reduce_msa_clusters_by_max_templates: + pad_msa_clusters = eval_cfg.max_msa_clusters - eval_cfg.max_templates + else: + pad_msa_clusters = eval_cfg.max_msa_clusters + + max_msa_clusters = pad_msa_clusters + max_extra_msa = common_cfg.max_extra_msa + + map_fns.append( + data_transforms.sample_msa( + max_msa_clusters, + keep_extra=True)) + + if 'masked_msa' in common_cfg: + # Masked MSA should come *before* MSA clustering so that + # the clustering and full MSA profile do not leak information about + # the masked locations and secret corrupted locations. + map_fns.append( + data_transforms.make_masked_msa(common_cfg.masked_msa, + eval_cfg.masked_msa_replace_fraction)) + + if common_cfg.msa_cluster_features: + map_fns.append(data_transforms.nearest_neighbor_clusters()) + map_fns.append(data_transforms.summarize_clusters()) + + # Crop after creating the cluster profiles. + if max_extra_msa: + map_fns.append(data_transforms.crop_extra_msa(max_extra_msa)) + else: + map_fns.append(data_transforms.delete_extra_msa) + + map_fns.append(data_transforms.make_msa_feat()) + + crop_feats = dict(eval_cfg.feat) + + if eval_cfg.fixed_size: + map_fns.append(data_transforms.select_feat(list(crop_feats))) + map_fns.append(data_transforms.random_crop_to_size( + eval_cfg.crop_size, + eval_cfg.max_templates, + crop_feats, + eval_cfg.subsample_templates)) + map_fns.append(data_transforms.make_fixed_size( + crop_feats, + pad_msa_clusters, + common_cfg.max_extra_msa, + eval_cfg.crop_size, + eval_cfg.max_templates)) + else: + map_fns.append(data_transforms.crop_templates(eval_cfg.max_templates)) + + return map_fns + + +def process_tensors_from_config(tensors, data_config): + """Apply filters and maps to an existing dataset, based on the config.""" + + def wrap_ensemble_fn(data, i): + """Function to be mapped over the ensemble dimension.""" + d = data.copy() + fns = ensembled_map_fns(data_config) + fn = compose(fns) + d['ensemble_index'] = i + return fn(d) + + eval_cfg = data_config.eval + tensors = compose( + nonensembled_map_fns( + data_config))( + tensors) + + tensors_0 = wrap_ensemble_fn(tensors, tf.constant(0)) + num_ensemble = eval_cfg.num_ensemble + if data_config.common.resample_msa_in_recycling: + # Separate batch per ensembling & recycling step. + num_ensemble *= data_config.common.num_recycle + 1 + + if isinstance(num_ensemble, tf.Tensor) or num_ensemble > 1: + fn_output_signature = tree.map_structure( + tf.TensorSpec.from_tensor, tensors_0) + tensors = tf.map_fn( + lambda x: wrap_ensemble_fn(tensors, x), + tf.range(num_ensemble), + parallel_iterations=1, + fn_output_signature=fn_output_signature) + else: + tensors = tree.map_structure(lambda x: x[None], + tensors_0) + return tensors + + +@data_transforms.curry1 +def compose(x, fs): + for f in fs: + x = f(x) + return x diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/tf/protein_features.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/tf/protein_features.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,129 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Contains descriptions of various protein features.""" +import enum +from typing import Dict, Optional, Sequence, Tuple, Union +from alphafold.common import residue_constants +import tensorflow.compat.v1 as tf + +# Type aliases. +FeaturesMetadata = Dict[str, Tuple[tf.dtypes.DType, Sequence[Union[str, int]]]] + + +class FeatureType(enum.Enum): + ZERO_DIM = 0 # Shape [x] + ONE_DIM = 1 # Shape [num_res, x] + TWO_DIM = 2 # Shape [num_res, num_res, x] + MSA = 3 # Shape [msa_length, num_res, x] + + +# Placeholder values that will be replaced with their true value at runtime. +NUM_RES = "num residues placeholder" +NUM_SEQ = "length msa placeholder" +NUM_TEMPLATES = "num templates placeholder" +# Sizes of the protein features, NUM_RES and NUM_SEQ are allowed as placeholders +# to be replaced with the number of residues and the number of sequences in the +# multiple sequence alignment, respectively. + + +FEATURES = { + #### Static features of a protein sequence #### + "aatype": (tf.float32, [NUM_RES, 21]), + "between_segment_residues": (tf.int64, [NUM_RES, 1]), + "deletion_matrix": (tf.float32, [NUM_SEQ, NUM_RES, 1]), + "domain_name": (tf.string, [1]), + "msa": (tf.int64, [NUM_SEQ, NUM_RES, 1]), + "num_alignments": (tf.int64, [NUM_RES, 1]), + "residue_index": (tf.int64, [NUM_RES, 1]), + "seq_length": (tf.int64, [NUM_RES, 1]), + "sequence": (tf.string, [1]), + "all_atom_positions": (tf.float32, + [NUM_RES, residue_constants.atom_type_num, 3]), + "all_atom_mask": (tf.int64, [NUM_RES, residue_constants.atom_type_num]), + "resolution": (tf.float32, [1]), + "template_domain_names": (tf.string, [NUM_TEMPLATES]), + "template_sum_probs": (tf.float32, [NUM_TEMPLATES, 1]), + "template_aatype": (tf.float32, [NUM_TEMPLATES, NUM_RES, 22]), + "template_all_atom_positions": (tf.float32, [ + NUM_TEMPLATES, NUM_RES, residue_constants.atom_type_num, 3 + ]), + "template_all_atom_masks": (tf.float32, [ + NUM_TEMPLATES, NUM_RES, residue_constants.atom_type_num, 1 + ]), +} + +FEATURE_TYPES = {k: v[0] for k, v in FEATURES.items()} +FEATURE_SIZES = {k: v[1] for k, v in FEATURES.items()} + + +def register_feature(name: str, + type_: tf.dtypes.DType, + shape_: Tuple[Union[str, int]]): + """Register extra features used in custom datasets.""" + FEATURES[name] = (type_, shape_) + FEATURE_TYPES[name] = type_ + FEATURE_SIZES[name] = shape_ + + +def shape(feature_name: str, + num_residues: int, + msa_length: int, + num_templates: Optional[int] = None, + features: Optional[FeaturesMetadata] = None): + """Get the shape for the given feature name. + + This is near identical to _get_tf_shape_no_placeholders() but with 2 + differences: + * This method does not calculate a single placeholder from the total number of + elements (eg given and size := 12, this won't deduce NUM_RES + must be 4) + * This method will work with tensors + + Args: + feature_name: String identifier for the feature. If the feature name ends + with "_unnormalized", this suffix is stripped off. + num_residues: The number of residues in the current domain - some elements + of the shape can be dynamic and will be replaced by this value. + msa_length: The number of sequences in the multiple sequence alignment, some + elements of the shape can be dynamic and will be replaced by this value. + If the number of alignments is unknown / not read, please pass None for + msa_length. + num_templates (optional): The number of templates in this tfexample. + features: A feature_name to (tf_dtype, shape) lookup; defaults to FEATURES. + + Returns: + List of ints representation the tensor size. + + Raises: + ValueError: If a feature is requested but no concrete placeholder value is + given. + """ + features = features or FEATURES + if feature_name.endswith("_unnormalized"): + feature_name = feature_name[:-13] + + unused_dtype, raw_sizes = features[feature_name] + replacements = {NUM_RES: num_residues, + NUM_SEQ: msa_length} + + if num_templates is not None: + replacements[NUM_TEMPLATES] = num_templates + + sizes = [replacements.get(dimension, dimension) for dimension in raw_sizes] + for dimension in sizes: + if isinstance(dimension, str): + raise ValueError("Could not parse %s (shape: %s) with values: %s" % ( + feature_name, raw_sizes, replacements)) + return sizes diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/tf/protein_features_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/tf/protein_features_test.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,51 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for protein_features.""" +import uuid + +from absl.testing import absltest +from absl.testing import parameterized +from alphafold.model.tf import protein_features +import tensorflow.compat.v1 as tf + + +def _random_bytes(): + return str(uuid.uuid4()).encode('utf-8') + + +class FeaturesTest(parameterized.TestCase, tf.test.TestCase): + + def testFeatureNames(self): + self.assertEqual(len(protein_features.FEATURE_SIZES), + len(protein_features.FEATURE_TYPES)) + sorted_size_names = sorted(protein_features.FEATURE_SIZES.keys()) + sorted_type_names = sorted(protein_features.FEATURE_TYPES.keys()) + for i, size_name in enumerate(sorted_size_names): + self.assertEqual(size_name, sorted_type_names[i]) + + def testReplacement(self): + for name in protein_features.FEATURE_SIZES.keys(): + sizes = protein_features.shape(name, + num_residues=12, + msa_length=24, + num_templates=3) + for x in sizes: + self.assertEqual(type(x), int) + self.assertGreater(x, 0) + + +if __name__ == '__main__': + tf.disable_v2_behavior() + absltest.main() diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/tf/proteins_dataset.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/tf/proteins_dataset.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,166 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Datasets consisting of proteins.""" +from typing import Dict, Mapping, Optional, Sequence +from alphafold.model.tf import protein_features +import numpy as np +import tensorflow.compat.v1 as tf + +TensorDict = Dict[str, tf.Tensor] + + +def parse_tfexample( + raw_data: bytes, + features: protein_features.FeaturesMetadata, + key: Optional[str] = None) -> Dict[str, tf.train.Feature]: + """Read a single TF Example proto and return a subset of its features. + + Args: + raw_data: A serialized tf.Example proto. + features: A dictionary of features, mapping string feature names to a tuple + (dtype, shape). This dictionary should be a subset of + protein_features.FEATURES (or the dictionary itself for all features). + key: Optional string with the SSTable key of that tf.Example. This will be + added into features as a 'key' but only if requested in features. + + Returns: + A dictionary of features mapping feature names to features. Only the given + features are returned, all other ones are filtered out. + """ + feature_map = { + k: tf.io.FixedLenSequenceFeature(shape=(), dtype=v[0], allow_missing=True) + for k, v in features.items() + } + parsed_features = tf.io.parse_single_example(raw_data, feature_map) + reshaped_features = parse_reshape_logic(parsed_features, features, key=key) + + return reshaped_features + + +def _first(tensor: tf.Tensor) -> tf.Tensor: + """Returns the 1st element - the input can be a tensor or a scalar.""" + return tf.reshape(tensor, shape=(-1,))[0] + + +def parse_reshape_logic( + parsed_features: TensorDict, + features: protein_features.FeaturesMetadata, + key: Optional[str] = None) -> TensorDict: + """Transforms parsed serial features to the correct shape.""" + # Find out what is the number of sequences and the number of alignments. + num_residues = tf.cast(_first(parsed_features["seq_length"]), dtype=tf.int32) + + if "num_alignments" in parsed_features: + num_msa = tf.cast(_first(parsed_features["num_alignments"]), dtype=tf.int32) + else: + num_msa = 0 + + if "template_domain_names" in parsed_features: + num_templates = tf.cast( + tf.shape(parsed_features["template_domain_names"])[0], dtype=tf.int32) + else: + num_templates = 0 + + if key is not None and "key" in features: + parsed_features["key"] = [key] # Expand dims from () to (1,). + + # Reshape the tensors according to the sequence length and num alignments. + for k, v in parsed_features.items(): + new_shape = protein_features.shape( + feature_name=k, + num_residues=num_residues, + msa_length=num_msa, + num_templates=num_templates, + features=features) + new_shape_size = tf.constant(1, dtype=tf.int32) + for dim in new_shape: + new_shape_size *= tf.cast(dim, tf.int32) + + assert_equal = tf.assert_equal( + tf.size(v), new_shape_size, + name="assert_%s_shape_correct" % k, + message="The size of feature %s (%s) could not be reshaped " + "into %s" % (k, tf.size(v), new_shape)) + if "template" not in k: + # Make sure the feature we are reshaping is not empty. + assert_non_empty = tf.assert_greater( + tf.size(v), 0, name="assert_%s_non_empty" % k, + message="The feature %s is not set in the tf.Example. Either do not " + "request the feature or use a tf.Example that has the " + "feature set." % k) + with tf.control_dependencies([assert_non_empty, assert_equal]): + parsed_features[k] = tf.reshape(v, new_shape, name="reshape_%s" % k) + else: + with tf.control_dependencies([assert_equal]): + parsed_features[k] = tf.reshape(v, new_shape, name="reshape_%s" % k) + + return parsed_features + + +def _make_features_metadata( + feature_names: Sequence[str]) -> protein_features.FeaturesMetadata: + """Makes a feature name to type and shape mapping from a list of names.""" + # Make sure these features are always read. + required_features = ["aatype", "sequence", "seq_length"] + feature_names = list(set(feature_names) | set(required_features)) + + features_metadata = {name: protein_features.FEATURES[name] + for name in feature_names} + return features_metadata + + +def create_tensor_dict( + raw_data: bytes, + features: Sequence[str], + key: Optional[str] = None, + ) -> TensorDict: + """Creates a dictionary of tensor features. + + Args: + raw_data: A serialized tf.Example proto. + features: A list of strings of feature names to be returned in the dataset. + key: Optional string with the SSTable key of that tf.Example. This will be + added into features as a 'key' but only if requested in features. + + Returns: + A dictionary of features mapping feature names to features. Only the given + features are returned, all other ones are filtered out. + """ + features_metadata = _make_features_metadata(features) + return parse_tfexample(raw_data, features_metadata, key) + + +def np_to_tensor_dict( + np_example: Mapping[str, np.ndarray], + features: Sequence[str], + ) -> TensorDict: + """Creates dict of tensors from a dict of NumPy arrays. + + Args: + np_example: A dict of NumPy feature arrays. + features: A list of strings of feature names to be returned in the dataset. + + Returns: + A dictionary of features mapping feature names to features. Only the given + features are returned, all other ones are filtered out. + """ + features_metadata = _make_features_metadata(features) + tensor_dict = {k: tf.constant(v) for k, v in np_example.items() + if k in features_metadata} + + # Ensures shapes are as expected. Needed for setting size of empty features + # e.g. when no template hits were found. + tensor_dict = parse_reshape_logic(tensor_dict, features_metadata) + return tensor_dict diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/tf/shape_helpers.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/tf/shape_helpers.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,47 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utilities for dealing with shapes of TensorFlow tensors.""" +import tensorflow.compat.v1 as tf + + +def shape_list(x): + """Return list of dimensions of a tensor, statically where possible. + + Like `x.shape.as_list()` but with tensors instead of `None`s. + + Args: + x: A tensor. + Returns: + A list with length equal to the rank of the tensor. The n-th element of the + list is an integer when that dimension is statically known otherwise it is + the n-th element of `tf.shape(x)`. + """ + x = tf.convert_to_tensor(x) + + # If unknown rank, return dynamic shape + if x.get_shape().dims is None: + return tf.shape(x) + + static = x.get_shape().as_list() + shape = tf.shape(x) + + ret = [] + for i in range(len(static)): + dim = static[i] + if dim is None: + dim = shape[i] + ret.append(dim) + return ret + diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/tf/shape_helpers_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/tf/shape_helpers_test.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,39 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for shape_helpers.""" + +from alphafold.model.tf import shape_helpers +import numpy as np +import tensorflow.compat.v1 as tf + + +class ShapeTest(tf.test.TestCase): + + def test_shape_list(self): + """Test that shape_list can allow for reshaping to dynamic shapes.""" + a = tf.zeros([10, 4, 4, 2]) + p = tf.placeholder(tf.float32, shape=[None, None, 1, 4, 4]) + shape_dyn = shape_helpers.shape_list(p)[:2] + [4, 4] + + b = tf.reshape(a, shape_dyn) + with self.session() as sess: + out = sess.run(b, feed_dict={p: np.ones((20, 1, 1, 4, 4))}) + + self.assertAllEqual(out.shape, (20, 1, 4, 4)) + + +if __name__ == '__main__': + tf.disable_v2_behavior() + tf.test.main() diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/tf/shape_placeholders.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/tf/shape_placeholders.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,20 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Placeholder values for run-time varying dimension sizes.""" + +NUM_RES = 'num residues placeholder' +NUM_MSA_SEQ = 'msa placeholder' +NUM_EXTRA_SEQ = 'extra msa placeholder' +NUM_TEMPLATES = 'num templates placeholder' diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/tf/utils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/tf/utils.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,47 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Shared utilities for various components.""" +import tensorflow.compat.v1 as tf + + +def tf_combine_mask(*masks): + """Take the intersection of float-valued masks.""" + ret = 1 + for m in masks: + ret *= m + return ret + + +class SeedMaker(object): + """Return unique seeds.""" + + def __init__(self, initial_seed=0): + self.next_seed = initial_seed + + def __call__(self): + i = self.next_seed + self.next_seed += 1 + return i + +seed_maker = SeedMaker() + + +def make_random_seed(): + return tf.random.uniform([2], + tf.int32.min, + tf.int32.max, + tf.int32, + seed=seed_maker()) + diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/model/utils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/model/utils.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,131 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""A collection of JAX utility functions for use in protein folding.""" + +import collections +import functools +import numbers +from typing import Mapping + +import haiku as hk +import jax +import jax.numpy as jnp +import numpy as np + + +def final_init(config): + if config.zero_init: + return 'zeros' + else: + return 'linear' + + +def batched_gather(params, indices, axis=0, batch_dims=0): + """Implements a JAX equivalent of `tf.gather` with `axis` and `batch_dims`.""" + take_fn = lambda p, i: jnp.take(p, i, axis=axis) + for _ in range(batch_dims): + take_fn = jax.vmap(take_fn) + return take_fn(params, indices) + + +def mask_mean(mask, value, axis=None, drop_mask_channel=False, eps=1e-10): + """Masked mean.""" + if drop_mask_channel: + mask = mask[..., 0] + + mask_shape = mask.shape + value_shape = value.shape + + assert len(mask_shape) == len(value_shape) + + if isinstance(axis, numbers.Integral): + axis = [axis] + elif axis is None: + axis = list(range(len(mask_shape))) + assert isinstance(axis, collections.Iterable), ( + 'axis needs to be either an iterable, integer or "None"') + + broadcast_factor = 1. + for axis_ in axis: + value_size = value_shape[axis_] + mask_size = mask_shape[axis_] + if mask_size == 1: + broadcast_factor *= value_size + else: + assert mask_size == value_size + + return (jnp.sum(mask * value, axis=axis) / + (jnp.sum(mask, axis=axis) * broadcast_factor + eps)) + + +def flat_params_to_haiku(params: Mapping[str, np.ndarray]) -> hk.Params: + """Convert a dictionary of NumPy arrays to Haiku parameters.""" + hk_params = {} + for path, array in params.items(): + scope, name = path.split('//') + if scope not in hk_params: + hk_params[scope] = {} + hk_params[scope][name] = jnp.array(array) + + return hk_params + + +def padding_consistent_rng(f): + """Modify any element-wise random function to be consistent with padding. + + Normally if you take a function like jax.random.normal and generate an array, + say of size (10,10), you will get a different set of random numbers to if you + add padding and take the first (10,10) sub-array. + + This function makes a random function that is consistent regardless of the + amount of padding added. + + Note: The padding-consistent function is likely to be slower to compile and + run than the function it is wrapping, but these slowdowns are likely to be + negligible in a large network. + + Args: + f: Any element-wise function that takes (PRNG key, shape) as the first 2 + arguments. + + Returns: + An equivalent function to f, that is now consistent for different amounts of + padding. + """ + def grid_keys(key, shape): + """Generate a grid of rng keys that is consistent with different padding. + + Generate random keys such that the keys will be identical, regardless of + how much padding is added to any dimension. + + Args: + key: A PRNG key. + shape: The shape of the output array of keys that will be generated. + + Returns: + An array of shape `shape` consisting of random keys. + """ + if not shape: + return key + new_keys = jax.vmap(functools.partial(jax.random.fold_in, key))( + jnp.arange(shape[0])) + return jax.vmap(functools.partial(grid_keys, shape=shape[1:]))(new_keys) + + def inner(key, shape, **kwargs): + return jnp.vectorize( + lambda key: f(key, shape=(), **kwargs), + signature='(2)->()')( + grid_keys(key, shape)) + return inner diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/notebooks/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/notebooks/__init__.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,14 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""AlphaFold Colab notebook.""" diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/notebooks/notebook_utils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/notebooks/notebook_utils.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,182 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Helper methods for the AlphaFold Colab notebook.""" +import enum +import json +from typing import Any, Mapping, Optional, Sequence, Tuple + +from alphafold.common import residue_constants +from alphafold.data import parsers +from matplotlib import pyplot as plt +import numpy as np + + +@enum.unique +class ModelType(enum.Enum): + MONOMER = 0 + MULTIMER = 1 + + +def clean_and_validate_sequence( + input_sequence: str, min_length: int, max_length: int) -> str: + """Checks that the input sequence is ok and returns a clean version of it.""" + # Remove all whitespaces, tabs and end lines; upper-case. + clean_sequence = input_sequence.translate( + str.maketrans('', '', ' \n\t')).upper() + aatypes = set(residue_constants.restypes) # 20 standard aatypes. + if not set(clean_sequence).issubset(aatypes): + raise ValueError( + f'Input sequence contains non-amino acid letters: ' + f'{set(clean_sequence) - aatypes}. AlphaFold only supports 20 standard ' + 'amino acids as inputs.') + if len(clean_sequence) < min_length: + raise ValueError( + f'Input sequence is too short: {len(clean_sequence)} amino acids, ' + f'while the minimum is {min_length}') + if len(clean_sequence) > max_length: + raise ValueError( + f'Input sequence is too long: {len(clean_sequence)} amino acids, while ' + f'the maximum is {max_length}. You may be able to run it with the full ' + f'AlphaFold system depending on your resources (system memory, ' + f'GPU memory).') + return clean_sequence + + +def validate_input( + input_sequences: Sequence[str], + min_length: int, + max_length: int, + max_multimer_length: int) -> Tuple[Sequence[str], ModelType]: + """Validates and cleans input sequences and determines which model to use.""" + sequences = [] + + for input_sequence in input_sequences: + if input_sequence.strip(): + input_sequence = clean_and_validate_sequence( + input_sequence=input_sequence, + min_length=min_length, + max_length=max_length) + sequences.append(input_sequence) + + if len(sequences) == 1: + print('Using the single-chain model.') + return sequences, ModelType.MONOMER + + elif len(sequences) > 1: + total_multimer_length = sum([len(seq) for seq in sequences]) + if total_multimer_length > max_multimer_length: + raise ValueError(f'The total length of multimer sequences is too long: ' + f'{total_multimer_length}, while the maximum is ' + f'{max_multimer_length}. Please use the full AlphaFold ' + f'system for long multimers.') + elif total_multimer_length > 1536: + print('WARNING: The accuracy of the system has not been fully validated ' + 'above 1536 residues, and you may experience long running times or ' + f'run out of memory for your complex with {total_multimer_length} ' + 'residues.') + print(f'Using the multimer model with {len(sequences)} sequences.') + return sequences, ModelType.MULTIMER + + else: + raise ValueError('No input amino acid sequence provided, please provide at ' + 'least one sequence.') + + +def merge_chunked_msa( + results: Sequence[Mapping[str, Any]], + max_hits: Optional[int] = None + ) -> parsers.Msa: + """Merges chunked database hits together into hits for the full database.""" + unsorted_results = [] + for chunk_index, chunk in enumerate(results): + msa = parsers.parse_stockholm(chunk['sto']) + e_values_dict = parsers.parse_e_values_from_tblout(chunk['tbl']) + # Jackhmmer lists sequences as /-. + e_values = [e_values_dict[t.partition('/')[0]] for t in msa.descriptions] + chunk_results = zip( + msa.sequences, msa.deletion_matrix, msa.descriptions, e_values) + if chunk_index != 0: + next(chunk_results) # Only take query (first hit) from the first chunk. + unsorted_results.extend(chunk_results) + + sorted_by_evalue = sorted(unsorted_results, key=lambda x: x[-1]) + merged_sequences, merged_deletion_matrix, merged_descriptions, _ = zip( + *sorted_by_evalue) + merged_msa = parsers.Msa(sequences=merged_sequences, + deletion_matrix=merged_deletion_matrix, + descriptions=merged_descriptions) + if max_hits is not None: + merged_msa = merged_msa.truncate(max_seqs=max_hits) + + return merged_msa + + +def show_msa_info( + single_chain_msas: Sequence[parsers.Msa], + sequence_index: int): + """Prints info and shows a plot of the deduplicated single chain MSA.""" + full_single_chain_msa = [] + for single_chain_msa in single_chain_msas: + full_single_chain_msa.extend(single_chain_msa.sequences) + + # Deduplicate but preserve order (hence can't use set). + deduped_full_single_chain_msa = list(dict.fromkeys(full_single_chain_msa)) + total_msa_size = len(deduped_full_single_chain_msa) + print(f'\n{total_msa_size} unique sequences found in total for sequence ' + f'{sequence_index}\n') + + aa_map = {res: i for i, res in enumerate('ABCDEFGHIJKLMNOPQRSTUVWXYZ-')} + msa_arr = np.array( + [[aa_map[aa] for aa in seq] for seq in deduped_full_single_chain_msa]) + + plt.figure(figsize=(12, 3)) + plt.title(f'Per-Residue Count of Non-Gap Amino Acids in the MSA for Sequence ' + f'{sequence_index}') + plt.plot(np.sum(msa_arr != aa_map['-'], axis=0), color='black') + plt.ylabel('Non-Gap Count') + plt.yticks(range(0, total_msa_size + 1, max(1, int(total_msa_size / 3)))) + plt.show() + + +def empty_placeholder_template_features( + num_templates: int, num_res: int) -> Mapping[str, np.ndarray]: + return { + 'template_aatype': np.zeros( + (num_templates, num_res, + len(residue_constants.restypes_with_x_and_gap)), dtype=np.float32), + 'template_all_atom_masks': np.zeros( + (num_templates, num_res, residue_constants.atom_type_num), + dtype=np.float32), + 'template_all_atom_positions': np.zeros( + (num_templates, num_res, residue_constants.atom_type_num, 3), + dtype=np.float32), + 'template_domain_names': np.zeros([num_templates], dtype=np.object), + 'template_sequence': np.zeros([num_templates], dtype=np.object), + 'template_sum_probs': np.zeros([num_templates], dtype=np.float32), + } + + +def get_pae_json(pae: np.ndarray, max_pae: float) -> str: + """Returns the PAE in the same format as is used in the AFDB.""" + rounded_errors = np.round(pae.astype(np.float64), decimals=1) + indices = np.indices((len(rounded_errors), len(rounded_errors))) + 1 + indices_1 = indices[0].flatten().tolist() + indices_2 = indices[1].flatten().tolist() + return json.dumps( + [{'residue1': indices_1, + 'residue2': indices_2, + 'distance': rounded_errors.flatten().tolist(), + 'max_predicted_aligned_error': max_pae}], + indent=None, separators=(',', ':')) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/notebooks/notebook_utils_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/notebooks/notebook_utils_test.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,203 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for notebook_utils.""" +import io + +from absl.testing import absltest +from absl.testing import parameterized +from alphafold.data import parsers +from alphafold.data import templates +from alphafold.notebooks import notebook_utils + +import mock +import numpy as np + + +ONLY_QUERY_HIT = { + 'sto': ( + '# STOCKHOLM 1.0\n' + '#=GF ID query-i1\n' + 'query MAAHKGAEHHHKAAEHHEQAAKHHHAAAEHHEKGEHEQAAHHADTAYAHHKHAEEH\n' + '//\n'), + 'tbl': '', + 'stderr': b'', + 'n_iter': 1, + 'e_value': 0.0001} + +# pylint: disable=line-too-long +MULTI_SEQUENCE_HIT_1 = { + 'sto': ( + '# STOCKHOLM 1.0\n' + '#=GF ID query-i1\n' + '#=GS ERR1700680_4602609/41-109 DE [subseq from] ERR1700680_4602609\n' + '#=GS ERR1019366_5760491/40-105 DE [subseq from] ERR1019366_5760491\n' + '#=GS SRR5580704_12853319/61-125 DE [subseq from] SRR5580704_12853319\n' + 'query MAAHKGAEHHHKAAEHHEQAAKHHHAAAEHHEKGEHEQAAHHADTAYAHHKHAEEHAAQAAKHDAEHHAPKPH\n' + 'ERR1700680_4602609/41-109 --INKGAEYHKKAAEHHELAAKHHREAAKHHEAGSHEKAAHHSEIAAGHGLTAVHHTEEATK-HHPEEHTEK--\n' + 'ERR1019366_5760491/40-105 ---RSGAQHHDAAAQHYEEAARHHRMAAKQYQASHHEKAAHYAQLAYAHHMYAEQHAAEAAK-AHAKNHG----\n' + 'SRR5580704_12853319/61-125 ----PAADHHMKAAEHHEEAAKHHRAAAEHHTAGDHQKAGHHAHVANGHHVNAVHHAEEASK-HHATDHS----\n' + '//\n'), + 'tbl': ( + 'ERR1700680_4602609 - query - 7.7e-09 47.7 33.8 1.1e-08 47.2 33.8 1.2 1 0 0 1 1 1 1 -\n' + 'ERR1019366_5760491 - query - 1.7e-08 46.6 33.1 2.5e-08 46.1 33.1 1.3 1 0 0 1 1 1 1 -\n' + 'SRR5580704_12853319 - query - 1.1e-07 44.0 41.6 2e-07 43.1 41.6 1.4 1 0 0 1 1 1 1 -\n'), + 'stderr': b'', + 'n_iter': 1, + 'e_value': 0.0001} + +MULTI_SEQUENCE_HIT_2 = { + 'sto': ( + '# STOCKHOLM 1.0\n' + '#=GF ID query-i1\n' + '#=GS ERR1700719_3476944/70-137 DE [subseq from] ERR1700719_3476944\n' + '#=GS ERR1700761_4254522/72-138 DE [subseq from] ERR1700761_4254522\n' + '#=GS SRR5438477_9761204/64-132 DE [subseq from] SRR5438477_9761204\n' + 'query MAAHKGAEHHHKAAEHHEQAAKHHHAAAEHHEKGEHEQAAHHADTAYAHHKHAEEHAAQAAKHDAEHHAPKPH\n' + 'ERR1700719_3476944/70-137 ---KQAAEHHHQAAEHHEHAARHHREAAKHHEAGDHESAAHHAHTAQGHLHQATHHASEAAKLHVEHHGQK--\n' + 'ERR1700761_4254522/72-138 ----QASEHHNLAAEHHEHAARHHRDAAKHHKAGDHEKAAHHAHVAHGHHLHATHHATEAAKHHVEAHGEK--\n' + 'SRR5438477_9761204/64-132 MPKHEGAEHHKKAAEHNEHAARHHKEAARHHEEGSHEKVGHHAHIAHGHHLHATHHAEEAAKTHSNQHE----\n' + '//\n'), + 'tbl': ( + 'ERR1700719_3476944 - query - 2e-07 43.2 47.5 3.5e-07 42.4 47.5 1.4 1 0 0 1 1 1 1 -\n' + 'ERR1700761_4254522 - query - 6.1e-07 41.6 48.1 8.1e-07 41.3 48.1 1.2 1 0 0 1 1 1 1 -\n' + 'SRR5438477_9761204 - query - 1.8e-06 40.2 46.9 2.3e-06 39.8 46.9 1.2 1 0 0 1 1 1 1 -\n'), + 'stderr': b'', + 'n_iter': 1, + 'e_value': 0.0001} +# pylint: enable=line-too-long + + +class NotebookUtilsTest(parameterized.TestCase): + + @parameterized.parameters( + ('DeepMind', 'DEEPMIND'), ('A ', 'A'), ('\tA', 'A'), (' A\t\n', 'A'), + ('ACDEFGHIKLMNPQRSTVWY', 'ACDEFGHIKLMNPQRSTVWY')) + def test_clean_and_validate_sequence_ok(self, sequence, exp_clean): + clean = notebook_utils.clean_and_validate_sequence( + sequence, min_length=1, max_length=100) + self.assertEqual(clean, exp_clean) + + @parameterized.named_parameters( + ('too_short', 'AA', 'too short'), + ('too_long', 'AAAAAAAAAA', 'too long'), + ('bad_amino_acids_B', 'BBBB', 'non-amino acid'), + ('bad_amino_acids_J', 'JJJJ', 'non-amino acid'), + ('bad_amino_acids_O', 'OOOO', 'non-amino acid'), + ('bad_amino_acids_U', 'UUUU', 'non-amino acid'), + ('bad_amino_acids_X', 'XXXX', 'non-amino acid'), + ('bad_amino_acids_Z', 'ZZZZ', 'non-amino acid')) + def test_clean_and_validate_sequence_bad(self, sequence, exp_error): + with self.assertRaisesRegex(ValueError, f'.*{exp_error}.*'): + notebook_utils.clean_and_validate_sequence( + sequence, min_length=4, max_length=8) + + @parameterized.parameters( + (['A', '', '', ' ', '\t', ' \t\n', '', ''], ['A'], + notebook_utils.ModelType.MONOMER), + (['', 'A'], ['A'], + notebook_utils.ModelType.MONOMER), + (['A', 'C ', ''], ['A', 'C'], + notebook_utils.ModelType.MULTIMER), + (['', 'A', '', 'C '], ['A', 'C'], + notebook_utils.ModelType.MULTIMER)) + def test_validate_input_ok( + self, input_sequences, exp_sequences, exp_model_type): + sequences, model_type = notebook_utils.validate_input( + input_sequences=input_sequences, + min_length=1, max_length=100, max_multimer_length=100) + self.assertSequenceEqual(sequences, exp_sequences) + self.assertEqual(model_type, exp_model_type) + + @parameterized.named_parameters( + ('no_input_sequence', ['', '\t', '\n'], 'No input amino acid sequence'), + ('too_long_single', ['AAAAAAAAA', 'AAAA'], 'Input sequence is too long'), + ('too_long_multimer', ['AAAA', 'AAAAA'], 'The total length of multimer')) + def test_validate_input_bad(self, input_sequences, exp_error): + with self.assertRaisesRegex(ValueError, f'.*{exp_error}.*'): + notebook_utils.validate_input( + input_sequences=input_sequences, + min_length=4, max_length=8, max_multimer_length=6) + + def test_merge_chunked_msa_no_hits(self): + results = [ONLY_QUERY_HIT, ONLY_QUERY_HIT] + merged_msa = notebook_utils.merge_chunked_msa( + results=results) + self.assertSequenceEqual( + merged_msa.sequences, + ('MAAHKGAEHHHKAAEHHEQAAKHHHAAAEHHEKGEHEQAAHHADTAYAHHKHAEEH',)) + self.assertSequenceEqual(merged_msa.deletion_matrix, ([0] * 56,)) + + def test_merge_chunked_msa(self): + results = [MULTI_SEQUENCE_HIT_1, MULTI_SEQUENCE_HIT_2] + merged_msa = notebook_utils.merge_chunked_msa( + results=results) + self.assertLen(merged_msa.sequences, 7) + # The 1st one is the query. + self.assertEqual( + merged_msa.sequences[0], + 'MAAHKGAEHHHKAAEHHEQAAKHHHAAAEHHEKGEHEQAAHHADTAYAHHKHAEEHAAQAAKHDAEHHAP' + 'KPH') + # The 2nd one is the one with the lowest e-value: ERR1700680_4602609. + self.assertEqual( + merged_msa.sequences[1], + '--INKGAEYHKKAAEHHELAAKHHREAAKHHEAGSHEKAAHHSEIAAGHGLTAVHHTEEATK-HHPEEHT' + 'EK-') + # The last one is the one with the largest e-value: SRR5438477_9761204. + self.assertEqual( + merged_msa.sequences[-1], + 'MPKHEGAEHHKKAAEHNEHAARHHKEAARHHEEGSHEKVGHHAHIAHGHHLHATHHAEEAAKTHSNQHE-' + '---') + self.assertLen(merged_msa.deletion_matrix, 7) + + @mock.patch('sys.stdout', new_callable=io.StringIO) + def test_show_msa_info(self, mocked_stdout): + single_chain_msas = [ + parsers.Msa(sequences=['A', 'B', 'C', 'C'], + deletion_matrix=[None] * 4, + descriptions=[''] * 4), + parsers.Msa(sequences=['A', 'A', 'A', 'D'], + deletion_matrix=[None] * 4, + descriptions=[''] * 4) + ] + notebook_utils.show_msa_info( + single_chain_msas=single_chain_msas, sequence_index=1) + self.assertEqual(mocked_stdout.getvalue(), + '\n4 unique sequences found in total for sequence 1\n\n') + + @parameterized.named_parameters( + ('some_templates', 4), ('no_templates', 0)) + def test_empty_placeholder_template_features(self, num_templates): + template_features = notebook_utils.empty_placeholder_template_features( + num_templates=num_templates, num_res=16) + self.assertCountEqual(template_features.keys(), + templates.TEMPLATE_FEATURES.keys()) + self.assertSameElements( + [v.shape[0] for v in template_features.values()], [num_templates]) + self.assertSequenceEqual( + [t.dtype for t in template_features.values()], + [np.array([], dtype=templates.TEMPLATE_FEATURES[feat_name]).dtype + for feat_name in template_features]) + + def test_get_pae_json(self): + pae = np.array([[0.01, 13.12345], [20.0987, 0.0]]) + pae_json = notebook_utils.get_pae_json(pae=pae, max_pae=31.75) + self.assertEqual( + pae_json, + '[{"residue1":[1,1,2,2],"residue2":[1,2,1,2],"distance":' + '[0.0,13.1,20.1,0.0],"max_predicted_aligned_error":31.75}]') + + +if __name__ == '__main__': + absltest.main() diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/relax/__init__.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/relax/__init__.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,14 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Amber relaxation.""" diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/relax/amber_minimize.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/relax/amber_minimize.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,543 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Restrained Amber Minimization of a structure.""" + +import io +import time +from typing import Collection, Optional, Sequence + +from absl import logging +from alphafold.common import protein +from alphafold.common import residue_constants +from alphafold.model import folding +from alphafold.relax import cleanup +from alphafold.relax import utils +import ml_collections +import numpy as np +from simtk import openmm +from simtk import unit +from simtk.openmm import app as openmm_app +from simtk.openmm.app.internal.pdbstructure import PdbStructure + + +ENERGY = unit.kilocalories_per_mole +LENGTH = unit.angstroms + + +def will_restrain(atom: openmm_app.Atom, rset: str) -> bool: + """Returns True if the atom will be restrained by the given restraint set.""" + + if rset == "non_hydrogen": + return atom.element.name != "hydrogen" + elif rset == "c_alpha": + return atom.name == "CA" + + +def _add_restraints( + system: openmm.System, + reference_pdb: openmm_app.PDBFile, + stiffness: unit.Unit, + rset: str, + exclude_residues: Sequence[int]): + """Adds a harmonic potential that restrains the system to a structure.""" + assert rset in ["non_hydrogen", "c_alpha"] + + force = openmm.CustomExternalForce( + "0.5 * k * ((x-x0)^2 + (y-y0)^2 + (z-z0)^2)") + force.addGlobalParameter("k", stiffness) + for p in ["x0", "y0", "z0"]: + force.addPerParticleParameter(p) + + for i, atom in enumerate(reference_pdb.topology.atoms()): + if atom.residue.index in exclude_residues: + continue + if will_restrain(atom, rset): + force.addParticle(i, reference_pdb.positions[i]) + logging.info("Restraining %d / %d particles.", + force.getNumParticles(), system.getNumParticles()) + system.addForce(force) + + +def _openmm_minimize( + pdb_str: str, + max_iterations: int, + tolerance: unit.Unit, + stiffness: unit.Unit, + restraint_set: str, + exclude_residues: Sequence[int]): + """Minimize energy via openmm.""" + + pdb_file = io.StringIO(pdb_str) + pdb = openmm_app.PDBFile(pdb_file) + + force_field = openmm_app.ForceField("amber99sb.xml") + constraints = openmm_app.HBonds + system = force_field.createSystem( + pdb.topology, constraints=constraints) + if stiffness > 0 * ENERGY / (LENGTH**2): + _add_restraints(system, pdb, stiffness, restraint_set, exclude_residues) + + integrator = openmm.LangevinIntegrator(0, 0.01, 0.0) + platform = openmm.Platform.getPlatformByName("CPU") + simulation = openmm_app.Simulation( + pdb.topology, system, integrator, platform) + simulation.context.setPositions(pdb.positions) + + ret = {} + state = simulation.context.getState(getEnergy=True, getPositions=True) + ret["einit"] = state.getPotentialEnergy().value_in_unit(ENERGY) + ret["posinit"] = state.getPositions(asNumpy=True).value_in_unit(LENGTH) + simulation.minimizeEnergy(maxIterations=max_iterations, + tolerance=tolerance) + state = simulation.context.getState(getEnergy=True, getPositions=True) + ret["efinal"] = state.getPotentialEnergy().value_in_unit(ENERGY) + ret["pos"] = state.getPositions(asNumpy=True).value_in_unit(LENGTH) + ret["min_pdb"] = _get_pdb_string(simulation.topology, state.getPositions()) + return ret + + +def _get_pdb_string(topology: openmm_app.Topology, positions: unit.Quantity): + """Returns a pdb string provided OpenMM topology and positions.""" + with io.StringIO() as f: + openmm_app.PDBFile.writeFile(topology, positions, f) + return f.getvalue() + + +def _check_cleaned_atoms(pdb_cleaned_string: str, pdb_ref_string: str): + """Checks that no atom positions have been altered by cleaning.""" + cleaned = openmm_app.PDBFile(io.StringIO(pdb_cleaned_string)) + reference = openmm_app.PDBFile(io.StringIO(pdb_ref_string)) + + cl_xyz = np.array(cleaned.getPositions().value_in_unit(LENGTH)) + ref_xyz = np.array(reference.getPositions().value_in_unit(LENGTH)) + + for ref_res, cl_res in zip(reference.topology.residues(), + cleaned.topology.residues()): + assert ref_res.name == cl_res.name + for rat in ref_res.atoms(): + for cat in cl_res.atoms(): + if cat.name == rat.name: + if not np.array_equal(cl_xyz[cat.index], ref_xyz[rat.index]): + raise ValueError(f"Coordinates of cleaned atom {cat} do not match " + f"coordinates of reference atom {rat}.") + + +def _check_residues_are_well_defined(prot: protein.Protein): + """Checks that all residues contain non-empty atom sets.""" + if (prot.atom_mask.sum(axis=-1) == 0).any(): + raise ValueError("Amber minimization can only be performed on proteins with" + " well-defined residues. This protein contains at least" + " one residue with no atoms.") + + +def _check_atom_mask_is_ideal(prot): + """Sanity-check the atom mask is ideal, up to a possible OXT.""" + atom_mask = prot.atom_mask + ideal_atom_mask = protein.ideal_atom_mask(prot) + utils.assert_equal_nonterminal_atom_types(atom_mask, ideal_atom_mask) + + +def clean_protein( + prot: protein.Protein, + checks: bool = True): + """Adds missing atoms to Protein instance. + + Args: + prot: A `protein.Protein` instance. + checks: A `bool` specifying whether to add additional checks to the cleaning + process. + + Returns: + pdb_string: A string of the cleaned protein. + """ + _check_atom_mask_is_ideal(prot) + + # Clean pdb. + prot_pdb_string = protein.to_pdb(prot) + pdb_file = io.StringIO(prot_pdb_string) + alterations_info = {} + fixed_pdb = cleanup.fix_pdb(pdb_file, alterations_info) + fixed_pdb_file = io.StringIO(fixed_pdb) + pdb_structure = PdbStructure(fixed_pdb_file) + cleanup.clean_structure(pdb_structure, alterations_info) + + logging.info("alterations info: %s", alterations_info) + + # Write pdb file of cleaned structure. + as_file = openmm_app.PDBFile(pdb_structure) + pdb_string = _get_pdb_string(as_file.getTopology(), as_file.getPositions()) + if checks: + _check_cleaned_atoms(pdb_string, prot_pdb_string) + return pdb_string + + +def make_atom14_positions(prot): + """Constructs denser atom positions (14 dimensions instead of 37).""" + restype_atom14_to_atom37 = [] # mapping (restype, atom14) --> atom37 + restype_atom37_to_atom14 = [] # mapping (restype, atom37) --> atom14 + restype_atom14_mask = [] + + for rt in residue_constants.restypes: + atom_names = residue_constants.restype_name_to_atom14_names[ + residue_constants.restype_1to3[rt]] + + restype_atom14_to_atom37.append([ + (residue_constants.atom_order[name] if name else 0) + for name in atom_names + ]) + + atom_name_to_idx14 = {name: i for i, name in enumerate(atom_names)} + restype_atom37_to_atom14.append([ + (atom_name_to_idx14[name] if name in atom_name_to_idx14 else 0) + for name in residue_constants.atom_types + ]) + + restype_atom14_mask.append([(1. if name else 0.) for name in atom_names]) + + # Add dummy mapping for restype 'UNK'. + restype_atom14_to_atom37.append([0] * 14) + restype_atom37_to_atom14.append([0] * 37) + restype_atom14_mask.append([0.] * 14) + + restype_atom14_to_atom37 = np.array(restype_atom14_to_atom37, dtype=np.int32) + restype_atom37_to_atom14 = np.array(restype_atom37_to_atom14, dtype=np.int32) + restype_atom14_mask = np.array(restype_atom14_mask, dtype=np.float32) + + # Create the mapping for (residx, atom14) --> atom37, i.e. an array + # with shape (num_res, 14) containing the atom37 indices for this protein. + residx_atom14_to_atom37 = restype_atom14_to_atom37[prot["aatype"]] + residx_atom14_mask = restype_atom14_mask[prot["aatype"]] + + # Create a mask for known ground truth positions. + residx_atom14_gt_mask = residx_atom14_mask * np.take_along_axis( + prot["all_atom_mask"], residx_atom14_to_atom37, axis=1).astype(np.float32) + + # Gather the ground truth positions. + residx_atom14_gt_positions = residx_atom14_gt_mask[:, :, None] * ( + np.take_along_axis(prot["all_atom_positions"], + residx_atom14_to_atom37[..., None], + axis=1)) + + prot["atom14_atom_exists"] = residx_atom14_mask + prot["atom14_gt_exists"] = residx_atom14_gt_mask + prot["atom14_gt_positions"] = residx_atom14_gt_positions + + prot["residx_atom14_to_atom37"] = residx_atom14_to_atom37 + + # Create the gather indices for mapping back. + residx_atom37_to_atom14 = restype_atom37_to_atom14[prot["aatype"]] + prot["residx_atom37_to_atom14"] = residx_atom37_to_atom14 + + # Create the corresponding mask. + restype_atom37_mask = np.zeros([21, 37], dtype=np.float32) + for restype, restype_letter in enumerate(residue_constants.restypes): + restype_name = residue_constants.restype_1to3[restype_letter] + atom_names = residue_constants.residue_atoms[restype_name] + for atom_name in atom_names: + atom_type = residue_constants.atom_order[atom_name] + restype_atom37_mask[restype, atom_type] = 1 + + residx_atom37_mask = restype_atom37_mask[prot["aatype"]] + prot["atom37_atom_exists"] = residx_atom37_mask + + # As the atom naming is ambiguous for 7 of the 20 amino acids, provide + # alternative ground truth coordinates where the naming is swapped + restype_3 = [ + residue_constants.restype_1to3[res] for res in residue_constants.restypes + ] + restype_3 += ["UNK"] + + # Matrices for renaming ambiguous atoms. + all_matrices = {res: np.eye(14, dtype=np.float32) for res in restype_3} + for resname, swap in residue_constants.residue_atom_renaming_swaps.items(): + correspondences = np.arange(14) + for source_atom_swap, target_atom_swap in swap.items(): + source_index = residue_constants.restype_name_to_atom14_names[ + resname].index(source_atom_swap) + target_index = residue_constants.restype_name_to_atom14_names[ + resname].index(target_atom_swap) + correspondences[source_index] = target_index + correspondences[target_index] = source_index + renaming_matrix = np.zeros((14, 14), dtype=np.float32) + for index, correspondence in enumerate(correspondences): + renaming_matrix[index, correspondence] = 1. + all_matrices[resname] = renaming_matrix.astype(np.float32) + renaming_matrices = np.stack([all_matrices[restype] for restype in restype_3]) + + # Pick the transformation matrices for the given residue sequence + # shape (num_res, 14, 14). + renaming_transform = renaming_matrices[prot["aatype"]] + + # Apply it to the ground truth positions. shape (num_res, 14, 3). + alternative_gt_positions = np.einsum("rac,rab->rbc", + residx_atom14_gt_positions, + renaming_transform) + prot["atom14_alt_gt_positions"] = alternative_gt_positions + + # Create the mask for the alternative ground truth (differs from the + # ground truth mask, if only one of the atoms in an ambiguous pair has a + # ground truth position). + alternative_gt_mask = np.einsum("ra,rab->rb", + residx_atom14_gt_mask, + renaming_transform) + + prot["atom14_alt_gt_exists"] = alternative_gt_mask + + # Create an ambiguous atoms mask. shape: (21, 14). + restype_atom14_is_ambiguous = np.zeros((21, 14), dtype=np.float32) + for resname, swap in residue_constants.residue_atom_renaming_swaps.items(): + for atom_name1, atom_name2 in swap.items(): + restype = residue_constants.restype_order[ + residue_constants.restype_3to1[resname]] + atom_idx1 = residue_constants.restype_name_to_atom14_names[resname].index( + atom_name1) + atom_idx2 = residue_constants.restype_name_to_atom14_names[resname].index( + atom_name2) + restype_atom14_is_ambiguous[restype, atom_idx1] = 1 + restype_atom14_is_ambiguous[restype, atom_idx2] = 1 + + # From this create an ambiguous_mask for the given sequence. + prot["atom14_atom_is_ambiguous"] = ( + restype_atom14_is_ambiguous[prot["aatype"]]) + + return prot + + +def find_violations(prot_np: protein.Protein): + """Analyzes a protein and returns structural violation information. + + Args: + prot_np: A protein. + + Returns: + violations: A `dict` of structure components with structural violations. + violation_metrics: A `dict` of violation metrics. + """ + batch = { + "aatype": prot_np.aatype, + "all_atom_positions": prot_np.atom_positions.astype(np.float32), + "all_atom_mask": prot_np.atom_mask.astype(np.float32), + "residue_index": prot_np.residue_index, + } + + batch["seq_mask"] = np.ones_like(batch["aatype"], np.float32) + batch = make_atom14_positions(batch) + + violations = folding.find_structural_violations( + batch=batch, + atom14_pred_positions=batch["atom14_gt_positions"], + config=ml_collections.ConfigDict( + {"violation_tolerance_factor": 12, # Taken from model config. + "clash_overlap_tolerance": 1.5, # Taken from model config. + })) + violation_metrics = folding.compute_violation_metrics( + batch=batch, + atom14_pred_positions=batch["atom14_gt_positions"], + violations=violations, + ) + + return violations, violation_metrics + + +def get_violation_metrics(prot: protein.Protein): + """Computes violation and alignment metrics.""" + structural_violations, struct_metrics = find_violations(prot) + violation_idx = np.flatnonzero( + structural_violations["total_per_residue_violations_mask"]) + + struct_metrics["residue_violations"] = violation_idx + struct_metrics["num_residue_violations"] = len(violation_idx) + struct_metrics["structural_violations"] = structural_violations + return struct_metrics + + +def _run_one_iteration( + *, + pdb_string: str, + max_iterations: int, + tolerance: float, + stiffness: float, + restraint_set: str, + max_attempts: int, + exclude_residues: Optional[Collection[int]] = None): + """Runs the minimization pipeline. + + Args: + pdb_string: A pdb string. + max_iterations: An `int` specifying the maximum number of L-BFGS iterations. + A value of 0 specifies no limit. + tolerance: kcal/mol, the energy tolerance of L-BFGS. + stiffness: kcal/mol A**2, spring constant of heavy atom restraining + potential. + restraint_set: The set of atoms to restrain. + max_attempts: The maximum number of minimization attempts. + exclude_residues: An optional list of zero-indexed residues to exclude from + restraints. + + Returns: + A `dict` of minimization info. + """ + exclude_residues = exclude_residues or [] + + # Assign physical dimensions. + tolerance = tolerance * ENERGY + stiffness = stiffness * ENERGY / (LENGTH**2) + + start = time.time() + minimized = False + attempts = 0 + while not minimized and attempts < max_attempts: + attempts += 1 + try: + logging.info("Minimizing protein, attempt %d of %d.", + attempts, max_attempts) + ret = _openmm_minimize( + pdb_string, max_iterations=max_iterations, + tolerance=tolerance, stiffness=stiffness, + restraint_set=restraint_set, + exclude_residues=exclude_residues) + minimized = True + except Exception as e: # pylint: disable=broad-except + logging.info(e) + if not minimized: + raise ValueError(f"Minimization failed after {max_attempts} attempts.") + ret["opt_time"] = time.time() - start + ret["min_attempts"] = attempts + return ret + + +def run_pipeline( + prot: protein.Protein, + stiffness: float, + max_outer_iterations: int = 1, + place_hydrogens_every_iteration: bool = True, + max_iterations: int = 0, + tolerance: float = 2.39, + restraint_set: str = "non_hydrogen", + max_attempts: int = 100, + checks: bool = True, + exclude_residues: Optional[Sequence[int]] = None): + """Run iterative amber relax. + + Successive relax iterations are performed until all violations have been + resolved. Each iteration involves a restrained Amber minimization, with + restraint exclusions determined by violation-participating residues. + + Args: + prot: A protein to be relaxed. + stiffness: kcal/mol A**2, the restraint stiffness. + max_outer_iterations: The maximum number of iterative minimization. + place_hydrogens_every_iteration: Whether hydrogens are re-initialized + prior to every minimization. + max_iterations: An `int` specifying the maximum number of L-BFGS steps + per relax iteration. A value of 0 specifies no limit. + tolerance: kcal/mol, the energy tolerance of L-BFGS. + The default value is the OpenMM default. + restraint_set: The set of atoms to restrain. + max_attempts: The maximum number of minimization attempts per iteration. + checks: Whether to perform cleaning checks. + exclude_residues: An optional list of zero-indexed residues to exclude from + restraints. + + Returns: + out: A dictionary of output values. + """ + + # `protein.to_pdb` will strip any poorly-defined residues so we need to + # perform this check before `clean_protein`. + _check_residues_are_well_defined(prot) + pdb_string = clean_protein(prot, checks=checks) + + exclude_residues = exclude_residues or [] + exclude_residues = set(exclude_residues) + violations = np.inf + iteration = 0 + + while violations > 0 and iteration < max_outer_iterations: + ret = _run_one_iteration( + pdb_string=pdb_string, + exclude_residues=exclude_residues, + max_iterations=max_iterations, + tolerance=tolerance, + stiffness=stiffness, + restraint_set=restraint_set, + max_attempts=max_attempts) + prot = protein.from_pdb_string(ret["min_pdb"]) + if place_hydrogens_every_iteration: + pdb_string = clean_protein(prot, checks=True) + else: + pdb_string = ret["min_pdb"] + ret.update(get_violation_metrics(prot)) + ret.update({ + "num_exclusions": len(exclude_residues), + "iteration": iteration, + }) + violations = ret["violations_per_residue"] + exclude_residues = exclude_residues.union(ret["residue_violations"]) + + logging.info("Iteration completed: Einit %.2f Efinal %.2f Time %.2f s " + "num residue violations %d num residue exclusions %d ", + ret["einit"], ret["efinal"], ret["opt_time"], + ret["num_residue_violations"], ret["num_exclusions"]) + iteration += 1 + return ret + + +def get_initial_energies(pdb_strs: Sequence[str], + stiffness: float = 0.0, + restraint_set: str = "non_hydrogen", + exclude_residues: Optional[Sequence[int]] = None): + """Returns initial potential energies for a sequence of PDBs. + + Assumes the input PDBs are ready for minimization, and all have the same + topology. + Allows time to be saved by not pdbfixing / rebuilding the system. + + Args: + pdb_strs: List of PDB strings. + stiffness: kcal/mol A**2, spring constant of heavy atom restraining + potential. + restraint_set: Which atom types to restrain. + exclude_residues: An optional list of zero-indexed residues to exclude from + restraints. + + Returns: + A list of initial energies in the same order as pdb_strs. + """ + exclude_residues = exclude_residues or [] + + openmm_pdbs = [openmm_app.PDBFile(PdbStructure(io.StringIO(p))) + for p in pdb_strs] + force_field = openmm_app.ForceField("amber99sb.xml") + system = force_field.createSystem(openmm_pdbs[0].topology, + constraints=openmm_app.HBonds) + stiffness = stiffness * ENERGY / (LENGTH**2) + if stiffness > 0 * ENERGY / (LENGTH**2): + _add_restraints(system, openmm_pdbs[0], stiffness, restraint_set, + exclude_residues) + simulation = openmm_app.Simulation(openmm_pdbs[0].topology, + system, + openmm.LangevinIntegrator(0, 0.01, 0.0), + openmm.Platform.getPlatformByName("CPU")) + energies = [] + for pdb in openmm_pdbs: + try: + simulation.context.setPositions(pdb.positions) + state = simulation.context.getState(getEnergy=True) + energies.append(state.getPotentialEnergy().value_in_unit(ENERGY)) + except Exception as e: # pylint: disable=broad-except + logging.error("Error getting initial energy, returning large value %s", e) + energies.append(unit.Quantity(1e20, ENERGY)) + return energies diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/relax/amber_minimize_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/relax/amber_minimize_test.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,130 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for amber_minimize.""" +import os + +from absl.testing import absltest +from alphafold.common import protein +from alphafold.relax import amber_minimize +import numpy as np +# Internal import (7716). + + +def _load_test_protein(data_path): + pdb_path = os.path.join(absltest.get_default_test_srcdir(), data_path) + with open(pdb_path, 'r') as f: + return protein.from_pdb_string(f.read()) + + +class AmberMinimizeTest(absltest.TestCase): + + def test_multiple_disulfides_target(self): + prot = _load_test_protein( + 'alphafold/relax/testdata/multiple_disulfides_target.pdb' + ) + ret = amber_minimize.run_pipeline(prot, max_iterations=10, max_attempts=1, + stiffness=10.) + self.assertIn('opt_time', ret) + self.assertIn('min_attempts', ret) + + def test_raises_invalid_protein_assertion(self): + prot = _load_test_protein( + 'alphafold/relax/testdata/multiple_disulfides_target.pdb' + ) + prot.atom_mask[4, :] = 0 + with self.assertRaisesRegex( + ValueError, + 'Amber minimization can only be performed on proteins with well-defined' + ' residues. This protein contains at least one residue with no atoms.'): + amber_minimize.run_pipeline(prot, max_iterations=10, + stiffness=1., + max_attempts=1) + + def test_iterative_relax(self): + prot = _load_test_protein( + 'alphafold/relax/testdata/with_violations.pdb' + ) + violations = amber_minimize.get_violation_metrics(prot) + self.assertGreater(violations['num_residue_violations'], 0) + out = amber_minimize.run_pipeline( + prot=prot, max_outer_iterations=10, stiffness=10.) + self.assertLess(out['efinal'], out['einit']) + self.assertEqual(0, out['num_residue_violations']) + + def test_find_violations(self): + prot = _load_test_protein( + 'alphafold/relax/testdata/multiple_disulfides_target.pdb' + ) + viols, _ = amber_minimize.find_violations(prot) + + expected_between_residues_connection_mask = np.zeros((191,), np.float32) + for residue in (42, 43, 59, 60, 135, 136): + expected_between_residues_connection_mask[residue] = 1.0 + + expected_clash_indices = np.array([ + [8, 4], + [8, 5], + [13, 3], + [14, 1], + [14, 4], + [26, 4], + [26, 5], + [31, 8], + [31, 10], + [39, 0], + [39, 1], + [39, 2], + [39, 3], + [39, 4], + [42, 5], + [42, 6], + [42, 7], + [42, 8], + [47, 7], + [47, 8], + [47, 9], + [47, 10], + [64, 4], + [85, 5], + [102, 4], + [102, 5], + [109, 13], + [111, 5], + [118, 6], + [118, 7], + [118, 8], + [124, 4], + [124, 5], + [131, 5], + [139, 7], + [147, 4], + [152, 7]], dtype=np.int32) + expected_between_residues_clash_mask = np.zeros([191, 14]) + expected_between_residues_clash_mask[expected_clash_indices[:, 0], + expected_clash_indices[:, 1]] += 1 + expected_per_atom_violations = np.zeros([191, 14]) + np.testing.assert_array_equal( + viols['between_residues']['connections_per_residue_violation_mask'], + expected_between_residues_connection_mask) + np.testing.assert_array_equal( + viols['between_residues']['clashes_per_atom_clash_mask'], + expected_between_residues_clash_mask) + np.testing.assert_array_equal( + viols['within_residues']['per_atom_violations'], + expected_per_atom_violations) + + +if __name__ == '__main__': + absltest.main() diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/relax/cleanup.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/relax/cleanup.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,127 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Cleans up a PDB file using pdbfixer in preparation for OpenMM simulations. + +fix_pdb uses a third-party tool. We also support fixing some additional edge +cases like removing chains of length one (see clean_structure). +""" +import io + +import pdbfixer +from simtk.openmm import app +from simtk.openmm.app import element + + +def fix_pdb(pdbfile, alterations_info): + """Apply pdbfixer to the contents of a PDB file; return a PDB string result. + + 1) Replaces nonstandard residues. + 2) Removes heterogens (non protein residues) including water. + 3) Adds missing residues and missing atoms within existing residues. + 4) Adds hydrogens assuming pH=7.0. + 5) KeepIds is currently true, so the fixer must keep the existing chain and + residue identifiers. This will fail for some files in wider PDB that have + invalid IDs. + + Args: + pdbfile: Input PDB file handle. + alterations_info: A dict that will store details of changes made. + + Returns: + A PDB string representing the fixed structure. + """ + fixer = pdbfixer.PDBFixer(pdbfile=pdbfile) + fixer.findNonstandardResidues() + alterations_info['nonstandard_residues'] = fixer.nonstandardResidues + fixer.replaceNonstandardResidues() + _remove_heterogens(fixer, alterations_info, keep_water=False) + fixer.findMissingResidues() + alterations_info['missing_residues'] = fixer.missingResidues + fixer.findMissingAtoms() + alterations_info['missing_heavy_atoms'] = fixer.missingAtoms + alterations_info['missing_terminals'] = fixer.missingTerminals + fixer.addMissingAtoms(seed=0) + fixer.addMissingHydrogens() + out_handle = io.StringIO() + app.PDBFile.writeFile(fixer.topology, fixer.positions, out_handle, + keepIds=True) + return out_handle.getvalue() + + +def clean_structure(pdb_structure, alterations_info): + """Applies additional fixes to an OpenMM structure, to handle edge cases. + + Args: + pdb_structure: An OpenMM structure to modify and fix. + alterations_info: A dict that will store details of changes made. + """ + _replace_met_se(pdb_structure, alterations_info) + _remove_chains_of_length_one(pdb_structure, alterations_info) + + +def _remove_heterogens(fixer, alterations_info, keep_water): + """Removes the residues that Pdbfixer considers to be heterogens. + + Args: + fixer: A Pdbfixer instance. + alterations_info: A dict that will store details of changes made. + keep_water: If True, water (HOH) is not considered to be a heterogen. + """ + initial_resnames = set() + for chain in fixer.topology.chains(): + for residue in chain.residues(): + initial_resnames.add(residue.name) + fixer.removeHeterogens(keepWater=keep_water) + final_resnames = set() + for chain in fixer.topology.chains(): + for residue in chain.residues(): + final_resnames.add(residue.name) + alterations_info['removed_heterogens'] = ( + initial_resnames.difference(final_resnames)) + + +def _replace_met_se(pdb_structure, alterations_info): + """Replace the Se in any MET residues that were not marked as modified.""" + modified_met_residues = [] + for res in pdb_structure.iter_residues(): + name = res.get_name_with_spaces().strip() + if name == 'MET': + s_atom = res.get_atom('SD') + if s_atom.element_symbol == 'Se': + s_atom.element_symbol = 'S' + s_atom.element = element.get_by_symbol('S') + modified_met_residues.append(s_atom.residue_number) + alterations_info['Se_in_MET'] = modified_met_residues + + +def _remove_chains_of_length_one(pdb_structure, alterations_info): + """Removes chains that correspond to a single amino acid. + + A single amino acid in a chain is both N and C terminus. There is no force + template for this case. + + Args: + pdb_structure: An OpenMM pdb_structure to modify and fix. + alterations_info: A dict that will store details of changes made. + """ + removed_chains = {} + for model in pdb_structure.iter_models(): + valid_chains = [c for c in model.iter_chains() if len(c) > 1] + invalid_chain_ids = [c.chain_id for c in model.iter_chains() if len(c) <= 1] + model.chains = valid_chains + for chain_id in invalid_chain_ids: + model.chains_by_id.pop(chain_id) + removed_chains[model.number] = invalid_chain_ids + alterations_info['removed_chains'] = removed_chains diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/relax/cleanup_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/relax/cleanup_test.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,137 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for relax.cleanup.""" +import io + +from absl.testing import absltest +from alphafold.relax import cleanup +from simtk.openmm.app.internal import pdbstructure + + +def _pdb_to_structure(pdb_str): + handle = io.StringIO(pdb_str) + return pdbstructure.PdbStructure(handle) + + +def _lines_to_structure(pdb_lines): + return _pdb_to_structure('\n'.join(pdb_lines)) + + +class CleanupTest(absltest.TestCase): + + def test_missing_residues(self): + pdb_lines = ['SEQRES 1 C 3 CYS GLY LEU', + 'ATOM 1 N CYS C 1 -12.262 20.115 60.959 1.00 ' + '19.08 N', + 'ATOM 2 CA CYS C 1 -11.065 20.934 60.773 1.00 ' + '17.23 C', + 'ATOM 3 C CYS C 1 -10.002 20.742 61.844 1.00 ' + '15.38 C', + 'ATOM 4 O CYS C 1 -10.284 20.225 62.929 1.00 ' + '16.04 O', + 'ATOM 5 N LEU C 3 -7.688 18.700 62.045 1.00 ' + '14.75 N', + 'ATOM 6 CA LEU C 3 -7.256 17.320 62.234 1.00 ' + '16.81 C', + 'ATOM 7 C LEU C 3 -6.380 16.864 61.070 1.00 ' + '16.95 C', + 'ATOM 8 O LEU C 3 -6.551 17.332 59.947 1.00 ' + '16.97 O'] + input_handle = io.StringIO('\n'.join(pdb_lines)) + alterations = {} + result = cleanup.fix_pdb(input_handle, alterations) + structure = _pdb_to_structure(result) + residue_names = [r.get_name() for r in structure.iter_residues()] + self.assertCountEqual(residue_names, ['CYS', 'GLY', 'LEU']) + self.assertCountEqual(alterations['missing_residues'].values(), [['GLY']]) + + def test_missing_atoms(self): + pdb_lines = ['SEQRES 1 A 1 PRO', + 'ATOM 1 CA PRO A 1 1.000 1.000 1.000 1.00 ' + ' 0.00 C'] + input_handle = io.StringIO('\n'.join(pdb_lines)) + alterations = {} + result = cleanup.fix_pdb(input_handle, alterations) + structure = _pdb_to_structure(result) + atom_names = [a.get_name() for a in structure.iter_atoms()] + self.assertCountEqual(atom_names, ['N', 'CD', 'HD2', 'HD3', 'CG', 'HG2', + 'HG3', 'CB', 'HB2', 'HB3', 'CA', 'HA', + 'C', 'O', 'H2', 'H3', 'OXT']) + missing_atoms_by_residue = list(alterations['missing_heavy_atoms'].values()) + self.assertLen(missing_atoms_by_residue, 1) + atoms_added = [a.name for a in missing_atoms_by_residue[0]] + self.assertCountEqual(atoms_added, ['N', 'CD', 'CG', 'CB', 'C', 'O']) + missing_terminals_by_residue = alterations['missing_terminals'] + self.assertLen(missing_terminals_by_residue, 1) + has_missing_terminal = [r.name for r in missing_terminals_by_residue.keys()] + self.assertCountEqual(has_missing_terminal, ['PRO']) + self.assertCountEqual([t for t in missing_terminals_by_residue.values()], + [['OXT']]) + + def test_remove_heterogens(self): + pdb_lines = ['SEQRES 1 A 1 GLY', + 'ATOM 1 CA GLY A 1 0.000 0.000 0.000 1.00 ' + ' 0.00 C', + 'ATOM 2 O HOH A 2 0.000 0.000 0.000 1.00 ' + ' 0.00 O'] + input_handle = io.StringIO('\n'.join(pdb_lines)) + alterations = {} + result = cleanup.fix_pdb(input_handle, alterations) + structure = _pdb_to_structure(result) + self.assertCountEqual([res.get_name() for res in structure.iter_residues()], + ['GLY']) + self.assertEqual(alterations['removed_heterogens'], set(['HOH'])) + + def test_fix_nonstandard_residues(self): + pdb_lines = ['SEQRES 1 A 1 DAL', + 'ATOM 1 CA DAL A 1 0.000 0.000 0.000 1.00 ' + ' 0.00 C'] + input_handle = io.StringIO('\n'.join(pdb_lines)) + alterations = {} + result = cleanup.fix_pdb(input_handle, alterations) + structure = _pdb_to_structure(result) + residue_names = [res.get_name() for res in structure.iter_residues()] + self.assertCountEqual(residue_names, ['ALA']) + self.assertLen(alterations['nonstandard_residues'], 1) + original_res, new_name = alterations['nonstandard_residues'][0] + self.assertEqual(original_res.id, '1') + self.assertEqual(new_name, 'ALA') + + def test_replace_met_se(self): + pdb_lines = ['SEQRES 1 A 1 MET', + 'ATOM 1 SD MET A 1 0.000 0.000 0.000 1.00 ' + ' 0.00 Se'] + structure = _lines_to_structure(pdb_lines) + alterations = {} + cleanup._replace_met_se(structure, alterations) + sd = [a for a in structure.iter_atoms() if a.get_name() == 'SD'] + self.assertLen(sd, 1) + self.assertEqual(sd[0].element_symbol, 'S') + self.assertCountEqual(alterations['Se_in_MET'], [sd[0].residue_number]) + + def test_remove_chains_of_length_one(self): + pdb_lines = ['SEQRES 1 A 1 GLY', + 'ATOM 1 CA GLY A 1 0.000 0.000 0.000 1.00 ' + ' 0.00 C'] + structure = _lines_to_structure(pdb_lines) + alterations = {} + cleanup._remove_chains_of_length_one(structure, alterations) + chains = list(structure.iter_chains()) + self.assertEmpty(chains) + self.assertCountEqual(alterations['removed_chains'].values(), [['A']]) + + +if __name__ == '__main__': + absltest.main() diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/relax/relax.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/relax/relax.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,80 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Amber relaxation.""" +from typing import Any, Dict, Sequence, Tuple +from alphafold.common import protein +from alphafold.relax import amber_minimize +from alphafold.relax import utils +import numpy as np + + +class AmberRelaxation(object): + """Amber relaxation.""" + + def __init__(self, + *, + max_iterations: int, + tolerance: float, + stiffness: float, + exclude_residues: Sequence[int], + max_outer_iterations: int): + """Initialize Amber Relaxer. + + Args: + max_iterations: Maximum number of L-BFGS iterations. 0 means no max. + tolerance: kcal/mol, the energy tolerance of L-BFGS. + stiffness: kcal/mol A**2, spring constant of heavy atom restraining + potential. + exclude_residues: Residues to exclude from per-atom restraining. + Zero-indexed. + max_outer_iterations: Maximum number of violation-informed relax + iterations. A value of 1 will run the non-iterative procedure used in + CASP14. Use 20 so that >95% of the bad cases are relaxed. Relax finishes + as soon as there are no violations, hence in most cases this causes no + slowdown. In the worst case we do 20 outer iterations. + """ + + self._max_iterations = max_iterations + self._tolerance = tolerance + self._stiffness = stiffness + self._exclude_residues = exclude_residues + self._max_outer_iterations = max_outer_iterations + + def process(self, *, + prot: protein.Protein) -> Tuple[str, Dict[str, Any], np.ndarray]: + """Runs Amber relax on a prediction, adds hydrogens, returns PDB string.""" + out = amber_minimize.run_pipeline( + prot=prot, max_iterations=self._max_iterations, + tolerance=self._tolerance, stiffness=self._stiffness, + exclude_residues=self._exclude_residues, + max_outer_iterations=self._max_outer_iterations) + min_pos = out['pos'] + start_pos = out['posinit'] + rmsd = np.sqrt(np.sum((start_pos - min_pos)**2) / start_pos.shape[0]) + debug_data = { + 'initial_energy': out['einit'], + 'final_energy': out['efinal'], + 'attempts': out['min_attempts'], + 'rmsd': rmsd + } + pdb_str = amber_minimize.clean_protein(prot) + min_pdb = utils.overwrite_pdb_coordinates(pdb_str, min_pos) + min_pdb = utils.overwrite_b_factors(min_pdb, prot.b_factors) + utils.assert_equal_nonterminal_atom_types( + protein.from_pdb_string(min_pdb).atom_mask, + prot.atom_mask) + violations = out['structural_violations'][ + 'total_per_residue_violations_mask'] + return min_pdb, debug_data, violations diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/relax/relax_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/relax/relax_test.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,88 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for relax.""" +import os + +from absl.testing import absltest +from alphafold.common import protein +from alphafold.relax import relax +import numpy as np +# Internal import (7716). + + +class RunAmberRelaxTest(absltest.TestCase): + + def setUp(self): + super().setUp() + self.test_dir = os.path.join( + absltest.get_default_test_srcdir(), + 'alphafold/relax/testdata/') + self.test_config = { + 'max_iterations': 1, + 'tolerance': 2.39, + 'stiffness': 10.0, + 'exclude_residues': [], + 'max_outer_iterations': 1} + + def test_process(self): + amber_relax = relax.AmberRelaxation(**self.test_config) + + with open(os.path.join(self.test_dir, 'model_output.pdb')) as f: + test_prot = protein.from_pdb_string(f.read()) + pdb_min, debug_info, num_violations = amber_relax.process(prot=test_prot) + + self.assertCountEqual(debug_info.keys(), + set({'initial_energy', 'final_energy', + 'attempts', 'rmsd'})) + self.assertLess(debug_info['final_energy'], debug_info['initial_energy']) + self.assertGreater(debug_info['rmsd'], 0) + + prot_min = protein.from_pdb_string(pdb_min) + # Most protein properties should be unchanged. + np.testing.assert_almost_equal(test_prot.aatype, prot_min.aatype) + np.testing.assert_almost_equal(test_prot.residue_index, + prot_min.residue_index) + # Atom mask and bfactors identical except for terminal OXT of last residue. + np.testing.assert_almost_equal(test_prot.atom_mask[:-1, :], + prot_min.atom_mask[:-1, :]) + np.testing.assert_almost_equal(test_prot.b_factors[:-1, :], + prot_min.b_factors[:-1, :]) + np.testing.assert_almost_equal(test_prot.atom_mask[:, :-1], + prot_min.atom_mask[:, :-1]) + np.testing.assert_almost_equal(test_prot.b_factors[:, :-1], + prot_min.b_factors[:, :-1]) + # There are no residues with violations. + np.testing.assert_equal(num_violations, np.zeros_like(num_violations)) + + def test_unresolved_violations(self): + amber_relax = relax.AmberRelaxation(**self.test_config) + with open(os.path.join(self.test_dir, + 'with_violations_casp14.pdb')) as f: + test_prot = protein.from_pdb_string(f.read()) + _, _, num_violations = amber_relax.process(prot=test_prot) + exp_num_violations = np.array( + [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, + 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, + 0, 0, 0, 0]) + # Check no violations were added. Can't check exactly due to stochasticity. + self.assertTrue(np.all(num_violations <= exp_num_violations)) + + +if __name__ == '__main__': + absltest.main() diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/relax/testdata/model_output.pdb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/relax/testdata/model_output.pdb Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,98 @@ +ATOM 1 C MET A 1 1.921 -46.152 7.786 1.00 4.39 C +ATOM 2 CA MET A 1 1.631 -46.829 9.131 1.00 4.39 C +ATOM 3 CB MET A 1 2.759 -47.768 9.578 1.00 4.39 C +ATOM 4 CE MET A 1 3.466 -49.770 13.198 1.00 4.39 C +ATOM 5 CG MET A 1 2.581 -48.221 11.034 1.00 4.39 C +ATOM 6 H MET A 1 0.234 -48.249 8.549 1.00 4.39 H +ATOM 7 H2 MET A 1 -0.424 -46.789 8.952 1.00 4.39 H +ATOM 8 H3 MET A 1 0.111 -47.796 10.118 1.00 4.39 H +ATOM 9 HA MET A 1 1.628 -46.009 9.849 1.00 4.39 H +ATOM 10 HB2 MET A 1 3.701 -47.225 9.500 1.00 4.39 H +ATOM 11 HB3 MET A 1 2.807 -48.640 8.926 1.00 4.39 H +ATOM 12 HE1 MET A 1 2.747 -50.537 12.910 1.00 4.39 H +ATOM 13 HE2 MET A 1 4.296 -50.241 13.725 1.00 4.39 H +ATOM 14 HE3 MET A 1 2.988 -49.052 13.864 1.00 4.39 H +ATOM 15 HG2 MET A 1 1.791 -48.971 11.083 1.00 4.39 H +ATOM 16 HG3 MET A 1 2.295 -47.368 11.650 1.00 4.39 H +ATOM 17 N MET A 1 0.291 -47.464 9.182 1.00 4.39 N +ATOM 18 O MET A 1 2.091 -44.945 7.799 1.00 4.39 O +ATOM 19 SD MET A 1 4.096 -48.921 11.725 1.00 4.39 S +ATOM 20 C LYS A 2 1.366 -45.033 4.898 1.00 2.92 C +ATOM 21 CA LYS A 2 2.235 -46.242 5.308 1.00 2.92 C +ATOM 22 CB LYS A 2 2.206 -47.314 4.196 1.00 2.92 C +ATOM 23 CD LYS A 2 3.331 -49.342 3.134 1.00 2.92 C +ATOM 24 CE LYS A 2 4.434 -50.403 3.293 1.00 2.92 C +ATOM 25 CG LYS A 2 3.294 -48.395 4.349 1.00 2.92 C +ATOM 26 H LYS A 2 1.832 -47.853 6.656 1.00 2.92 H +ATOM 27 HA LYS A 2 3.248 -45.841 5.355 1.00 2.92 H +ATOM 28 HB2 LYS A 2 1.223 -47.785 4.167 1.00 2.92 H +ATOM 29 HB3 LYS A 2 2.363 -46.812 3.241 1.00 2.92 H +ATOM 30 HD2 LYS A 2 3.524 -48.754 2.237 1.00 2.92 H +ATOM 31 HD3 LYS A 2 2.364 -49.833 3.031 1.00 2.92 H +ATOM 32 HE2 LYS A 2 5.383 -49.891 3.455 1.00 2.92 H +ATOM 33 HE3 LYS A 2 4.225 -51.000 4.180 1.00 2.92 H +ATOM 34 HG2 LYS A 2 3.102 -48.977 5.250 1.00 2.92 H +ATOM 35 HG3 LYS A 2 4.264 -47.909 4.446 1.00 2.92 H +ATOM 36 HZ1 LYS A 2 4.763 -50.747 1.274 1.00 2.92 H +ATOM 37 HZ2 LYS A 2 3.681 -51.785 1.931 1.00 2.92 H +ATOM 38 HZ3 LYS A 2 5.280 -51.965 2.224 1.00 2.92 H +ATOM 39 N LYS A 2 1.907 -46.846 6.629 1.00 2.92 N +ATOM 40 NZ LYS A 2 4.542 -51.286 2.100 1.00 2.92 N +ATOM 41 O LYS A 2 1.882 -44.093 4.312 1.00 2.92 O +ATOM 42 C PHE A 3 -0.511 -42.597 5.624 1.00 4.39 C +ATOM 43 CA PHE A 3 -0.853 -43.933 4.929 1.00 4.39 C +ATOM 44 CB PHE A 3 -2.271 -44.408 5.285 1.00 4.39 C +ATOM 45 CD1 PHE A 3 -3.760 -43.542 3.432 1.00 4.39 C +ATOM 46 CD2 PHE A 3 -4.050 -42.638 5.675 1.00 4.39 C +ATOM 47 CE1 PHE A 3 -4.797 -42.715 2.965 1.00 4.39 C +ATOM 48 CE2 PHE A 3 -5.091 -41.818 5.207 1.00 4.39 C +ATOM 49 CG PHE A 3 -3.382 -43.505 4.788 1.00 4.39 C +ATOM 50 CZ PHE A 3 -5.463 -41.853 3.853 1.00 4.39 C +ATOM 51 H PHE A 3 -0.311 -45.868 5.655 1.00 4.39 H +ATOM 52 HA PHE A 3 -0.817 -43.746 3.856 1.00 4.39 H +ATOM 53 HB2 PHE A 3 -2.353 -44.512 6.367 1.00 4.39 H +ATOM 54 HB3 PHE A 3 -2.432 -45.393 4.848 1.00 4.39 H +ATOM 55 HD1 PHE A 3 -3.255 -44.198 2.739 1.00 4.39 H +ATOM 56 HD2 PHE A 3 -3.768 -42.590 6.716 1.00 4.39 H +ATOM 57 HE1 PHE A 3 -5.083 -42.735 1.923 1.00 4.39 H +ATOM 58 HE2 PHE A 3 -5.604 -41.151 5.885 1.00 4.39 H +ATOM 59 HZ PHE A 3 -6.257 -41.215 3.493 1.00 4.39 H +ATOM 60 N PHE A 3 0.079 -45.027 5.253 1.00 4.39 N +ATOM 61 O PHE A 3 -0.633 -41.541 5.014 1.00 4.39 O +ATOM 62 C LEU A 4 1.598 -40.732 7.042 1.00 4.39 C +ATOM 63 CA LEU A 4 0.367 -41.437 7.633 1.00 4.39 C +ATOM 64 CB LEU A 4 0.628 -41.823 9.104 1.00 4.39 C +ATOM 65 CD1 LEU A 4 -0.319 -42.778 11.228 1.00 4.39 C +ATOM 66 CD2 LEU A 4 -1.300 -40.694 10.309 1.00 4.39 C +ATOM 67 CG LEU A 4 -0.650 -42.027 9.937 1.00 4.39 C +ATOM 68 H LEU A 4 0.163 -43.538 7.292 1.00 4.39 H +ATOM 69 HA LEU A 4 -0.445 -40.712 7.588 1.00 4.39 H +ATOM 70 HB2 LEU A 4 1.213 -41.034 9.576 1.00 4.39 H +ATOM 71 HB3 LEU A 4 1.235 -42.728 9.127 1.00 4.39 H +ATOM 72 HD11 LEU A 4 0.380 -42.191 11.824 1.00 4.39 H +ATOM 73 HD12 LEU A 4 0.127 -43.747 11.002 1.00 4.39 H +ATOM 74 HD13 LEU A 4 -1.230 -42.927 11.808 1.00 4.39 H +ATOM 75 HD21 LEU A 4 -0.606 -40.080 10.883 1.00 4.39 H +ATOM 76 HD22 LEU A 4 -2.193 -40.869 10.909 1.00 4.39 H +ATOM 77 HD23 LEU A 4 -1.593 -40.147 9.413 1.00 4.39 H +ATOM 78 HG LEU A 4 -1.359 -42.630 9.370 1.00 4.39 H +ATOM 79 N LEU A 4 -0.012 -42.638 6.869 1.00 4.39 N +ATOM 80 O LEU A 4 1.655 -39.508 7.028 1.00 4.39 O +ATOM 81 C VAL A 5 3.372 -40.190 4.573 1.00 4.39 C +ATOM 82 CA VAL A 5 3.752 -40.956 5.845 1.00 4.39 C +ATOM 83 CB VAL A 5 4.757 -42.083 5.528 1.00 4.39 C +ATOM 84 CG1 VAL A 5 6.019 -41.568 4.827 1.00 4.39 C +ATOM 85 CG2 VAL A 5 5.199 -42.807 6.810 1.00 4.39 C +ATOM 86 H VAL A 5 2.440 -42.503 6.548 1.00 4.39 H +ATOM 87 HA VAL A 5 4.234 -40.242 6.512 1.00 4.39 H +ATOM 88 HB VAL A 5 4.279 -42.813 4.875 1.00 4.39 H +ATOM 89 HG11 VAL A 5 6.494 -40.795 5.431 1.00 4.39 H +ATOM 90 HG12 VAL A 5 5.770 -41.145 3.853 1.00 4.39 H +ATOM 91 HG13 VAL A 5 6.725 -42.383 4.670 1.00 4.39 H +ATOM 92 HG21 VAL A 5 4.347 -43.283 7.297 1.00 4.39 H +ATOM 93 HG22 VAL A 5 5.933 -43.575 6.568 1.00 4.39 H +ATOM 94 HG23 VAL A 5 5.651 -42.093 7.498 1.00 4.39 H +ATOM 95 N VAL A 5 2.554 -41.501 6.509 1.00 4.39 N +ATOM 96 O VAL A 5 3.937 -39.138 4.297 1.00 4.39 O +TER 96 VAL A 5 +END diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/relax/testdata/multiple_disulfides_target.pdb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/relax/testdata/multiple_disulfides_target.pdb Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,1478 @@ +MODEL 0 +ATOM 1 N MET A 1 19.164 -28.457 26.130 1.00 0.00 N +ATOM 2 CA MET A 1 19.746 -27.299 25.456 1.00 0.00 C +ATOM 3 C MET A 1 19.080 -26.008 25.921 1.00 0.00 C +ATOM 4 CB MET A 1 19.615 -27.438 23.938 1.00 0.00 C +ATOM 5 O MET A 1 17.853 -25.899 25.913 1.00 0.00 O +ATOM 6 CG MET A 1 19.873 -28.846 23.427 1.00 0.00 C +ATOM 7 SD MET A 1 21.636 -29.126 23.002 1.00 0.00 S +ATOM 8 CE MET A 1 22.302 -27.462 23.284 1.00 0.00 C +ATOM 9 N ALA A 2 19.679 -25.354 27.019 1.00 0.00 N +ATOM 10 CA ALA A 2 19.241 -24.061 27.539 1.00 0.00 C +ATOM 11 C ALA A 2 18.629 -23.204 26.434 1.00 0.00 C +ATOM 12 CB ALA A 2 20.410 -23.326 28.192 1.00 0.00 C +ATOM 13 O ALA A 2 19.158 -23.145 25.322 1.00 0.00 O +ATOM 14 N HIS A 3 17.369 -23.382 26.161 1.00 0.00 N +ATOM 15 CA HIS A 3 16.748 -22.427 25.250 1.00 0.00 C +ATOM 16 C HIS A 3 17.419 -21.061 25.342 1.00 0.00 C +ATOM 17 CB HIS A 3 15.252 -22.299 25.547 1.00 0.00 C +ATOM 18 O HIS A 3 17.896 -20.669 26.409 1.00 0.00 O +ATOM 19 CG HIS A 3 14.464 -23.520 25.196 1.00 0.00 C +ATOM 20 CD2 HIS A 3 13.848 -24.436 25.979 1.00 0.00 C +ATOM 21 ND1 HIS A 3 14.242 -23.914 23.894 1.00 0.00 N +ATOM 22 CE1 HIS A 3 13.520 -25.022 23.892 1.00 0.00 C +ATOM 23 NE2 HIS A 3 13.268 -25.360 25.145 1.00 0.00 N +ATOM 24 N GLU A 4 18.306 -20.798 24.429 1.00 0.00 N +ATOM 25 CA GLU A 4 18.907 -19.505 24.115 1.00 0.00 C +ATOM 26 C GLU A 4 18.392 -18.415 25.050 1.00 0.00 C +ATOM 27 CB GLU A 4 18.631 -19.123 22.659 1.00 0.00 C +ATOM 28 O GLU A 4 17.240 -18.458 25.486 1.00 0.00 O +ATOM 29 CG GLU A 4 19.253 -20.072 21.645 1.00 0.00 C +ATOM 30 CD GLU A 4 20.767 -19.956 21.564 1.00 0.00 C +ATOM 31 OE1 GLU A 4 21.330 -18.981 22.111 1.00 0.00 O +ATOM 32 OE2 GLU A 4 21.394 -20.846 20.948 1.00 0.00 O +ATOM 33 N GLU A 5 19.093 -18.090 26.026 1.00 0.00 N +ATOM 34 CA GLU A 5 19.080 -16.885 26.849 1.00 0.00 C +ATOM 35 C GLU A 5 17.938 -15.956 26.449 1.00 0.00 C +ATOM 36 CB GLU A 5 20.418 -16.148 26.746 1.00 0.00 C +ATOM 37 O GLU A 5 17.774 -15.636 25.269 1.00 0.00 O +ATOM 38 CG GLU A 5 21.604 -16.952 27.257 1.00 0.00 C +ATOM 39 CD GLU A 5 21.641 -17.070 28.772 1.00 0.00 C +ATOM 40 OE1 GLU A 5 20.899 -16.330 29.457 1.00 0.00 O +ATOM 41 OE2 GLU A 5 22.419 -17.909 29.279 1.00 0.00 O +ATOM 42 N ASP A 6 16.721 -16.161 26.857 1.00 0.00 N +ATOM 43 CA ASP A 6 15.629 -15.196 26.948 1.00 0.00 C +ATOM 44 C ASP A 6 16.107 -13.791 26.591 1.00 0.00 C +ATOM 45 CB ASP A 6 15.022 -15.204 28.353 1.00 0.00 C +ATOM 46 O ASP A 6 17.144 -13.339 27.079 1.00 0.00 O +ATOM 47 CG ASP A 6 14.317 -16.507 28.687 1.00 0.00 C +ATOM 48 OD1 ASP A 6 14.123 -16.805 29.885 1.00 0.00 O +ATOM 49 OD2 ASP A 6 13.956 -17.243 27.744 1.00 0.00 O +ATOM 50 N GLY A 7 16.251 -13.433 25.339 1.00 0.00 N +ATOM 51 CA GLY A 7 16.311 -11.996 25.123 1.00 0.00 C +ATOM 52 C GLY A 7 15.833 -11.192 26.317 1.00 0.00 C +ATOM 53 O GLY A 7 15.023 -11.674 27.112 1.00 0.00 O +ATOM 54 N VAL A 8 16.762 -10.664 27.143 1.00 0.00 N +ATOM 55 CA VAL A 8 16.521 -9.756 28.260 1.00 0.00 C +ATOM 56 C VAL A 8 15.307 -8.880 27.960 1.00 0.00 C +ATOM 57 CB VAL A 8 17.755 -8.875 28.552 1.00 0.00 C +ATOM 58 O VAL A 8 15.173 -8.351 26.854 1.00 0.00 O +ATOM 59 CG1 VAL A 8 17.461 -7.891 29.683 1.00 0.00 C +ATOM 60 CG2 VAL A 8 18.962 -9.745 28.898 1.00 0.00 C +ATOM 61 N CYS A 9 14.157 -9.255 28.398 1.00 0.00 N +ATOM 62 CA CYS A 9 13.010 -8.353 28.403 1.00 0.00 C +ATOM 63 C CYS A 9 13.310 -7.095 29.209 1.00 0.00 C +ATOM 64 CB CYS A 9 11.779 -9.055 28.975 1.00 0.00 C +ATOM 65 O CYS A 9 14.058 -7.142 30.187 1.00 0.00 O +ATOM 66 SG CYS A 9 11.197 -10.439 27.970 1.00 0.00 S +ATOM 67 N ASN A 10 13.304 -5.993 28.589 1.00 0.00 N +ATOM 68 CA ASN A 10 13.371 -4.703 29.267 1.00 0.00 C +ATOM 69 C ASN A 10 12.018 -3.997 29.264 1.00 0.00 C +ATOM 70 CB ASN A 10 14.436 -3.812 28.624 1.00 0.00 C +ATOM 71 O ASN A 10 11.047 -4.510 28.706 1.00 0.00 O +ATOM 72 CG ASN A 10 14.149 -3.521 27.164 1.00 0.00 C +ATOM 73 ND2 ASN A 10 15.189 -3.547 26.338 1.00 0.00 N +ATOM 74 OD1 ASN A 10 13.003 -3.275 26.781 1.00 0.00 O +ATOM 75 N SER A 11 11.830 -2.986 30.142 1.00 0.00 N +ATOM 76 CA SER A 11 10.597 -2.233 30.343 1.00 0.00 C +ATOM 77 C SER A 11 10.027 -1.741 29.017 1.00 0.00 C +ATOM 78 CB SER A 11 10.840 -1.044 31.274 1.00 0.00 C +ATOM 79 O SER A 11 8.847 -1.392 28.933 1.00 0.00 O +ATOM 80 OG SER A 11 11.841 -0.190 30.748 1.00 0.00 O +ATOM 81 N ASN A 12 10.789 -1.874 27.933 1.00 0.00 N +ATOM 82 CA ASN A 12 10.334 -1.422 26.622 1.00 0.00 C +ATOM 83 C ASN A 12 9.775 -2.576 25.795 1.00 0.00 C +ATOM 84 CB ASN A 12 11.472 -0.730 25.868 1.00 0.00 C +ATOM 85 O ASN A 12 9.262 -2.365 24.694 1.00 0.00 O +ATOM 86 CG ASN A 12 11.875 0.587 26.501 1.00 0.00 C +ATOM 87 ND2 ASN A 12 13.170 0.882 26.482 1.00 0.00 N +ATOM 88 OD1 ASN A 12 11.031 1.333 27.004 1.00 0.00 O +ATOM 89 N ALA A 13 10.045 -3.775 26.303 1.00 0.00 N +ATOM 90 CA ALA A 13 9.523 -4.930 25.578 1.00 0.00 C +ATOM 91 C ALA A 13 8.001 -4.994 25.671 1.00 0.00 C +ATOM 92 CB ALA A 13 10.140 -6.219 26.115 1.00 0.00 C +ATOM 93 O ALA A 13 7.423 -4.684 26.715 1.00 0.00 O +ATOM 94 N PRO A 14 7.310 -5.193 24.591 1.00 0.00 N +ATOM 95 CA PRO A 14 5.846 -5.238 24.577 1.00 0.00 C +ATOM 96 C PRO A 14 5.280 -6.316 25.499 1.00 0.00 C +ATOM 97 CB PRO A 14 5.517 -5.544 23.113 1.00 0.00 C +ATOM 98 O PRO A 14 4.127 -6.222 25.929 1.00 0.00 O +ATOM 99 CG PRO A 14 6.757 -6.177 22.568 1.00 0.00 C +ATOM 100 CD PRO A 14 7.941 -5.617 23.303 1.00 0.00 C +ATOM 101 N CYS A 15 6.076 -7.229 25.793 1.00 0.00 N +ATOM 102 CA CYS A 15 5.597 -8.339 26.610 1.00 0.00 C +ATOM 103 C CYS A 15 6.142 -8.245 28.030 1.00 0.00 C +ATOM 104 CB CYS A 15 5.999 -9.676 25.987 1.00 0.00 C +ATOM 105 O CYS A 15 6.205 -9.249 28.743 1.00 0.00 O +ATOM 106 SG CYS A 15 7.766 -9.813 25.636 1.00 0.00 S +ATOM 107 N TYR A 16 6.510 -6.994 28.366 1.00 0.00 N +ATOM 108 CA TYR A 16 7.076 -6.735 29.685 1.00 0.00 C +ATOM 109 C TYR A 16 5.978 -6.589 30.731 1.00 0.00 C +ATOM 110 CB TYR A 16 7.944 -5.473 29.659 1.00 0.00 C +ATOM 111 O TYR A 16 5.053 -5.791 30.563 1.00 0.00 O +ATOM 112 CG TYR A 16 8.545 -5.122 30.998 1.00 0.00 C +ATOM 113 CD1 TYR A 16 8.126 -3.993 31.698 1.00 0.00 C +ATOM 114 CD2 TYR A 16 9.534 -5.918 31.566 1.00 0.00 C +ATOM 115 CE1 TYR A 16 8.678 -3.665 32.932 1.00 0.00 C +ATOM 116 CE2 TYR A 16 10.093 -5.600 32.799 1.00 0.00 C +ATOM 117 OH TYR A 16 10.210 -4.153 34.695 1.00 0.00 O +ATOM 118 CZ TYR A 16 9.660 -4.473 33.474 1.00 0.00 C +ATOM 119 N HIS A 17 5.834 -7.548 31.703 1.00 0.00 N +ATOM 120 CA HIS A 17 4.846 -7.504 32.775 1.00 0.00 C +ATOM 121 C HIS A 17 5.518 -7.493 34.143 1.00 0.00 C +ATOM 122 CB HIS A 17 3.888 -8.692 32.669 1.00 0.00 C +ATOM 123 O HIS A 17 6.482 -8.228 34.372 1.00 0.00 O +ATOM 124 CG HIS A 17 2.782 -8.661 33.675 1.00 0.00 C +ATOM 125 CD2 HIS A 17 2.548 -9.432 34.764 1.00 0.00 C +ATOM 126 ND1 HIS A 17 1.748 -7.752 33.617 1.00 0.00 N +ATOM 127 CE1 HIS A 17 0.924 -7.965 34.630 1.00 0.00 C +ATOM 128 NE2 HIS A 17 1.387 -8.979 35.341 1.00 0.00 N +ATOM 129 N CYS A 18 5.067 -6.579 34.995 1.00 0.00 N +ATOM 130 CA CYS A 18 5.599 -6.487 36.350 1.00 0.00 C +ATOM 131 C CYS A 18 4.501 -6.711 37.383 1.00 0.00 C +ATOM 132 CB CYS A 18 6.255 -5.126 36.577 1.00 0.00 C +ATOM 133 O CYS A 18 3.325 -6.465 37.109 1.00 0.00 O +ATOM 134 SG CYS A 18 7.757 -4.870 35.607 1.00 0.00 S +ATOM 135 N ASP A 19 4.791 -7.344 38.476 1.00 0.00 N +ATOM 136 CA ASP A 19 3.829 -7.460 39.568 1.00 0.00 C +ATOM 137 C ASP A 19 3.430 -6.084 40.096 1.00 0.00 C +ATOM 138 CB ASP A 19 4.403 -8.313 40.701 1.00 0.00 C +ATOM 139 O ASP A 19 3.960 -5.064 39.651 1.00 0.00 O +ATOM 140 CG ASP A 19 5.572 -7.650 41.408 1.00 0.00 C +ATOM 141 OD1 ASP A 19 6.325 -8.345 42.124 1.00 0.00 O +ATOM 142 OD2 ASP A 19 5.741 -6.422 41.250 1.00 0.00 O +ATOM 143 N ALA A 20 2.383 -5.908 40.933 1.00 0.00 N +ATOM 144 CA ALA A 20 1.776 -4.676 41.429 1.00 0.00 C +ATOM 145 C ALA A 20 2.806 -3.807 42.144 1.00 0.00 C +ATOM 146 CB ALA A 20 0.612 -4.995 42.363 1.00 0.00 C +ATOM 147 O ALA A 20 2.714 -2.577 42.119 1.00 0.00 O +ATOM 148 N ASN A 21 3.841 -4.457 42.638 1.00 0.00 N +ATOM 149 CA ASN A 21 4.863 -3.719 43.373 1.00 0.00 C +ATOM 150 C ASN A 21 6.048 -3.362 42.480 1.00 0.00 C +ATOM 151 CB ASN A 21 5.335 -4.521 44.588 1.00 0.00 C +ATOM 152 O ASN A 21 6.980 -2.686 42.919 1.00 0.00 O +ATOM 153 CG ASN A 21 4.246 -4.704 45.626 1.00 0.00 C +ATOM 154 ND2 ASN A 21 4.183 -5.892 46.216 1.00 0.00 N +ATOM 155 OD1 ASN A 21 3.467 -3.786 45.895 1.00 0.00 O +ATOM 156 N GLY A 22 5.989 -3.893 41.230 1.00 0.00 N +ATOM 157 CA GLY A 22 7.093 -3.614 40.325 1.00 0.00 C +ATOM 158 C GLY A 22 8.376 -4.324 40.712 1.00 0.00 C +ATOM 159 O GLY A 22 9.456 -3.972 40.234 1.00 0.00 O +ATOM 160 N GLU A 23 8.305 -5.221 41.664 1.00 0.00 N +ATOM 161 CA GLU A 23 9.501 -5.877 42.183 1.00 0.00 C +ATOM 162 C GLU A 23 9.861 -7.105 41.352 1.00 0.00 C +ATOM 163 CB GLU A 23 9.306 -6.272 43.649 1.00 0.00 C +ATOM 164 O GLU A 23 11.038 -7.361 41.089 1.00 0.00 O +ATOM 165 CG GLU A 23 9.200 -5.085 44.596 1.00 0.00 C +ATOM 166 CD GLU A 23 9.074 -5.493 46.055 1.00 0.00 C +ATOM 167 OE1 GLU A 23 9.075 -6.710 46.348 1.00 0.00 O +ATOM 168 OE2 GLU A 23 8.972 -4.587 46.913 1.00 0.00 O +ATOM 169 N ASN A 24 8.853 -7.853 40.844 1.00 0.00 N +ATOM 170 CA ASN A 24 9.050 -9.077 40.075 1.00 0.00 C +ATOM 171 C ASN A 24 8.522 -8.936 38.651 1.00 0.00 C +ATOM 172 CB ASN A 24 8.381 -10.263 40.774 1.00 0.00 C +ATOM 173 O ASN A 24 7.317 -9.045 38.417 1.00 0.00 O +ATOM 174 CG ASN A 24 8.973 -10.546 42.140 1.00 0.00 C +ATOM 175 ND2 ASN A 24 8.125 -10.569 43.162 1.00 0.00 N +ATOM 176 OD1 ASN A 24 10.184 -10.741 42.277 1.00 0.00 O +ATOM 177 N CYS A 25 9.371 -8.554 37.764 1.00 0.00 N +ATOM 178 CA CYS A 25 8.978 -8.390 36.369 1.00 0.00 C +ATOM 179 C CYS A 25 9.448 -9.572 35.529 1.00 0.00 C +ATOM 180 CB CYS A 25 9.548 -7.091 35.800 1.00 0.00 C +ATOM 181 O CYS A 25 10.486 -10.171 35.816 1.00 0.00 O +ATOM 182 SG CYS A 25 8.933 -5.605 36.623 1.00 0.00 S +ATOM 183 N SER A 26 8.561 -10.102 34.701 1.00 0.00 N +ATOM 184 CA SER A 26 8.921 -11.145 33.746 1.00 0.00 C +ATOM 185 C SER A 26 8.422 -10.808 32.345 1.00 0.00 C +ATOM 186 CB SER A 26 8.353 -12.495 34.187 1.00 0.00 C +ATOM 187 O SER A 26 7.563 -9.940 32.178 1.00 0.00 O +ATOM 188 OG SER A 26 6.936 -12.470 34.186 1.00 0.00 O +ATOM 189 N CYS A 27 9.208 -11.265 31.277 1.00 0.00 N +ATOM 190 CA CYS A 27 8.779 -11.211 29.884 1.00 0.00 C +ATOM 191 C CYS A 27 7.948 -12.436 29.521 1.00 0.00 C +ATOM 192 CB CYS A 27 9.988 -11.110 28.954 1.00 0.00 C +ATOM 193 O CYS A 27 8.470 -13.551 29.464 1.00 0.00 O +ATOM 194 SG CYS A 27 10.970 -9.614 29.197 1.00 0.00 S +ATOM 195 N ASN A 28 6.768 -12.360 29.728 1.00 0.00 N +ATOM 196 CA ASN A 28 5.905 -13.476 29.355 1.00 0.00 C +ATOM 197 C ASN A 28 5.123 -13.178 28.079 1.00 0.00 C +ATOM 198 CB ASN A 28 4.946 -13.817 30.498 1.00 0.00 C +ATOM 199 O ASN A 28 4.074 -12.532 28.125 1.00 0.00 O +ATOM 200 CG ASN A 28 4.239 -15.142 30.291 1.00 0.00 C +ATOM 201 ND2 ASN A 28 3.472 -15.568 31.288 1.00 0.00 N +ATOM 202 OD1 ASN A 28 4.381 -15.779 29.243 1.00 0.00 O +ATOM 203 N CYS A 29 5.741 -13.488 26.787 1.00 0.00 N +ATOM 204 CA CYS A 29 5.086 -13.282 25.500 1.00 0.00 C +ATOM 205 C CYS A 29 3.880 -14.201 25.348 1.00 0.00 C +ATOM 206 CB CYS A 29 6.070 -13.522 24.355 1.00 0.00 C +ATOM 207 O CYS A 29 3.011 -13.958 24.510 1.00 0.00 O +ATOM 208 SG CYS A 29 7.446 -12.353 24.319 1.00 0.00 S +ATOM 209 N GLU A 30 3.849 -15.077 26.140 1.00 0.00 N +ATOM 210 CA GLU A 30 2.725 -16.005 26.063 1.00 0.00 C +ATOM 211 C GLU A 30 1.452 -15.379 26.627 1.00 0.00 C +ATOM 212 CB GLU A 30 3.047 -17.303 26.808 1.00 0.00 C +ATOM 213 O GLU A 30 0.344 -15.770 26.255 1.00 0.00 O +ATOM 214 CG GLU A 30 4.161 -18.119 26.169 1.00 0.00 C +ATOM 215 CD GLU A 30 4.469 -19.405 26.919 1.00 0.00 C +ATOM 216 OE1 GLU A 30 3.822 -19.672 27.957 1.00 0.00 O +ATOM 217 OE2 GLU A 30 5.366 -20.150 26.466 1.00 0.00 O +ATOM 218 N LEU A 31 1.743 -14.305 27.387 1.00 0.00 N +ATOM 219 CA LEU A 31 0.622 -13.672 28.074 1.00 0.00 C +ATOM 220 C LEU A 31 0.044 -12.536 27.237 1.00 0.00 C +ATOM 221 CB LEU A 31 1.061 -13.142 29.441 1.00 0.00 C +ATOM 222 O LEU A 31 -0.996 -11.971 27.584 1.00 0.00 O +ATOM 223 CG LEU A 31 1.437 -14.194 30.487 1.00 0.00 C +ATOM 224 CD1 LEU A 31 1.920 -13.520 31.766 1.00 0.00 C +ATOM 225 CD2 LEU A 31 0.252 -15.110 30.774 1.00 0.00 C +ATOM 226 N PHE A 32 0.769 -12.166 26.008 1.00 0.00 N +ATOM 227 CA PHE A 32 0.157 -11.063 25.278 1.00 0.00 C +ATOM 228 C PHE A 32 -1.020 -11.553 24.443 1.00 0.00 C +ATOM 229 CB PHE A 32 1.188 -10.374 24.378 1.00 0.00 C +ATOM 230 O PHE A 32 -0.898 -12.531 23.702 1.00 0.00 O +ATOM 231 CG PHE A 32 2.115 -9.447 25.117 1.00 0.00 C +ATOM 232 CD1 PHE A 32 3.383 -9.869 25.499 1.00 0.00 C +ATOM 233 CD2 PHE A 32 1.719 -8.153 25.429 1.00 0.00 C +ATOM 234 CE1 PHE A 32 4.243 -9.014 26.183 1.00 0.00 C +ATOM 235 CE2 PHE A 32 2.574 -7.293 26.112 1.00 0.00 C +ATOM 236 CZ PHE A 32 3.835 -7.725 26.489 1.00 0.00 C +ATOM 237 N ASP A 33 -2.070 -11.361 25.097 1.00 0.00 N +ATOM 238 CA ASP A 33 -3.364 -11.602 24.466 1.00 0.00 C +ATOM 239 C ASP A 33 -3.423 -10.975 23.075 1.00 0.00 C +ATOM 240 CB ASP A 33 -4.496 -11.055 25.339 1.00 0.00 C +ATOM 241 O ASP A 33 -3.225 -9.767 22.924 1.00 0.00 O +ATOM 242 CG ASP A 33 -5.861 -11.590 24.943 1.00 0.00 C +ATOM 243 OD1 ASP A 33 -6.769 -11.638 25.800 1.00 0.00 O +ATOM 244 OD2 ASP A 33 -6.029 -11.969 23.764 1.00 0.00 O +ATOM 245 N CYS A 34 -3.021 -11.756 22.063 1.00 0.00 N +ATOM 246 CA CYS A 34 -3.371 -11.337 20.711 1.00 0.00 C +ATOM 247 C CYS A 34 -4.691 -10.576 20.700 1.00 0.00 C +ATOM 248 CB CYS A 34 -3.461 -12.547 19.781 1.00 0.00 C +ATOM 249 O CYS A 34 -4.986 -9.851 19.748 1.00 0.00 O +ATOM 250 SG CYS A 34 -1.909 -13.455 19.615 1.00 0.00 S +ATOM 251 N GLU A 35 -5.284 -10.672 21.987 1.00 0.00 N +ATOM 252 CA GLU A 35 -6.617 -10.086 22.085 1.00 0.00 C +ATOM 253 C GLU A 35 -6.556 -8.667 22.644 1.00 0.00 C +ATOM 254 CB GLU A 35 -7.525 -10.956 22.958 1.00 0.00 C +ATOM 255 O GLU A 35 -7.592 -8.032 22.853 1.00 0.00 O +ATOM 256 CG GLU A 35 -7.790 -12.339 22.381 1.00 0.00 C +ATOM 257 CD GLU A 35 -8.779 -13.152 23.202 1.00 0.00 C +ATOM 258 OE1 GLU A 35 -9.290 -12.636 24.222 1.00 0.00 O +ATOM 259 OE2 GLU A 35 -9.046 -14.314 22.821 1.00 0.00 O +ATOM 260 N ALA A 36 -5.315 -8.202 22.860 1.00 0.00 N +ATOM 261 CA ALA A 36 -5.282 -6.884 23.489 1.00 0.00 C +ATOM 262 C ALA A 36 -5.714 -5.797 22.509 1.00 0.00 C +ATOM 263 CB ALA A 36 -3.884 -6.586 24.027 1.00 0.00 C +ATOM 264 O ALA A 36 -5.299 -5.799 21.348 1.00 0.00 O +ATOM 265 N LYS A 37 -6.942 -5.271 22.819 1.00 0.00 N +ATOM 266 CA LYS A 37 -7.519 -4.143 22.094 1.00 0.00 C +ATOM 267 C LYS A 37 -6.974 -2.817 22.617 1.00 0.00 C +ATOM 268 CB LYS A 37 -9.045 -4.160 22.200 1.00 0.00 C +ATOM 269 O LYS A 37 -6.897 -2.606 23.829 1.00 0.00 O +ATOM 270 CG LYS A 37 -9.701 -5.341 21.499 1.00 0.00 C +ATOM 271 CD LYS A 37 -11.220 -5.239 21.536 1.00 0.00 C +ATOM 272 CE LYS A 37 -11.877 -6.385 20.778 1.00 0.00 C +ATOM 273 NZ LYS A 37 -13.353 -6.194 20.656 1.00 0.00 N +ATOM 274 N LYS A 38 -6.207 -2.111 21.604 1.00 0.00 N +ATOM 275 CA LYS A 38 -5.876 -0.745 21.998 1.00 0.00 C +ATOM 276 C LYS A 38 -7.131 0.118 22.100 1.00 0.00 C +ATOM 277 CB LYS A 38 -4.892 -0.123 21.007 1.00 0.00 C +ATOM 278 O LYS A 38 -8.188 -0.253 21.585 1.00 0.00 O +ATOM 279 CG LYS A 38 -3.518 -0.776 21.006 1.00 0.00 C +ATOM 280 CD LYS A 38 -2.564 -0.072 20.049 1.00 0.00 C +ATOM 281 CE LYS A 38 -1.196 -0.740 20.029 1.00 0.00 C +ATOM 282 NZ LYS A 38 -0.234 -0.007 19.153 1.00 0.00 N +ATOM 283 N PRO A 39 -7.175 1.097 22.994 1.00 0.00 N +ATOM 284 CA PRO A 39 -8.319 2.000 23.133 1.00 0.00 C +ATOM 285 C PRO A 39 -8.863 2.474 21.787 1.00 0.00 C +ATOM 286 CB PRO A 39 -7.748 3.173 23.934 1.00 0.00 C +ATOM 287 O PRO A 39 -10.057 2.762 21.666 1.00 0.00 O +ATOM 288 CG PRO A 39 -6.594 2.594 24.688 1.00 0.00 C +ATOM 289 CD PRO A 39 -6.004 1.478 23.874 1.00 0.00 C +ATOM 290 N ASP A 40 -8.068 2.468 20.731 1.00 0.00 N +ATOM 291 CA ASP A 40 -8.452 2.969 19.415 1.00 0.00 C +ATOM 292 C ASP A 40 -9.034 1.852 18.551 1.00 0.00 C +ATOM 293 CB ASP A 40 -7.252 3.607 18.712 1.00 0.00 C +ATOM 294 O ASP A 40 -9.337 2.064 17.375 1.00 0.00 O +ATOM 295 CG ASP A 40 -6.113 2.630 18.480 1.00 0.00 C +ATOM 296 OD1 ASP A 40 -5.040 3.049 17.994 1.00 0.00 O +ATOM 297 OD2 ASP A 40 -6.290 1.431 18.784 1.00 0.00 O +ATOM 298 N GLY A 41 -9.199 0.638 19.013 1.00 0.00 N +ATOM 299 CA GLY A 41 -9.761 -0.484 18.279 1.00 0.00 C +ATOM 300 C GLY A 41 -8.723 -1.268 17.500 1.00 0.00 C +ATOM 301 O GLY A 41 -9.019 -2.338 16.963 1.00 0.00 O +ATOM 302 N SER A 42 -7.510 -0.651 17.403 1.00 0.00 N +ATOM 303 CA SER A 42 -6.433 -1.355 16.714 1.00 0.00 C +ATOM 304 C SER A 42 -5.824 -2.436 17.601 1.00 0.00 C +ATOM 305 CB SER A 42 -5.346 -0.373 16.274 1.00 0.00 C +ATOM 306 O SER A 42 -6.031 -2.440 18.816 1.00 0.00 O +ATOM 307 OG SER A 42 -4.758 0.262 17.396 1.00 0.00 O +ATOM 308 N TYR A 43 -5.412 -3.575 16.938 1.00 0.00 N +ATOM 309 CA TYR A 43 -4.705 -4.628 17.660 1.00 0.00 C +ATOM 310 C TYR A 43 -3.244 -4.254 17.877 1.00 0.00 C +ATOM 311 CB TYR A 43 -4.798 -5.955 16.901 1.00 0.00 C +ATOM 312 O TYR A 43 -2.651 -3.542 17.063 1.00 0.00 O +ATOM 313 CG TYR A 43 -6.184 -6.551 16.887 1.00 0.00 C +ATOM 314 CD1 TYR A 43 -7.022 -6.388 15.786 1.00 0.00 C +ATOM 315 CD2 TYR A 43 -6.659 -7.277 17.974 1.00 0.00 C +ATOM 316 CE1 TYR A 43 -8.302 -6.934 15.769 1.00 0.00 C +ATOM 317 CE2 TYR A 43 -7.937 -7.827 17.968 1.00 0.00 C +ATOM 318 OH TYR A 43 -10.015 -8.194 16.852 1.00 0.00 O +ATOM 319 CZ TYR A 43 -8.749 -7.651 16.863 1.00 0.00 C +ATOM 320 N ALA A 44 -3.024 -4.415 19.155 1.00 0.00 N +ATOM 321 CA ALA A 44 -1.659 -4.137 19.595 1.00 0.00 C +ATOM 322 C ALA A 44 -0.642 -4.865 18.721 1.00 0.00 C +ATOM 323 CB ALA A 44 -1.480 -4.536 21.058 1.00 0.00 C +ATOM 324 O ALA A 44 0.477 -4.383 18.527 1.00 0.00 O +ATOM 325 N HIS A 45 -1.081 -5.938 17.989 1.00 0.00 N +ATOM 326 CA HIS A 45 -0.065 -6.665 17.236 1.00 0.00 C +ATOM 327 C HIS A 45 -0.455 -6.795 15.768 1.00 0.00 C +ATOM 328 CB HIS A 45 0.163 -8.051 17.843 1.00 0.00 C +ATOM 329 O HIS A 45 -1.558 -7.246 15.452 1.00 0.00 O +ATOM 330 CG HIS A 45 1.501 -8.634 17.520 1.00 0.00 C +ATOM 331 CD2 HIS A 45 2.577 -8.880 18.304 1.00 0.00 C +ATOM 332 ND1 HIS A 45 1.850 -9.037 16.249 1.00 0.00 N +ATOM 333 CE1 HIS A 45 3.086 -9.508 16.266 1.00 0.00 C +ATOM 334 NE2 HIS A 45 3.550 -9.423 17.502 1.00 0.00 N +ATOM 335 N PRO A 46 0.267 -6.191 14.776 1.00 0.00 N +ATOM 336 CA PRO A 46 0.007 -6.193 13.334 1.00 0.00 C +ATOM 337 C PRO A 46 -0.224 -7.596 12.778 1.00 0.00 C +ATOM 338 CB PRO A 46 1.278 -5.574 12.747 1.00 0.00 C +ATOM 339 O PRO A 46 -0.835 -7.750 11.717 1.00 0.00 O +ATOM 340 CG PRO A 46 2.320 -5.770 13.800 1.00 0.00 C +ATOM 341 CD PRO A 46 1.648 -5.768 15.143 1.00 0.00 C +ATOM 342 N CYS A 47 0.238 -8.592 13.354 1.00 0.00 N +ATOM 343 CA CYS A 47 0.115 -9.950 12.835 1.00 0.00 C +ATOM 344 C CYS A 47 -1.253 -10.537 13.163 1.00 0.00 C +ATOM 345 CB CYS A 47 1.214 -10.844 13.408 1.00 0.00 C +ATOM 346 O CYS A 47 -1.515 -11.707 12.880 1.00 0.00 O +ATOM 347 SG CYS A 47 1.222 -10.923 15.212 1.00 0.00 S +ATOM 348 N ARG A 48 -2.155 -9.724 13.452 1.00 0.00 N +ATOM 349 CA ARG A 48 -3.422 -10.192 14.005 1.00 0.00 C +ATOM 350 C ARG A 48 -4.542 -10.086 12.974 1.00 0.00 C +ATOM 351 CB ARG A 48 -3.788 -9.396 15.260 1.00 0.00 C +ATOM 352 O ARG A 48 -4.649 -9.082 12.268 1.00 0.00 O +ATOM 353 CG ARG A 48 -5.032 -9.905 15.970 1.00 0.00 C +ATOM 354 CD ARG A 48 -5.909 -8.761 16.460 1.00 0.00 C +ATOM 355 NE ARG A 48 -6.344 -7.906 15.359 1.00 0.00 N +ATOM 356 NH1 ARG A 48 -6.885 -6.103 16.700 1.00 0.00 N +ATOM 357 NH2 ARG A 48 -7.167 -5.974 14.429 1.00 0.00 N +ATOM 358 CZ ARG A 48 -6.798 -6.663 15.499 1.00 0.00 C +ATOM 359 N ARG A 49 -5.146 -11.174 12.527 1.00 0.00 N +ATOM 360 CA ARG A 49 -6.391 -11.158 11.765 1.00 0.00 C +ATOM 361 C ARG A 49 -7.546 -11.713 12.593 1.00 0.00 C +ATOM 362 CB ARG A 49 -6.241 -11.962 10.472 1.00 0.00 C +ATOM 363 O ARG A 49 -7.402 -12.739 13.261 1.00 0.00 O +ATOM 364 CG ARG A 49 -5.303 -11.327 9.458 1.00 0.00 C +ATOM 365 CD ARG A 49 -5.887 -10.048 8.873 1.00 0.00 C +ATOM 366 NE ARG A 49 -5.349 -9.769 7.544 1.00 0.00 N +ATOM 367 NH1 ARG A 49 -6.902 -8.137 7.033 1.00 0.00 N +ATOM 368 NH2 ARG A 49 -5.276 -8.694 5.516 1.00 0.00 N +ATOM 369 CZ ARG A 49 -5.843 -8.867 6.701 1.00 0.00 C +ATOM 370 N CYS A 50 -8.427 -10.800 12.832 1.00 0.00 N +ATOM 371 CA CYS A 50 -9.591 -11.226 13.601 1.00 0.00 C +ATOM 372 C CYS A 50 -10.795 -11.442 12.692 1.00 0.00 C +ATOM 373 CB CYS A 50 -9.931 -10.193 14.675 1.00 0.00 C +ATOM 374 O CYS A 50 -11.034 -10.657 11.773 1.00 0.00 O +ATOM 375 SG CYS A 50 -8.684 -10.057 15.975 1.00 0.00 S +ATOM 376 N ASP A 51 -11.409 -12.618 12.691 1.00 0.00 N +ATOM 377 CA ASP A 51 -12.641 -12.851 11.943 1.00 0.00 C +ATOM 378 C ASP A 51 -13.818 -12.115 12.580 1.00 0.00 C +ATOM 379 CB ASP A 51 -12.942 -14.349 11.858 1.00 0.00 C +ATOM 380 O ASP A 51 -13.640 -11.361 13.539 1.00 0.00 O +ATOM 381 CG ASP A 51 -13.232 -14.974 13.211 1.00 0.00 C +ATOM 382 OD1 ASP A 51 -13.125 -16.212 13.347 1.00 0.00 O +ATOM 383 OD2 ASP A 51 -13.573 -14.221 14.150 1.00 0.00 O +ATOM 384 N ALA A 52 -15.076 -12.051 11.991 1.00 0.00 N +ATOM 385 CA ALA A 52 -16.275 -11.314 12.381 1.00 0.00 C +ATOM 386 C ALA A 52 -16.730 -11.710 13.783 1.00 0.00 C +ATOM 387 CB ALA A 52 -17.397 -11.550 11.374 1.00 0.00 C +ATOM 388 O ALA A 52 -17.439 -10.952 14.450 1.00 0.00 O +ATOM 389 N ASN A 53 -16.263 -12.790 14.290 1.00 0.00 N +ATOM 390 CA ASN A 53 -16.638 -13.257 15.620 1.00 0.00 C +ATOM 391 C ASN A 53 -15.557 -12.943 16.650 1.00 0.00 C +ATOM 392 CB ASN A 53 -16.930 -14.759 15.599 1.00 0.00 C +ATOM 393 O ASN A 53 -15.602 -13.442 17.776 1.00 0.00 O +ATOM 394 CG ASN A 53 -18.169 -15.101 14.795 1.00 0.00 C +ATOM 395 ND2 ASN A 53 -18.098 -16.183 14.029 1.00 0.00 N +ATOM 396 OD1 ASN A 53 -19.181 -14.399 14.862 1.00 0.00 O +ATOM 397 N ASN A 54 -14.551 -12.153 16.191 1.00 0.00 N +ATOM 398 CA ASN A 54 -13.470 -11.666 17.041 1.00 0.00 C +ATOM 399 C ASN A 54 -12.543 -12.798 17.474 1.00 0.00 C +ATOM 400 CB ASN A 54 -14.034 -10.943 18.266 1.00 0.00 C +ATOM 401 O ASN A 54 -11.968 -12.753 18.563 1.00 0.00 O +ATOM 402 CG ASN A 54 -14.752 -9.657 17.906 1.00 0.00 C +ATOM 403 ND2 ASN A 54 -15.894 -9.418 18.540 1.00 0.00 N +ATOM 404 OD1 ASN A 54 -14.285 -8.885 17.065 1.00 0.00 O +ATOM 405 N ILE A 55 -12.428 -13.839 16.751 1.00 0.00 N +ATOM 406 CA ILE A 55 -11.423 -14.875 16.963 1.00 0.00 C +ATOM 407 C ILE A 55 -10.158 -14.536 16.179 1.00 0.00 C +ATOM 408 CB ILE A 55 -11.952 -16.268 16.552 1.00 0.00 C +ATOM 409 O ILE A 55 -10.162 -14.552 14.946 1.00 0.00 O +ATOM 410 CG1 ILE A 55 -13.233 -16.602 17.325 1.00 0.00 C +ATOM 411 CG2 ILE A 55 -10.881 -17.340 16.777 1.00 0.00 C +ATOM 412 CD1 ILE A 55 -13.942 -17.857 16.834 1.00 0.00 C +ATOM 413 N CYS A 56 -9.197 -14.050 16.887 1.00 0.00 N +ATOM 414 CA CYS A 56 -7.960 -13.587 16.266 1.00 0.00 C +ATOM 415 C CYS A 56 -6.914 -14.694 16.244 1.00 0.00 C +ATOM 416 CB CYS A 56 -7.411 -12.369 17.009 1.00 0.00 C +ATOM 417 O CYS A 56 -6.807 -15.473 17.192 1.00 0.00 O +ATOM 418 SG CYS A 56 -8.495 -10.925 16.941 1.00 0.00 S +ATOM 419 N LYS A 57 -6.417 -14.976 15.112 1.00 0.00 N +ATOM 420 CA LYS A 57 -5.263 -15.861 14.977 1.00 0.00 C +ATOM 421 C LYS A 57 -4.007 -15.075 14.612 1.00 0.00 C +ATOM 422 CB LYS A 57 -5.532 -16.937 13.924 1.00 0.00 C +ATOM 423 O LYS A 57 -4.077 -14.092 13.873 1.00 0.00 O +ATOM 424 CG LYS A 57 -6.672 -17.880 14.280 1.00 0.00 C +ATOM 425 CD LYS A 57 -6.811 -19.000 13.257 1.00 0.00 C +ATOM 426 CE LYS A 57 -7.903 -19.986 13.651 1.00 0.00 C +ATOM 427 NZ LYS A 57 -7.947 -21.161 12.730 1.00 0.00 N +ATOM 428 N CYS A 58 -2.976 -15.105 15.608 1.00 0.00 N +ATOM 429 CA CYS A 58 -1.644 -14.583 15.328 1.00 0.00 C +ATOM 430 C CYS A 58 -0.938 -15.425 14.272 1.00 0.00 C +ATOM 431 CB CYS A 58 -0.804 -14.543 16.605 1.00 0.00 C +ATOM 432 O CYS A 58 -0.662 -16.605 14.495 1.00 0.00 O +ATOM 433 SG CYS A 58 -1.471 -13.452 17.881 1.00 0.00 S +ATOM 434 N SER A 59 -1.100 -15.266 13.105 1.00 0.00 N +ATOM 435 CA SER A 59 -0.317 -16.194 12.296 1.00 0.00 C +ATOM 436 C SER A 59 0.363 -15.475 11.135 1.00 0.00 C +ATOM 437 CB SER A 59 -1.204 -17.319 11.760 1.00 0.00 C +ATOM 438 O SER A 59 -0.281 -15.151 10.135 1.00 0.00 O +ATOM 439 OG SER A 59 -0.425 -18.302 11.100 1.00 0.00 O +ATOM 440 N CYS A 60 1.699 -14.861 11.329 1.00 0.00 N +ATOM 441 CA CYS A 60 2.405 -14.577 10.084 1.00 0.00 C +ATOM 442 C CYS A 60 2.585 -15.845 9.258 1.00 0.00 C +ATOM 443 CB CYS A 60 3.768 -13.947 10.372 1.00 0.00 C +ATOM 444 O CYS A 60 2.717 -15.781 8.035 1.00 0.00 O +ATOM 445 SG CYS A 60 4.863 -14.997 11.352 1.00 0.00 S +ATOM 446 N THR A 61 2.370 -16.747 10.083 1.00 0.00 N +ATOM 447 CA THR A 61 2.549 -18.025 9.403 1.00 0.00 C +ATOM 448 C THR A 61 1.237 -18.493 8.779 1.00 0.00 C +ATOM 449 CB THR A 61 3.075 -19.103 10.369 1.00 0.00 C +ATOM 450 O THR A 61 1.241 -19.301 7.848 1.00 0.00 O +ATOM 451 CG2 THR A 61 4.474 -18.758 10.869 1.00 0.00 C +ATOM 452 OG1 THR A 61 2.190 -19.204 11.491 1.00 0.00 O +ATOM 453 N ALA A 62 0.127 -17.759 9.279 1.00 0.00 N +ATOM 454 CA ALA A 62 -1.157 -18.259 8.792 1.00 0.00 C +ATOM 455 C ALA A 62 -1.490 -17.673 7.423 1.00 0.00 C +ATOM 456 CB ALA A 62 -2.266 -17.936 9.790 1.00 0.00 C +ATOM 457 O ALA A 62 -2.183 -18.305 6.623 1.00 0.00 O +ATOM 458 N ILE A 63 -0.953 -16.508 7.207 1.00 0.00 N +ATOM 459 CA ILE A 63 -1.310 -15.966 5.900 1.00 0.00 C +ATOM 460 C ILE A 63 -0.123 -16.091 4.948 1.00 0.00 C +ATOM 461 CB ILE A 63 -1.761 -14.492 6.004 1.00 0.00 C +ATOM 462 O ILE A 63 0.973 -15.611 5.246 1.00 0.00 O +ATOM 463 CG1 ILE A 63 -2.978 -14.369 6.929 1.00 0.00 C +ATOM 464 CG2 ILE A 63 -2.068 -13.922 4.616 1.00 0.00 C +ATOM 465 CD1 ILE A 63 -3.430 -12.936 7.171 1.00 0.00 C +ATOM 466 N PRO A 64 -0.261 -17.160 4.081 1.00 0.00 N +ATOM 467 CA PRO A 64 0.812 -17.254 3.088 1.00 0.00 C +ATOM 468 C PRO A 64 1.182 -15.899 2.489 1.00 0.00 C +ATOM 469 CB PRO A 64 0.224 -18.181 2.021 1.00 0.00 C +ATOM 470 O PRO A 64 0.304 -15.069 2.242 1.00 0.00 O +ATOM 471 CG PRO A 64 -1.257 -18.050 2.175 1.00 0.00 C +ATOM 472 CD PRO A 64 -1.560 -17.707 3.605 1.00 0.00 C +ATOM 473 N CYS A 65 2.470 -15.492 2.721 1.00 0.00 N +ATOM 474 CA CYS A 65 2.970 -14.283 2.076 1.00 0.00 C +ATOM 475 C CYS A 65 2.884 -14.399 0.559 1.00 0.00 C +ATOM 476 CB CYS A 65 4.415 -14.011 2.494 1.00 0.00 C +ATOM 477 O CYS A 65 3.627 -15.170 -0.052 1.00 0.00 O +ATOM 478 SG CYS A 65 4.983 -12.341 2.107 1.00 0.00 S +ATOM 479 N ASN A 66 1.810 -14.008 0.034 1.00 0.00 N +ATOM 480 CA ASN A 66 1.657 -13.952 -1.416 1.00 0.00 C +ATOM 481 C ASN A 66 1.636 -12.512 -1.922 1.00 0.00 C +ATOM 482 CB ASN A 66 0.388 -14.686 -1.851 1.00 0.00 C +ATOM 483 O ASN A 66 1.843 -11.575 -1.149 1.00 0.00 O +ATOM 484 CG ASN A 66 -0.864 -14.121 -1.209 1.00 0.00 C +ATOM 485 ND2 ASN A 66 -1.787 -14.999 -0.835 1.00 0.00 N +ATOM 486 OD1 ASN A 66 -0.999 -12.905 -1.050 1.00 0.00 O +ATOM 487 N GLU A 67 1.630 -12.346 -3.214 1.00 0.00 N +ATOM 488 CA GLU A 67 1.700 -11.038 -3.857 1.00 0.00 C +ATOM 489 C GLU A 67 0.657 -10.083 -3.284 1.00 0.00 C +ATOM 490 CB GLU A 67 1.515 -11.172 -5.371 1.00 0.00 C +ATOM 491 O GLU A 67 0.820 -8.863 -3.353 1.00 0.00 O +ATOM 492 CG GLU A 67 0.220 -11.863 -5.773 1.00 0.00 C +ATOM 493 CD GLU A 67 0.104 -12.095 -7.271 1.00 0.00 C +ATOM 494 OE1 GLU A 67 -1.026 -12.040 -7.808 1.00 0.00 O +ATOM 495 OE2 GLU A 67 1.151 -12.333 -7.914 1.00 0.00 O +ATOM 496 N ASP A 68 -0.317 -10.492 -2.553 1.00 0.00 N +ATOM 497 CA ASP A 68 -1.388 -9.666 -2.004 1.00 0.00 C +ATOM 498 C ASP A 68 -1.103 -9.292 -0.551 1.00 0.00 C +ATOM 499 CB ASP A 68 -2.731 -10.391 -2.108 1.00 0.00 C +ATOM 500 O ASP A 68 -1.847 -8.517 0.053 1.00 0.00 O +ATOM 501 CG ASP A 68 -3.195 -10.578 -3.541 1.00 0.00 C +ATOM 502 OD1 ASP A 68 -3.828 -11.611 -3.848 1.00 0.00 O +ATOM 503 OD2 ASP A 68 -2.923 -9.685 -4.373 1.00 0.00 O +ATOM 504 N HIS A 69 -0.167 -9.956 0.090 1.00 0.00 N +ATOM 505 CA HIS A 69 0.211 -9.683 1.472 1.00 0.00 C +ATOM 506 C HIS A 69 1.044 -8.410 1.574 1.00 0.00 C +ATOM 507 CB HIS A 69 0.985 -10.865 2.059 1.00 0.00 C +ATOM 508 O HIS A 69 2.031 -8.249 0.852 1.00 0.00 O +ATOM 509 CG HIS A 69 1.021 -10.875 3.555 1.00 0.00 C +ATOM 510 CD2 HIS A 69 0.448 -11.711 4.452 1.00 0.00 C +ATOM 511 ND1 HIS A 69 1.712 -9.936 4.288 1.00 0.00 N +ATOM 512 CE1 HIS A 69 1.562 -10.195 5.577 1.00 0.00 C +ATOM 513 NE2 HIS A 69 0.800 -11.267 5.703 1.00 0.00 N +ATOM 514 N PRO A 70 0.701 -7.489 2.427 1.00 0.00 N +ATOM 515 CA PRO A 70 1.384 -6.197 2.516 1.00 0.00 C +ATOM 516 C PRO A 70 2.882 -6.337 2.776 1.00 0.00 C +ATOM 517 CB PRO A 70 0.687 -5.510 3.693 1.00 0.00 C +ATOM 518 O PRO A 70 3.663 -5.461 2.397 1.00 0.00 O +ATOM 519 CG PRO A 70 -0.046 -6.606 4.396 1.00 0.00 C +ATOM 520 CD PRO A 70 -0.242 -7.742 3.433 1.00 0.00 C +ATOM 521 N CYS A 71 3.295 -7.400 3.302 1.00 0.00 N +ATOM 522 CA CYS A 71 4.698 -7.619 3.633 1.00 0.00 C +ATOM 523 C CYS A 71 5.402 -8.407 2.534 1.00 0.00 C +ATOM 524 CB CYS A 71 4.825 -8.359 4.965 1.00 0.00 C +ATOM 525 O CYS A 71 6.576 -8.754 2.669 1.00 0.00 O +ATOM 526 SG CYS A 71 4.149 -7.448 6.371 1.00 0.00 S +ATOM 527 N HIS A 72 4.559 -8.757 1.576 1.00 0.00 N +ATOM 528 CA HIS A 72 5.119 -9.541 0.482 1.00 0.00 C +ATOM 529 C HIS A 72 6.010 -8.684 -0.411 1.00 0.00 C +ATOM 530 CB HIS A 72 4.003 -10.179 -0.347 1.00 0.00 C +ATOM 531 O HIS A 72 5.596 -7.614 -0.864 1.00 0.00 O +ATOM 532 CG HIS A 72 4.498 -10.942 -1.534 1.00 0.00 C +ATOM 533 CD2 HIS A 72 4.508 -10.627 -2.851 1.00 0.00 C +ATOM 534 ND1 HIS A 72 5.064 -12.194 -1.431 1.00 0.00 N +ATOM 535 CE1 HIS A 72 5.402 -12.618 -2.638 1.00 0.00 C +ATOM 536 NE2 HIS A 72 5.076 -11.686 -3.517 1.00 0.00 N +ATOM 537 N HIS A 73 7.276 -9.193 -0.608 1.00 0.00 N +ATOM 538 CA HIS A 73 8.219 -8.528 -1.500 1.00 0.00 C +ATOM 539 C HIS A 73 8.888 -9.527 -2.439 1.00 0.00 C +ATOM 540 CB HIS A 73 9.278 -7.774 -0.695 1.00 0.00 C +ATOM 541 O HIS A 73 9.246 -10.632 -2.024 1.00 0.00 O +ATOM 542 CG HIS A 73 8.715 -6.693 0.172 1.00 0.00 C +ATOM 543 CD2 HIS A 73 8.550 -6.625 1.514 1.00 0.00 C +ATOM 544 ND1 HIS A 73 8.240 -5.504 -0.336 1.00 0.00 N +ATOM 545 CE1 HIS A 73 7.806 -4.749 0.660 1.00 0.00 C +ATOM 546 NE2 HIS A 73 7.983 -5.406 1.793 1.00 0.00 N +ATOM 547 N CYS A 74 8.907 -9.137 -3.729 1.00 0.00 N +ATOM 548 CA CYS A 74 9.582 -9.976 -4.713 1.00 0.00 C +ATOM 549 C CYS A 74 10.677 -9.199 -5.433 1.00 0.00 C +ATOM 550 CB CYS A 74 8.580 -10.523 -5.729 1.00 0.00 C +ATOM 551 O CYS A 74 10.529 -8.003 -5.689 1.00 0.00 O +ATOM 552 SG CYS A 74 7.391 -11.686 -5.025 1.00 0.00 S +ATOM 553 N HIS A 75 11.715 -9.738 -5.575 1.00 0.00 N +ATOM 554 CA HIS A 75 12.772 -9.115 -6.364 1.00 0.00 C +ATOM 555 C HIS A 75 13.308 -10.074 -7.422 1.00 0.00 C +ATOM 556 CB HIS A 75 13.910 -8.643 -5.458 1.00 0.00 C +ATOM 557 O HIS A 75 13.275 -11.292 -7.235 1.00 0.00 O +ATOM 558 CG HIS A 75 14.605 -9.755 -4.740 1.00 0.00 C +ATOM 559 CD2 HIS A 75 15.789 -10.366 -4.981 1.00 0.00 C +ATOM 560 ND1 HIS A 75 14.075 -10.368 -3.625 1.00 0.00 N +ATOM 561 CE1 HIS A 75 14.905 -11.310 -3.211 1.00 0.00 C +ATOM 562 NE2 HIS A 75 15.954 -11.330 -4.016 1.00 0.00 N +ATOM 563 N GLU A 76 13.647 -9.546 -8.538 1.00 0.00 N +ATOM 564 CA GLU A 76 14.228 -10.322 -9.630 1.00 0.00 C +ATOM 565 C GLU A 76 15.753 -10.283 -9.585 1.00 0.00 C +ATOM 566 CB GLU A 76 13.727 -9.806 -10.981 1.00 0.00 C +ATOM 567 O GLU A 76 16.350 -9.205 -9.546 1.00 0.00 O +ATOM 568 CG GLU A 76 14.060 -10.722 -12.150 1.00 0.00 C +ATOM 569 CD GLU A 76 13.399 -10.297 -13.452 1.00 0.00 C +ATOM 570 OE1 GLU A 76 12.554 -9.373 -13.430 1.00 0.00 O +ATOM 571 OE2 GLU A 76 13.727 -10.894 -14.501 1.00 0.00 O +ATOM 572 N GLU A 77 16.313 -11.414 -9.551 1.00 0.00 N +ATOM 573 CA GLU A 77 17.769 -11.510 -9.570 1.00 0.00 C +ATOM 574 C GLU A 77 18.310 -11.423 -10.994 1.00 0.00 C +ATOM 575 CB GLU A 77 18.231 -12.812 -8.911 1.00 0.00 C +ATOM 576 O GLU A 77 17.548 -11.510 -11.959 1.00 0.00 O +ATOM 577 CG GLU A 77 17.844 -12.929 -7.444 1.00 0.00 C +ATOM 578 CD GLU A 77 18.639 -12.003 -6.537 1.00 0.00 C +ATOM 579 OE1 GLU A 77 19.776 -11.626 -6.901 1.00 0.00 O +ATOM 580 OE2 GLU A 77 18.120 -11.651 -5.454 1.00 0.00 O +ATOM 581 N ASP A 78 19.573 -11.066 -11.091 1.00 0.00 N +ATOM 582 CA ASP A 78 20.266 -10.915 -12.367 1.00 0.00 C +ATOM 583 C ASP A 78 20.084 -12.153 -13.241 1.00 0.00 C +ATOM 584 CB ASP A 78 21.755 -10.645 -12.140 1.00 0.00 C +ATOM 585 O ASP A 78 20.095 -12.059 -14.470 1.00 0.00 O +ATOM 586 CG ASP A 78 22.029 -9.254 -11.597 1.00 0.00 C +ATOM 587 OD1 ASP A 78 23.114 -9.028 -11.018 1.00 0.00 O +ATOM 588 OD2 ASP A 78 21.152 -8.376 -11.747 1.00 0.00 O +ATOM 589 N ASP A 79 19.875 -13.279 -12.572 1.00 0.00 N +ATOM 590 CA ASP A 79 19.730 -14.515 -13.335 1.00 0.00 C +ATOM 591 C ASP A 79 18.288 -14.708 -13.799 1.00 0.00 C +ATOM 592 CB ASP A 79 20.180 -15.716 -12.501 1.00 0.00 C +ATOM 593 O ASP A 79 17.946 -15.750 -14.363 1.00 0.00 O +ATOM 594 CG ASP A 79 19.357 -15.903 -11.238 1.00 0.00 C +ATOM 595 OD1 ASP A 79 19.656 -16.822 -10.446 1.00 0.00 O +ATOM 596 OD2 ASP A 79 18.399 -15.127 -11.036 1.00 0.00 O +ATOM 597 N GLY A 80 17.414 -13.715 -13.581 1.00 0.00 N +ATOM 598 CA GLY A 80 16.044 -13.776 -14.063 1.00 0.00 C +ATOM 599 C GLY A 80 15.095 -14.439 -13.081 1.00 0.00 C +ATOM 600 O GLY A 80 13.881 -14.444 -13.291 1.00 0.00 O +ATOM 601 N ASP A 81 15.662 -15.063 -12.040 1.00 0.00 N +ATOM 602 CA ASP A 81 14.823 -15.721 -11.043 1.00 0.00 C +ATOM 603 C ASP A 81 14.161 -14.699 -10.121 1.00 0.00 C +ATOM 604 CB ASP A 81 15.646 -16.715 -10.221 1.00 0.00 C +ATOM 605 O ASP A 81 14.715 -13.625 -9.878 1.00 0.00 O +ATOM 606 CG ASP A 81 16.113 -17.911 -11.031 1.00 0.00 C +ATOM 607 OD1 ASP A 81 17.118 -18.550 -10.652 1.00 0.00 O +ATOM 608 OD2 ASP A 81 15.472 -18.216 -12.060 1.00 0.00 O +ATOM 609 N THR A 82 12.968 -14.929 -9.764 1.00 0.00 N +ATOM 610 CA THR A 82 12.231 -14.082 -8.832 1.00 0.00 C +ATOM 611 C THR A 82 12.233 -14.689 -7.432 1.00 0.00 C +ATOM 612 CB THR A 82 10.780 -13.868 -9.299 1.00 0.00 C +ATOM 613 O THR A 82 11.931 -15.872 -7.262 1.00 0.00 O +ATOM 614 CG2 THR A 82 10.028 -12.938 -8.352 1.00 0.00 C +ATOM 615 OG1 THR A 82 10.786 -13.291 -10.611 1.00 0.00 O +ATOM 616 N HIS A 83 12.679 -13.936 -6.510 1.00 0.00 N +ATOM 617 CA HIS A 83 12.653 -14.347 -5.110 1.00 0.00 C +ATOM 618 C HIS A 83 11.661 -13.511 -4.309 1.00 0.00 C +ATOM 619 CB HIS A 83 14.049 -14.239 -4.494 1.00 0.00 C +ATOM 620 O HIS A 83 11.675 -12.280 -4.387 1.00 0.00 O +ATOM 621 CG HIS A 83 15.047 -15.170 -5.104 1.00 0.00 C +ATOM 622 CD2 HIS A 83 15.925 -14.991 -6.119 1.00 0.00 C +ATOM 623 ND1 HIS A 83 15.219 -16.466 -4.669 1.00 0.00 N +ATOM 624 CE1 HIS A 83 16.163 -17.046 -5.391 1.00 0.00 C +ATOM 625 NE2 HIS A 83 16.608 -16.172 -6.279 1.00 0.00 N +ATOM 626 N CYS A 84 10.791 -14.185 -3.722 1.00 0.00 N +ATOM 627 CA CYS A 84 9.793 -13.514 -2.896 1.00 0.00 C +ATOM 628 C CYS A 84 10.017 -13.813 -1.419 1.00 0.00 C +ATOM 629 CB CYS A 84 8.384 -13.942 -3.305 1.00 0.00 C +ATOM 630 O CYS A 84 10.408 -14.924 -1.059 1.00 0.00 O +ATOM 631 SG CYS A 84 7.968 -13.539 -5.016 1.00 0.00 S +ATOM 632 N HIS A 85 9.909 -12.859 -0.708 1.00 0.00 N +ATOM 633 CA HIS A 85 9.975 -13.070 0.733 1.00 0.00 C +ATOM 634 C HIS A 85 8.927 -12.236 1.461 1.00 0.00 C +ATOM 635 CB HIS A 85 11.372 -12.734 1.260 1.00 0.00 C +ATOM 636 O HIS A 85 8.377 -11.289 0.894 1.00 0.00 O +ATOM 637 CG HIS A 85 11.748 -11.296 1.089 1.00 0.00 C +ATOM 638 CD2 HIS A 85 11.777 -10.273 1.975 1.00 0.00 C +ATOM 639 ND1 HIS A 85 12.152 -10.772 -0.120 1.00 0.00 N +ATOM 640 CE1 HIS A 85 12.415 -9.485 0.031 1.00 0.00 C +ATOM 641 NE2 HIS A 85 12.196 -9.157 1.293 1.00 0.00 N +ATOM 642 N CYS A 86 8.500 -12.728 2.583 1.00 0.00 N +ATOM 643 CA CYS A 86 7.596 -12.021 3.484 1.00 0.00 C +ATOM 644 C CYS A 86 8.373 -11.290 4.572 1.00 0.00 C +ATOM 645 CB CYS A 86 6.603 -12.993 4.119 1.00 0.00 C +ATOM 646 O CYS A 86 8.939 -11.920 5.467 1.00 0.00 O +ATOM 647 SG CYS A 86 5.473 -13.757 2.934 1.00 0.00 S +ATOM 648 N SER A 87 8.541 -10.143 4.355 1.00 0.00 N +ATOM 649 CA SER A 87 9.296 -9.406 5.363 1.00 0.00 C +ATOM 650 C SER A 87 8.570 -8.129 5.774 1.00 0.00 C +ATOM 651 CB SER A 87 10.692 -9.064 4.843 1.00 0.00 C +ATOM 652 O SER A 87 8.018 -7.424 4.927 1.00 0.00 O +ATOM 653 OG SER A 87 11.410 -8.297 5.794 1.00 0.00 O +ATOM 654 N CYS A 88 8.324 -8.020 6.956 1.00 0.00 N +ATOM 655 CA CYS A 88 7.764 -6.786 7.497 1.00 0.00 C +ATOM 656 C CYS A 88 8.869 -5.829 7.928 1.00 0.00 C +ATOM 657 CB CYS A 88 6.847 -7.088 8.681 1.00 0.00 C +ATOM 658 O CYS A 88 8.600 -4.816 8.576 1.00 0.00 O +ATOM 659 SG CYS A 88 5.414 -8.102 8.253 1.00 0.00 S +ATOM 660 N GLU A 89 9.941 -6.277 7.427 1.00 0.00 N +ATOM 661 CA GLU A 89 11.079 -5.417 7.736 1.00 0.00 C +ATOM 662 C GLU A 89 11.164 -4.244 6.763 1.00 0.00 C +ATOM 663 CB GLU A 89 12.382 -6.219 7.709 1.00 0.00 C +ATOM 664 O GLU A 89 10.884 -4.397 5.572 1.00 0.00 O +ATOM 665 CG GLU A 89 12.497 -7.244 8.828 1.00 0.00 C +ATOM 666 CD GLU A 89 12.821 -6.626 10.178 1.00 0.00 C +ATOM 667 OE1 GLU A 89 13.318 -5.477 10.216 1.00 0.00 O +ATOM 668 OE2 GLU A 89 12.577 -7.295 11.207 1.00 0.00 O +ATOM 669 N HIS A 90 10.877 -2.882 7.151 1.00 0.00 N +ATOM 670 CA HIS A 90 11.037 -1.634 6.414 1.00 0.00 C +ATOM 671 C HIS A 90 12.477 -1.452 5.945 1.00 0.00 C +ATOM 672 CB HIS A 90 10.611 -0.444 7.276 1.00 0.00 C +ATOM 673 O HIS A 90 13.291 -0.847 6.647 1.00 0.00 O +ATOM 674 CG HIS A 90 9.172 -0.482 7.682 1.00 0.00 C +ATOM 675 CD2 HIS A 90 8.592 -0.729 8.880 1.00 0.00 C +ATOM 676 ND1 HIS A 90 8.143 -0.249 6.795 1.00 0.00 N +ATOM 677 CE1 HIS A 90 6.989 -0.350 7.434 1.00 0.00 C +ATOM 678 NE2 HIS A 90 7.234 -0.641 8.700 1.00 0.00 N +ATOM 679 N SER A 91 13.113 -2.415 5.257 1.00 0.00 N +ATOM 680 CA SER A 91 14.468 -1.999 4.909 1.00 0.00 C +ATOM 681 C SER A 91 14.482 -1.180 3.623 1.00 0.00 C +ATOM 682 CB SER A 91 15.380 -3.217 4.758 1.00 0.00 C +ATOM 683 O SER A 91 13.868 -1.568 2.627 1.00 0.00 O +ATOM 684 OG SER A 91 15.023 -3.974 3.614 1.00 0.00 O +ATOM 685 N HIS A 92 14.296 0.148 3.642 1.00 0.00 N +ATOM 686 CA HIS A 92 14.603 1.170 2.647 1.00 0.00 C +ATOM 687 C HIS A 92 15.930 0.882 1.953 1.00 0.00 C +ATOM 688 CB HIS A 92 14.638 2.555 3.295 1.00 0.00 C +ATOM 689 O HIS A 92 16.284 1.551 0.979 1.00 0.00 O +ATOM 690 CG HIS A 92 13.324 2.984 3.866 1.00 0.00 C +ATOM 691 CD2 HIS A 92 12.943 3.213 5.145 1.00 0.00 C +ATOM 692 ND1 HIS A 92 12.216 3.224 3.084 1.00 0.00 N +ATOM 693 CE1 HIS A 92 11.207 3.584 3.859 1.00 0.00 C +ATOM 694 NE2 HIS A 92 11.621 3.585 5.114 1.00 0.00 N +ATOM 695 N ASP A 93 16.676 -0.193 2.168 1.00 0.00 N +ATOM 696 CA ASP A 93 18.015 -0.132 1.589 1.00 0.00 C +ATOM 697 C ASP A 93 18.097 -0.954 0.305 1.00 0.00 C +ATOM 698 CB ASP A 93 19.058 -0.622 2.596 1.00 0.00 C +ATOM 699 O ASP A 93 19.179 -1.132 -0.256 1.00 0.00 O +ATOM 700 CG ASP A 93 19.487 0.454 3.577 1.00 0.00 C +ATOM 701 OD1 ASP A 93 20.370 0.192 4.423 1.00 0.00 O +ATOM 702 OD2 ASP A 93 18.935 1.573 3.507 1.00 0.00 O +ATOM 703 N HIS A 94 17.467 -0.634 -0.758 1.00 0.00 N +ATOM 704 CA HIS A 94 18.140 -1.057 -1.982 1.00 0.00 C +ATOM 705 C HIS A 94 17.627 -0.280 -3.189 1.00 0.00 C +ATOM 706 CB HIS A 94 17.951 -2.559 -2.205 1.00 0.00 C +ATOM 707 O HIS A 94 16.419 -0.075 -3.334 1.00 0.00 O +ATOM 708 CG HIS A 94 18.800 -3.409 -1.315 1.00 0.00 C +ATOM 709 CD2 HIS A 94 18.467 -4.255 -0.312 1.00 0.00 C +ATOM 710 ND1 HIS A 94 20.175 -3.442 -1.407 1.00 0.00 N +ATOM 711 CE1 HIS A 94 20.651 -4.276 -0.497 1.00 0.00 C +ATOM 712 NE2 HIS A 94 19.635 -4.782 0.181 1.00 0.00 N +ATOM 713 N HIS A 95 18.249 0.957 -3.481 1.00 0.00 N +ATOM 714 CA HIS A 95 18.496 1.578 -4.778 1.00 0.00 C +ATOM 715 C HIS A 95 18.041 0.672 -5.918 1.00 0.00 C +ATOM 716 CB HIS A 95 19.979 1.916 -4.936 1.00 0.00 C +ATOM 717 O HIS A 95 18.521 0.799 -7.046 1.00 0.00 O +ATOM 718 CG HIS A 95 20.458 2.973 -3.992 1.00 0.00 C +ATOM 719 CD2 HIS A 95 21.388 2.932 -3.010 1.00 0.00 C +ATOM 720 ND1 HIS A 95 19.959 4.258 -3.999 1.00 0.00 N +ATOM 721 CE1 HIS A 95 20.565 4.963 -3.059 1.00 0.00 C +ATOM 722 NE2 HIS A 95 21.437 4.182 -2.444 1.00 0.00 N +ATOM 723 N ASP A 96 16.923 0.136 -5.980 1.00 0.00 N +ATOM 724 CA ASP A 96 16.554 -0.218 -7.347 1.00 0.00 C +ATOM 725 C ASP A 96 15.459 0.705 -7.877 1.00 0.00 C +ATOM 726 CB ASP A 96 16.093 -1.676 -7.418 1.00 0.00 C +ATOM 727 O ASP A 96 14.655 1.230 -7.104 1.00 0.00 O +ATOM 728 CG ASP A 96 17.246 -2.663 -7.414 1.00 0.00 C +ATOM 729 OD1 ASP A 96 17.024 -3.859 -7.125 1.00 0.00 O +ATOM 730 OD2 ASP A 96 18.388 -2.242 -7.699 1.00 0.00 O +ATOM 731 N ASP A 97 15.890 1.837 -8.509 1.00 0.00 N +ATOM 732 CA ASP A 97 15.081 2.551 -9.491 1.00 0.00 C +ATOM 733 C ASP A 97 13.874 1.718 -9.918 1.00 0.00 C +ATOM 734 CB ASP A 97 15.923 2.923 -10.713 1.00 0.00 C +ATOM 735 O ASP A 97 13.369 1.874 -11.032 1.00 0.00 O +ATOM 736 CG ASP A 97 16.981 3.969 -10.409 1.00 0.00 C +ATOM 737 OD1 ASP A 97 18.049 3.963 -11.058 1.00 0.00 O +ATOM 738 OD2 ASP A 97 16.746 4.804 -9.509 1.00 0.00 O +ATOM 739 N ASP A 98 13.468 0.802 -9.181 1.00 0.00 N +ATOM 740 CA ASP A 98 12.224 0.241 -9.699 1.00 0.00 C +ATOM 741 C ASP A 98 11.093 1.266 -9.639 1.00 0.00 C +ATOM 742 CB ASP A 98 11.837 -1.017 -8.920 1.00 0.00 C +ATOM 743 O ASP A 98 10.974 2.010 -8.663 1.00 0.00 O +ATOM 744 CG ASP A 98 12.688 -2.222 -9.278 1.00 0.00 C +ATOM 745 OD1 ASP A 98 12.775 -3.170 -8.468 1.00 0.00 O +ATOM 746 OD2 ASP A 98 13.280 -2.223 -10.379 1.00 0.00 O +ATOM 747 N THR A 99 11.132 2.146 -10.695 1.00 0.00 N +ATOM 748 CA THR A 99 9.998 2.959 -11.120 1.00 0.00 C +ATOM 749 C THR A 99 8.685 2.355 -10.628 1.00 0.00 C +ATOM 750 CB THR A 99 9.957 3.102 -12.653 1.00 0.00 C +ATOM 751 O THR A 99 7.730 2.223 -11.396 1.00 0.00 O +ATOM 752 CG2 THR A 99 11.214 3.791 -13.174 1.00 0.00 C +ATOM 753 OG1 THR A 99 9.857 1.802 -13.246 1.00 0.00 O +ATOM 754 N HIS A 100 8.653 1.675 -9.540 1.00 0.00 N +ATOM 755 CA HIS A 100 7.234 1.548 -9.228 1.00 0.00 C +ATOM 756 C HIS A 100 6.625 2.899 -8.870 1.00 0.00 C +ATOM 757 CB HIS A 100 7.024 0.557 -8.082 1.00 0.00 C +ATOM 758 O HIS A 100 7.154 3.617 -8.019 1.00 0.00 O +ATOM 759 CG HIS A 100 7.437 -0.842 -8.414 1.00 0.00 C +ATOM 760 CD2 HIS A 100 8.446 -1.607 -7.937 1.00 0.00 C +ATOM 761 ND1 HIS A 100 6.777 -1.610 -9.349 1.00 0.00 N +ATOM 762 CE1 HIS A 100 7.365 -2.793 -9.431 1.00 0.00 C +ATOM 763 NE2 HIS A 100 8.380 -2.816 -8.585 1.00 0.00 N +ATOM 764 N GLY A 101 6.381 3.654 -9.900 1.00 0.00 N +ATOM 765 CA GLY A 101 5.487 4.753 -9.574 1.00 0.00 C +ATOM 766 C GLY A 101 4.744 4.550 -8.267 1.00 0.00 C +ATOM 767 O GLY A 101 3.522 4.386 -8.261 1.00 0.00 O +ATOM 768 N GLU A 102 5.430 3.707 -7.432 1.00 0.00 N +ATOM 769 CA GLU A 102 4.594 3.693 -6.235 1.00 0.00 C +ATOM 770 C GLU A 102 4.478 5.087 -5.627 1.00 0.00 C +ATOM 771 CB GLU A 102 5.153 2.712 -5.201 1.00 0.00 C +ATOM 772 O GLU A 102 5.392 5.905 -5.757 1.00 0.00 O +ATOM 773 CG GLU A 102 5.195 1.269 -5.681 1.00 0.00 C +ATOM 774 CD GLU A 102 3.815 0.668 -5.897 1.00 0.00 C +ATOM 775 OE1 GLU A 102 2.810 1.292 -5.486 1.00 0.00 O +ATOM 776 OE2 GLU A 102 3.738 -0.436 -6.481 1.00 0.00 O +ATOM 777 N CYS A 103 3.304 5.585 -5.881 1.00 0.00 N +ATOM 778 CA CYS A 103 2.937 6.733 -5.061 1.00 0.00 C +ATOM 779 C CYS A 103 3.767 6.778 -3.783 1.00 0.00 C +ATOM 780 CB CYS A 103 1.449 6.690 -4.712 1.00 0.00 C +ATOM 781 O CYS A 103 3.803 5.806 -3.027 1.00 0.00 O +ATOM 782 SG CYS A 103 0.362 6.999 -6.121 1.00 0.00 S +ATOM 783 N THR A 104 4.856 7.423 -3.910 1.00 0.00 N +ATOM 784 CA THR A 104 5.617 7.650 -2.686 1.00 0.00 C +ATOM 785 C THR A 104 5.025 8.808 -1.888 1.00 0.00 C +ATOM 786 CB THR A 104 7.098 7.939 -2.995 1.00 0.00 C +ATOM 787 O THR A 104 4.142 9.517 -2.375 1.00 0.00 O +ATOM 788 CG2 THR A 104 7.686 6.875 -3.916 1.00 0.00 C +ATOM 789 OG1 THR A 104 7.206 9.219 -3.631 1.00 0.00 O +ATOM 790 N LYS A 105 5.301 8.726 -0.683 1.00 0.00 N +ATOM 791 CA LYS A 105 4.870 9.830 0.169 1.00 0.00 C +ATOM 792 C LYS A 105 5.196 11.178 -0.469 1.00 0.00 C +ATOM 793 CB LYS A 105 5.523 9.731 1.548 1.00 0.00 C +ATOM 794 O LYS A 105 4.534 12.179 -0.190 1.00 0.00 O +ATOM 795 CG LYS A 105 4.955 8.625 2.425 1.00 0.00 C +ATOM 796 CD LYS A 105 5.514 8.690 3.840 1.00 0.00 C +ATOM 797 CE LYS A 105 4.894 7.627 4.736 1.00 0.00 C +ATOM 798 NZ LYS A 105 5.487 7.643 6.107 1.00 0.00 N +ATOM 799 N LYS A 106 6.132 11.170 -1.417 1.00 0.00 N +ATOM 800 CA LYS A 106 6.565 12.408 -2.059 1.00 0.00 C +ATOM 801 C LYS A 106 5.727 12.707 -3.298 1.00 0.00 C +ATOM 802 CB LYS A 106 8.046 12.329 -2.434 1.00 0.00 C +ATOM 803 O LYS A 106 5.794 13.808 -3.849 1.00 0.00 O +ATOM 804 CG LYS A 106 8.986 12.274 -1.239 1.00 0.00 C +ATOM 805 CD LYS A 106 10.445 12.262 -1.676 1.00 0.00 C +ATOM 806 CE LYS A 106 11.387 12.222 -0.480 1.00 0.00 C +ATOM 807 NZ LYS A 106 12.817 12.151 -0.903 1.00 0.00 N +ATOM 808 N ALA A 107 5.009 11.655 -3.672 1.00 0.00 N +ATOM 809 CA ALA A 107 4.208 11.854 -4.877 1.00 0.00 C +ATOM 810 C ALA A 107 3.004 12.748 -4.594 1.00 0.00 C +ATOM 811 CB ALA A 107 3.749 10.511 -5.439 1.00 0.00 C +ATOM 812 O ALA A 107 2.358 12.618 -3.552 1.00 0.00 O +ATOM 813 N PRO A 108 2.750 13.735 -5.350 1.00 0.00 N +ATOM 814 CA PRO A 108 1.593 14.609 -5.140 1.00 0.00 C +ATOM 815 C PRO A 108 0.271 13.845 -5.124 1.00 0.00 C +ATOM 816 CB PRO A 108 1.656 15.568 -6.331 1.00 0.00 C +ATOM 817 O PRO A 108 -0.730 14.348 -4.607 1.00 0.00 O +ATOM 818 CG PRO A 108 2.473 14.851 -7.356 1.00 0.00 C +ATOM 819 CD PRO A 108 3.398 13.901 -6.650 1.00 0.00 C +ATOM 820 N CYS A 109 0.245 12.652 -5.681 1.00 0.00 N +ATOM 821 CA CYS A 109 -1.009 11.911 -5.770 1.00 0.00 C +ATOM 822 C CYS A 109 -1.162 10.954 -4.594 1.00 0.00 C +ATOM 823 CB CYS A 109 -1.079 11.134 -7.084 1.00 0.00 C +ATOM 824 O CYS A 109 -2.174 10.261 -4.480 1.00 0.00 O +ATOM 825 SG CYS A 109 0.290 9.980 -7.322 1.00 0.00 S +ATOM 826 N TRP A 110 -0.068 10.925 -3.822 1.00 0.00 N +ATOM 827 CA TRP A 110 -0.125 10.100 -2.621 1.00 0.00 C +ATOM 828 C TRP A 110 -1.071 10.704 -1.589 1.00 0.00 C +ATOM 829 CB TRP A 110 1.272 9.934 -2.016 1.00 0.00 C +ATOM 830 O TRP A 110 -0.962 11.887 -1.255 1.00 0.00 O +ATOM 831 CG TRP A 110 1.322 8.997 -0.846 1.00 0.00 C +ATOM 832 CD1 TRP A 110 1.696 7.682 -0.859 1.00 0.00 C +ATOM 833 CD2 TRP A 110 0.980 9.304 0.509 1.00 0.00 C +ATOM 834 CE2 TRP A 110 1.172 8.127 1.266 1.00 0.00 C +ATOM 835 CE3 TRP A 110 0.531 10.462 1.158 1.00 0.00 C +ATOM 836 NE1 TRP A 110 1.608 7.153 0.408 1.00 0.00 N +ATOM 837 CH2 TRP A 110 0.491 9.222 3.250 1.00 0.00 C +ATOM 838 CZ2 TRP A 110 0.929 8.075 2.640 1.00 0.00 C +ATOM 839 CZ3 TRP A 110 0.290 10.409 2.526 1.00 0.00 C +ATOM 840 N ARG A 111 -2.102 9.880 -1.162 1.00 0.00 N +ATOM 841 CA ARG A 111 -3.086 10.357 -0.197 1.00 0.00 C +ATOM 842 C ARG A 111 -3.345 9.313 0.884 1.00 0.00 C +ATOM 843 CB ARG A 111 -4.397 10.720 -0.899 1.00 0.00 C +ATOM 844 O ARG A 111 -3.329 8.111 0.610 1.00 0.00 O +ATOM 845 CG ARG A 111 -5.466 11.263 0.034 1.00 0.00 C +ATOM 846 CD ARG A 111 -5.180 12.701 0.443 1.00 0.00 C +ATOM 847 NE ARG A 111 -6.289 13.277 1.200 1.00 0.00 N +ATOM 848 NH1 ARG A 111 -5.313 15.357 1.444 1.00 0.00 N +ATOM 849 NH2 ARG A 111 -7.384 14.946 2.335 1.00 0.00 N +ATOM 850 CZ ARG A 111 -6.326 14.525 1.658 1.00 0.00 C +ATOM 851 N CYS A 112 -3.365 9.888 2.033 1.00 0.00 N +ATOM 852 CA CYS A 112 -3.705 9.030 3.162 1.00 0.00 C +ATOM 853 C CYS A 112 -5.019 9.463 3.801 1.00 0.00 C +ATOM 854 CB CYS A 112 -2.589 9.051 4.206 1.00 0.00 C +ATOM 855 O CYS A 112 -5.318 10.657 3.866 1.00 0.00 O +ATOM 856 SG CYS A 112 -1.031 8.349 3.621 1.00 0.00 S +ATOM 857 N GLU A 113 -5.830 8.580 3.951 1.00 0.00 N +ATOM 858 CA GLU A 113 -7.053 8.822 4.710 1.00 0.00 C +ATOM 859 C GLU A 113 -7.031 8.080 6.044 1.00 0.00 C +ATOM 860 CB GLU A 113 -8.282 8.407 3.897 1.00 0.00 C +ATOM 861 O GLU A 113 -6.553 6.947 6.122 1.00 0.00 O +ATOM 862 CG GLU A 113 -8.500 9.241 2.643 1.00 0.00 C +ATOM 863 CD GLU A 113 -9.731 8.826 1.853 1.00 0.00 C +ATOM 864 OE1 GLU A 113 -10.458 7.909 2.299 1.00 0.00 O +ATOM 865 OE2 GLU A 113 -9.971 9.422 0.780 1.00 0.00 O +ATOM 866 N TYR A 114 -7.305 8.869 7.059 1.00 0.00 N +ATOM 867 CA TYR A 114 -7.411 8.262 8.381 1.00 0.00 C +ATOM 868 C TYR A 114 -8.671 7.412 8.491 1.00 0.00 C +ATOM 869 CB TYR A 114 -7.412 9.340 9.469 1.00 0.00 C +ATOM 870 O TYR A 114 -9.775 7.889 8.219 1.00 0.00 O +ATOM 871 CG TYR A 114 -7.392 8.785 10.872 1.00 0.00 C +ATOM 872 CD1 TYR A 114 -8.527 8.834 11.678 1.00 0.00 C +ATOM 873 CD2 TYR A 114 -6.238 8.213 11.396 1.00 0.00 C +ATOM 874 CE1 TYR A 114 -8.513 8.326 12.973 1.00 0.00 C +ATOM 875 CE2 TYR A 114 -6.212 7.702 12.690 1.00 0.00 C +ATOM 876 OH TYR A 114 -7.333 7.258 14.750 1.00 0.00 O +ATOM 877 CZ TYR A 114 -7.353 7.762 13.469 1.00 0.00 C +ATOM 878 N ASN A 115 -8.439 6.137 8.704 1.00 0.00 N +ATOM 879 CA ASN A 115 -9.527 5.202 8.972 1.00 0.00 C +ATOM 880 C ASN A 115 -9.770 5.039 10.469 1.00 0.00 C +ATOM 881 CB ASN A 115 -9.238 3.844 8.330 1.00 0.00 C +ATOM 882 O ASN A 115 -8.953 4.443 11.174 1.00 0.00 O +ATOM 883 CG ASN A 115 -10.442 2.922 8.345 1.00 0.00 C +ATOM 884 ND2 ASN A 115 -10.511 2.021 7.373 1.00 0.00 N +ATOM 885 OD1 ASN A 115 -11.302 3.020 9.224 1.00 0.00 O +ATOM 886 N ALA A 116 -10.796 5.699 10.920 1.00 0.00 N +ATOM 887 CA ALA A 116 -11.108 5.723 12.347 1.00 0.00 C +ATOM 888 C ALA A 116 -11.283 4.310 12.894 1.00 0.00 C +ATOM 889 CB ALA A 116 -12.366 6.549 12.603 1.00 0.00 C +ATOM 890 O ALA A 116 -10.898 4.025 14.031 1.00 0.00 O +ATOM 891 N ASP A 117 -11.725 3.414 12.007 1.00 0.00 N +ATOM 892 CA ASP A 117 -11.929 2.033 12.433 1.00 0.00 C +ATOM 893 C ASP A 117 -10.595 1.313 12.619 1.00 0.00 C +ATOM 894 CB ASP A 117 -12.796 1.281 11.421 1.00 0.00 C +ATOM 895 O ASP A 117 -10.432 0.529 13.557 1.00 0.00 O +ATOM 896 CG ASP A 117 -14.225 1.793 11.369 1.00 0.00 C +ATOM 897 OD1 ASP A 117 -14.898 1.618 10.331 1.00 0.00 O +ATOM 898 OD2 ASP A 117 -14.681 2.378 12.375 1.00 0.00 O +ATOM 899 N LEU A 118 -9.664 1.666 11.738 1.00 0.00 N +ATOM 900 CA LEU A 118 -8.349 1.035 11.785 1.00 0.00 C +ATOM 901 C LEU A 118 -7.364 1.891 12.575 1.00 0.00 C +ATOM 902 CB LEU A 118 -7.816 0.800 10.369 1.00 0.00 C +ATOM 903 O LEU A 118 -6.231 1.472 12.822 1.00 0.00 O +ATOM 904 CG LEU A 118 -8.639 -0.135 9.481 1.00 0.00 C +ATOM 905 CD1 LEU A 118 -8.022 -0.225 8.090 1.00 0.00 C +ATOM 906 CD2 LEU A 118 -8.746 -1.518 10.115 1.00 0.00 C +ATOM 907 N LYS A 119 -7.828 3.091 12.895 1.00 0.00 N +ATOM 908 CA LYS A 119 -6.988 4.069 13.580 1.00 0.00 C +ATOM 909 C LYS A 119 -5.640 4.222 12.882 1.00 0.00 C +ATOM 910 CB LYS A 119 -6.780 3.668 15.041 1.00 0.00 C +ATOM 911 O LYS A 119 -4.596 4.257 13.537 1.00 0.00 O +ATOM 912 CG LYS A 119 -8.064 3.604 15.855 1.00 0.00 C +ATOM 913 CD LYS A 119 -7.786 3.255 17.311 1.00 0.00 C +ATOM 914 CE LYS A 119 -9.062 3.263 18.143 1.00 0.00 C +ATOM 915 NZ LYS A 119 -8.786 2.979 19.583 1.00 0.00 N +ATOM 916 N HIS A 120 -5.544 4.099 11.731 1.00 0.00 N +ATOM 917 CA HIS A 120 -4.341 4.353 10.946 1.00 0.00 C +ATOM 918 C HIS A 120 -4.690 4.898 9.565 1.00 0.00 C +ATOM 919 CB HIS A 120 -3.510 3.075 10.811 1.00 0.00 C +ATOM 920 O HIS A 120 -5.853 4.869 9.156 1.00 0.00 O +ATOM 921 CG HIS A 120 -4.217 1.973 10.089 1.00 0.00 C +ATOM 922 CD2 HIS A 120 -4.107 1.532 8.813 1.00 0.00 C +ATOM 923 ND1 HIS A 120 -5.174 1.183 10.689 1.00 0.00 N +ATOM 924 CE1 HIS A 120 -5.622 0.301 9.811 1.00 0.00 C +ATOM 925 NE2 HIS A 120 -4.990 0.491 8.666 1.00 0.00 N +ATOM 926 N ASP A 121 -3.704 5.475 9.017 1.00 0.00 N +ATOM 927 CA ASP A 121 -3.873 6.009 7.669 1.00 0.00 C +ATOM 928 C ASP A 121 -3.838 4.892 6.628 1.00 0.00 C +ATOM 929 CB ASP A 121 -2.792 7.047 7.363 1.00 0.00 C +ATOM 930 O ASP A 121 -3.053 3.949 6.747 1.00 0.00 O +ATOM 931 CG ASP A 121 -2.917 8.301 8.210 1.00 0.00 C +ATOM 932 OD1 ASP A 121 -1.936 9.069 8.312 1.00 0.00 O +ATOM 933 OD2 ASP A 121 -4.006 8.521 8.784 1.00 0.00 O +ATOM 934 N VAL A 122 -4.829 4.935 5.875 1.00 0.00 N +ATOM 935 CA VAL A 122 -4.775 4.123 4.663 1.00 0.00 C +ATOM 936 C VAL A 122 -4.276 4.971 3.495 1.00 0.00 C +ATOM 937 CB VAL A 122 -6.154 3.511 4.328 1.00 0.00 C +ATOM 938 O VAL A 122 -4.857 6.013 3.184 1.00 0.00 O +ATOM 939 CG1 VAL A 122 -6.063 2.625 3.087 1.00 0.00 C +ATOM 940 CG2 VAL A 122 -6.688 2.717 5.518 1.00 0.00 C +ATOM 941 N CYS A 123 -3.142 4.531 3.102 1.00 0.00 N +ATOM 942 CA CYS A 123 -2.472 5.318 2.073 1.00 0.00 C +ATOM 943 C CYS A 123 -2.628 4.669 0.702 1.00 0.00 C +ATOM 944 CB CYS A 123 -0.988 5.481 2.403 1.00 0.00 C +ATOM 945 O CYS A 123 -2.671 3.443 0.593 1.00 0.00 O +ATOM 946 SG CYS A 123 -0.681 6.285 3.991 1.00 0.00 S +ATOM 947 N GLY A 124 -2.924 5.462 -0.260 1.00 0.00 N +ATOM 948 CA GLY A 124 -2.940 5.028 -1.648 1.00 0.00 C +ATOM 949 C GLY A 124 -2.699 6.159 -2.630 1.00 0.00 C +ATOM 950 O GLY A 124 -2.463 7.299 -2.225 1.00 0.00 O +ATOM 951 N CYS A 125 -2.378 5.820 -3.813 1.00 0.00 N +ATOM 952 CA CYS A 125 -2.283 6.770 -4.916 1.00 0.00 C +ATOM 953 C CYS A 125 -3.667 7.160 -5.421 1.00 0.00 C +ATOM 954 CB CYS A 125 -1.458 6.183 -6.061 1.00 0.00 C +ATOM 955 O CYS A 125 -4.425 6.307 -5.886 1.00 0.00 O +ATOM 956 SG CYS A 125 0.275 5.900 -5.639 1.00 0.00 S +ATOM 957 N GLU A 126 -4.004 8.323 -5.203 1.00 0.00 N +ATOM 958 CA GLU A 126 -5.332 8.779 -5.604 1.00 0.00 C +ATOM 959 C GLU A 126 -5.243 9.988 -6.530 1.00 0.00 C +ATOM 960 CB GLU A 126 -6.177 9.119 -4.374 1.00 0.00 C +ATOM 961 O GLU A 126 -5.752 11.064 -6.206 1.00 0.00 O +ATOM 962 CG GLU A 126 -6.467 7.924 -3.478 1.00 0.00 C +ATOM 963 CD GLU A 126 -7.488 6.964 -4.068 1.00 0.00 C +ATOM 964 OE1 GLU A 126 -8.228 7.361 -4.997 1.00 0.00 O +ATOM 965 OE2 GLU A 126 -7.550 5.807 -3.597 1.00 0.00 O +ATOM 966 N CYS A 127 -4.822 9.841 -7.699 1.00 0.00 N +ATOM 967 CA CYS A 127 -4.615 10.948 -8.626 1.00 0.00 C +ATOM 968 C CYS A 127 -5.940 11.598 -9.005 1.00 0.00 C +ATOM 969 CB CYS A 127 -3.895 10.466 -9.885 1.00 0.00 C +ATOM 970 O CYS A 127 -5.987 12.790 -9.312 1.00 0.00 O +ATOM 971 SG CYS A 127 -2.217 9.868 -9.581 1.00 0.00 S +ATOM 972 N SER A 128 -6.846 10.833 -8.955 1.00 0.00 N +ATOM 973 CA SER A 128 -8.150 11.305 -9.408 1.00 0.00 C +ATOM 974 C SER A 128 -8.755 12.293 -8.417 1.00 0.00 C +ATOM 975 CB SER A 128 -9.104 10.128 -9.616 1.00 0.00 C +ATOM 976 O SER A 128 -9.642 13.073 -8.772 1.00 0.00 O +ATOM 977 OG SER A 128 -9.249 9.381 -8.420 1.00 0.00 O +ATOM 978 N LYS A 129 -8.286 12.254 -7.213 1.00 0.00 N +ATOM 979 CA LYS A 129 -8.887 13.076 -6.166 1.00 0.00 C +ATOM 980 C LYS A 129 -8.020 14.294 -5.859 1.00 0.00 C +ATOM 981 CB LYS A 129 -9.104 12.253 -4.896 1.00 0.00 C +ATOM 982 O LYS A 129 -8.420 15.169 -5.089 1.00 0.00 O +ATOM 983 CG LYS A 129 -10.151 11.158 -5.039 1.00 0.00 C +ATOM 984 CD LYS A 129 -10.395 10.441 -3.717 1.00 0.00 C +ATOM 985 CE LYS A 129 -11.490 9.392 -3.843 1.00 0.00 C +ATOM 986 NZ LYS A 129 -11.820 8.774 -2.524 1.00 0.00 N +ATOM 987 N LEU A 130 -6.820 14.391 -6.394 1.00 0.00 N +ATOM 988 CA LEU A 130 -5.922 15.506 -6.110 1.00 0.00 C +ATOM 989 C LEU A 130 -6.255 16.708 -6.987 1.00 0.00 C +ATOM 990 CB LEU A 130 -4.465 15.089 -6.326 1.00 0.00 C +ATOM 991 O LEU A 130 -6.713 16.548 -8.120 1.00 0.00 O +ATOM 992 CG LEU A 130 -3.879 14.117 -5.300 1.00 0.00 C +ATOM 993 CD1 LEU A 130 -2.476 13.689 -5.718 1.00 0.00 C +ATOM 994 CD2 LEU A 130 -3.859 14.751 -3.913 1.00 0.00 C +ATOM 995 N PRO A 131 -6.218 17.878 -6.321 1.00 0.00 N +ATOM 996 CA PRO A 131 -6.403 19.088 -7.125 1.00 0.00 C +ATOM 997 C PRO A 131 -5.423 19.176 -8.293 1.00 0.00 C +ATOM 998 CB PRO A 131 -6.159 20.219 -6.123 1.00 0.00 C +ATOM 999 O PRO A 131 -4.287 18.706 -8.188 1.00 0.00 O +ATOM 1000 CG PRO A 131 -5.279 19.617 -5.075 1.00 0.00 C +ATOM 1001 CD PRO A 131 -5.586 18.150 -4.979 1.00 0.00 C +ATOM 1002 N CYS A 132 -5.915 19.530 -9.527 1.00 0.00 N +ATOM 1003 CA CYS A 132 -5.087 19.711 -10.714 1.00 0.00 C +ATOM 1004 C CYS A 132 -4.132 20.886 -10.540 1.00 0.00 C +ATOM 1005 CB CYS A 132 -5.960 19.930 -11.949 1.00 0.00 C +ATOM 1006 O CYS A 132 -4.531 22.042 -10.690 1.00 0.00 O +ATOM 1007 SG CYS A 132 -6.896 18.468 -12.448 1.00 0.00 S +ATOM 1008 N ASN A 133 -3.080 20.705 -9.927 1.00 0.00 N +ATOM 1009 CA ASN A 133 -2.027 21.708 -9.810 1.00 0.00 C +ATOM 1010 C ASN A 133 -0.764 21.283 -10.553 1.00 0.00 C +ATOM 1011 CB ASN A 133 -1.710 21.986 -8.339 1.00 0.00 C +ATOM 1012 O ASN A 133 -0.740 20.230 -11.194 1.00 0.00 O +ATOM 1013 CG ASN A 133 -1.260 20.744 -7.595 1.00 0.00 C +ATOM 1014 ND2 ASN A 133 -1.758 20.570 -6.376 1.00 0.00 N +ATOM 1015 OD1 ASN A 133 -0.470 19.949 -8.110 1.00 0.00 O +ATOM 1016 N ASP A 134 0.201 22.148 -10.651 1.00 0.00 N +ATOM 1017 CA ASP A 134 1.408 21.962 -11.451 1.00 0.00 C +ATOM 1018 C ASP A 134 2.162 20.705 -11.023 1.00 0.00 C +ATOM 1019 CB ASP A 134 2.320 23.185 -11.338 1.00 0.00 C +ATOM 1020 O ASP A 134 2.999 20.192 -11.768 1.00 0.00 O +ATOM 1021 CG ASP A 134 1.789 24.392 -12.091 1.00 0.00 C +ATOM 1022 OD1 ASP A 134 2.250 25.525 -11.832 1.00 0.00 O +ATOM 1023 OD2 ASP A 134 0.898 24.210 -12.950 1.00 0.00 O +ATOM 1024 N GLU A 135 1.732 20.132 -9.899 1.00 0.00 N +ATOM 1025 CA GLU A 135 2.429 18.946 -9.410 1.00 0.00 C +ATOM 1026 C GLU A 135 1.714 17.669 -9.840 1.00 0.00 C +ATOM 1027 CB GLU A 135 2.559 18.988 -7.885 1.00 0.00 C +ATOM 1028 O GLU A 135 2.282 16.577 -9.766 1.00 0.00 O +ATOM 1029 CG GLU A 135 3.439 20.120 -7.374 1.00 0.00 C +ATOM 1030 CD GLU A 135 3.561 20.146 -5.859 1.00 0.00 C +ATOM 1031 OE1 GLU A 135 2.885 19.340 -5.181 1.00 0.00 O +ATOM 1032 OE2 GLU A 135 4.339 20.981 -5.346 1.00 0.00 O +ATOM 1033 N HIS A 136 0.480 17.885 -10.293 1.00 0.00 N +ATOM 1034 CA HIS A 136 -0.314 16.733 -10.704 1.00 0.00 C +ATOM 1035 C HIS A 136 0.053 16.287 -12.116 1.00 0.00 C +ATOM 1036 CB HIS A 136 -1.807 17.057 -10.628 1.00 0.00 C +ATOM 1037 O HIS A 136 0.114 17.107 -13.035 1.00 0.00 O +ATOM 1038 CG HIS A 136 -2.685 15.846 -10.645 1.00 0.00 C +ATOM 1039 CD2 HIS A 136 -3.469 15.303 -9.684 1.00 0.00 C +ATOM 1040 ND1 HIS A 136 -2.822 15.042 -11.756 1.00 0.00 N +ATOM 1041 CE1 HIS A 136 -3.655 14.054 -11.476 1.00 0.00 C +ATOM 1042 NE2 HIS A 136 -4.062 14.189 -10.225 1.00 0.00 N +ATOM 1043 N PRO A 137 0.226 15.184 -12.322 1.00 0.00 N +ATOM 1044 CA PRO A 137 0.690 14.688 -13.619 1.00 0.00 C +ATOM 1045 C PRO A 137 -0.331 14.908 -14.734 1.00 0.00 C +ATOM 1046 CB PRO A 137 0.910 13.194 -13.368 1.00 0.00 C +ATOM 1047 O PRO A 137 0.032 14.929 -15.913 1.00 0.00 O +ATOM 1048 CG PRO A 137 0.170 12.907 -12.101 1.00 0.00 C +ATOM 1049 CD PRO A 137 0.003 14.193 -11.344 1.00 0.00 C +ATOM 1050 N CYS A 138 -1.650 15.096 -14.399 1.00 0.00 N +ATOM 1051 CA CYS A 138 -2.702 15.293 -15.390 1.00 0.00 C +ATOM 1052 C CYS A 138 -2.947 16.776 -15.639 1.00 0.00 C +ATOM 1053 CB CYS A 138 -3.998 14.621 -14.937 1.00 0.00 C +ATOM 1054 O CYS A 138 -3.870 17.144 -16.368 1.00 0.00 O +ATOM 1055 SG CYS A 138 -3.887 12.822 -14.830 1.00 0.00 S +ATOM 1056 N TYR A 139 -2.050 17.530 -14.923 1.00 0.00 N +ATOM 1057 CA TYR A 139 -2.147 18.977 -15.082 1.00 0.00 C +ATOM 1058 C TYR A 139 -1.580 19.417 -16.427 1.00 0.00 C +ATOM 1059 CB TYR A 139 -1.411 19.693 -13.946 1.00 0.00 C +ATOM 1060 O TYR A 139 -0.470 19.029 -16.796 1.00 0.00 O +ATOM 1061 CG TYR A 139 -1.400 21.197 -14.083 1.00 0.00 C +ATOM 1062 CD1 TYR A 139 -0.320 21.854 -14.668 1.00 0.00 C +ATOM 1063 CD2 TYR A 139 -2.468 21.962 -13.627 1.00 0.00 C +ATOM 1064 CE1 TYR A 139 -0.305 23.239 -14.795 1.00 0.00 C +ATOM 1065 CE2 TYR A 139 -2.463 23.348 -13.748 1.00 0.00 C +ATOM 1066 OH TYR A 139 -1.369 25.348 -14.456 1.00 0.00 O +ATOM 1067 CZ TYR A 139 -1.379 23.976 -14.333 1.00 0.00 C +ATOM 1068 N ARG A 140 -2.489 20.100 -17.284 1.00 0.00 N +ATOM 1069 CA ARG A 140 -2.036 20.668 -18.550 1.00 0.00 C +ATOM 1070 C ARG A 140 -2.355 22.157 -18.626 1.00 0.00 C +ATOM 1071 CB ARG A 140 -2.678 19.934 -19.729 1.00 0.00 C +ATOM 1072 O ARG A 140 -3.435 22.589 -18.217 1.00 0.00 O +ATOM 1073 CG ARG A 140 -2.260 18.477 -19.849 1.00 0.00 C +ATOM 1074 CD ARG A 140 -2.856 17.819 -21.086 1.00 0.00 C +ATOM 1075 NE ARG A 140 -2.303 16.486 -21.307 1.00 0.00 N +ATOM 1076 NH1 ARG A 140 -1.510 16.942 -23.429 1.00 0.00 N +ATOM 1077 NH2 ARG A 140 -1.212 14.863 -22.510 1.00 0.00 N +ATOM 1078 CZ ARG A 140 -1.676 16.100 -22.415 1.00 0.00 C +ATOM 1079 N LYS A 141 -1.270 22.911 -18.875 1.00 0.00 N +ATOM 1080 CA LYS A 141 -1.432 24.343 -19.105 1.00 0.00 C +ATOM 1081 C LYS A 141 -1.210 24.693 -20.574 1.00 0.00 C +ATOM 1082 CB LYS A 141 -0.468 25.141 -18.226 1.00 0.00 C +ATOM 1083 O LYS A 141 -0.111 24.513 -21.101 1.00 0.00 O +ATOM 1084 CG LYS A 141 -0.749 26.636 -18.196 1.00 0.00 C +ATOM 1085 CD LYS A 141 0.167 27.359 -17.217 1.00 0.00 C +ATOM 1086 CE LYS A 141 -0.124 28.853 -17.175 1.00 0.00 C +ATOM 1087 NZ LYS A 141 0.783 29.568 -16.230 1.00 0.00 N +ATOM 1088 N GLU A 142 -2.285 24.849 -21.311 1.00 0.00 N +ATOM 1089 CA GLU A 142 -2.217 25.247 -22.713 1.00 0.00 C +ATOM 1090 C GLU A 142 -2.920 26.583 -22.942 1.00 0.00 C +ATOM 1091 CB GLU A 142 -2.832 24.169 -23.609 1.00 0.00 C +ATOM 1092 O GLU A 142 -4.113 26.718 -22.665 1.00 0.00 O +ATOM 1093 CG GLU A 142 -2.588 24.391 -25.095 1.00 0.00 C +ATOM 1094 CD GLU A 142 -3.160 23.287 -25.969 1.00 0.00 C +ATOM 1095 OE1 GLU A 142 -3.890 22.415 -25.445 1.00 0.00 O +ATOM 1096 OE2 GLU A 142 -2.875 23.292 -27.187 1.00 0.00 O +ATOM 1097 N GLY A 143 -2.182 27.579 -23.469 1.00 0.00 N +ATOM 1098 CA GLY A 143 -2.758 28.882 -23.764 1.00 0.00 C +ATOM 1099 C GLY A 143 -3.342 29.566 -22.543 1.00 0.00 C +ATOM 1100 O GLY A 143 -4.385 30.217 -22.629 1.00 0.00 O +ATOM 1101 N GLY A 144 -2.777 29.290 -21.354 1.00 0.00 N +ATOM 1102 CA GLY A 144 -3.245 29.947 -20.144 1.00 0.00 C +ATOM 1103 C GLY A 144 -4.395 29.217 -19.476 1.00 0.00 C +ATOM 1104 O GLY A 144 -4.892 29.651 -18.435 1.00 0.00 O +ATOM 1105 N VAL A 145 -4.936 28.140 -20.164 1.00 0.00 N +ATOM 1106 CA VAL A 145 -6.039 27.365 -19.605 1.00 0.00 C +ATOM 1107 C VAL A 145 -5.504 26.079 -18.980 1.00 0.00 C +ATOM 1108 CB VAL A 145 -7.101 27.035 -20.678 1.00 0.00 C +ATOM 1109 O VAL A 145 -4.666 25.395 -19.573 1.00 0.00 O +ATOM 1110 CG1 VAL A 145 -8.253 26.236 -20.071 1.00 0.00 C +ATOM 1111 CG2 VAL A 145 -7.619 28.316 -21.328 1.00 0.00 C +ATOM 1112 N VAL A 146 -5.959 25.837 -17.763 1.00 0.00 N +ATOM 1113 CA VAL A 146 -5.574 24.626 -17.045 1.00 0.00 C +ATOM 1114 C VAL A 146 -6.593 23.521 -17.311 1.00 0.00 C +ATOM 1115 CB VAL A 146 -5.450 24.882 -15.526 1.00 0.00 C +ATOM 1116 O VAL A 146 -7.803 23.758 -17.264 1.00 0.00 O +ATOM 1117 CG1 VAL A 146 -5.098 23.592 -14.788 1.00 0.00 C +ATOM 1118 CG2 VAL A 146 -4.405 25.962 -15.251 1.00 0.00 C +ATOM 1119 N SER A 147 -6.150 22.494 -17.818 1.00 0.00 N +ATOM 1120 CA SER A 147 -7.007 21.324 -17.979 1.00 0.00 C +ATOM 1121 C SER A 147 -6.522 20.161 -17.121 1.00 0.00 C +ATOM 1122 CB SER A 147 -7.062 20.898 -19.447 1.00 0.00 C +ATOM 1123 O SER A 147 -5.316 19.946 -16.982 1.00 0.00 O +ATOM 1124 OG SER A 147 -7.912 19.776 -19.614 1.00 0.00 O +ATOM 1125 N CYS A 148 -7.409 19.637 -16.271 1.00 0.00 N +ATOM 1126 CA CYS A 148 -7.135 18.448 -15.471 1.00 0.00 C +ATOM 1127 C CYS A 148 -7.876 17.237 -16.024 1.00 0.00 C +ATOM 1128 CB CYS A 148 -7.534 18.680 -14.014 1.00 0.00 C +ATOM 1129 O CYS A 148 -8.941 16.873 -15.523 1.00 0.00 O +ATOM 1130 SG CYS A 148 -6.884 17.438 -12.875 1.00 0.00 S +ATOM 1131 N ASP A 149 -7.699 16.869 -17.159 1.00 0.00 N +ATOM 1132 CA ASP A 149 -8.393 15.760 -17.806 1.00 0.00 C +ATOM 1133 C ASP A 149 -7.445 14.590 -18.056 1.00 0.00 C +ATOM 1134 CB ASP A 149 -9.023 16.217 -19.124 1.00 0.00 C +ATOM 1135 O ASP A 149 -6.548 14.679 -18.896 1.00 0.00 O +ATOM 1136 CG ASP A 149 -9.976 15.192 -19.713 1.00 0.00 C +ATOM 1137 OD1 ASP A 149 -10.762 15.540 -20.620 1.00 0.00 O +ATOM 1138 OD2 ASP A 149 -9.941 14.025 -19.265 1.00 0.00 O +ATOM 1139 N CYS A 150 -7.567 13.519 -17.222 1.00 0.00 N +ATOM 1140 CA CYS A 150 -6.727 12.334 -17.354 1.00 0.00 C +ATOM 1141 C CYS A 150 -7.054 11.573 -18.634 1.00 0.00 C +ATOM 1142 CB CYS A 150 -6.900 11.416 -16.145 1.00 0.00 C +ATOM 1143 O CYS A 150 -6.244 10.777 -19.112 1.00 0.00 O +ATOM 1144 SG CYS A 150 -6.406 12.171 -14.581 1.00 0.00 S +ATOM 1145 N LYS A 151 -8.188 11.835 -19.161 1.00 0.00 N +ATOM 1146 CA LYS A 151 -8.679 11.063 -20.298 1.00 0.00 C +ATOM 1147 C LYS A 151 -8.050 11.543 -21.603 1.00 0.00 C +ATOM 1148 CB LYS A 151 -10.204 11.151 -20.388 1.00 0.00 C +ATOM 1149 O LYS A 151 -7.990 10.796 -22.581 1.00 0.00 O +ATOM 1150 CG LYS A 151 -10.931 10.505 -19.219 1.00 0.00 C +ATOM 1151 CD LYS A 151 -12.443 10.620 -19.368 1.00 0.00 C +ATOM 1152 CE LYS A 151 -13.172 9.972 -18.199 1.00 0.00 C +ATOM 1153 NZ LYS A 151 -14.653 10.132 -18.313 1.00 0.00 N +ATOM 1154 N THR A 152 -7.674 12.782 -21.597 1.00 0.00 N +ATOM 1155 CA THR A 152 -7.207 13.367 -22.850 1.00 0.00 C +ATOM 1156 C THR A 152 -5.688 13.279 -22.954 1.00 0.00 C +ATOM 1157 CB THR A 152 -7.649 14.836 -22.979 1.00 0.00 C +ATOM 1158 O THR A 152 -5.115 13.562 -24.008 1.00 0.00 O +ATOM 1159 CG2 THR A 152 -9.168 14.950 -23.052 1.00 0.00 C +ATOM 1160 OG1 THR A 152 -7.180 15.572 -21.843 1.00 0.00 O +ATOM 1161 N ILE A 153 -5.064 12.924 -21.810 1.00 0.00 N +ATOM 1162 CA ILE A 153 -3.608 12.834 -21.820 1.00 0.00 C +ATOM 1163 C ILE A 153 -3.181 11.435 -22.259 1.00 0.00 C +ATOM 1164 CB ILE A 153 -3.011 13.167 -20.435 1.00 0.00 C +ATOM 1165 O ILE A 153 -3.826 10.444 -21.909 1.00 0.00 O +ATOM 1166 CG1 ILE A 153 -3.390 14.593 -20.018 1.00 0.00 C +ATOM 1167 CG2 ILE A 153 -1.490 12.986 -20.444 1.00 0.00 C +ATOM 1168 CD1 ILE A 153 -2.744 15.679 -20.868 1.00 0.00 C +ATOM 1169 N THR A 154 -2.270 11.363 -23.110 1.00 0.00 N +ATOM 1170 CA THR A 154 -1.713 10.090 -23.553 1.00 0.00 C +ATOM 1171 C THR A 154 -1.160 9.301 -22.370 1.00 0.00 C +ATOM 1172 CB THR A 154 -0.603 10.299 -24.599 1.00 0.00 C +ATOM 1173 O THR A 154 -0.336 9.812 -21.607 1.00 0.00 O +ATOM 1174 CG2 THR A 154 -0.102 8.965 -25.144 1.00 0.00 C +ATOM 1175 OG1 THR A 154 -1.117 11.082 -25.683 1.00 0.00 O +ATOM 1176 N CYS A 155 -1.859 8.148 -22.092 1.00 0.00 N +ATOM 1177 CA CYS A 155 -1.409 7.282 -21.008 1.00 0.00 C +ATOM 1178 C CYS A 155 -0.060 6.653 -21.338 1.00 0.00 C +ATOM 1179 CB CYS A 155 -2.439 6.187 -20.734 1.00 0.00 C +ATOM 1180 O CYS A 155 0.086 5.991 -22.367 1.00 0.00 O +ATOM 1181 SG CYS A 155 -4.010 6.809 -20.095 1.00 0.00 S +ATOM 1182 N ASN A 156 0.893 7.176 -20.743 1.00 0.00 N +ATOM 1183 CA ASN A 156 2.201 6.536 -20.836 1.00 0.00 C +ATOM 1184 C ASN A 156 2.611 5.904 -19.509 1.00 0.00 C +ATOM 1185 CB ASN A 156 3.259 7.541 -21.294 1.00 0.00 C +ATOM 1186 O ASN A 156 1.841 5.916 -18.546 1.00 0.00 O +ATOM 1187 CG ASN A 156 3.393 8.721 -20.351 1.00 0.00 C +ATOM 1188 ND2 ASN A 156 3.554 9.913 -20.912 1.00 0.00 N +ATOM 1189 OD1 ASN A 156 3.352 8.561 -19.128 1.00 0.00 O +ATOM 1190 N GLU A 157 3.624 5.165 -19.538 1.00 0.00 N +ATOM 1191 CA GLU A 157 4.078 4.402 -18.379 1.00 0.00 C +ATOM 1192 C GLU A 157 4.196 5.291 -17.145 1.00 0.00 C +ATOM 1193 CB GLU A 157 5.421 3.727 -18.672 1.00 0.00 C +ATOM 1194 O GLU A 157 4.169 4.799 -16.014 1.00 0.00 O +ATOM 1195 CG GLU A 157 5.336 2.611 -19.703 1.00 0.00 C +ATOM 1196 CD GLU A 157 6.670 1.925 -19.955 1.00 0.00 C +ATOM 1197 OE1 GLU A 157 7.660 2.250 -19.261 1.00 0.00 O +ATOM 1198 OE2 GLU A 157 6.725 1.058 -20.855 1.00 0.00 O +ATOM 1199 N ASP A 158 4.203 6.614 -17.357 1.00 0.00 N +ATOM 1200 CA ASP A 158 4.344 7.540 -16.237 1.00 0.00 C +ATOM 1201 C ASP A 158 2.979 8.001 -15.731 1.00 0.00 C +ATOM 1202 CB ASP A 158 5.190 8.748 -16.643 1.00 0.00 C +ATOM 1203 O ASP A 158 2.882 8.629 -14.675 1.00 0.00 O +ATOM 1204 CG ASP A 158 6.638 8.390 -16.927 1.00 0.00 C +ATOM 1205 OD1 ASP A 158 7.251 8.999 -17.830 1.00 0.00 O +ATOM 1206 OD2 ASP A 158 7.169 7.487 -16.244 1.00 0.00 O +ATOM 1207 N HIS A 159 2.048 7.688 -16.589 1.00 0.00 N +ATOM 1208 CA HIS A 159 0.697 8.104 -16.230 1.00 0.00 C +ATOM 1209 C HIS A 159 0.098 7.180 -15.176 1.00 0.00 C +ATOM 1210 CB HIS A 159 -0.199 8.139 -17.469 1.00 0.00 C +ATOM 1211 O HIS A 159 0.148 5.956 -15.317 1.00 0.00 O +ATOM 1212 CG HIS A 159 -1.463 8.914 -17.273 1.00 0.00 C +ATOM 1213 CD2 HIS A 159 -1.859 10.110 -17.769 1.00 0.00 C +ATOM 1214 ND1 HIS A 159 -2.495 8.466 -16.477 1.00 0.00 N +ATOM 1215 CE1 HIS A 159 -3.474 9.356 -16.493 1.00 0.00 C +ATOM 1216 NE2 HIS A 159 -3.113 10.363 -17.270 1.00 0.00 N +ATOM 1217 N PRO A 160 -0.393 7.607 -14.171 1.00 0.00 N +ATOM 1218 CA PRO A 160 -0.882 6.789 -13.059 1.00 0.00 C +ATOM 1219 C PRO A 160 -2.041 5.880 -13.461 1.00 0.00 C +ATOM 1220 CB PRO A 160 -1.334 7.828 -12.030 1.00 0.00 C +ATOM 1221 O PRO A 160 -2.326 4.896 -12.773 1.00 0.00 O +ATOM 1222 CG PRO A 160 -1.619 9.058 -12.828 1.00 0.00 C +ATOM 1223 CD PRO A 160 -0.678 9.096 -13.998 1.00 0.00 C +ATOM 1224 N CYS A 161 -2.737 6.193 -14.475 1.00 0.00 N +ATOM 1225 CA CYS A 161 -3.885 5.408 -14.913 1.00 0.00 C +ATOM 1226 C CYS A 161 -3.470 4.357 -15.936 1.00 0.00 C +ATOM 1227 CB CYS A 161 -4.960 6.317 -15.509 1.00 0.00 C +ATOM 1228 O CYS A 161 -4.312 3.621 -16.453 1.00 0.00 O +ATOM 1229 SG CYS A 161 -5.656 7.493 -14.328 1.00 0.00 S +ATOM 1230 N TYR A 162 -2.132 4.465 -16.183 1.00 0.00 N +ATOM 1231 CA TYR A 162 -1.573 3.516 -17.140 1.00 0.00 C +ATOM 1232 C TYR A 162 -1.452 2.127 -16.525 1.00 0.00 C +ATOM 1233 CB TYR A 162 -0.201 3.992 -17.629 1.00 0.00 C +ATOM 1234 O TYR A 162 -0.908 1.972 -15.429 1.00 0.00 O +ATOM 1235 CG TYR A 162 0.469 3.033 -18.583 1.00 0.00 C +ATOM 1236 CD1 TYR A 162 1.424 2.124 -18.131 1.00 0.00 C +ATOM 1237 CD2 TYR A 162 0.151 3.035 -19.936 1.00 0.00 C +ATOM 1238 CE1 TYR A 162 2.046 1.240 -19.007 1.00 0.00 C +ATOM 1239 CE2 TYR A 162 0.766 2.155 -20.821 1.00 0.00 C +ATOM 1240 OH TYR A 162 2.323 0.390 -21.219 1.00 0.00 O +ATOM 1241 CZ TYR A 162 1.710 1.263 -20.348 1.00 0.00 C +ATOM 1242 N HIS A 163 -2.235 1.147 -17.118 1.00 0.00 N +ATOM 1243 CA HIS A 163 -2.129 -0.243 -16.689 1.00 0.00 C +ATOM 1244 C HIS A 163 -1.553 -1.119 -17.797 1.00 0.00 C +ATOM 1245 CB HIS A 163 -3.496 -0.776 -16.256 1.00 0.00 C +ATOM 1246 O HIS A 163 -1.953 -1.002 -18.957 1.00 0.00 O +ATOM 1247 CG HIS A 163 -4.058 -0.081 -15.057 1.00 0.00 C +ATOM 1248 CD2 HIS A 163 -5.018 0.867 -14.948 1.00 0.00 C +ATOM 1249 ND1 HIS A 163 -3.625 -0.342 -13.775 1.00 0.00 N +ATOM 1250 CE1 HIS A 163 -4.297 0.419 -12.926 1.00 0.00 C +ATOM 1251 NE2 HIS A 163 -5.148 1.162 -13.613 1.00 0.00 N +ATOM 1252 N SER A 164 -0.416 -1.652 -17.433 1.00 0.00 N +ATOM 1253 CA SER A 164 0.122 -2.646 -18.356 1.00 0.00 C +ATOM 1254 C SER A 164 -0.175 -4.063 -17.877 1.00 0.00 C +ATOM 1255 CB SER A 164 1.630 -2.462 -18.523 1.00 0.00 C +ATOM 1256 O SER A 164 -0.055 -4.360 -16.687 1.00 0.00 O +ATOM 1257 OG SER A 164 2.304 -2.686 -17.297 1.00 0.00 O +ATOM 1258 N TYR A 165 -0.841 -4.799 -18.656 1.00 0.00 N +ATOM 1259 CA TYR A 165 -1.123 -6.182 -18.287 1.00 0.00 C +ATOM 1260 C TYR A 165 -0.783 -7.132 -19.429 1.00 0.00 C +ATOM 1261 CB TYR A 165 -2.594 -6.345 -17.894 1.00 0.00 C +ATOM 1262 O TYR A 165 -0.643 -6.705 -20.578 1.00 0.00 O +ATOM 1263 CG TYR A 165 -3.559 -6.039 -19.013 1.00 0.00 C +ATOM 1264 CD1 TYR A 165 -3.953 -4.730 -19.282 1.00 0.00 C +ATOM 1265 CD2 TYR A 165 -4.080 -7.058 -19.804 1.00 0.00 C +ATOM 1266 CE1 TYR A 165 -4.843 -4.444 -20.312 1.00 0.00 C +ATOM 1267 CE2 TYR A 165 -4.971 -6.783 -20.836 1.00 0.00 C +ATOM 1268 OH TYR A 165 -6.228 -5.197 -22.103 1.00 0.00 O +ATOM 1269 CZ TYR A 165 -5.346 -5.475 -21.082 1.00 0.00 C +ATOM 1270 N GLU A 166 -0.437 -8.271 -19.075 1.00 0.00 N +ATOM 1271 CA GLU A 166 -0.092 -9.311 -20.039 1.00 0.00 C +ATOM 1272 C GLU A 166 -1.292 -10.203 -20.342 1.00 0.00 C +ATOM 1273 CB GLU A 166 1.075 -10.158 -19.523 1.00 0.00 C +ATOM 1274 O GLU A 166 -1.951 -10.699 -19.426 1.00 0.00 O +ATOM 1275 CG GLU A 166 1.667 -11.091 -20.569 1.00 0.00 C +ATOM 1276 CD GLU A 166 2.944 -11.776 -20.108 1.00 0.00 C +ATOM 1277 OE1 GLU A 166 3.581 -11.289 -19.146 1.00 0.00 O +ATOM 1278 OE2 GLU A 166 3.310 -12.808 -20.713 1.00 0.00 O +ATOM 1279 N GLU A 167 -1.701 -10.268 -21.599 1.00 0.00 N +ATOM 1280 CA GLU A 167 -2.770 -11.135 -22.087 1.00 0.00 C +ATOM 1281 C GLU A 167 -2.310 -11.956 -23.288 1.00 0.00 C +ATOM 1282 CB GLU A 167 -4.006 -10.310 -22.455 1.00 0.00 C +ATOM 1283 O GLU A 167 -1.837 -11.400 -24.282 1.00 0.00 O +ATOM 1284 CG GLU A 167 -5.232 -11.150 -22.780 1.00 0.00 C +ATOM 1285 CD GLU A 167 -6.476 -10.318 -23.048 1.00 0.00 C +ATOM 1286 OE1 GLU A 167 -6.367 -9.075 -23.139 1.00 0.00 O +ATOM 1287 OE2 GLU A 167 -7.570 -10.915 -23.165 1.00 0.00 O +ATOM 1288 N ASP A 168 -2.308 -13.235 -23.226 1.00 0.00 N +ATOM 1289 CA ASP A 168 -1.919 -14.168 -24.278 1.00 0.00 C +ATOM 1290 C ASP A 168 -0.472 -13.938 -24.708 1.00 0.00 C +ATOM 1291 CB ASP A 168 -2.852 -14.039 -25.483 1.00 0.00 C +ATOM 1292 O ASP A 168 -0.162 -13.963 -25.901 1.00 0.00 O +ATOM 1293 CG ASP A 168 -4.284 -14.437 -25.170 1.00 0.00 C +ATOM 1294 OD1 ASP A 168 -5.222 -13.831 -25.730 1.00 0.00 O +ATOM 1295 OD2 ASP A 168 -4.475 -15.363 -24.352 1.00 0.00 O +ATOM 1296 N GLY A 169 0.378 -13.615 -23.661 1.00 0.00 N +ATOM 1297 CA GLY A 169 1.802 -13.483 -23.928 1.00 0.00 C +ATOM 1298 C GLY A 169 2.181 -12.123 -24.482 1.00 0.00 C +ATOM 1299 O GLY A 169 3.340 -11.889 -24.830 1.00 0.00 O +ATOM 1300 N VAL A 170 1.173 -11.220 -24.645 1.00 0.00 N +ATOM 1301 CA VAL A 170 1.451 -9.891 -25.180 1.00 0.00 C +ATOM 1302 C VAL A 170 1.131 -8.832 -24.128 1.00 0.00 C +ATOM 1303 CB VAL A 170 0.648 -9.620 -26.472 1.00 0.00 C +ATOM 1304 O VAL A 170 0.124 -8.932 -23.423 1.00 0.00 O +ATOM 1305 CG1 VAL A 170 0.954 -8.226 -27.016 1.00 0.00 C +ATOM 1306 CG2 VAL A 170 0.952 -10.686 -27.523 1.00 0.00 C +ATOM 1307 N THR A 171 2.053 -7.976 -23.977 1.00 0.00 N +ATOM 1308 CA THR A 171 1.846 -6.867 -23.053 1.00 0.00 C +ATOM 1309 C THR A 171 0.857 -5.859 -23.631 1.00 0.00 C +ATOM 1310 CB THR A 171 3.173 -6.158 -22.725 1.00 0.00 C +ATOM 1311 O THR A 171 1.042 -5.370 -24.747 1.00 0.00 O +ATOM 1312 CG2 THR A 171 2.958 -5.020 -21.732 1.00 0.00 C +ATOM 1313 OG1 THR A 171 4.087 -7.105 -22.158 1.00 0.00 O +ATOM 1314 N LYS A 172 -0.220 -5.669 -22.883 1.00 0.00 N +ATOM 1315 CA LYS A 172 -1.215 -4.669 -23.260 1.00 0.00 C +ATOM 1316 C LYS A 172 -1.232 -3.510 -22.267 1.00 0.00 C +ATOM 1317 CB LYS A 172 -2.604 -5.301 -23.351 1.00 0.00 C +ATOM 1318 O LYS A 172 -0.846 -3.673 -21.107 1.00 0.00 O +ATOM 1319 CG LYS A 172 -2.720 -6.393 -24.405 1.00 0.00 C +ATOM 1320 CD LYS A 172 -4.154 -6.886 -24.543 1.00 0.00 C +ATOM 1321 CE LYS A 172 -4.280 -7.941 -25.634 1.00 0.00 C +ATOM 1322 NZ LYS A 172 -5.700 -8.358 -25.841 1.00 0.00 N +ATOM 1323 N SER A 173 -1.325 -2.368 -22.806 1.00 0.00 N +ATOM 1324 CA SER A 173 -1.442 -1.178 -21.969 1.00 0.00 C +ATOM 1325 C SER A 173 -2.815 -0.530 -22.119 1.00 0.00 C +ATOM 1326 CB SER A 173 -0.350 -0.167 -22.320 1.00 0.00 C +ATOM 1327 O SER A 173 -3.418 -0.584 -23.193 1.00 0.00 O +ATOM 1328 OG SER A 173 -0.430 0.207 -23.684 1.00 0.00 O +ATOM 1329 N ASP A 174 -3.338 -0.380 -21.075 1.00 0.00 N +ATOM 1330 CA ASP A 174 -4.630 0.300 -21.062 1.00 0.00 C +ATOM 1331 C ASP A 174 -4.627 1.467 -20.077 1.00 0.00 C +ATOM 1332 CB ASP A 174 -5.749 -0.682 -20.712 1.00 0.00 C +ATOM 1333 O ASP A 174 -3.834 1.488 -19.134 1.00 0.00 O +ATOM 1334 CG ASP A 174 -7.118 -0.209 -21.168 1.00 0.00 C +ATOM 1335 OD1 ASP A 174 -8.137 -0.811 -20.767 1.00 0.00 O +ATOM 1336 OD2 ASP A 174 -7.178 0.774 -21.938 1.00 0.00 O +ATOM 1337 N CYS A 175 -5.254 2.474 -20.569 1.00 0.00 N +ATOM 1338 CA CYS A 175 -5.511 3.614 -19.696 1.00 0.00 C +ATOM 1339 C CYS A 175 -6.886 3.506 -19.049 1.00 0.00 C +ATOM 1340 CB CYS A 175 -5.407 4.922 -20.479 1.00 0.00 C +ATOM 1341 O CYS A 175 -7.906 3.530 -19.740 1.00 0.00 O +ATOM 1342 SG CYS A 175 -5.469 6.399 -19.441 1.00 0.00 S +ATOM 1343 N ASP A 176 -6.926 3.117 -17.897 1.00 0.00 N +ATOM 1344 CA ASP A 176 -8.216 2.947 -17.236 1.00 0.00 C +ATOM 1345 C ASP A 176 -8.579 4.181 -16.413 1.00 0.00 C +ATOM 1346 CB ASP A 176 -8.202 1.705 -16.343 1.00 0.00 C +ATOM 1347 O ASP A 176 -8.390 4.199 -15.195 1.00 0.00 O +ATOM 1348 CG ASP A 176 -9.574 1.352 -15.795 1.00 0.00 C +ATOM 1349 OD1 ASP A 176 -9.681 0.413 -14.978 1.00 0.00 O +ATOM 1350 OD2 ASP A 176 -10.557 2.018 -16.187 1.00 0.00 O +ATOM 1351 N CYS A 177 -8.999 5.207 -17.129 1.00 0.00 N +ATOM 1352 CA CYS A 177 -9.425 6.426 -16.450 1.00 0.00 C +ATOM 1353 C CYS A 177 -10.930 6.422 -16.214 1.00 0.00 C +ATOM 1354 CB CYS A 177 -9.030 7.658 -17.264 1.00 0.00 C +ATOM 1355 O CYS A 177 -11.489 7.406 -15.726 1.00 0.00 O +ATOM 1356 SG CYS A 177 -7.251 7.814 -17.535 1.00 0.00 S +ATOM 1357 N GLU A 178 -11.529 5.450 -16.815 1.00 0.00 N +ATOM 1358 CA GLU A 178 -12.986 5.385 -16.754 1.00 0.00 C +ATOM 1359 C GLU A 178 -13.473 5.263 -15.313 1.00 0.00 C +ATOM 1360 CB GLU A 178 -13.508 4.211 -17.586 1.00 0.00 C +ATOM 1361 O GLU A 178 -14.584 5.686 -14.990 1.00 0.00 O +ATOM 1362 CG GLU A 178 -13.379 4.417 -19.089 1.00 0.00 C +ATOM 1363 CD GLU A 178 -13.985 3.285 -19.903 1.00 0.00 C +ATOM 1364 OE1 GLU A 178 -14.377 2.254 -19.312 1.00 0.00 O +ATOM 1365 OE2 GLU A 178 -14.069 3.431 -21.143 1.00 0.00 O +ATOM 1366 N HIS A 179 -12.602 4.539 -14.596 1.00 0.00 N +ATOM 1367 CA HIS A 179 -13.083 4.370 -13.230 1.00 0.00 C +ATOM 1368 C HIS A 179 -12.913 5.654 -12.423 1.00 0.00 C +ATOM 1369 CB HIS A 179 -12.350 3.217 -12.542 1.00 0.00 C +ATOM 1370 O HIS A 179 -13.259 5.699 -11.241 1.00 0.00 O +ATOM 1371 CG HIS A 179 -12.672 1.874 -13.117 1.00 0.00 C +ATOM 1372 CD2 HIS A 179 -11.913 1.007 -13.828 1.00 0.00 C +ATOM 1373 ND1 HIS A 179 -13.911 1.286 -12.986 1.00 0.00 N +ATOM 1374 CE1 HIS A 179 -13.900 0.111 -13.593 1.00 0.00 C +ATOM 1375 NE2 HIS A 179 -12.700 -0.082 -14.113 1.00 0.00 N +ATOM 1376 N SER A 180 -12.761 6.762 -13.153 1.00 0.00 N +ATOM 1377 CA SER A 180 -12.625 8.074 -12.529 1.00 0.00 C +ATOM 1378 C SER A 180 -13.967 8.797 -12.465 1.00 0.00 C +ATOM 1379 CB SER A 180 -11.612 8.928 -13.291 1.00 0.00 C +ATOM 1380 O SER A 180 -14.740 8.767 -13.424 1.00 0.00 O +ATOM 1381 OG SER A 180 -12.114 9.284 -14.568 1.00 0.00 O +ATOM 1382 N PRO A 181 -14.719 8.802 -11.393 1.00 0.00 N +ATOM 1383 CA PRO A 181 -15.790 9.799 -11.316 1.00 0.00 C +ATOM 1384 C PRO A 181 -15.334 11.190 -11.748 1.00 0.00 C +ATOM 1385 CB PRO A 181 -16.178 9.786 -9.836 1.00 0.00 C +ATOM 1386 O PRO A 181 -14.337 11.704 -11.233 1.00 0.00 O +ATOM 1387 CG PRO A 181 -14.965 9.275 -9.127 1.00 0.00 C +ATOM 1388 CD PRO A 181 -14.184 8.416 -10.079 1.00 0.00 C +ATOM 1389 N GLY A 182 -15.240 11.453 -13.029 1.00 0.00 N +ATOM 1390 CA GLY A 182 -15.037 12.822 -13.476 1.00 0.00 C +ATOM 1391 C GLY A 182 -15.937 13.820 -12.770 1.00 0.00 C +ATOM 1392 O GLY A 182 -16.871 13.431 -12.066 1.00 0.00 O +ATOM 1393 N PRO A 183 -15.396 14.822 -12.165 1.00 0.00 N +ATOM 1394 CA PRO A 183 -16.219 15.888 -11.589 1.00 0.00 C +ATOM 1395 C PRO A 183 -17.636 15.913 -12.158 1.00 0.00 C +ATOM 1396 CB PRO A 183 -15.458 17.162 -11.964 1.00 0.00 C +ATOM 1397 O PRO A 183 -17.838 15.604 -13.335 1.00 0.00 O +ATOM 1398 CG PRO A 183 -14.695 16.803 -13.198 1.00 0.00 C +ATOM 1399 CD PRO A 183 -14.401 15.330 -13.164 1.00 0.00 C +ATOM 1400 N SER A 184 -18.521 15.085 -11.531 1.00 0.00 N +ATOM 1401 CA SER A 184 -19.918 15.360 -11.852 1.00 0.00 C +ATOM 1402 C SER A 184 -20.093 16.779 -12.385 1.00 0.00 C +ATOM 1403 CB SER A 184 -20.801 15.157 -10.620 1.00 0.00 C +ATOM 1404 O SER A 184 -19.543 17.730 -11.826 1.00 0.00 O +ATOM 1405 OG SER A 184 -20.375 15.985 -9.553 1.00 0.00 O +ATOM 1406 N GLU A 185 -19.658 16.958 -13.662 1.00 0.00 N +ATOM 1407 CA GLU A 185 -20.167 18.200 -14.237 1.00 0.00 C +ATOM 1408 C GLU A 185 -21.492 18.603 -13.597 1.00 0.00 C +ATOM 1409 CB GLU A 185 -20.336 18.061 -15.752 1.00 0.00 C +ATOM 1410 O GLU A 185 -22.450 17.827 -13.598 1.00 0.00 O +ATOM 1411 CG GLU A 185 -19.029 17.836 -16.499 1.00 0.00 C +ATOM 1412 CD GLU A 185 -18.110 19.046 -16.478 1.00 0.00 C +ATOM 1413 OE1 GLU A 185 -18.546 20.132 -16.032 1.00 0.00 O +ATOM 1414 OE2 GLU A 185 -16.944 18.908 -16.911 1.00 0.00 O +ATOM 1415 N HIS A 186 -21.419 18.961 -12.324 1.00 0.00 N +ATOM 1416 CA HIS A 186 -22.672 19.612 -11.958 1.00 0.00 C +ATOM 1417 C HIS A 186 -23.102 20.617 -13.022 1.00 0.00 C +ATOM 1418 CB HIS A 186 -22.539 20.307 -10.602 1.00 0.00 C +ATOM 1419 O HIS A 186 -22.311 21.471 -13.430 1.00 0.00 O +ATOM 1420 CG HIS A 186 -22.402 19.359 -9.453 1.00 0.00 C +ATOM 1421 CD2 HIS A 186 -21.354 19.097 -8.637 1.00 0.00 C +ATOM 1422 ND1 HIS A 186 -23.431 18.543 -9.034 1.00 0.00 N +ATOM 1423 CE1 HIS A 186 -23.020 17.819 -8.007 1.00 0.00 C +ATOM 1424 NE2 HIS A 186 -21.763 18.136 -7.746 1.00 0.00 N +ATOM 1425 N HIS A 187 -23.592 20.072 -14.139 1.00 0.00 N +ATOM 1426 CA HIS A 187 -24.308 20.983 -15.025 1.00 0.00 C +ATOM 1427 C HIS A 187 -24.996 22.092 -14.235 1.00 0.00 C +ATOM 1428 CB HIS A 187 -25.335 20.219 -15.862 1.00 0.00 C +ATOM 1429 O HIS A 187 -25.860 21.819 -13.399 1.00 0.00 O +ATOM 1430 CG HIS A 187 -24.723 19.267 -16.840 1.00 0.00 C +ATOM 1431 CD2 HIS A 187 -24.703 17.913 -16.868 1.00 0.00 C +ATOM 1432 ND1 HIS A 187 -24.026 19.687 -17.952 1.00 0.00 N +ATOM 1433 CE1 HIS A 187 -23.603 18.630 -18.624 1.00 0.00 C +ATOM 1434 NE2 HIS A 187 -24.000 17.541 -17.988 1.00 0.00 N +ATOM 1435 N HIS A 188 -24.187 22.915 -13.610 1.00 0.00 N +ATOM 1436 CA HIS A 188 -24.905 24.094 -13.140 1.00 0.00 C +ATOM 1437 C HIS A 188 -25.814 24.656 -14.228 1.00 0.00 C +ATOM 1438 CB HIS A 188 -23.923 25.168 -12.669 1.00 0.00 C +ATOM 1439 O HIS A 188 -25.380 24.849 -15.366 1.00 0.00 O +ATOM 1440 CG HIS A 188 -23.207 24.814 -11.404 1.00 0.00 C +ATOM 1441 CD2 HIS A 188 -21.913 24.489 -11.176 1.00 0.00 C +ATOM 1442 ND1 HIS A 188 -23.839 24.765 -10.181 1.00 0.00 N +ATOM 1443 CE1 HIS A 188 -22.961 24.425 -9.252 1.00 0.00 C +ATOM 1444 NE2 HIS A 188 -21.785 24.251 -9.830 1.00 0.00 N +ATOM 1445 N HIS A 189 -26.993 24.045 -14.412 1.00 0.00 N +ATOM 1446 CA HIS A 189 -28.031 24.708 -15.193 1.00 0.00 C +ATOM 1447 C HIS A 189 -28.031 26.213 -14.944 1.00 0.00 C +ATOM 1448 CB HIS A 189 -29.406 24.122 -14.865 1.00 0.00 C +ATOM 1449 O HIS A 189 -27.980 26.656 -13.795 1.00 0.00 O +ATOM 1450 CG HIS A 189 -29.586 22.714 -15.335 1.00 0.00 C +ATOM 1451 CD2 HIS A 189 -29.619 21.546 -14.652 1.00 0.00 C +ATOM 1452 ND1 HIS A 189 -29.755 22.390 -16.664 1.00 0.00 N +ATOM 1453 CE1 HIS A 189 -29.887 21.079 -16.778 1.00 0.00 C +ATOM 1454 NE2 HIS A 189 -29.808 20.543 -15.572 1.00 0.00 N +ATOM 1455 N HIS A 190 -27.150 26.911 -15.639 1.00 0.00 N +ATOM 1456 CA HIS A 190 -27.271 28.363 -15.688 1.00 0.00 C +ATOM 1457 C HIS A 190 -28.733 28.796 -15.667 1.00 0.00 C +ATOM 1458 CB HIS A 190 -26.577 28.917 -16.934 1.00 0.00 C +ATOM 1459 O HIS A 190 -29.546 28.288 -16.443 1.00 0.00 O +ATOM 1460 CG HIS A 190 -25.087 28.794 -16.896 1.00 0.00 C +ATOM 1461 CD2 HIS A 190 -24.243 27.997 -17.592 1.00 0.00 C +ATOM 1462 ND1 HIS A 190 -24.299 29.553 -16.058 1.00 0.00 N +ATOM 1463 CE1 HIS A 190 -23.030 29.227 -16.242 1.00 0.00 C +ATOM 1464 NE2 HIS A 190 -22.969 28.285 -17.168 1.00 0.00 N +ATOM 1465 N HIS A 191 -29.301 28.820 -14.496 1.00 0.00 N +ATOM 1466 CA HIS A 191 -30.497 29.651 -14.423 1.00 0.00 C +ATOM 1467 C HIS A 191 -30.240 31.039 -15.001 1.00 0.00 C +ATOM 1468 CB HIS A 191 -30.981 29.766 -12.976 1.00 0.00 C +ATOM 1469 O HIS A 191 -29.144 31.584 -14.855 1.00 0.00 O +ATOM 1470 CG HIS A 191 -31.644 28.528 -12.465 1.00 0.00 C +ATOM 1471 CD2 HIS A 191 -31.230 27.603 -11.566 1.00 0.00 C +ATOM 1472 ND1 HIS A 191 -32.892 28.123 -12.888 1.00 0.00 N +ATOM 1473 CE1 HIS A 191 -33.217 27.000 -12.270 1.00 0.00 C +ATOM 1474 NE2 HIS A 191 -32.226 26.664 -11.462 1.00 0.00 N +TER 1475 HIS A 191 +ENDMDL +END diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/relax/testdata/with_violations.pdb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/relax/testdata/with_violations.pdb Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,1193 @@ +MODEL 0 +ATOM 1 N SER A 1 23.291 1.505 0.613 1.00 6.08 N +ATOM 2 CA SER A 1 22.518 0.883 -0.457 1.00 6.08 C +ATOM 3 C SER A 1 21.020 1.015 -0.206 1.00 6.08 C +ATOM 4 CB SER A 1 22.891 -0.593 -0.601 1.00 6.08 C +ATOM 5 O SER A 1 20.593 1.246 0.928 1.00 6.08 O +ATOM 6 OG SER A 1 22.364 -1.352 0.474 1.00 6.08 O +ATOM 7 N PHE A 2 20.180 1.317 -1.280 1.00 6.08 N +ATOM 8 CA PHE A 2 18.725 1.321 -1.187 1.00 6.08 C +ATOM 9 C PHE A 2 18.244 0.288 -0.175 1.00 6.08 C +ATOM 10 CB PHE A 2 18.097 1.046 -2.557 1.00 6.08 C +ATOM 11 O PHE A 2 17.437 0.600 0.703 1.00 6.08 O +ATOM 12 CG PHE A 2 16.601 0.880 -2.517 1.00 6.08 C +ATOM 13 CD1 PHE A 2 15.765 1.989 -2.519 1.00 6.08 C +ATOM 14 CD2 PHE A 2 16.033 -0.386 -2.478 1.00 6.08 C +ATOM 15 CE1 PHE A 2 14.380 1.838 -2.482 1.00 6.08 C +ATOM 16 CE2 PHE A 2 14.650 -0.545 -2.441 1.00 6.08 C +ATOM 17 CZ PHE A 2 13.826 0.569 -2.442 1.00 6.08 C +ATOM 18 N GLU A 3 18.695 -0.904 -0.178 1.00 6.08 N +ATOM 19 CA GLU A 3 18.305 -2.028 0.668 1.00 6.08 C +ATOM 20 C GLU A 3 18.535 -1.714 2.144 1.00 6.08 C +ATOM 21 CB GLU A 3 19.073 -3.291 0.273 1.00 6.08 C +ATOM 22 O GLU A 3 17.664 -1.961 2.980 1.00 6.08 O +ATOM 23 CG GLU A 3 18.413 -4.088 -0.843 1.00 6.08 C +ATOM 24 CD GLU A 3 19.408 -4.840 -1.713 1.00 6.08 C +ATOM 25 OE1 GLU A 3 18.977 -5.585 -2.622 1.00 6.08 O +ATOM 26 OE2 GLU A 3 20.628 -4.683 -1.482 1.00 6.08 O +ATOM 27 N GLU A 4 19.823 -1.305 2.459 1.00 6.08 N +ATOM 28 CA GLU A 4 20.190 -1.047 3.848 1.00 6.08 C +ATOM 29 C GLU A 4 19.315 0.044 4.456 1.00 6.08 C +ATOM 30 CB GLU A 4 21.666 -0.656 3.950 1.00 6.08 C +ATOM 31 O GLU A 4 18.868 -0.076 5.599 1.00 6.08 O +ATOM 32 CG GLU A 4 22.621 -1.841 3.913 1.00 6.08 C +ATOM 33 CD GLU A 4 24.085 -1.434 3.973 1.00 6.08 C +ATOM 34 OE1 GLU A 4 24.957 -2.324 4.094 1.00 6.08 O +ATOM 35 OE2 GLU A 4 24.361 -0.216 3.899 1.00 6.08 O +ATOM 36 N GLN A 5 19.061 1.102 3.590 1.00 6.08 N +ATOM 37 CA GLN A 5 18.207 2.189 4.056 1.00 6.08 C +ATOM 38 C GLN A 5 16.771 1.714 4.255 1.00 6.08 C +ATOM 39 CB GLN A 5 18.241 3.359 3.071 1.00 6.08 C +ATOM 40 O GLN A 5 16.113 2.097 5.225 1.00 6.08 O +ATOM 41 CG GLN A 5 19.395 4.326 3.304 1.00 6.08 C +ATOM 42 CD GLN A 5 19.384 5.496 2.338 1.00 6.08 C +ATOM 43 NE2 GLN A 5 20.565 6.022 2.031 1.00 6.08 N +ATOM 44 OE1 GLN A 5 18.323 5.922 1.871 1.00 6.08 O +ATOM 45 N PHE A 6 16.354 0.831 3.208 1.00 5.36 N +ATOM 46 CA PHE A 6 15.014 0.260 3.283 1.00 5.36 C +ATOM 47 C PHE A 6 14.844 -0.555 4.559 1.00 5.36 C +ATOM 48 CB PHE A 6 14.732 -0.616 2.059 1.00 5.36 C +ATOM 49 O PHE A 6 13.859 -0.388 5.282 1.00 5.36 O +ATOM 50 CG PHE A 6 13.331 -1.164 2.014 1.00 5.36 C +ATOM 51 CD1 PHE A 6 12.278 -0.379 1.561 1.00 5.36 C +ATOM 52 CD2 PHE A 6 13.068 -2.464 2.424 1.00 5.36 C +ATOM 53 CE1 PHE A 6 10.980 -0.884 1.518 1.00 5.36 C +ATOM 54 CE2 PHE A 6 11.774 -2.975 2.384 1.00 5.36 C +ATOM 55 CZ PHE A 6 10.731 -2.183 1.932 1.00 5.36 C +ATOM 56 N ILE A 7 15.772 -1.382 4.937 1.00 6.08 N +ATOM 57 CA ILE A 7 15.726 -2.220 6.131 1.00 6.08 C +ATOM 58 C ILE A 7 15.811 -1.345 7.379 1.00 6.08 C +ATOM 59 CB ILE A 7 16.864 -3.266 6.130 1.00 6.08 C +ATOM 60 O ILE A 7 15.052 -1.538 8.332 1.00 6.08 O +ATOM 61 CG1 ILE A 7 16.652 -4.286 5.006 1.00 6.08 C +ATOM 62 CG2 ILE A 7 16.957 -3.962 7.491 1.00 6.08 C +ATOM 63 CD1 ILE A 7 17.837 -5.214 4.781 1.00 6.08 C +ATOM 64 N LYS A 8 16.750 -0.406 7.403 1.00 6.08 N +ATOM 65 CA LYS A 8 16.953 0.493 8.535 1.00 6.08 C +ATOM 66 C LYS A 8 15.689 1.294 8.836 1.00 6.08 C +ATOM 67 CB LYS A 8 18.122 1.442 8.265 1.00 6.08 C +ATOM 68 O LYS A 8 15.304 1.443 9.997 1.00 6.08 O +ATOM 69 CG LYS A 8 18.564 2.242 9.481 1.00 6.08 C +ATOM 70 CD LYS A 8 19.735 3.159 9.151 1.00 6.08 C +ATOM 71 CE LYS A 8 20.102 4.046 10.333 1.00 6.08 C +ATOM 72 NZ LYS A 8 21.192 5.007 9.987 1.00 6.08 N +ATOM 73 N ASN A 9 14.988 1.804 7.750 1.00 6.08 N +ATOM 74 CA ASN A 9 13.799 2.629 7.937 1.00 6.08 C +ATOM 75 C ASN A 9 12.593 1.788 8.349 1.00 6.08 C +ATOM 76 CB ASN A 9 13.486 3.416 6.663 1.00 6.08 C +ATOM 77 O ASN A 9 11.581 2.327 8.801 1.00 6.08 O +ATOM 78 CG ASN A 9 14.404 4.608 6.473 1.00 6.08 C +ATOM 79 ND2 ASN A 9 14.484 5.105 5.244 1.00 6.08 N +ATOM 80 OD1 ASN A 9 15.036 5.078 7.423 1.00 6.08 O +ATOM 81 N ASN A 10 12.800 0.438 8.337 1.00 6.08 N +ATOM 82 CA ASN A 10 11.572 -0.311 8.581 1.00 6.08 C +ATOM 83 C ASN A 10 11.753 -1.335 9.699 1.00 6.08 C +ATOM 84 CB ASN A 10 11.100 -1.002 7.300 1.00 6.08 C +ATOM 85 O ASN A 10 10.808 -2.039 10.060 1.00 6.08 O +ATOM 86 CG ASN A 10 10.549 -0.025 6.280 1.00 6.08 C +ATOM 87 ND2 ASN A 10 11.285 0.176 5.193 1.00 6.08 N +ATOM 88 OD1 ASN A 10 9.471 0.545 6.467 1.00 6.08 O +ATOM 89 N SER A 11 12.959 -1.512 10.211 1.00 6.08 N +ATOM 90 CA SER A 11 13.197 -2.465 11.291 1.00 6.08 C +ATOM 91 C SER A 11 12.666 -1.938 12.620 1.00 6.08 C +ATOM 92 CB SER A 11 14.690 -2.772 11.415 1.00 6.08 C +ATOM 93 O SER A 11 12.451 -2.709 13.557 1.00 6.08 O +ATOM 94 OG SER A 11 15.435 -1.581 11.601 1.00 6.08 O +ATOM 95 N ASP A 12 12.220 -0.675 12.710 1.00 6.08 N +ATOM 96 CA ASP A 12 11.747 -0.267 14.029 1.00 6.08 C +ATOM 97 C ASP A 12 10.304 -0.711 14.256 1.00 6.08 C +ATOM 98 CB ASP A 12 11.864 1.249 14.196 1.00 6.08 C +ATOM 99 O ASP A 12 9.847 -0.792 15.398 1.00 6.08 O +ATOM 100 CG ASP A 12 13.206 1.682 14.760 1.00 6.08 C +ATOM 101 OD1 ASP A 12 13.586 2.861 14.592 1.00 6.08 O +ATOM 102 OD2 ASP A 12 13.890 0.837 15.376 1.00 6.08 O +ATOM 103 N SER A 13 9.678 -1.277 13.274 1.00 6.08 N +ATOM 104 CA SER A 13 8.274 -1.520 13.587 1.00 6.08 C +ATOM 105 C SER A 13 8.041 -2.973 13.988 1.00 6.08 C +ATOM 106 CB SER A 13 7.389 -1.164 12.393 1.00 6.08 C +ATOM 107 O SER A 13 8.569 -3.889 13.355 1.00 6.08 O +ATOM 108 OG SER A 13 7.871 -1.776 11.209 1.00 6.08 O +ATOM 109 N ASN A 14 8.368 -3.385 15.178 1.00 6.08 N +ATOM 110 CA ASN A 14 7.591 -4.466 15.775 1.00 6.08 C +ATOM 111 C ASN A 14 6.843 -5.271 14.716 1.00 6.08 C +ATOM 112 CB ASN A 14 6.610 -3.912 16.812 1.00 6.08 C +ATOM 113 O ASN A 14 6.016 -6.122 15.047 1.00 6.08 O +ATOM 114 CG ASN A 14 7.250 -3.709 18.171 1.00 6.08 C +ATOM 115 ND2 ASN A 14 6.608 -2.910 19.015 1.00 6.08 N +ATOM 116 OD1 ASN A 14 8.313 -4.265 18.460 1.00 6.08 O +ATOM 117 N ILE A 15 7.204 -5.229 13.474 1.00 6.08 N +ATOM 118 CA ILE A 15 6.430 -5.995 12.502 1.00 6.08 C +ATOM 119 C ILE A 15 7.095 -7.349 12.265 1.00 6.08 C +ATOM 120 CB ILE A 15 6.282 -5.229 11.168 1.00 6.08 C +ATOM 121 O ILE A 15 8.306 -7.422 12.045 1.00 6.08 O +ATOM 122 CG1 ILE A 15 5.583 -3.885 11.398 1.00 6.08 C +ATOM 123 CG2 ILE A 15 5.520 -6.074 10.143 1.00 6.08 C +ATOM 124 CD1 ILE A 15 5.473 -3.022 10.149 1.00 6.08 C +ATOM 125 N LEU A 16 6.669 -8.397 13.068 1.00 6.08 N +ATOM 126 CA LEU A 16 6.808 -9.846 12.972 1.00 6.08 C +ATOM 127 C LEU A 16 6.967 -10.281 11.519 1.00 6.08 C +ATOM 128 CB LEU A 16 5.597 -10.544 13.596 1.00 6.08 C +ATOM 129 O LEU A 16 6.238 -9.812 10.643 1.00 6.08 O +ATOM 130 CG LEU A 16 5.559 -10.598 15.125 1.00 6.08 C +ATOM 131 CD1 LEU A 16 4.134 -10.839 15.611 1.00 6.08 C +ATOM 132 CD2 LEU A 16 6.498 -11.682 15.644 1.00 6.08 C +ATOM 133 N ALA A 17 8.248 -10.386 11.036 1.00 6.08 N +ATOM 134 CA ALA A 17 8.700 -10.996 9.788 1.00 6.08 C +ATOM 135 C ALA A 17 7.863 -12.224 9.444 1.00 6.08 C +ATOM 136 CB ALA A 17 10.177 -11.372 9.884 1.00 6.08 C +ATOM 137 O ALA A 17 7.473 -12.986 10.332 1.00 6.08 O +ATOM 138 N PRO A 18 7.023 -12.218 8.206 1.00 6.08 N +ATOM 139 CA PRO A 18 6.298 -13.437 7.841 1.00 6.08 C +ATOM 140 C PRO A 18 7.204 -14.499 7.222 1.00 6.08 C +ATOM 141 CB PRO A 18 5.264 -12.942 6.826 1.00 6.08 C +ATOM 142 O PRO A 18 8.307 -14.186 6.767 1.00 6.08 O +ATOM 143 CG PRO A 18 5.663 -11.532 6.531 1.00 6.08 C +ATOM 144 CD PRO A 18 6.762 -11.140 7.476 1.00 6.08 C +ATOM 145 N LYS A 19 6.910 -15.813 7.261 1.00 6.08 N +ATOM 146 CA LYS A 19 7.401 -17.032 6.627 1.00 6.08 C +ATOM 147 C LYS A 19 6.700 -17.279 5.294 1.00 6.08 C +ATOM 148 CB LYS A 19 7.206 -18.235 7.552 1.00 6.08 C +ATOM 149 O LYS A 19 5.494 -17.054 5.170 1.00 6.08 O +ATOM 150 CG LYS A 19 8.289 -18.383 8.610 1.00 6.08 C +ATOM 151 CD LYS A 19 8.149 -19.695 9.372 1.00 6.08 C +ATOM 152 CE LYS A 19 9.213 -19.830 10.454 1.00 6.08 C +ATOM 153 NZ LYS A 19 9.104 -21.132 11.178 1.00 6.08 N +ATOM 154 N VAL A 20 7.272 -17.218 4.055 1.00 6.08 N +ATOM 155 CA VAL A 20 6.741 -17.404 2.708 1.00 6.08 C +ATOM 156 C VAL A 20 7.061 -18.814 2.217 1.00 6.08 C +ATOM 157 CB VAL A 20 7.307 -16.355 1.725 1.00 6.08 C +ATOM 158 O VAL A 20 8.148 -19.336 2.476 1.00 6.08 O +ATOM 159 CG1 VAL A 20 6.686 -16.524 0.339 1.00 6.08 C +ATOM 160 CG2 VAL A 20 7.064 -14.943 2.254 1.00 6.08 C +ATOM 161 N SER A 21 6.082 -19.480 1.504 1.00 6.08 N +ATOM 162 CA SER A 21 6.281 -20.787 0.888 1.00 6.08 C +ATOM 163 C SER A 21 7.315 -20.720 -0.230 1.00 6.08 C +ATOM 164 CB SER A 21 4.960 -21.329 0.340 1.00 6.08 C +ATOM 165 O SER A 21 7.458 -19.688 -0.889 1.00 6.08 O +ATOM 166 OG SER A 21 4.811 -20.999 -1.030 1.00 6.08 O +ATOM 167 N GLN A 22 8.094 -21.778 -0.457 1.00 6.08 N +ATOM 168 CA GLN A 22 9.146 -22.023 -1.437 1.00 6.08 C +ATOM 169 C GLN A 22 8.608 -21.912 -2.861 1.00 6.08 C +ATOM 170 CB GLN A 22 9.774 -23.400 -1.218 1.00 6.08 C +ATOM 171 O GLN A 22 9.307 -21.436 -3.758 1.00 6.08 O +ATOM 172 CG GLN A 22 11.028 -23.375 -0.356 1.00 6.08 C +ATOM 173 CD GLN A 22 11.900 -24.601 -0.550 1.00 6.08 C +ATOM 174 NE2 GLN A 22 13.017 -24.654 0.167 1.00 6.08 N +ATOM 175 OE1 GLN A 22 11.570 -25.495 -1.337 1.00 6.08 O +ATOM 176 N SER A 23 7.326 -22.350 -3.087 1.00 6.08 N +ATOM 177 CA SER A 23 6.818 -22.344 -4.455 1.00 6.08 C +ATOM 178 C SER A 23 6.627 -20.921 -4.968 1.00 6.08 C +ATOM 179 CB SER A 23 5.494 -23.106 -4.539 1.00 6.08 C +ATOM 180 O SER A 23 6.916 -20.631 -6.131 1.00 6.08 O +ATOM 181 OG SER A 23 4.496 -22.467 -3.762 1.00 6.08 O +ATOM 182 N VAL A 24 6.156 -19.987 -4.125 1.00 6.08 N +ATOM 183 CA VAL A 24 5.987 -18.582 -4.483 1.00 6.08 C +ATOM 184 C VAL A 24 7.353 -17.938 -4.708 1.00 6.08 C +ATOM 185 CB VAL A 24 5.206 -17.809 -3.397 1.00 6.08 C +ATOM 186 O VAL A 24 7.534 -17.165 -5.652 1.00 6.08 O +ATOM 187 CG1 VAL A 24 5.211 -16.310 -3.691 1.00 6.08 C +ATOM 188 CG2 VAL A 24 3.775 -18.332 -3.296 1.00 6.08 C +ATOM 189 N ILE A 25 8.365 -18.356 -3.827 1.00 6.08 N +ATOM 190 CA ILE A 25 9.724 -17.836 -3.937 1.00 6.08 C +ATOM 191 C ILE A 25 10.325 -18.244 -5.280 1.00 6.08 C +ATOM 192 CB ILE A 25 10.616 -18.332 -2.777 1.00 6.08 C +ATOM 193 O ILE A 25 11.011 -17.450 -5.928 1.00 6.08 O +ATOM 194 CG1 ILE A 25 10.127 -17.755 -1.444 1.00 6.08 C +ATOM 195 CG2 ILE A 25 12.081 -17.966 -3.028 1.00 6.08 C +ATOM 196 CD1 ILE A 25 10.848 -18.316 -0.226 1.00 6.08 C +ATOM 197 N LYS A 26 9.942 -19.394 -5.728 1.00 6.08 N +ATOM 198 CA LYS A 26 10.533 -19.885 -6.969 1.00 6.08 C +ATOM 199 C LYS A 26 9.961 -19.150 -8.178 1.00 6.08 C +ATOM 200 CB LYS A 26 10.303 -21.391 -7.115 1.00 6.08 C +ATOM 201 O LYS A 26 10.615 -19.055 -9.219 1.00 6.08 O +ATOM 202 CG LYS A 26 11.247 -22.244 -6.281 1.00 6.08 C +ATOM 203 CD LYS A 26 11.022 -23.730 -6.527 1.00 6.08 C +ATOM 204 CE LYS A 26 11.909 -24.587 -5.634 1.00 6.08 C +ATOM 205 NZ LYS A 26 11.672 -26.045 -5.852 1.00 6.08 N +ATOM 206 N SER A 27 8.716 -18.585 -8.016 1.00 6.08 N +ATOM 207 CA SER A 27 8.115 -17.884 -9.146 1.00 6.08 C +ATOM 208 C SER A 27 8.597 -16.439 -9.220 1.00 6.08 C +ATOM 209 CB SER A 27 6.589 -17.917 -9.047 1.00 6.08 C +ATOM 210 O SER A 27 8.389 -15.761 -10.229 1.00 6.08 O +ATOM 211 OG SER A 27 6.145 -17.239 -7.885 1.00 6.08 O +ATOM 212 N ILE A 28 9.326 -16.021 -8.127 1.00 6.08 N +ATOM 213 CA ILE A 28 9.655 -14.600 -8.095 1.00 6.08 C +ATOM 214 C ILE A 28 11.013 -14.370 -8.753 1.00 6.08 C +ATOM 215 CB ILE A 28 9.660 -14.055 -6.649 1.00 6.08 C +ATOM 216 O ILE A 28 12.005 -15.000 -8.381 1.00 6.08 O +ATOM 217 CG1 ILE A 28 8.282 -14.238 -6.004 1.00 6.08 C +ATOM 218 CG2 ILE A 28 10.082 -12.583 -6.629 1.00 6.08 C +ATOM 219 CD1 ILE A 28 8.241 -13.892 -4.522 1.00 6.08 C +ATOM 220 N LYS A 29 11.102 -13.748 -9.982 1.00 6.08 N +ATOM 221 CA LYS A 29 12.253 -13.354 -10.790 1.00 6.08 C +ATOM 222 C LYS A 29 12.954 -12.137 -10.192 1.00 6.08 C +ATOM 223 CB LYS A 29 11.825 -13.058 -12.228 1.00 6.08 C +ATOM 224 O LYS A 29 12.302 -11.156 -9.829 1.00 6.08 O +ATOM 225 CG LYS A 29 11.657 -14.299 -13.092 1.00 6.08 C +ATOM 226 CD LYS A 29 11.456 -13.937 -14.557 1.00 6.08 C +ATOM 227 CE LYS A 29 11.272 -15.178 -15.421 1.00 6.08 C +ATOM 228 NZ LYS A 29 11.145 -14.832 -16.868 1.00 6.08 N +ATOM 229 N GLY A 30 13.888 -12.322 -9.217 1.00 6.08 N +ATOM 230 CA GLY A 30 14.719 -11.185 -8.854 1.00 6.08 C +ATOM 231 C GLY A 30 14.960 -11.074 -7.361 1.00 6.08 C +ATOM 232 O GLY A 30 14.940 -9.975 -6.804 1.00 6.08 O +ATOM 233 N ILE A 31 15.279 -12.138 -6.638 1.00 6.08 N +ATOM 234 CA ILE A 31 15.591 -12.164 -5.214 1.00 6.08 C +ATOM 235 C ILE A 31 16.885 -11.396 -4.954 1.00 6.08 C +ATOM 236 CB ILE A 31 15.713 -13.612 -4.689 1.00 6.08 C +ATOM 237 O ILE A 31 17.945 -11.761 -5.468 1.00 6.08 O +ATOM 238 CG1 ILE A 31 14.396 -14.369 -4.897 1.00 6.08 C +ATOM 239 CG2 ILE A 31 16.121 -13.619 -3.212 1.00 6.08 C +ATOM 240 CD1 ILE A 31 14.475 -15.853 -4.565 1.00 6.08 C +ATOM 241 N LYS A 32 16.933 -10.089 -4.574 1.00 6.08 N +ATOM 242 CA LYS A 32 18.183 -9.378 -4.323 1.00 6.08 C +ATOM 243 C LYS A 32 18.755 -9.738 -2.954 1.00 6.08 C +ATOM 244 CB LYS A 32 17.970 -7.867 -4.420 1.00 6.08 C +ATOM 245 O LYS A 32 19.969 -9.892 -2.804 1.00 6.08 O +ATOM 246 CG LYS A 32 18.023 -7.324 -5.841 1.00 6.08 C +ATOM 247 CD LYS A 32 18.626 -5.926 -5.883 1.00 6.08 C +ATOM 248 CE LYS A 32 18.645 -5.367 -7.300 1.00 6.08 C +ATOM 249 NZ LYS A 32 19.320 -4.036 -7.361 1.00 6.08 N +ATOM 250 N SER A 33 17.909 -10.205 -1.893 1.00 6.08 N +ATOM 251 CA SER A 33 18.320 -10.572 -0.542 1.00 6.08 C +ATOM 252 C SER A 33 17.215 -11.331 0.185 1.00 6.08 C +ATOM 253 CB SER A 33 18.707 -9.328 0.258 1.00 6.08 C +ATOM 254 O SER A 33 16.099 -11.450 -0.324 1.00 6.08 O +ATOM 255 OG SER A 33 17.560 -8.560 0.581 1.00 6.08 O +ATOM 256 N LYS A 34 17.506 -12.068 1.188 1.00 6.08 N +ATOM 257 CA LYS A 34 16.610 -12.918 1.967 1.00 6.08 C +ATOM 258 C LYS A 34 15.403 -12.131 2.468 1.00 6.08 C +ATOM 259 CB LYS A 34 17.356 -13.542 3.148 1.00 6.08 C +ATOM 260 O LYS A 34 14.341 -12.706 2.718 1.00 6.08 O +ATOM 261 CG LYS A 34 18.074 -14.840 2.810 1.00 6.08 C +ATOM 262 CD LYS A 34 18.733 -15.451 4.040 1.00 6.08 C +ATOM 263 CE LYS A 34 19.519 -16.707 3.689 1.00 6.08 C +ATOM 264 NZ LYS A 34 20.155 -17.318 4.894 1.00 6.08 N +ATOM 265 N HIS A 35 15.371 -10.863 2.266 1.00 5.36 N +ATOM 266 CA HIS A 35 14.261 -10.259 2.994 1.00 5.36 C +ATOM 267 C HIS A 35 13.500 -9.270 2.117 1.00 5.36 C +ATOM 268 CB HIS A 35 14.765 -9.560 4.258 1.00 5.36 C +ATOM 269 O HIS A 35 12.451 -8.760 2.515 1.00 5.36 O +ATOM 270 CG HIS A 35 15.436 -10.482 5.225 1.00 5.36 C +ATOM 271 CD2 HIS A 35 16.730 -10.574 5.614 1.00 5.36 C +ATOM 272 ND1 HIS A 35 14.755 -11.461 5.916 1.00 5.36 N +ATOM 273 CE1 HIS A 35 15.604 -12.116 6.691 1.00 5.36 C +ATOM 274 NE2 HIS A 35 16.808 -11.597 6.526 1.00 5.36 N +ATOM 275 N VAL A 36 14.059 -8.954 0.978 1.00 5.36 N +ATOM 276 CA VAL A 36 13.407 -7.973 0.118 1.00 5.36 C +ATOM 277 C VAL A 36 13.086 -8.603 -1.235 1.00 5.36 C +ATOM 278 CB VAL A 36 14.284 -6.715 -0.074 1.00 5.36 C +ATOM 279 O VAL A 36 13.962 -9.187 -1.878 1.00 5.36 O +ATOM 280 CG1 VAL A 36 13.591 -5.709 -0.992 1.00 5.36 C +ATOM 281 CG2 VAL A 36 14.605 -6.078 1.277 1.00 5.36 C +ATOM 282 N PHE A 37 11.770 -8.603 -1.484 1.00 5.36 N +ATOM 283 CA PHE A 37 11.361 -9.204 -2.749 1.00 5.36 C +ATOM 284 C PHE A 37 10.980 -8.129 -3.760 1.00 5.36 C +ATOM 285 CB PHE A 37 10.186 -10.163 -2.535 1.00 5.36 C +ATOM 286 O PHE A 37 10.245 -7.195 -3.434 1.00 5.36 O +ATOM 287 CG PHE A 37 10.500 -11.311 -1.614 1.00 5.36 C +ATOM 288 CD1 PHE A 37 10.346 -11.180 -0.239 1.00 5.36 C +ATOM 289 CD2 PHE A 37 10.949 -12.522 -2.124 1.00 5.36 C +ATOM 290 CE1 PHE A 37 10.636 -12.242 0.616 1.00 5.36 C +ATOM 291 CE2 PHE A 37 11.241 -13.587 -1.276 1.00 5.36 C +ATOM 292 CZ PHE A 37 11.084 -13.445 0.093 1.00 5.36 C +ATOM 293 N GLU A 38 11.560 -8.160 -4.884 1.00 5.36 N +ATOM 294 CA GLU A 38 11.250 -7.279 -6.006 1.00 5.36 C +ATOM 295 C GLU A 38 10.193 -7.897 -6.917 1.00 5.36 C +ATOM 296 CB GLU A 38 12.516 -6.965 -6.808 1.00 5.36 C +ATOM 297 O GLU A 38 10.363 -9.016 -7.405 1.00 5.36 O +ATOM 298 CG GLU A 38 12.298 -5.965 -7.934 1.00 5.36 C +ATOM 299 CD GLU A 38 13.552 -5.698 -8.751 1.00 5.36 C +ATOM 300 OE1 GLU A 38 13.571 -6.025 -9.960 1.00 5.36 O +ATOM 301 OE2 GLU A 38 14.525 -5.158 -8.178 1.00 5.36 O +ATOM 302 N LEU A 39 9.040 -7.269 -6.940 1.00 5.36 N +ATOM 303 CA LEU A 39 7.988 -7.741 -7.834 1.00 5.36 C +ATOM 304 C LEU A 39 7.842 -6.816 -9.038 1.00 5.36 C +ATOM 305 CB LEU A 39 6.655 -7.839 -7.087 1.00 5.36 C +ATOM 306 O LEU A 39 7.250 -5.740 -8.931 1.00 5.36 O +ATOM 307 CG LEU A 39 6.571 -8.896 -5.984 1.00 5.36 C +ATOM 308 CD1 LEU A 39 5.425 -8.577 -5.030 1.00 5.36 C +ATOM 309 CD2 LEU A 39 6.401 -10.286 -6.587 1.00 5.36 C +ATOM 310 N PRO A 40 8.487 -7.251 -10.143 1.00 6.08 N +ATOM 311 CA PRO A 40 8.346 -6.359 -11.296 1.00 6.08 C +ATOM 312 C PRO A 40 6.896 -6.208 -11.751 1.00 6.08 C +ATOM 313 CB PRO A 40 9.189 -7.043 -12.376 1.00 6.08 C +ATOM 314 O PRO A 40 6.198 -7.207 -11.942 1.00 6.08 O +ATOM 315 CG PRO A 40 9.550 -8.371 -11.794 1.00 6.08 C +ATOM 316 CD PRO A 40 9.068 -8.416 -10.373 1.00 6.08 C +ATOM 317 N ILE A 41 6.243 -4.982 -11.734 1.00 6.08 N +ATOM 318 CA ILE A 41 4.884 -4.747 -12.210 1.00 6.08 C +ATOM 319 C ILE A 41 4.900 -4.508 -13.718 1.00 6.08 C +ATOM 320 CB ILE A 41 4.229 -3.551 -11.484 1.00 6.08 C +ATOM 321 O ILE A 41 4.158 -5.153 -14.463 1.00 6.08 O +ATOM 322 CG1 ILE A 41 4.176 -3.807 -9.974 1.00 6.08 C +ATOM 323 CG2 ILE A 41 2.829 -3.280 -12.044 1.00 6.08 C +ATOM 324 CD1 ILE A 41 3.647 -2.630 -9.165 1.00 6.08 C +ATOM 325 N ASN A 42 5.810 -3.703 -14.250 1.00 6.08 N +ATOM 326 CA ASN A 42 6.141 -3.456 -15.649 1.00 6.08 C +ATOM 327 C ASN A 42 7.598 -3.034 -15.814 1.00 6.08 C +ATOM 328 CB ASN A 42 5.210 -2.396 -16.242 1.00 6.08 C +ATOM 329 O ASN A 42 8.396 -3.167 -14.885 1.00 6.08 O +ATOM 330 CG ASN A 42 5.303 -1.067 -15.518 1.00 6.08 C +ATOM 331 ND2 ASN A 42 4.156 -0.448 -15.267 1.00 6.08 N +ATOM 332 OD1 ASN A 42 6.397 -0.600 -15.189 1.00 6.08 O +ATOM 333 N ASP A 43 7.989 -2.622 -17.089 1.00 6.08 N +ATOM 334 CA ASP A 43 9.387 -2.328 -17.387 1.00 6.08 C +ATOM 335 C ASP A 43 9.898 -1.169 -16.534 1.00 6.08 C +ATOM 336 CB ASP A 43 9.563 -2.005 -18.873 1.00 6.08 C +ATOM 337 O ASP A 43 11.101 -1.051 -16.294 1.00 6.08 O +ATOM 338 CG ASP A 43 9.350 -3.211 -19.771 1.00 6.08 C +ATOM 339 OD1 ASP A 43 9.124 -3.033 -20.987 1.00 6.08 O +ATOM 340 OD2 ASP A 43 9.406 -4.349 -19.257 1.00 6.08 O +ATOM 341 N LYS A 44 8.964 -0.340 -16.036 1.00 6.08 N +ATOM 342 CA LYS A 44 9.421 0.879 -15.374 1.00 6.08 C +ATOM 343 C LYS A 44 9.078 0.858 -13.887 1.00 6.08 C +ATOM 344 CB LYS A 44 8.806 2.113 -16.036 1.00 6.08 C +ATOM 345 O LYS A 44 9.523 1.723 -13.130 1.00 6.08 O +ATOM 346 CG LYS A 44 9.329 2.388 -17.438 1.00 6.08 C +ATOM 347 CD LYS A 44 8.792 3.704 -17.986 1.00 6.08 C +ATOM 348 CE LYS A 44 9.281 3.960 -19.405 1.00 6.08 C +ATOM 349 NZ LYS A 44 8.775 5.260 -19.939 1.00 6.08 N +ATOM 350 N THR A 45 8.261 -0.019 -13.522 1.00 6.08 N +ATOM 351 CA THR A 45 7.740 0.030 -12.161 1.00 6.08 C +ATOM 352 C THR A 45 8.037 -1.272 -11.421 1.00 6.08 C +ATOM 353 CB THR A 45 6.223 0.293 -12.152 1.00 6.08 C +ATOM 354 O THR A 45 7.755 -2.359 -11.929 1.00 6.08 O +ATOM 355 CG2 THR A 45 5.716 0.544 -10.736 1.00 6.08 C +ATOM 356 OG1 THR A 45 5.938 1.442 -12.960 1.00 6.08 O +ATOM 357 N LYS A 46 8.731 -1.136 -10.322 1.00 5.36 N +ATOM 358 CA LYS A 46 9.040 -2.324 -9.532 1.00 5.36 C +ATOM 359 C LYS A 46 8.519 -2.186 -8.104 1.00 5.36 C +ATOM 360 CB LYS A 46 10.548 -2.581 -9.517 1.00 5.36 C +ATOM 361 O LYS A 46 8.374 -1.073 -7.595 1.00 5.36 O +ATOM 362 CG LYS A 46 11.163 -2.738 -10.899 1.00 5.36 C +ATOM 363 CD LYS A 46 12.677 -2.883 -10.824 1.00 5.36 C +ATOM 364 CE LYS A 46 13.302 -2.960 -12.211 1.00 5.36 C +ATOM 365 NZ LYS A 46 14.790 -2.843 -12.156 1.00 5.36 N +ATOM 366 N ARG A 47 8.105 -3.267 -7.558 1.00 5.36 N +ATOM 367 CA ARG A 47 7.620 -3.302 -6.182 1.00 5.36 C +ATOM 368 C ARG A 47 8.642 -3.956 -5.258 1.00 5.36 C +ATOM 369 CB ARG A 47 6.286 -4.047 -6.100 1.00 5.36 C +ATOM 370 O ARG A 47 9.291 -4.934 -5.635 1.00 5.36 O +ATOM 371 CG ARG A 47 5.588 -3.915 -4.756 1.00 5.36 C +ATOM 372 CD ARG A 47 4.227 -4.596 -4.758 1.00 5.36 C +ATOM 373 NE ARG A 47 3.162 -3.674 -4.374 1.00 5.36 N +ATOM 374 NH1 ARG A 47 1.449 -5.188 -4.705 1.00 5.36 N +ATOM 375 NH2 ARG A 47 0.983 -3.060 -3.991 1.00 5.36 N +ATOM 376 CZ ARG A 47 1.867 -3.976 -4.358 1.00 5.36 C +ATOM 377 N TYR A 48 8.748 -3.477 -3.978 1.00 5.36 N +ATOM 378 CA TYR A 48 9.593 -4.175 -3.016 1.00 5.36 C +ATOM 379 C TYR A 48 8.779 -4.649 -1.818 1.00 5.36 C +ATOM 380 CB TYR A 48 10.734 -3.268 -2.546 1.00 5.36 C +ATOM 381 O TYR A 48 7.943 -3.908 -1.295 1.00 5.36 O +ATOM 382 CG TYR A 48 11.694 -2.881 -3.645 1.00 5.36 C +ATOM 383 CD1 TYR A 48 11.441 -1.781 -4.462 1.00 5.36 C +ATOM 384 CD2 TYR A 48 12.854 -3.613 -3.869 1.00 5.36 C +ATOM 385 CE1 TYR A 48 12.321 -1.421 -5.477 1.00 5.36 C +ATOM 386 CE2 TYR A 48 13.742 -3.263 -4.881 1.00 5.36 C +ATOM 387 OH TYR A 48 14.342 -1.815 -6.682 1.00 5.36 O +ATOM 388 CZ TYR A 48 13.467 -2.167 -5.678 1.00 5.36 C +ATOM 389 N ILE A 49 8.717 -5.888 -1.613 1.00 5.36 N +ATOM 390 CA ILE A 49 7.989 -6.430 -0.471 1.00 5.36 C +ATOM 391 C ILE A 49 8.975 -6.856 0.614 1.00 5.36 C +ATOM 392 CB ILE A 49 7.097 -7.622 -0.882 1.00 5.36 C +ATOM 393 O ILE A 49 10.017 -7.445 0.318 1.00 5.36 O +ATOM 394 CG1 ILE A 49 6.166 -7.220 -2.032 1.00 5.36 C +ATOM 395 CG2 ILE A 49 6.296 -8.136 0.317 1.00 5.36 C +ATOM 396 CD1 ILE A 49 5.367 -8.378 -2.615 1.00 5.36 C +ATOM 397 N LEU A 50 8.862 -6.262 1.857 1.00 5.36 N +ATOM 398 CA LEU A 50 9.614 -6.778 2.996 1.00 5.36 C +ATOM 399 C LEU A 50 8.946 -8.023 3.570 1.00 5.36 C +ATOM 400 CB LEU A 50 9.742 -5.707 4.083 1.00 5.36 C +ATOM 401 O LEU A 50 7.743 -8.018 3.842 1.00 5.36 O +ATOM 402 CG LEU A 50 11.100 -5.606 4.779 1.00 5.36 C +ATOM 403 CD1 LEU A 50 11.885 -4.416 4.238 1.00 5.36 C +ATOM 404 CD2 LEU A 50 10.920 -5.493 6.289 1.00 5.36 C +ATOM 405 N GLY A 51 9.463 -9.224 3.543 1.00 5.36 N +ATOM 406 CA GLY A 51 9.015 -10.310 4.400 1.00 5.36 C +ATOM 407 C GLY A 51 7.865 -11.102 3.807 1.00 5.36 C +ATOM 408 O GLY A 51 6.957 -10.529 3.200 1.00 5.36 O +ATOM 409 N ALA A 52 8.049 -12.344 3.364 1.00 5.36 N +ATOM 410 CA ALA A 52 6.949 -13.257 3.063 1.00 5.36 C +ATOM 411 C ALA A 52 6.954 -14.454 4.009 1.00 5.36 C +ATOM 412 CB ALA A 52 7.031 -13.728 1.613 1.00 5.36 C +ATOM 413 O ALA A 52 8.016 -14.901 4.449 1.00 5.36 O +ATOM 414 N THR A 53 5.741 -14.626 4.821 1.00 6.08 N +ATOM 415 CA THR A 53 5.638 -15.818 5.655 1.00 6.08 C +ATOM 416 C THR A 53 4.816 -16.898 4.956 1.00 6.08 C +ATOM 417 CB THR A 53 5.005 -15.492 7.020 1.00 6.08 C +ATOM 418 O THR A 53 4.163 -16.631 3.944 1.00 6.08 O +ATOM 419 CG2 THR A 53 5.758 -14.364 7.719 1.00 6.08 C +ATOM 420 OG1 THR A 53 3.643 -15.093 6.826 1.00 6.08 O +ATOM 421 N GLU A 54 4.971 -18.204 5.377 1.00 6.08 N +ATOM 422 CA GLU A 54 4.419 -19.510 5.031 1.00 6.08 C +ATOM 423 C GLU A 54 2.894 -19.498 5.093 1.00 6.08 C +ATOM 424 CB GLU A 54 4.974 -20.593 5.960 1.00 6.08 C +ATOM 425 O GLU A 54 2.229 -20.148 4.283 1.00 6.08 O +ATOM 426 CG GLU A 54 6.145 -21.367 5.371 1.00 6.08 C +ATOM 427 CD GLU A 54 6.620 -22.505 6.261 1.00 6.08 C +ATOM 428 OE1 GLU A 54 7.586 -23.207 5.885 1.00 6.08 O +ATOM 429 OE2 GLU A 54 6.021 -22.696 7.343 1.00 6.08 O +ATOM 430 N THR A 55 2.284 -18.768 6.100 1.00 6.08 N +ATOM 431 CA THR A 55 0.931 -19.163 6.472 1.00 6.08 C +ATOM 432 C THR A 55 -0.065 -18.051 6.153 1.00 6.08 C +ATOM 433 CB THR A 55 0.844 -19.519 7.968 1.00 6.08 C +ATOM 434 O THR A 55 -1.275 -18.282 6.132 1.00 6.08 O +ATOM 435 CG2 THR A 55 1.686 -20.749 8.290 1.00 6.08 C +ATOM 436 OG1 THR A 55 1.318 -18.412 8.745 1.00 6.08 O +ATOM 437 N LYS A 56 0.188 -16.955 5.564 1.00 6.08 N +ATOM 438 CA LYS A 56 -0.870 -15.980 5.311 1.00 6.08 C +ATOM 439 C LYS A 56 -0.294 -14.671 4.777 1.00 6.08 C +ATOM 440 CB LYS A 56 -1.676 -15.719 6.584 1.00 6.08 C +ATOM 441 O LYS A 56 0.578 -14.069 5.406 1.00 6.08 O +ATOM 442 CG LYS A 56 -2.640 -16.838 6.947 1.00 6.08 C +ATOM 443 CD LYS A 56 -3.584 -16.421 8.067 1.00 6.08 C +ATOM 444 CE LYS A 56 -4.524 -17.554 8.458 1.00 6.08 C +ATOM 445 NZ LYS A 56 -5.505 -17.125 9.499 1.00 6.08 N +ATOM 446 N GLU A 57 -0.274 -14.530 3.508 1.00 6.08 N +ATOM 447 CA GLU A 57 -0.600 -13.449 2.582 1.00 6.08 C +ATOM 448 C GLU A 57 -0.543 -12.092 3.276 1.00 6.08 C +ATOM 449 CB GLU A 57 -1.984 -13.668 1.966 1.00 6.08 C +ATOM 450 O GLU A 57 -0.932 -11.075 2.696 1.00 6.08 O +ATOM 451 CG GLU A 57 -1.976 -14.563 0.735 1.00 6.08 C +ATOM 452 CD GLU A 57 -3.338 -14.683 0.071 1.00 6.08 C +ATOM 453 OE1 GLU A 57 -3.423 -15.253 -1.040 1.00 6.08 O +ATOM 454 OE2 GLU A 57 -4.328 -14.203 0.667 1.00 6.08 O +ATOM 455 N GLU A 58 0.126 -11.928 4.480 1.00 6.08 N +ATOM 456 CA GLU A 58 -0.009 -10.492 4.706 1.00 6.08 C +ATOM 457 C GLU A 58 1.168 -9.726 4.108 1.00 6.08 C +ATOM 458 CB GLU A 58 -0.125 -10.192 6.203 1.00 6.08 C +ATOM 459 O GLU A 58 2.323 -10.126 4.271 1.00 6.08 O +ATOM 460 CG GLU A 58 -1.553 -9.953 6.673 1.00 6.08 C +ATOM 461 CD GLU A 58 -1.648 -9.595 8.147 1.00 6.08 C +ATOM 462 OE1 GLU A 58 -2.761 -9.279 8.625 1.00 6.08 O +ATOM 463 OE2 GLU A 58 -0.600 -9.629 8.830 1.00 6.08 O +ATOM 464 N VAL A 59 0.923 -8.970 3.115 1.00 5.36 N +ATOM 465 CA VAL A 59 1.676 -8.056 2.262 1.00 5.36 C +ATOM 466 C VAL A 59 1.616 -6.644 2.838 1.00 5.36 C +ATOM 467 CB VAL A 59 1.141 -8.064 0.812 1.00 5.36 C +ATOM 468 O VAL A 59 0.550 -6.026 2.875 1.00 5.36 O +ATOM 469 CG1 VAL A 59 2.113 -7.351 -0.126 1.00 5.36 C +ATOM 470 CG2 VAL A 59 0.894 -9.496 0.342 1.00 5.36 C +ATOM 471 N LEU A 60 2.134 -6.382 4.062 1.00 5.36 N +ATOM 472 CA LEU A 60 2.206 -4.926 4.093 1.00 5.36 C +ATOM 473 C LEU A 60 3.467 -4.459 4.813 1.00 5.36 C +ATOM 474 CB LEU A 60 0.968 -4.342 4.778 1.00 5.36 C +ATOM 475 O LEU A 60 3.645 -4.732 6.002 1.00 5.36 O +ATOM 476 CG LEU A 60 -0.362 -4.523 4.044 1.00 5.36 C +ATOM 477 CD1 LEU A 60 -1.528 -4.217 4.977 1.00 5.36 C +ATOM 478 CD2 LEU A 60 -0.414 -3.635 2.805 1.00 5.36 C +ATOM 479 N PRO A 61 4.690 -3.899 4.294 1.00 5.36 N +ATOM 480 CA PRO A 61 4.413 -2.559 3.771 1.00 5.36 C +ATOM 481 C PRO A 61 4.305 -2.531 2.248 1.00 5.36 C +ATOM 482 CB PRO A 61 5.614 -1.739 4.248 1.00 5.36 C +ATOM 483 O PRO A 61 4.930 -3.348 1.566 1.00 5.36 O +ATOM 484 CG PRO A 61 6.588 -2.750 4.760 1.00 5.36 C +ATOM 485 CD PRO A 61 5.908 -4.089 4.803 1.00 5.36 C +ATOM 486 N ASN A 62 3.352 -1.777 1.774 1.00 4.42 N +ATOM 487 CA ASN A 62 2.773 -1.209 0.561 1.00 4.42 C +ATOM 488 C ASN A 62 3.673 -0.134 -0.041 1.00 4.42 C +ATOM 489 CB ASN A 62 1.382 -0.637 0.847 1.00 4.42 C +ATOM 490 O ASN A 62 3.461 1.058 0.186 1.00 4.42 O +ATOM 491 CG ASN A 62 0.321 -1.714 0.963 1.00 4.42 C +ATOM 492 ND2 ASN A 62 -0.756 -1.410 1.679 1.00 4.42 N +ATOM 493 OD1 ASN A 62 0.468 -2.809 0.415 1.00 4.42 O +ATOM 494 N TYR A 63 4.982 -0.411 -0.311 1.00 4.42 N +ATOM 495 CA TYR A 63 5.766 0.563 -1.062 1.00 4.42 C +ATOM 496 C TYR A 63 5.569 0.381 -2.563 1.00 4.42 C +ATOM 497 CB TYR A 63 7.252 0.442 -0.712 1.00 4.42 C +ATOM 498 O TYR A 63 5.291 -0.726 -3.029 1.00 4.42 O +ATOM 499 CG TYR A 63 7.556 0.705 0.743 1.00 4.42 C +ATOM 500 CD1 TYR A 63 7.564 -0.333 1.672 1.00 4.42 C +ATOM 501 CD2 TYR A 63 7.835 1.992 1.191 1.00 4.42 C +ATOM 502 CE1 TYR A 63 7.842 -0.094 3.014 1.00 4.42 C +ATOM 503 CE2 TYR A 63 8.114 2.242 2.531 1.00 4.42 C +ATOM 504 OH TYR A 63 8.392 1.435 4.760 1.00 4.42 O +ATOM 505 CZ TYR A 63 8.116 1.194 3.433 1.00 4.42 C +ATOM 506 N VAL A 64 5.361 1.479 -3.257 1.00 4.42 N +ATOM 507 CA VAL A 64 5.323 1.484 -4.716 1.00 4.42 C +ATOM 508 C VAL A 64 6.456 2.350 -5.260 1.00 4.42 C +ATOM 509 CB VAL A 64 3.963 1.991 -5.246 1.00 4.42 C +ATOM 510 O VAL A 64 6.668 3.472 -4.794 1.00 4.42 O +ATOM 511 CG1 VAL A 64 4.005 2.161 -6.764 1.00 4.42 C +ATOM 512 CG2 VAL A 64 2.843 1.034 -4.844 1.00 4.42 C +ATOM 513 N LYS A 65 7.501 1.678 -5.944 1.00 5.36 N +ATOM 514 CA LYS A 65 8.535 2.445 -6.634 1.00 5.36 C +ATOM 515 C LYS A 65 8.078 2.847 -8.033 1.00 5.36 C +ATOM 516 CB LYS A 65 9.834 1.643 -6.717 1.00 5.36 C +ATOM 517 O LYS A 65 7.631 2.004 -8.812 1.00 5.36 O +ATOM 518 CG LYS A 65 11.017 2.434 -7.256 1.00 5.36 C +ATOM 519 CD LYS A 65 12.295 1.605 -7.245 1.00 5.36 C +ATOM 520 CE LYS A 65 13.479 2.395 -7.786 1.00 5.36 C +ATOM 521 NZ LYS A 65 14.742 1.599 -7.745 1.00 5.36 N +ATOM 522 N VAL A 66 8.093 4.091 -8.333 1.00 5.36 N +ATOM 523 CA VAL A 66 7.778 4.641 -9.647 1.00 5.36 C +ATOM 524 C VAL A 66 9.008 5.340 -10.223 1.00 5.36 C +ATOM 525 CB VAL A 66 6.589 5.625 -9.580 1.00 5.36 C +ATOM 526 O VAL A 66 9.375 6.430 -9.779 1.00 5.36 O +ATOM 527 CG1 VAL A 66 6.222 6.125 -10.977 1.00 5.36 C +ATOM 528 CG2 VAL A 66 5.385 4.962 -8.914 1.00 5.36 C +ATOM 529 N GLY A 67 9.659 4.635 -11.193 1.00 5.36 N +ATOM 530 CA GLY A 67 10.950 5.142 -11.630 1.00 5.36 C +ATOM 531 C GLY A 67 12.012 5.081 -10.548 1.00 5.36 C +ATOM 532 O GLY A 67 12.305 4.007 -10.019 1.00 5.36 O +ATOM 533 N SER A 68 12.514 6.343 -10.271 1.00 5.36 N +ATOM 534 CA SER A 68 13.526 6.411 -9.222 1.00 5.36 C +ATOM 535 C SER A 68 12.902 6.759 -7.874 1.00 5.36 C +ATOM 536 CB SER A 68 14.598 7.442 -9.577 1.00 5.36 C +ATOM 537 O SER A 68 13.608 6.894 -6.873 1.00 5.36 O +ATOM 538 OG SER A 68 14.006 8.654 -10.011 1.00 5.36 O +ATOM 539 N ASP A 69 11.527 6.878 -7.937 1.00 5.36 N +ATOM 540 CA ASP A 69 10.872 7.348 -6.720 1.00 5.36 C +ATOM 541 C ASP A 69 10.208 6.193 -5.973 1.00 5.36 C +ATOM 542 CB ASP A 69 9.837 8.425 -7.049 1.00 5.36 C +ATOM 543 O ASP A 69 9.669 5.274 -6.594 1.00 5.36 O +ATOM 544 CG ASP A 69 10.453 9.672 -7.660 1.00 5.36 C +ATOM 545 OD1 ASP A 69 9.841 10.271 -8.571 1.00 5.36 O +ATOM 546 OD2 ASP A 69 11.561 10.057 -7.229 1.00 5.36 O +ATOM 547 N LEU A 70 10.310 6.167 -4.690 1.00 4.42 N +ATOM 548 CA LEU A 70 9.712 5.170 -3.810 1.00 4.42 C +ATOM 549 C LEU A 70 8.614 5.789 -2.952 1.00 4.42 C +ATOM 550 CB LEU A 70 10.780 4.537 -2.914 1.00 4.42 C +ATOM 551 O LEU A 70 8.810 6.853 -2.359 1.00 4.42 O +ATOM 552 CG LEU A 70 10.378 3.255 -2.182 1.00 4.42 C +ATOM 553 CD1 LEU A 70 10.240 2.102 -3.170 1.00 4.42 C +ATOM 554 CD2 LEU A 70 11.395 2.918 -1.097 1.00 4.42 C +ATOM 555 N TYR A 71 7.500 5.181 -2.953 1.00 4.42 N +ATOM 556 CA TYR A 71 6.370 5.703 -2.192 1.00 4.42 C +ATOM 557 C TYR A 71 5.940 4.719 -1.110 1.00 4.42 C +ATOM 558 CB TYR A 71 5.190 6.006 -3.121 1.00 4.42 C +ATOM 559 O TYR A 71 5.994 3.504 -1.311 1.00 4.42 O +ATOM 560 CG TYR A 71 5.502 7.031 -4.184 1.00 4.42 C +ATOM 561 CD1 TYR A 71 6.112 6.661 -5.380 1.00 4.42 C +ATOM 562 CD2 TYR A 71 5.190 8.373 -3.993 1.00 4.42 C +ATOM 563 CE1 TYR A 71 6.403 7.603 -6.361 1.00 4.42 C +ATOM 564 CE2 TYR A 71 5.476 9.324 -4.967 1.00 4.42 C +ATOM 565 OH TYR A 71 6.367 9.867 -7.114 1.00 4.42 O +ATOM 566 CZ TYR A 71 6.081 8.930 -6.146 1.00 4.42 C +ATOM 567 N ARG A 72 5.750 5.231 0.077 1.00 4.42 N +ATOM 568 CA ARG A 72 5.102 4.470 1.140 1.00 4.42 C +ATOM 569 C ARG A 72 3.588 4.648 1.097 1.00 4.42 C +ATOM 570 CB ARG A 72 5.641 4.892 2.509 1.00 4.42 C +ATOM 571 O ARG A 72 3.093 5.772 0.987 1.00 4.42 O +ATOM 572 CG ARG A 72 5.185 4.000 3.652 1.00 4.42 C +ATOM 573 CD ARG A 72 5.797 4.427 4.979 1.00 4.42 C +ATOM 574 NE ARG A 72 5.284 3.630 6.090 1.00 4.42 N +ATOM 575 NH1 ARG A 72 6.305 4.890 7.736 1.00 4.42 N +ATOM 576 NH2 ARG A 72 5.019 3.079 8.304 1.00 4.42 N +ATOM 577 CZ ARG A 72 5.538 3.868 7.374 1.00 4.42 C +ATOM 578 N LEU A 73 2.852 3.545 1.039 1.00 3.21 N +ATOM 579 CA LEU A 73 1.395 3.610 1.029 1.00 3.21 C +ATOM 580 C LEU A 73 0.833 3.435 2.436 1.00 3.21 C +ATOM 581 CB LEU A 73 0.816 2.541 0.099 1.00 3.21 C +ATOM 582 O LEU A 73 1.261 2.544 3.173 1.00 3.21 O +ATOM 583 CG LEU A 73 1.212 2.639 -1.375 1.00 3.21 C +ATOM 584 CD1 LEU A 73 1.030 1.291 -2.064 1.00 3.21 C +ATOM 585 CD2 LEU A 73 0.396 3.719 -2.078 1.00 3.21 C +ATOM 586 N LYS A 74 0.064 4.404 2.921 1.00 4.42 N +ATOM 587 CA LYS A 74 -0.627 4.299 4.203 1.00 4.42 C +ATOM 588 C LYS A 74 -2.140 4.374 4.020 1.00 4.42 C +ATOM 589 CB LYS A 74 -0.159 5.400 5.156 1.00 4.42 C +ATOM 590 O LYS A 74 -2.643 5.256 3.320 1.00 4.42 O +ATOM 591 CG LYS A 74 -0.498 5.139 6.617 1.00 4.42 C +ATOM 592 CD LYS A 74 0.083 6.214 7.526 1.00 4.42 C +ATOM 593 CE LYS A 74 -0.315 5.993 8.979 1.00 4.42 C +ATOM 594 NZ LYS A 74 0.270 7.033 9.878 1.00 4.42 N +ATOM 595 N ALA A 75 -2.781 3.329 4.523 1.00 4.42 N +ATOM 596 CA ALA A 75 -4.241 3.343 4.479 1.00 4.42 C +ATOM 597 C ALA A 75 -4.826 3.741 5.831 1.00 4.42 C +ATOM 598 CB ALA A 75 -4.774 1.977 4.053 1.00 4.42 C +ATOM 599 O ALA A 75 -4.275 3.396 6.879 1.00 4.42 O +ATOM 600 N TYR A 76 -5.770 4.597 5.772 1.00 5.36 N +ATOM 601 CA TYR A 76 -6.435 4.990 7.009 1.00 5.36 C +ATOM 602 C TYR A 76 -7.924 5.215 6.781 1.00 5.36 C +ATOM 603 CB TYR A 76 -5.797 6.259 7.583 1.00 5.36 C +ATOM 604 O TYR A 76 -8.375 5.314 5.637 1.00 5.36 O +ATOM 605 CG TYR A 76 -5.887 7.453 6.665 1.00 5.36 C +ATOM 606 CD1 TYR A 76 -6.916 8.382 6.798 1.00 5.36 C +ATOM 607 CD2 TYR A 76 -4.946 7.653 5.661 1.00 5.36 C +ATOM 608 CE1 TYR A 76 -7.005 9.483 5.952 1.00 5.36 C +ATOM 609 CE2 TYR A 76 -5.024 8.750 4.809 1.00 5.36 C +ATOM 610 OH TYR A 76 -6.138 10.746 4.123 1.00 5.36 O +ATOM 611 CZ TYR A 76 -6.055 9.659 4.963 1.00 5.36 C +ATOM 612 N ARG A 77 -8.726 5.218 7.799 1.00 5.36 N +ATOM 613 CA ARG A 77 -10.169 5.419 7.723 1.00 5.36 C +ATOM 614 C ARG A 77 -10.570 6.753 8.344 1.00 5.36 C +ATOM 615 CB ARG A 77 -10.909 4.274 8.418 1.00 5.36 C +ATOM 616 O ARG A 77 -10.028 7.151 9.377 1.00 5.36 O +ATOM 617 CG ARG A 77 -12.424 4.372 8.322 1.00 5.36 C +ATOM 618 CD ARG A 77 -13.109 3.207 9.024 1.00 5.36 C +ATOM 619 NE ARG A 77 -13.016 1.977 8.244 1.00 5.36 N +ATOM 620 NH1 ARG A 77 -14.417 0.756 9.616 1.00 5.36 N +ATOM 621 NH2 ARG A 77 -13.484 -0.219 7.763 1.00 5.36 N +ATOM 622 CZ ARG A 77 -13.639 0.841 8.542 1.00 5.36 C +ATOM 623 N GLU A 78 -11.384 7.464 7.705 1.00 5.36 N +ATOM 624 CA GLU A 78 -12.045 8.659 8.221 1.00 5.36 C +ATOM 625 C GLU A 78 -13.563 8.519 8.164 1.00 5.36 C +ATOM 626 CB GLU A 78 -11.601 9.899 7.439 1.00 5.36 C +ATOM 627 O GLU A 78 -14.081 7.514 7.673 1.00 5.36 O +ATOM 628 CG GLU A 78 -10.154 10.298 7.690 1.00 5.36 C +ATOM 629 CD GLU A 78 -9.792 11.648 7.092 1.00 5.36 C +ATOM 630 OE1 GLU A 78 -8.647 12.113 7.291 1.00 5.36 O +ATOM 631 OE2 GLU A 78 -10.661 12.247 6.420 1.00 5.36 O +ATOM 632 N LYS A 79 -14.255 9.510 8.676 1.00 5.36 N +ATOM 633 CA LYS A 79 -15.715 9.478 8.716 1.00 5.36 C +ATOM 634 C LYS A 79 -16.297 9.210 7.332 1.00 5.36 C +ATOM 635 CB LYS A 79 -16.265 10.793 9.271 1.00 5.36 C +ATOM 636 O LYS A 79 -17.266 8.460 7.195 1.00 5.36 O +ATOM 637 CG LYS A 79 -16.312 10.853 10.790 1.00 5.36 C +ATOM 638 CD LYS A 79 -16.955 12.144 11.279 1.00 5.36 C +ATOM 639 CE LYS A 79 -16.920 12.247 12.798 1.00 5.36 C +ATOM 640 NZ LYS A 79 -17.524 13.525 13.281 1.00 5.36 N +ATOM 641 N SER A 80 -15.615 9.685 6.280 1.00 5.36 N +ATOM 642 CA SER A 80 -16.194 9.676 4.940 1.00 5.36 C +ATOM 643 C SER A 80 -15.768 8.435 4.163 1.00 5.36 C +ATOM 644 CB SER A 80 -15.787 10.934 4.171 1.00 5.36 C +ATOM 645 O SER A 80 -16.312 8.148 3.094 1.00 5.36 O +ATOM 646 OG SER A 80 -14.376 11.064 4.131 1.00 5.36 O +ATOM 647 N GLY A 81 -14.790 7.696 4.700 1.00 5.36 N +ATOM 648 CA GLY A 81 -14.368 6.508 3.975 1.00 5.36 C +ATOM 649 C GLY A 81 -12.932 6.112 4.264 1.00 5.36 C +ATOM 650 O GLY A 81 -12.367 6.508 5.285 1.00 5.36 O +ATOM 651 N VAL A 82 -12.402 5.137 3.681 1.00 5.36 N +ATOM 652 CA VAL A 82 -11.030 4.653 3.791 1.00 5.36 C +ATOM 653 C VAL A 82 -10.148 5.364 2.767 1.00 5.36 C +ATOM 654 CB VAL A 82 -10.951 3.123 3.591 1.00 5.36 C +ATOM 655 O VAL A 82 -10.540 5.531 1.610 1.00 5.36 O +ATOM 656 CG1 VAL A 82 -9.503 2.642 3.674 1.00 5.36 C +ATOM 657 CG2 VAL A 82 -11.816 2.405 4.626 1.00 5.36 C +ATOM 658 N TYR A 83 -9.046 5.835 3.278 1.00 5.36 N +ATOM 659 CA TYR A 83 -8.137 6.587 2.421 1.00 5.36 C +ATOM 660 C TYR A 83 -6.784 5.894 2.319 1.00 5.36 C +ATOM 661 CB TYR A 83 -7.955 8.013 2.949 1.00 5.36 C +ATOM 662 O TYR A 83 -6.401 5.132 3.211 1.00 5.36 O +ATOM 663 CG TYR A 83 -9.240 8.802 3.021 1.00 5.36 C +ATOM 664 CD1 TYR A 83 -10.101 8.671 4.108 1.00 5.36 C +ATOM 665 CD2 TYR A 83 -9.595 9.680 2.003 1.00 5.36 C +ATOM 666 CE1 TYR A 83 -11.287 9.395 4.177 1.00 5.36 C +ATOM 667 CE2 TYR A 83 -10.778 10.409 2.062 1.00 5.36 C +ATOM 668 OH TYR A 83 -12.789 10.979 3.215 1.00 5.36 O +ATOM 669 CZ TYR A 83 -11.616 10.260 3.151 1.00 5.36 C +ATOM 670 N VAL A 84 -6.217 5.977 1.159 1.00 4.42 N +ATOM 671 CA VAL A 84 -4.820 5.583 1.013 1.00 4.42 C +ATOM 672 C VAL A 84 -3.961 6.815 0.737 1.00 4.42 C +ATOM 673 CB VAL A 84 -4.638 4.544 -0.116 1.00 4.42 C +ATOM 674 O VAL A 84 -4.300 7.638 -0.117 1.00 4.42 O +ATOM 675 CG1 VAL A 84 -3.161 4.207 -0.308 1.00 4.42 C +ATOM 676 CG2 VAL A 84 -5.442 3.281 0.188 1.00 4.42 C +ATOM 677 N ARG A 85 -3.015 6.909 1.573 1.00 4.42 N +ATOM 678 CA ARG A 85 -2.052 8.001 1.472 1.00 4.42 C +ATOM 679 C ARG A 85 -0.722 7.508 0.912 1.00 4.42 C +ATOM 680 CB ARG A 85 -1.835 8.655 2.838 1.00 4.42 C +ATOM 681 O ARG A 85 -0.256 6.423 1.268 1.00 4.42 O +ATOM 682 CG ARG A 85 -1.113 9.992 2.773 1.00 4.42 C +ATOM 683 CD ARG A 85 -0.941 10.607 4.154 1.00 4.42 C +ATOM 684 NE ARG A 85 -2.049 11.496 4.491 1.00 4.42 N +ATOM 685 NH1 ARG A 85 -1.423 11.780 6.696 1.00 4.42 N +ATOM 686 NH2 ARG A 85 -3.293 12.829 5.887 1.00 4.42 N +ATOM 687 CZ ARG A 85 -2.252 12.033 5.691 1.00 4.42 C +ATOM 688 N THR A 86 -0.152 8.200 -0.153 1.00 4.42 N +ATOM 689 CA THR A 86 1.170 7.866 -0.672 1.00 4.42 C +ATOM 690 C THR A 86 2.192 8.924 -0.269 1.00 4.42 C +ATOM 691 CB THR A 86 1.148 7.726 -2.205 1.00 4.42 C +ATOM 692 O THR A 86 1.918 10.123 -0.351 1.00 4.42 O +ATOM 693 CG2 THR A 86 0.290 6.542 -2.638 1.00 4.42 C +ATOM 694 OG1 THR A 86 0.614 8.925 -2.781 1.00 4.42 O +ATOM 695 N ASN A 87 3.286 8.434 0.354 1.00 5.36 N +ATOM 696 CA ASN A 87 4.370 9.361 0.664 1.00 5.36 C +ATOM 697 C ASN A 87 5.610 9.075 -0.179 1.00 5.36 C +ATOM 698 CB ASN A 87 4.716 9.304 2.153 1.00 5.36 C +ATOM 699 O ASN A 87 6.062 7.932 -0.258 1.00 5.36 O +ATOM 700 CG ASN A 87 3.662 9.959 3.024 1.00 5.36 C +ATOM 701 ND2 ASN A 87 3.845 9.882 4.336 1.00 5.36 N +ATOM 702 OD1 ASN A 87 2.690 10.529 2.520 1.00 5.36 O +ATOM 703 N LYS A 88 6.095 10.057 -0.960 1.00 5.36 N +ATOM 704 CA LYS A 88 7.392 9.866 -1.603 1.00 5.36 C +ATOM 705 C LYS A 88 8.509 9.764 -0.569 1.00 5.36 C +ATOM 706 CB LYS A 88 7.682 11.009 -2.577 1.00 5.36 C +ATOM 707 O LYS A 88 8.639 10.630 0.299 1.00 5.36 O +ATOM 708 CG LYS A 88 8.904 10.780 -3.454 1.00 5.36 C +ATOM 709 CD LYS A 88 9.096 11.914 -4.452 1.00 5.36 C +ATOM 710 CE LYS A 88 10.325 11.692 -5.323 1.00 5.36 C +ATOM 711 NZ LYS A 88 10.494 12.781 -6.331 1.00 5.36 N +ATOM 712 N LEU A 89 9.164 8.645 -0.576 1.00 5.36 N +ATOM 713 CA LEU A 89 10.225 8.484 0.412 1.00 5.36 C +ATOM 714 C LEU A 89 11.425 9.361 0.069 1.00 5.36 C +ATOM 715 CB LEU A 89 10.658 7.018 0.498 1.00 5.36 C +ATOM 716 O LEU A 89 11.670 9.653 -1.104 1.00 5.36 O +ATOM 717 CG LEU A 89 9.718 6.077 1.253 1.00 5.36 C +ATOM 718 CD1 LEU A 89 10.100 4.624 0.990 1.00 5.36 C +ATOM 719 CD2 LEU A 89 9.744 6.380 2.747 1.00 5.36 C +ATOM 720 N GLY A 90 12.223 9.812 1.163 1.00 5.36 N +ATOM 721 CA GLY A 90 13.408 10.650 1.084 1.00 5.36 C +ATOM 722 C GLY A 90 13.092 12.133 1.100 1.00 5.36 C +ATOM 723 O GLY A 90 13.993 12.967 0.984 1.00 5.36 O +ATOM 724 N PHE A 91 11.852 12.465 1.066 1.00 6.08 N +ATOM 725 CA PHE A 91 11.399 13.841 1.233 1.00 6.08 C +ATOM 726 C PHE A 91 10.518 13.974 2.470 1.00 6.08 C +ATOM 727 CB PHE A 91 10.634 14.312 -0.008 1.00 6.08 C +ATOM 728 O PHE A 91 9.318 13.699 2.417 1.00 6.08 O +ATOM 729 CG PHE A 91 11.519 14.611 -1.188 1.00 6.08 C +ATOM 730 CD1 PHE A 91 11.797 13.630 -2.132 1.00 6.08 C +ATOM 731 CD2 PHE A 91 12.073 15.874 -1.353 1.00 6.08 C +ATOM 732 CE1 PHE A 91 12.615 13.904 -3.226 1.00 6.08 C +ATOM 733 CE2 PHE A 91 12.891 16.155 -2.443 1.00 6.08 C +ATOM 734 CZ PHE A 91 13.162 15.168 -3.378 1.00 6.08 C +ATOM 735 N GLU A 92 11.137 13.459 3.617 1.00 6.08 N +ATOM 736 CA GLU A 92 10.535 13.509 4.945 1.00 6.08 C +ATOM 737 C GLU A 92 10.313 14.950 5.397 1.00 6.08 C +ATOM 738 CB GLU A 92 11.409 12.768 5.960 1.00 6.08 C +ATOM 739 O GLU A 92 11.270 15.664 5.704 1.00 6.08 O +ATOM 740 CG GLU A 92 10.998 11.320 6.186 1.00 6.08 C +ATOM 741 CD GLU A 92 11.819 10.623 7.259 1.00 6.08 C +ATOM 742 OE1 GLU A 92 11.506 9.461 7.604 1.00 6.08 O +ATOM 743 OE2 GLU A 92 12.783 11.245 7.759 1.00 6.08 O +ATOM 744 N ASP A 93 9.620 15.891 4.677 1.00 6.08 N +ATOM 745 CA ASP A 93 9.157 16.934 5.587 1.00 6.08 C +ATOM 746 C ASP A 93 7.745 16.640 6.087 1.00 6.08 C +ATOM 747 CB ASP A 93 9.199 18.302 4.902 1.00 6.08 C +ATOM 748 O ASP A 93 6.797 16.602 5.299 1.00 6.08 O +ATOM 749 CG ASP A 93 8.970 19.454 5.864 1.00 6.08 C +ATOM 750 OD1 ASP A 93 9.176 20.624 5.475 1.00 6.08 O +ATOM 751 OD2 ASP A 93 8.583 19.190 7.023 1.00 6.08 O +ATOM 752 N PRO A 94 7.665 15.732 7.134 1.00 6.08 N +ATOM 753 CA PRO A 94 6.298 15.546 7.627 1.00 6.08 C +ATOM 754 C PRO A 94 5.438 16.798 7.468 1.00 6.08 C +ATOM 755 CB PRO A 94 6.500 15.207 9.106 1.00 6.08 C +ATOM 756 O PRO A 94 4.211 16.701 7.386 1.00 6.08 O +ATOM 757 CG PRO A 94 7.963 15.397 9.346 1.00 6.08 C +ATOM 758 CD PRO A 94 8.633 15.652 8.026 1.00 6.08 C +ATOM 759 N LYS A 95 6.057 18.015 7.264 1.00 6.08 N +ATOM 760 CA LYS A 95 5.244 19.214 7.077 1.00 6.08 C +ATOM 761 C LYS A 95 5.046 19.517 5.595 1.00 6.08 C +ATOM 762 CB LYS A 95 5.887 20.413 7.776 1.00 6.08 C +ATOM 763 O LYS A 95 4.278 20.413 5.236 1.00 6.08 O +ATOM 764 CG LYS A 95 5.879 20.321 9.295 1.00 6.08 C +ATOM 765 CD LYS A 95 6.403 21.600 9.935 1.00 6.08 C +ATOM 766 CE LYS A 95 6.462 21.483 11.452 1.00 6.08 C +ATOM 767 NZ LYS A 95 6.972 22.736 12.085 1.00 6.08 N +ATOM 768 N SER A 96 5.718 18.813 4.741 1.00 6.08 N +ATOM 769 CA SER A 96 5.637 19.195 3.335 1.00 6.08 C +ATOM 770 C SER A 96 4.505 18.459 2.626 1.00 6.08 C +ATOM 771 CB SER A 96 6.963 18.912 2.626 1.00 6.08 C +ATOM 772 O SER A 96 4.417 17.231 2.696 1.00 6.08 O +ATOM 773 OG SER A 96 6.791 17.942 1.607 1.00 6.08 O +ATOM 774 N PHE A 97 3.312 18.985 2.701 1.00 6.08 N +ATOM 775 CA PHE A 97 2.069 18.645 2.019 1.00 6.08 C +ATOM 776 C PHE A 97 2.310 18.435 0.529 1.00 6.08 C +ATOM 777 CB PHE A 97 1.020 19.741 2.232 1.00 6.08 C +ATOM 778 O PHE A 97 1.445 17.913 -0.178 1.00 6.08 O +ATOM 779 CG PHE A 97 0.311 19.653 3.556 1.00 6.08 C +ATOM 780 CD1 PHE A 97 0.741 20.404 4.643 1.00 6.08 C +ATOM 781 CD2 PHE A 97 -0.787 18.817 3.714 1.00 6.08 C +ATOM 782 CE1 PHE A 97 0.086 20.325 5.869 1.00 6.08 C +ATOM 783 CE2 PHE A 97 -1.447 18.732 4.937 1.00 6.08 C +ATOM 784 CZ PHE A 97 -1.008 19.486 6.014 1.00 6.08 C +ATOM 785 N LEU A 98 3.513 18.640 0.039 1.00 6.08 N +ATOM 786 CA LEU A 98 3.587 18.842 -1.404 1.00 6.08 C +ATOM 787 C LEU A 98 3.826 17.520 -2.126 1.00 6.08 C +ATOM 788 CB LEU A 98 4.699 19.835 -1.751 1.00 6.08 C +ATOM 789 O LEU A 98 3.478 17.377 -3.300 1.00 6.08 O +ATOM 790 CG LEU A 98 4.372 21.315 -1.552 1.00 6.08 C +ATOM 791 CD1 LEU A 98 5.651 22.145 -1.553 1.00 6.08 C +ATOM 792 CD2 LEU A 98 3.413 21.801 -2.634 1.00 6.08 C +ATOM 793 N SER A 99 3.933 16.395 -1.418 1.00 6.08 N +ATOM 794 CA SER A 99 3.847 15.299 -2.377 1.00 6.08 C +ATOM 795 C SER A 99 2.856 14.236 -1.915 1.00 6.08 C +ATOM 796 CB SER A 99 5.222 14.666 -2.593 1.00 6.08 C +ATOM 797 O SER A 99 3.126 13.039 -2.024 1.00 6.08 O +ATOM 798 OG SER A 99 6.045 14.848 -1.453 1.00 6.08 O +ATOM 799 N ILE A 100 1.863 14.672 -1.135 1.00 6.08 N +ATOM 800 CA ILE A 100 0.897 13.774 -0.510 1.00 6.08 C +ATOM 801 C ILE A 100 -0.327 13.626 -1.411 1.00 6.08 C +ATOM 802 CB ILE A 100 0.475 14.281 0.887 1.00 6.08 C +ATOM 803 O ILE A 100 -0.866 14.619 -1.905 1.00 6.08 O +ATOM 804 CG1 ILE A 100 1.693 14.364 1.815 1.00 6.08 C +ATOM 805 CG2 ILE A 100 -0.608 13.379 1.485 1.00 6.08 C +ATOM 806 CD1 ILE A 100 1.410 15.041 3.149 1.00 6.08 C +ATOM 807 N LYS A 101 -0.627 12.452 -2.003 1.00 6.08 N +ATOM 808 CA LYS A 101 -1.914 12.267 -2.668 1.00 6.08 C +ATOM 809 C LYS A 101 -2.834 11.369 -1.846 1.00 6.08 C +ATOM 810 CB LYS A 101 -1.717 11.677 -4.065 1.00 6.08 C +ATOM 811 O LYS A 101 -2.372 10.433 -1.190 1.00 6.08 O +ATOM 812 CG LYS A 101 -1.039 12.621 -5.046 1.00 6.08 C +ATOM 813 CD LYS A 101 -1.055 12.062 -6.463 1.00 6.08 C +ATOM 814 CE LYS A 101 -0.350 12.992 -7.441 1.00 6.08 C +ATOM 815 NZ LYS A 101 -0.430 12.486 -8.844 1.00 6.08 N +ATOM 816 N GLU A 102 -3.972 11.868 -1.441 1.00 6.08 N +ATOM 817 CA GLU A 102 -5.024 11.157 -0.721 1.00 6.08 C +ATOM 818 C GLU A 102 -6.074 10.605 -1.681 1.00 6.08 C +ATOM 819 CB GLU A 102 -5.686 12.076 0.310 1.00 6.08 C +ATOM 820 O GLU A 102 -6.525 11.308 -2.587 1.00 6.08 O +ATOM 821 CG GLU A 102 -6.179 11.350 1.553 1.00 6.08 C +ATOM 822 CD GLU A 102 -6.742 12.286 2.611 1.00 6.08 C +ATOM 823 OE1 GLU A 102 -7.929 12.141 2.980 1.00 6.08 O +ATOM 824 OE2 GLU A 102 -5.989 13.172 3.073 1.00 6.08 O +ATOM 825 N TYR A 103 -6.415 9.341 -1.509 1.00 5.36 N +ATOM 826 CA TYR A 103 -7.466 8.743 -2.325 1.00 5.36 C +ATOM 827 C TYR A 103 -8.614 8.244 -1.456 1.00 5.36 C +ATOM 828 CB TYR A 103 -6.906 7.589 -3.163 1.00 5.36 C +ATOM 829 O TYR A 103 -8.406 7.443 -0.541 1.00 5.36 O +ATOM 830 CG TYR A 103 -5.758 7.990 -4.057 1.00 5.36 C +ATOM 831 CD1 TYR A 103 -4.440 7.899 -3.614 1.00 5.36 C +ATOM 832 CD2 TYR A 103 -5.988 8.459 -5.345 1.00 5.36 C +ATOM 833 CE1 TYR A 103 -3.379 8.269 -4.434 1.00 5.36 C +ATOM 834 CE2 TYR A 103 -4.935 8.832 -6.174 1.00 5.36 C +ATOM 835 OH TYR A 103 -2.589 9.101 -6.526 1.00 5.36 O +ATOM 836 CZ TYR A 103 -3.636 8.733 -5.710 1.00 5.36 C +ATOM 837 N LYS A 104 -9.836 8.844 -1.571 1.00 6.08 N +ATOM 838 CA LYS A 104 -11.038 8.471 -0.832 1.00 6.08 C +ATOM 839 C LYS A 104 -11.649 7.188 -1.387 1.00 6.08 C +ATOM 840 CB LYS A 104 -12.066 9.602 -0.872 1.00 6.08 C +ATOM 841 O LYS A 104 -11.782 7.033 -2.603 1.00 6.08 O +ATOM 842 CG LYS A 104 -13.244 9.402 0.070 1.00 6.08 C +ATOM 843 CD LYS A 104 -14.209 10.579 0.017 1.00 6.08 C +ATOM 844 CE LYS A 104 -15.411 10.360 0.925 1.00 6.08 C +ATOM 845 NZ LYS A 104 -16.346 11.524 0.898 1.00 6.08 N +ATOM 846 N PHE A 105 -11.952 6.243 -0.555 1.00 6.08 N +ATOM 847 CA PHE A 105 -12.601 4.981 -0.891 1.00 6.08 C +ATOM 848 C PHE A 105 -14.029 4.948 -0.359 1.00 6.08 C +ATOM 849 CB PHE A 105 -11.804 3.799 -0.330 1.00 6.08 C +ATOM 850 O PHE A 105 -14.252 5.098 0.844 1.00 6.08 O +ATOM 851 CG PHE A 105 -11.481 2.744 -1.354 1.00 6.08 C +ATOM 852 CD1 PHE A 105 -10.418 2.913 -2.234 1.00 6.08 C +ATOM 853 CD2 PHE A 105 -12.239 1.584 -1.436 1.00 6.08 C +ATOM 854 CE1 PHE A 105 -10.116 1.938 -3.182 1.00 6.08 C +ATOM 855 CE2 PHE A 105 -11.944 0.606 -2.382 1.00 6.08 C +ATOM 856 CZ PHE A 105 -10.883 0.785 -3.254 1.00 6.08 C +ATOM 857 N GLY A 106 -15.105 4.749 -1.141 1.00 6.08 N +ATOM 858 CA GLY A 106 -16.469 4.462 -0.727 1.00 6.08 C +ATOM 859 C GLY A 106 -17.097 5.584 0.078 1.00 6.08 C +ATOM 860 O GLY A 106 -16.419 6.547 0.445 1.00 6.08 O +ATOM 861 N THR A 107 -18.460 5.770 -0.034 1.00 6.08 N +ATOM 862 CA THR A 107 -19.388 6.728 0.555 1.00 6.08 C +ATOM 863 C THR A 107 -20.150 6.098 1.717 1.00 6.08 C +ATOM 864 CB THR A 107 -20.386 7.256 -0.492 1.00 6.08 C +ATOM 865 O THR A 107 -21.036 5.267 1.507 1.00 6.08 O +ATOM 866 CG2 THR A 107 -19.699 8.182 -1.490 1.00 6.08 C +ATOM 867 OG1 THR A 107 -20.957 6.148 -1.200 1.00 6.08 O +ATOM 868 N ARG A 108 -19.518 5.476 2.709 1.00 6.08 N +ATOM 869 CA ARG A 108 -20.503 5.366 3.780 1.00 6.08 C +ATOM 870 C ARG A 108 -20.263 6.422 4.855 1.00 6.08 C +ATOM 871 CB ARG A 108 -20.469 3.968 4.402 1.00 6.08 C +ATOM 872 O ARG A 108 -19.123 6.646 5.268 1.00 6.08 O +ATOM 873 CG ARG A 108 -21.303 2.941 3.653 1.00 6.08 C +ATOM 874 CD ARG A 108 -21.351 1.609 4.389 1.00 6.08 C +ATOM 875 NE ARG A 108 -22.338 0.706 3.804 1.00 6.08 N +ATOM 876 NH1 ARG A 108 -22.022 -0.977 5.356 1.00 6.08 N +ATOM 877 NH2 ARG A 108 -23.549 -1.241 3.667 1.00 6.08 N +ATOM 878 CZ ARG A 108 -22.634 -0.502 4.277 1.00 6.08 C +ATOM 879 N THR A 109 -21.000 7.550 4.650 1.00 6.08 N +ATOM 880 CA THR A 109 -21.263 8.726 5.472 1.00 6.08 C +ATOM 881 C THR A 109 -21.852 8.323 6.821 1.00 6.08 C +ATOM 882 CB THR A 109 -22.219 9.702 4.762 1.00 6.08 C +ATOM 883 O THR A 109 -22.761 7.492 6.883 1.00 6.08 O +ATOM 884 CG2 THR A 109 -21.477 10.545 3.730 1.00 6.08 C +ATOM 885 OG1 THR A 109 -23.249 8.956 4.102 1.00 6.08 O +ATOM 886 N GLY A 110 -21.151 8.030 7.805 1.00 6.08 N +ATOM 887 CA GLY A 110 -21.715 8.302 9.117 1.00 6.08 C +ATOM 888 C GLY A 110 -20.888 7.731 10.253 1.00 6.08 C +ATOM 889 O GLY A 110 -20.720 6.514 10.354 1.00 6.08 O +ATOM 890 N GLY A 111 -20.142 8.505 10.862 1.00 6.08 N +ATOM 891 CA GLY A 111 -19.936 8.533 12.301 1.00 6.08 C +ATOM 892 C GLY A 111 -18.518 8.904 12.693 1.00 6.08 C +ATOM 893 O GLY A 111 -17.638 9.007 11.836 1.00 6.08 O +ATOM 894 N ASN A 112 -18.313 9.718 13.747 1.00 6.08 N +ATOM 895 CA ASN A 112 -17.174 10.179 14.534 1.00 6.08 C +ATOM 896 C ASN A 112 -16.127 9.082 14.702 1.00 6.08 C +ATOM 897 CB ASN A 112 -17.637 10.685 15.902 1.00 6.08 C +ATOM 898 O ASN A 112 -16.444 7.978 15.149 1.00 6.08 O +ATOM 899 CG ASN A 112 -18.364 12.013 15.817 1.00 6.08 C +ATOM 900 ND2 ASN A 112 -19.255 12.262 16.770 1.00 6.08 N +ATOM 901 OD1 ASN A 112 -18.128 12.807 14.904 1.00 6.08 O +ATOM 902 N PHE A 113 -14.987 8.972 14.050 1.00 6.08 N +ATOM 903 CA PHE A 113 -14.107 7.891 14.478 1.00 6.08 C +ATOM 904 C PHE A 113 -12.665 8.373 14.576 1.00 6.08 C +ATOM 905 CB PHE A 113 -14.201 6.706 13.512 1.00 6.08 C +ATOM 906 O PHE A 113 -12.245 9.251 13.819 1.00 6.08 O +ATOM 907 CG PHE A 113 -15.131 5.617 13.975 1.00 6.08 C +ATOM 908 CD1 PHE A 113 -16.472 5.629 13.613 1.00 6.08 C +ATOM 909 CD2 PHE A 113 -14.664 4.581 14.774 1.00 6.08 C +ATOM 910 CE1 PHE A 113 -17.336 4.623 14.040 1.00 6.08 C +ATOM 911 CE2 PHE A 113 -15.520 3.572 15.204 1.00 6.08 C +ATOM 912 CZ PHE A 113 -16.856 3.595 14.837 1.00 6.08 C +ATOM 913 N THR A 114 -11.825 7.651 15.237 1.00 6.08 N +ATOM 914 CA THR A 114 -10.413 7.467 15.554 1.00 6.08 C +ATOM 915 C THR A 114 -10.065 5.983 15.631 1.00 6.08 C +ATOM 916 CB THR A 114 -10.044 8.154 16.882 1.00 6.08 C +ATOM 917 O THR A 114 -10.307 5.336 16.652 1.00 6.08 O +ATOM 918 CG2 THR A 114 -10.124 9.672 16.757 1.00 6.08 C +ATOM 919 OG1 THR A 114 -10.951 7.722 17.904 1.00 6.08 O +ATOM 920 N GLY A 115 -10.385 5.096 14.476 1.00 6.08 N +ATOM 921 CA GLY A 115 -9.894 3.756 14.757 1.00 6.08 C +ATOM 922 C GLY A 115 -9.108 3.156 13.607 1.00 6.08 C +ATOM 923 O GLY A 115 -8.864 3.823 12.600 1.00 6.08 O +ATOM 924 N GLU A 116 -8.292 2.078 13.803 1.00 6.08 N +ATOM 925 CA GLU A 116 -7.455 1.166 13.029 1.00 6.08 C +ATOM 926 C GLU A 116 -8.271 0.435 11.966 1.00 6.08 C +ATOM 927 CB GLU A 116 -6.766 0.156 13.950 1.00 6.08 C +ATOM 928 O GLU A 116 -9.471 0.214 12.141 1.00 6.08 O +ATOM 929 CG GLU A 116 -5.620 0.745 14.760 1.00 6.08 C +ATOM 930 CD GLU A 116 -4.833 -0.301 15.533 1.00 6.08 C +ATOM 931 OE1 GLU A 116 -3.793 0.046 16.138 1.00 6.08 O +ATOM 932 OE2 GLU A 116 -5.258 -1.478 15.533 1.00 6.08 O +ATOM 933 N LEU A 117 -7.867 0.276 10.616 1.00 5.36 N +ATOM 934 CA LEU A 117 -8.515 -0.475 9.546 1.00 5.36 C +ATOM 935 C LEU A 117 -8.722 -1.930 9.952 1.00 5.36 C +ATOM 936 CB LEU A 117 -7.684 -0.404 8.263 1.00 5.36 C +ATOM 937 O LEU A 117 -7.933 -2.483 10.723 1.00 5.36 O +ATOM 938 CG LEU A 117 -7.563 0.973 7.608 1.00 5.36 C +ATOM 939 CD1 LEU A 117 -6.504 0.947 6.511 1.00 5.36 C +ATOM 940 CD2 LEU A 117 -8.910 1.418 7.048 1.00 5.36 C +ATOM 941 N THR A 118 -9.851 -2.455 9.647 1.00 5.36 N +ATOM 942 CA THR A 118 -10.035 -3.895 9.790 1.00 5.36 C +ATOM 943 C THR A 118 -9.157 -4.654 8.798 1.00 5.36 C +ATOM 944 CB THR A 118 -11.508 -4.294 9.584 1.00 5.36 C +ATOM 945 O THR A 118 -8.605 -4.060 7.869 1.00 5.36 O +ATOM 946 CG2 THR A 118 -12.428 -3.487 10.493 1.00 5.36 C +ATOM 947 OG1 THR A 118 -11.875 -4.057 8.219 1.00 5.36 O +ATOM 948 N LYS A 119 -8.889 -5.923 9.112 1.00 6.08 N +ATOM 949 CA LYS A 119 -8.123 -6.769 8.201 1.00 6.08 C +ATOM 950 C LYS A 119 -8.726 -6.753 6.799 1.00 6.08 C +ATOM 951 CB LYS A 119 -8.057 -8.204 8.727 1.00 6.08 C +ATOM 952 O LYS A 119 -8.000 -6.675 5.806 1.00 6.08 O +ATOM 953 CG LYS A 119 -7.022 -9.073 8.027 1.00 6.08 C +ATOM 954 CD LYS A 119 -6.951 -10.463 8.644 1.00 6.08 C +ATOM 955 CE LYS A 119 -6.004 -11.371 7.872 1.00 6.08 C +ATOM 956 NZ LYS A 119 -5.944 -12.741 8.463 1.00 6.08 N +ATOM 957 N GLN A 120 -10.033 -6.874 6.687 1.00 6.08 N +ATOM 958 CA GLN A 120 -10.739 -6.867 5.410 1.00 6.08 C +ATOM 959 C GLN A 120 -10.510 -5.558 4.661 1.00 6.08 C +ATOM 960 CB GLN A 120 -12.236 -7.096 5.623 1.00 6.08 C +ATOM 961 O GLN A 120 -10.287 -5.560 3.449 1.00 6.08 O +ATOM 962 CG GLN A 120 -12.965 -7.594 4.383 1.00 6.08 C +ATOM 963 CD GLN A 120 -14.269 -8.298 4.711 1.00 6.08 C +ATOM 964 NE2 GLN A 120 -14.806 -9.032 3.742 1.00 6.08 N +ATOM 965 OE1 GLN A 120 -14.788 -8.184 5.826 1.00 6.08 O +ATOM 966 N GLU A 121 -10.563 -4.457 5.339 1.00 5.36 N +ATOM 967 CA GLU A 121 -10.315 -3.150 4.738 1.00 5.36 C +ATOM 968 C GLU A 121 -8.891 -3.052 4.198 1.00 5.36 C +ATOM 969 CB GLU A 121 -10.571 -2.034 5.753 1.00 5.36 C +ATOM 970 O GLU A 121 -8.667 -2.500 3.119 1.00 5.36 O +ATOM 971 CG GLU A 121 -12.041 -1.836 6.092 1.00 5.36 C +ATOM 972 CD GLU A 121 -12.266 -0.891 7.262 1.00 5.36 C +ATOM 973 OE1 GLU A 121 -13.256 -0.126 7.243 1.00 5.36 O +ATOM 974 OE2 GLU A 121 -11.443 -0.916 8.205 1.00 5.36 O +ATOM 975 N LEU A 122 -7.942 -3.571 4.960 1.00 5.36 N +ATOM 976 CA LEU A 122 -6.554 -3.544 4.512 1.00 5.36 C +ATOM 977 C LEU A 122 -6.385 -4.331 3.217 1.00 5.36 C +ATOM 978 CB LEU A 122 -5.632 -4.115 5.593 1.00 5.36 C +ATOM 979 O LEU A 122 -5.705 -3.876 2.294 1.00 5.36 O +ATOM 980 CG LEU A 122 -5.295 -3.183 6.758 1.00 5.36 C +ATOM 981 CD1 LEU A 122 -4.667 -3.971 7.903 1.00 5.36 C +ATOM 982 CD2 LEU A 122 -4.365 -2.066 6.298 1.00 5.36 C +ATOM 983 N VAL A 123 -6.991 -5.567 3.232 1.00 5.36 N +ATOM 984 CA VAL A 123 -6.926 -6.409 2.042 1.00 5.36 C +ATOM 985 C VAL A 123 -7.518 -5.664 0.848 1.00 5.36 C +ATOM 986 CB VAL A 123 -7.667 -7.749 2.255 1.00 5.36 C +ATOM 987 O VAL A 123 -6.921 -5.633 -0.230 1.00 5.36 O +ATOM 988 CG1 VAL A 123 -7.796 -8.509 0.936 1.00 5.36 C +ATOM 989 CG2 VAL A 123 -6.942 -8.599 3.296 1.00 5.36 C +ATOM 990 N TYR A 124 -8.671 -5.070 1.043 1.00 5.36 N +ATOM 991 CA TYR A 124 -9.359 -4.338 -0.015 1.00 5.36 C +ATOM 992 C TYR A 124 -8.521 -3.160 -0.496 1.00 5.36 C +ATOM 993 CB TYR A 124 -10.724 -3.844 0.473 1.00 5.36 C +ATOM 994 O TYR A 124 -8.387 -2.937 -1.702 1.00 5.36 O +ATOM 995 CG TYR A 124 -11.884 -4.667 -0.033 1.00 5.36 C +ATOM 996 CD1 TYR A 124 -12.554 -5.552 0.808 1.00 5.36 C +ATOM 997 CD2 TYR A 124 -12.312 -4.561 -1.352 1.00 5.36 C +ATOM 998 CE1 TYR A 124 -13.622 -6.314 0.346 1.00 5.36 C +ATOM 999 CE2 TYR A 124 -13.379 -5.318 -1.825 1.00 5.36 C +ATOM 1000 OH TYR A 124 -15.084 -6.942 -1.432 1.00 5.36 O +ATOM 1001 CZ TYR A 124 -14.027 -6.190 -0.969 1.00 5.36 C +ATOM 1002 N THR A 125 -8.064 -2.317 0.476 1.00 5.36 N +ATOM 1003 CA THR A 125 -7.230 -1.175 0.117 1.00 5.36 C +ATOM 1004 C THR A 125 -6.008 -1.626 -0.679 1.00 5.36 C +ATOM 1005 CB THR A 125 -6.774 -0.402 1.368 1.00 5.36 C +ATOM 1006 O THR A 125 -5.643 -0.999 -1.675 1.00 5.36 O +ATOM 1007 CG2 THR A 125 -7.392 0.992 1.408 1.00 5.36 C +ATOM 1008 OG1 THR A 125 -7.173 -1.123 2.540 1.00 5.36 O +ATOM 1009 N ASN A 126 -5.437 -2.705 -0.271 1.00 5.36 N +ATOM 1010 CA ASN A 126 -4.265 -3.214 -0.977 1.00 5.36 C +ATOM 1011 C ASN A 126 -4.614 -3.656 -2.395 1.00 5.36 C +ATOM 1012 CB ASN A 126 -3.631 -4.370 -0.201 1.00 5.36 C +ATOM 1013 O ASN A 126 -3.866 -3.384 -3.336 1.00 5.36 O +ATOM 1014 CG ASN A 126 -2.693 -3.896 0.891 1.00 5.36 C +ATOM 1015 ND2 ASN A 126 -2.429 -4.760 1.864 1.00 5.36 N +ATOM 1016 OD1 ASN A 126 -2.209 -2.761 0.861 1.00 5.36 O +ATOM 1017 N GLN A 127 -5.691 -4.483 -2.497 1.00 5.36 N +ATOM 1018 CA GLN A 127 -6.138 -4.897 -3.823 1.00 5.36 C +ATOM 1019 C GLN A 127 -6.385 -3.689 -4.722 1.00 5.36 C +ATOM 1020 CB GLN A 127 -7.407 -5.744 -3.721 1.00 5.36 C +ATOM 1021 O GLN A 127 -5.973 -3.679 -5.884 1.00 5.36 O +ATOM 1022 CG GLN A 127 -7.729 -6.525 -4.989 1.00 5.36 C +ATOM 1023 CD GLN A 127 -8.927 -7.440 -4.826 1.00 5.36 C +ATOM 1024 NE2 GLN A 127 -9.350 -8.064 -5.921 1.00 5.36 N +ATOM 1025 OE1 GLN A 127 -9.468 -7.587 -3.726 1.00 5.36 O +ATOM 1026 N TRP A 128 -7.149 -2.688 -4.151 1.00 5.36 N +ATOM 1027 CA TRP A 128 -7.418 -1.471 -4.911 1.00 5.36 C +ATOM 1028 C TRP A 128 -6.119 -0.818 -5.371 1.00 5.36 C +ATOM 1029 CB TRP A 128 -8.232 -0.482 -4.072 1.00 5.36 C +ATOM 1030 O TRP A 128 -5.992 -0.427 -6.534 1.00 5.36 O +ATOM 1031 CG TRP A 128 -8.637 0.757 -4.813 1.00 5.36 C +ATOM 1032 CD1 TRP A 128 -9.759 0.931 -5.576 1.00 5.36 C +ATOM 1033 CD2 TRP A 128 -7.919 1.993 -4.866 1.00 5.36 C +ATOM 1034 CE2 TRP A 128 -8.665 2.875 -5.680 1.00 5.36 C +ATOM 1035 CE3 TRP A 128 -6.717 2.442 -4.304 1.00 5.36 C +ATOM 1036 NE1 TRP A 128 -9.781 2.203 -6.099 1.00 5.36 N +ATOM 1037 CH2 TRP A 128 -7.067 4.595 -5.381 1.00 5.36 C +ATOM 1038 CZ2 TRP A 128 -8.247 4.181 -5.944 1.00 5.36 C +ATOM 1039 CZ3 TRP A 128 -6.302 3.742 -4.568 1.00 5.36 C +ATOM 1040 N VAL A 129 -5.185 -0.613 -4.368 1.00 4.42 N +ATOM 1041 CA VAL A 129 -3.899 -0.000 -4.687 1.00 4.42 C +ATOM 1042 C VAL A 129 -3.213 -0.789 -5.800 1.00 4.42 C +ATOM 1043 CB VAL A 129 -2.983 0.077 -3.446 1.00 4.42 C +ATOM 1044 O VAL A 129 -2.691 -0.205 -6.753 1.00 4.42 O +ATOM 1045 CG1 VAL A 129 -1.562 0.471 -3.846 1.00 4.42 C +ATOM 1046 CG2 VAL A 129 -3.548 1.065 -2.427 1.00 4.42 C +ATOM 1047 N ASN A 130 -3.323 -2.084 -5.765 1.00 5.36 N +ATOM 1048 CA ASN A 130 -2.699 -2.948 -6.762 1.00 5.36 C +ATOM 1049 C ASN A 130 -3.320 -2.751 -8.142 1.00 5.36 C +ATOM 1050 CB ASN A 130 -2.799 -4.415 -6.339 1.00 5.36 C +ATOM 1051 O ASN A 130 -2.613 -2.751 -9.151 1.00 5.36 O +ATOM 1052 CG ASN A 130 -1.767 -4.793 -5.294 1.00 5.36 C +ATOM 1053 ND2 ASN A 130 -2.027 -5.874 -4.568 1.00 5.36 N +ATOM 1054 OD1 ASN A 130 -0.746 -4.118 -5.140 1.00 5.36 O +ATOM 1055 N GLU A 131 -4.600 -2.603 -8.153 1.00 5.36 N +ATOM 1056 CA GLU A 131 -5.337 -2.535 -9.412 1.00 5.36 C +ATOM 1057 C GLU A 131 -5.256 -1.140 -10.024 1.00 5.36 C +ATOM 1058 CB GLU A 131 -6.800 -2.932 -9.200 1.00 5.36 C +ATOM 1059 O GLU A 131 -5.324 -0.988 -11.246 1.00 5.36 O +ATOM 1060 CG GLU A 131 -7.000 -4.413 -8.911 1.00 5.36 C +ATOM 1061 CD GLU A 131 -8.445 -4.776 -8.611 1.00 5.36 C +ATOM 1062 OE1 GLU A 131 -8.733 -5.970 -8.365 1.00 5.36 O +ATOM 1063 OE2 GLU A 131 -9.297 -3.860 -8.623 1.00 5.36 O +ATOM 1064 N ASN A 132 -5.147 -0.162 -9.186 1.00 5.36 N +ATOM 1065 CA ASN A 132 -5.357 1.190 -9.693 1.00 5.36 C +ATOM 1066 C ASN A 132 -4.037 1.936 -9.863 1.00 5.36 C +ATOM 1067 CB ASN A 132 -6.292 1.971 -8.767 1.00 5.36 C +ATOM 1068 O ASN A 132 -3.931 2.838 -10.696 1.00 5.36 O +ATOM 1069 CG ASN A 132 -7.729 1.494 -8.851 1.00 5.36 C +ATOM 1070 ND2 ASN A 132 -8.165 0.745 -7.845 1.00 5.36 N +ATOM 1071 OD1 ASN A 132 -8.441 1.795 -9.813 1.00 5.36 O +ATOM 1072 N ILE A 133 -3.015 1.608 -8.992 1.00 5.36 N +ATOM 1073 CA ILE A 133 -1.781 2.384 -9.062 1.00 5.36 C +ATOM 1074 C ILE A 133 -0.907 1.861 -10.200 1.00 5.36 C +ATOM 1075 CB ILE A 133 -1.007 2.336 -7.726 1.00 5.36 C +ATOM 1076 O ILE A 133 -0.112 2.609 -10.773 1.00 5.36 O +ATOM 1077 CG1 ILE A 133 -1.777 3.089 -6.635 1.00 5.36 C +ATOM 1078 CG2 ILE A 133 0.402 2.911 -7.897 1.00 5.36 C +ATOM 1079 CD1 ILE A 133 -1.113 3.045 -5.266 1.00 5.36 C +ATOM 1080 N THR A 134 -1.214 0.937 -10.975 1.00 6.08 N +ATOM 1081 CA THR A 134 -0.412 0.479 -12.104 1.00 6.08 C +ATOM 1082 C THR A 134 -0.466 1.485 -13.250 1.00 6.08 C +ATOM 1083 CB THR A 134 -0.887 -0.898 -12.605 1.00 6.08 C +ATOM 1084 O THR A 134 0.513 1.656 -13.980 1.00 6.08 O +ATOM 1085 CG2 THR A 134 0.024 -2.012 -12.097 1.00 6.08 C +ATOM 1086 OG1 THR A 134 -2.220 -1.137 -12.137 1.00 6.08 O +ATOM 1087 N LEU A 135 -1.656 2.194 -13.502 1.00 6.08 N +ATOM 1088 CA LEU A 135 -2.187 2.386 -14.847 1.00 6.08 C +ATOM 1089 C LEU A 135 -2.051 3.841 -15.283 1.00 6.08 C +ATOM 1090 CB LEU A 135 -3.656 1.958 -14.908 1.00 6.08 C +ATOM 1091 O LEU A 135 -2.144 4.147 -16.474 1.00 6.08 O +ATOM 1092 CG LEU A 135 -3.923 0.452 -14.925 1.00 6.08 C +ATOM 1093 CD1 LEU A 135 -5.388 0.171 -14.609 1.00 6.08 C +ATOM 1094 CD2 LEU A 135 -3.538 -0.144 -16.274 1.00 6.08 C +ATOM 1095 N ALA A 136 -1.314 4.761 -14.697 1.00 6.08 N +ATOM 1096 CA ALA A 136 -1.322 5.912 -15.597 1.00 6.08 C +ATOM 1097 C ALA A 136 0.036 6.607 -15.610 1.00 6.08 C +ATOM 1098 CB ALA A 136 -2.417 6.896 -15.193 1.00 6.08 C +ATOM 1099 O ALA A 136 0.626 6.851 -14.555 1.00 6.08 O +ATOM 1100 N ASN A 137 1.001 6.147 -16.376 1.00 6.08 N +ATOM 1101 CA ASN A 137 1.962 7.100 -16.923 1.00 6.08 C +ATOM 1102 C ASN A 137 2.561 7.982 -15.832 1.00 6.08 C +ATOM 1103 CB ASN A 137 1.305 7.963 -18.003 1.00 6.08 C +ATOM 1104 O ASN A 137 2.685 9.196 -16.007 1.00 6.08 O +ATOM 1105 CG ASN A 137 1.244 7.267 -19.349 1.00 6.08 C +ATOM 1106 ND2 ASN A 137 0.354 7.734 -20.218 1.00 6.08 N +ATOM 1107 OD1 ASN A 137 1.989 6.318 -19.606 1.00 6.08 O +ATOM 1108 N GLY A 138 2.939 7.445 -14.762 1.00 6.08 N +ATOM 1109 CA GLY A 138 3.642 8.286 -13.808 1.00 6.08 C +ATOM 1110 C GLY A 138 2.721 9.212 -13.036 1.00 6.08 C +ATOM 1111 O GLY A 138 3.179 10.003 -12.209 1.00 6.08 O +ATOM 1112 N TYR A 139 1.463 9.234 -13.302 1.00 6.08 N +ATOM 1113 CA TYR A 139 0.469 10.113 -12.696 1.00 6.08 C +ATOM 1114 C TYR A 139 -0.736 9.318 -12.207 1.00 6.08 C +ATOM 1115 CB TYR A 139 0.019 11.184 -13.694 1.00 6.08 C +ATOM 1116 O TYR A 139 -1.121 8.321 -12.822 1.00 6.08 O +ATOM 1117 CG TYR A 139 0.919 12.395 -13.732 1.00 6.08 C +ATOM 1118 CD1 TYR A 139 1.986 12.467 -14.625 1.00 6.08 C +ATOM 1119 CD2 TYR A 139 0.705 13.469 -12.874 1.00 6.08 C +ATOM 1120 CE1 TYR A 139 2.819 13.580 -14.662 1.00 6.08 C +ATOM 1121 CE2 TYR A 139 1.532 14.587 -12.903 1.00 6.08 C +ATOM 1122 OH TYR A 139 3.406 15.738 -13.831 1.00 6.08 O +ATOM 1123 CZ TYR A 139 2.584 14.634 -13.799 1.00 6.08 C +ATOM 1124 N ILE A 140 -0.939 9.058 -10.837 1.00 6.08 N +ATOM 1125 CA ILE A 140 -2.155 8.654 -10.138 1.00 6.08 C +ATOM 1126 C ILE A 140 -3.276 9.645 -10.440 1.00 6.08 C +ATOM 1127 CB ILE A 140 -1.924 8.553 -8.614 1.00 6.08 C +ATOM 1128 O ILE A 140 -3.182 10.824 -10.091 1.00 6.08 O +ATOM 1129 CG1 ILE A 140 -0.789 7.569 -8.310 1.00 6.08 C +ATOM 1130 CG2 ILE A 140 -3.213 8.140 -7.899 1.00 6.08 C +ATOM 1131 CD1 ILE A 140 -0.415 7.492 -6.836 1.00 6.08 C +ATOM 1132 N SER A 141 -3.684 9.839 -11.715 1.00 6.08 N +ATOM 1133 CA SER A 141 -4.883 10.669 -11.786 1.00 6.08 C +ATOM 1134 C SER A 141 -6.105 9.922 -11.262 1.00 6.08 C +ATOM 1135 CB SER A 141 -5.133 11.127 -13.224 1.00 6.08 C +ATOM 1136 O SER A 141 -6.378 8.796 -11.682 1.00 6.08 O +ATOM 1137 OG SER A 141 -6.496 10.963 -13.574 1.00 6.08 O +ATOM 1138 N ALA A 142 -6.198 9.687 -9.878 1.00 6.08 N +ATOM 1139 CA ALA A 142 -7.356 9.155 -9.164 1.00 6.08 C +ATOM 1140 C ALA A 142 -8.656 9.539 -9.865 1.00 6.08 C +ATOM 1141 CB ALA A 142 -7.366 9.651 -7.721 1.00 6.08 C +ATOM 1142 O ALA A 142 -8.874 10.710 -10.184 1.00 6.08 O +ATOM 1143 N ASP A 143 -9.016 8.898 -10.877 1.00 6.08 N +ATOM 1144 CA ASP A 143 -10.425 8.964 -11.253 1.00 6.08 C +ATOM 1145 C ASP A 143 -11.295 9.345 -10.057 1.00 6.08 C +ATOM 1146 CB ASP A 143 -10.889 7.628 -11.836 1.00 6.08 C +ATOM 1147 O ASP A 143 -11.158 8.769 -8.975 1.00 6.08 O +ATOM 1148 CG ASP A 143 -11.385 7.746 -13.267 1.00 6.08 C +ATOM 1149 OD1 ASP A 143 -11.573 6.706 -13.934 1.00 6.08 O +ATOM 1150 OD2 ASP A 143 -11.586 8.889 -13.731 1.00 6.08 O +ATOM 1151 N SER A 144 -11.432 10.610 -9.724 1.00 6.08 N +ATOM 1152 CA SER A 144 -12.633 11.251 -9.197 1.00 6.08 C +ATOM 1153 C SER A 144 -13.803 10.274 -9.147 1.00 6.08 C +ATOM 1154 CB SER A 144 -13.009 12.466 -10.046 1.00 6.08 C +ATOM 1155 O SER A 144 -14.946 10.678 -8.919 1.00 6.08 O +ATOM 1156 OG SER A 144 -12.987 12.143 -11.426 1.00 6.08 O +ATOM 1157 N ARG A 145 -13.625 8.971 -9.055 1.00 6.08 N +ATOM 1158 CA ARG A 145 -14.877 8.231 -8.942 1.00 6.08 C +ATOM 1159 C ARG A 145 -15.517 8.442 -7.574 1.00 6.08 C +ATOM 1160 CB ARG A 145 -14.644 6.738 -9.187 1.00 6.08 C +ATOM 1161 O ARG A 145 -14.826 8.444 -6.554 1.00 6.08 O +ATOM 1162 CG ARG A 145 -14.402 6.383 -10.645 1.00 6.08 C +ATOM 1163 CD ARG A 145 -14.336 4.877 -10.856 1.00 6.08 C +ATOM 1164 NE ARG A 145 -13.186 4.497 -11.671 1.00 6.08 N +ATOM 1165 NH1 ARG A 145 -13.735 2.255 -11.769 1.00 6.08 N +ATOM 1166 NH2 ARG A 145 -11.852 3.025 -12.824 1.00 6.08 N +ATOM 1167 CZ ARG A 145 -12.927 3.260 -12.086 1.00 6.08 C +ATOM 1168 N THR A 146 -16.379 9.419 -7.415 1.00 6.08 N +ATOM 1169 CA THR A 146 -17.507 9.485 -6.494 1.00 6.08 C +ATOM 1170 C THR A 146 -18.280 8.169 -6.491 1.00 6.08 C +ATOM 1171 CB THR A 146 -18.458 10.641 -6.856 1.00 6.08 C +ATOM 1172 O THR A 146 -18.534 7.590 -7.549 1.00 6.08 O +ATOM 1173 CG2 THR A 146 -18.028 11.939 -6.180 1.00 6.08 C +ATOM 1174 OG1 THR A 146 -18.451 10.830 -8.276 1.00 6.08 O +ATOM 1175 N VAL A 147 -17.785 7.142 -5.708 1.00 6.08 N +ATOM 1176 CA VAL A 147 -18.621 5.978 -5.435 1.00 6.08 C +ATOM 1177 C VAL A 147 -20.048 6.427 -5.126 1.00 6.08 C +ATOM 1178 CB VAL A 147 -18.061 5.139 -4.264 1.00 6.08 C +ATOM 1179 O VAL A 147 -20.261 7.305 -4.287 1.00 6.08 O +ATOM 1180 CG1 VAL A 147 -18.638 3.725 -4.289 1.00 6.08 C +ATOM 1181 CG2 VAL A 147 -16.535 5.098 -4.321 1.00 6.08 C +ATOM 1182 N ASP A 148 -20.960 6.728 -6.190 1.00 6.08 N +ATOM 1183 CA ASP A 148 -22.394 6.829 -5.938 1.00 6.08 C +ATOM 1184 C ASP A 148 -22.901 5.619 -5.157 1.00 6.08 C +ATOM 1185 CB ASP A 148 -23.162 6.965 -7.254 1.00 6.08 C +ATOM 1186 O ASP A 148 -22.505 4.485 -5.432 1.00 6.08 O +ATOM 1187 CG ASP A 148 -22.902 8.285 -7.959 1.00 6.08 C +ATOM 1188 OD1 ASP A 148 -23.140 8.380 -9.182 1.00 6.08 O +ATOM 1189 OD2 ASP A 148 -22.451 9.237 -7.286 1.00 6.08 O +TER 1190 ASP A 148 +ENDMDL +END diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/relax/testdata/with_violations_casp14.pdb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/relax/testdata/with_violations_casp14.pdb Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,1193 @@ +MODEL 0 +ATOM 1 N SER A 1 27.311 -3.395 37.375 1.00 8.64 N +ATOM 2 CA SER A 1 26.072 -4.109 37.084 1.00 8.64 C +ATOM 3 C SER A 1 26.047 -4.608 35.643 1.00 8.64 C +ATOM 4 CB SER A 1 24.862 -3.211 37.342 1.00 8.64 C +ATOM 5 O SER A 1 26.782 -4.101 34.792 1.00 8.64 O +ATOM 6 OG SER A 1 24.740 -2.228 36.329 1.00 8.64 O +ATOM 7 N PHE A 2 25.619 -5.987 35.357 1.00 8.64 N +ATOM 8 CA PHE A 2 25.448 -6.479 33.995 1.00 8.64 C +ATOM 9 C PHE A 2 25.049 -5.347 33.056 1.00 8.64 C +ATOM 10 CB PHE A 2 24.395 -7.591 33.953 1.00 8.64 C +ATOM 11 O PHE A 2 25.590 -5.226 31.955 1.00 8.64 O +ATOM 12 CG PHE A 2 24.140 -8.134 32.573 1.00 8.64 C +ATOM 13 CD1 PHE A 2 25.003 -9.063 32.006 1.00 8.64 C +ATOM 14 CD2 PHE A 2 23.036 -7.714 31.842 1.00 8.64 C +ATOM 15 CE1 PHE A 2 24.770 -9.567 30.728 1.00 8.64 C +ATOM 16 CE2 PHE A 2 22.796 -8.214 30.565 1.00 8.64 C +ATOM 17 CZ PHE A 2 23.665 -9.139 30.010 1.00 8.64 C +ATOM 18 N GLU A 3 24.279 -4.453 33.583 1.00 8.64 N +ATOM 19 CA GLU A 3 23.756 -3.316 32.831 1.00 8.64 C +ATOM 20 C GLU A 3 24.858 -2.308 32.517 1.00 8.64 C +ATOM 21 CB GLU A 3 22.624 -2.635 33.604 1.00 8.64 C +ATOM 22 O GLU A 3 24.963 -1.828 31.387 1.00 8.64 O +ATOM 23 CG GLU A 3 21.251 -3.239 33.345 1.00 8.64 C +ATOM 24 CD GLU A 3 20.291 -3.067 34.511 1.00 8.64 C +ATOM 25 OE1 GLU A 3 19.129 -3.525 34.413 1.00 8.64 O +ATOM 26 OE2 GLU A 3 20.702 -2.469 35.530 1.00 8.64 O +ATOM 27 N GLU A 4 25.795 -2.118 33.499 1.00 8.64 N +ATOM 28 CA GLU A 4 26.873 -1.150 33.321 1.00 8.64 C +ATOM 29 C GLU A 4 27.923 -1.667 32.341 1.00 8.64 C +ATOM 30 CB GLU A 4 27.526 -0.820 34.666 1.00 8.64 C +ATOM 31 O GLU A 4 28.401 -0.920 31.485 1.00 8.64 O +ATOM 32 CG GLU A 4 26.709 0.130 35.529 1.00 8.64 C +ATOM 33 CD GLU A 4 27.351 0.416 36.878 1.00 8.64 C +ATOM 34 OE1 GLU A 4 26.801 1.234 37.650 1.00 8.64 O +ATOM 35 OE2 GLU A 4 28.412 -0.182 37.164 1.00 8.64 O +ATOM 36 N GLN A 5 28.078 -2.983 32.335 1.00 8.64 N +ATOM 37 CA GLN A 5 29.050 -3.614 31.449 1.00 8.64 C +ATOM 38 C GLN A 5 28.520 -3.696 30.020 1.00 8.64 C +ATOM 39 CB GLN A 5 29.410 -5.012 31.956 1.00 8.64 C +ATOM 40 O GLN A 5 29.268 -3.487 29.063 1.00 8.64 O +ATOM 41 CG GLN A 5 30.587 -5.031 32.922 1.00 8.64 C +ATOM 42 CD GLN A 5 30.906 -6.425 33.430 1.00 8.64 C +ATOM 43 NE2 GLN A 5 31.803 -6.509 34.407 1.00 8.64 N +ATOM 44 OE1 GLN A 5 30.350 -7.418 32.950 1.00 8.64 O +ATOM 45 N PHE A 6 27.127 -3.824 29.849 1.00 8.64 N +ATOM 46 CA PHE A 6 26.442 -3.868 28.562 1.00 8.64 C +ATOM 47 C PHE A 6 26.501 -2.512 27.870 1.00 8.64 C +ATOM 48 CB PHE A 6 24.983 -4.302 28.744 1.00 8.64 C +ATOM 49 O PHE A 6 26.819 -2.429 26.682 1.00 8.64 O +ATOM 50 CG PHE A 6 24.232 -4.461 27.450 1.00 8.64 C +ATOM 51 CD1 PHE A 6 24.326 -5.636 26.715 1.00 8.64 C +ATOM 52 CD2 PHE A 6 23.431 -3.434 26.968 1.00 8.64 C +ATOM 53 CE1 PHE A 6 23.631 -5.786 25.517 1.00 8.64 C +ATOM 54 CE2 PHE A 6 22.734 -3.576 25.771 1.00 8.64 C +ATOM 55 CZ PHE A 6 22.836 -4.753 25.047 1.00 8.64 C +ATOM 56 N ILE A 7 26.367 -1.421 28.620 1.00 8.64 N +ATOM 57 CA ILE A 7 26.395 -0.058 28.103 1.00 8.64 C +ATOM 58 C ILE A 7 27.816 0.304 27.677 1.00 8.64 C +ATOM 59 CB ILE A 7 25.874 0.954 29.148 1.00 8.64 C +ATOM 60 O ILE A 7 28.020 0.896 26.614 1.00 8.64 O +ATOM 61 CG1 ILE A 7 24.383 0.725 29.417 1.00 8.64 C +ATOM 62 CG2 ILE A 7 26.133 2.391 28.685 1.00 8.64 C +ATOM 63 CD1 ILE A 7 23.831 1.540 30.579 1.00 8.64 C +ATOM 64 N LYS A 8 28.765 -0.137 28.420 1.00 8.64 N +ATOM 65 CA LYS A 8 30.171 0.158 28.160 1.00 8.64 C +ATOM 66 C LYS A 8 30.668 -0.584 26.922 1.00 8.64 C +ATOM 67 CB LYS A 8 31.030 -0.210 29.371 1.00 8.64 C +ATOM 68 O LYS A 8 31.368 -0.008 26.086 1.00 8.64 O +ATOM 69 CG LYS A 8 32.396 0.462 29.387 1.00 8.64 C +ATOM 70 CD LYS A 8 33.170 0.123 30.655 1.00 8.64 C +ATOM 71 CE LYS A 8 34.584 0.686 30.615 1.00 8.64 C +ATOM 72 NZ LYS A 8 35.350 0.348 31.852 1.00 8.64 N +ATOM 73 N ASN A 9 30.138 -1.772 26.738 1.00 8.64 N +ATOM 74 CA ASN A 9 30.622 -2.602 25.640 1.00 8.64 C +ATOM 75 C ASN A 9 29.908 -2.274 24.332 1.00 8.64 C +ATOM 76 CB ASN A 9 30.461 -4.086 25.976 1.00 8.64 C +ATOM 77 O ASN A 9 30.396 -2.610 23.252 1.00 8.64 O +ATOM 78 CG ASN A 9 31.429 -4.551 27.046 1.00 8.64 C +ATOM 79 ND2 ASN A 9 31.123 -5.682 27.670 1.00 8.64 N +ATOM 80 OD1 ASN A 9 32.442 -3.898 27.310 1.00 8.64 O +ATOM 81 N ASN A 10 28.860 -1.389 24.447 1.00 8.64 N +ATOM 82 CA ASN A 10 28.104 -1.128 23.227 1.00 8.64 C +ATOM 83 C ASN A 10 28.029 0.366 22.923 1.00 8.64 C +ATOM 84 CB ASN A 10 26.697 -1.720 23.330 1.00 8.64 C +ATOM 85 O ASN A 10 27.356 0.778 21.977 1.00 8.64 O +ATOM 86 CG ASN A 10 26.693 -3.234 23.250 1.00 8.64 C +ATOM 87 ND2 ASN A 10 26.544 -3.888 24.396 1.00 8.64 N +ATOM 88 OD1 ASN A 10 26.823 -3.811 22.167 1.00 8.64 O +ATOM 89 N SER A 11 28.833 1.149 23.636 1.00 8.64 N +ATOM 90 CA SER A 11 28.835 2.602 23.500 1.00 8.64 C +ATOM 91 C SER A 11 29.617 3.040 22.266 1.00 8.64 C +ATOM 92 CB SER A 11 29.427 3.257 24.748 1.00 8.64 C +ATOM 93 O SER A 11 29.395 4.132 21.740 1.00 8.64 O +ATOM 94 OG SER A 11 30.701 2.712 25.046 1.00 8.64 O +ATOM 95 N ASP A 12 30.212 2.067 21.630 1.00 8.64 N +ATOM 96 CA ASP A 12 30.855 2.541 20.408 1.00 8.64 C +ATOM 97 C ASP A 12 29.975 2.284 19.188 1.00 8.64 C +ATOM 98 CB ASP A 12 32.218 1.871 20.224 1.00 8.64 C +ATOM 99 O ASP A 12 30.218 2.836 18.113 1.00 8.64 O +ATOM 100 CG ASP A 12 33.270 2.391 21.189 1.00 8.64 C +ATOM 101 OD1 ASP A 12 34.250 1.668 21.471 1.00 8.64 O +ATOM 102 OD2 ASP A 12 33.116 3.533 21.673 1.00 8.64 O +ATOM 103 N SER A 13 28.711 1.753 19.485 1.00 8.64 N +ATOM 104 CA SER A 13 27.850 1.458 18.344 1.00 8.64 C +ATOM 105 C SER A 13 26.738 2.492 18.209 1.00 8.64 C +ATOM 106 CB SER A 13 27.245 0.060 18.477 1.00 8.64 C +ATOM 107 O SER A 13 26.268 3.040 19.208 1.00 8.64 O +ATOM 108 OG SER A 13 26.522 -0.062 19.690 1.00 8.64 O +ATOM 109 N ASN A 14 26.876 3.556 17.578 1.00 8.64 N +ATOM 110 CA ASN A 14 25.876 4.507 17.103 1.00 8.64 C +ATOM 111 C ASN A 14 24.502 3.857 16.973 1.00 8.64 C +ATOM 112 CB ASN A 14 26.306 5.114 15.766 1.00 8.64 C +ATOM 113 O ASN A 14 23.638 4.361 16.252 1.00 8.64 O +ATOM 114 CG ASN A 14 27.367 6.185 15.925 1.00 8.64 C +ATOM 115 ND2 ASN A 14 28.105 6.452 14.854 1.00 8.64 N +ATOM 116 OD1 ASN A 14 27.523 6.767 17.002 1.00 8.64 O +ATOM 117 N ILE A 15 24.147 2.876 17.739 1.00 8.64 N +ATOM 118 CA ILE A 15 22.782 2.392 17.562 1.00 8.64 C +ATOM 119 C ILE A 15 21.867 3.040 18.600 1.00 8.64 C +ATOM 120 CB ILE A 15 22.709 0.853 17.670 1.00 8.64 C +ATOM 121 O ILE A 15 22.173 3.037 19.794 1.00 8.64 O +ATOM 122 CG1 ILE A 15 23.583 0.200 16.594 1.00 8.64 C +ATOM 123 CG2 ILE A 15 21.259 0.372 17.564 1.00 8.64 C +ATOM 124 CD1 ILE A 15 23.694 -1.313 16.720 1.00 8.64 C +ATOM 125 N LEU A 16 21.054 3.988 18.304 1.00 8.64 N +ATOM 126 CA LEU A 16 19.978 4.667 19.017 1.00 8.64 C +ATOM 127 C LEU A 16 18.932 3.668 19.501 1.00 8.64 C +ATOM 128 CB LEU A 16 19.320 5.718 18.120 1.00 8.64 C +ATOM 129 O LEU A 16 18.483 2.813 18.734 1.00 8.64 O +ATOM 130 CG LEU A 16 20.096 7.021 17.921 1.00 8.64 C +ATOM 131 CD1 LEU A 16 19.696 7.681 16.605 1.00 8.64 C +ATOM 132 CD2 LEU A 16 19.860 7.968 19.093 1.00 8.64 C +ATOM 133 N ALA A 17 18.869 3.238 20.703 1.00 8.64 N +ATOM 134 CA ALA A 17 17.774 2.555 21.387 1.00 8.64 C +ATOM 135 C ALA A 17 16.496 3.388 21.343 1.00 8.64 C +ATOM 136 CB ALA A 17 18.158 2.251 22.833 1.00 8.64 C +ATOM 137 O ALA A 17 16.550 4.620 21.358 1.00 8.64 O +ATOM 138 N PRO A 18 15.357 2.800 20.785 1.00 8.64 N +ATOM 139 CA PRO A 18 14.061 3.472 20.894 1.00 8.64 C +ATOM 140 C PRO A 18 13.755 3.936 22.317 1.00 8.64 C +ATOM 141 CB PRO A 18 13.067 2.397 20.446 1.00 8.64 C +ATOM 142 O PRO A 18 14.149 3.277 23.283 1.00 8.64 O +ATOM 143 CG PRO A 18 13.852 1.125 20.452 1.00 8.64 C +ATOM 144 CD PRO A 18 15.296 1.456 20.699 1.00 8.64 C +ATOM 145 N LYS A 19 13.655 5.179 22.533 1.00 8.64 N +ATOM 146 CA LYS A 19 13.091 5.770 23.743 1.00 8.64 C +ATOM 147 C LYS A 19 11.590 5.512 23.832 1.00 8.64 C +ATOM 148 CB LYS A 19 13.369 7.273 23.786 1.00 8.64 C +ATOM 149 O LYS A 19 10.851 5.783 22.884 1.00 8.64 O +ATOM 150 CG LYS A 19 14.728 7.635 24.367 1.00 8.64 C +ATOM 151 CD LYS A 19 14.887 9.143 24.519 1.00 8.64 C +ATOM 152 CE LYS A 19 16.267 9.509 25.048 1.00 8.64 C +ATOM 153 NZ LYS A 19 16.425 10.986 25.206 1.00 8.64 N +ATOM 154 N VAL A 20 11.044 4.381 24.383 1.00 8.64 N +ATOM 155 CA VAL A 20 9.629 4.231 24.706 1.00 8.64 C +ATOM 156 C VAL A 20 9.274 5.116 25.898 1.00 8.64 C +ATOM 157 CB VAL A 20 9.268 2.759 25.009 1.00 8.64 C +ATOM 158 O VAL A 20 9.977 5.115 26.911 1.00 8.64 O +ATOM 159 CG1 VAL A 20 7.753 2.575 25.067 1.00 8.64 C +ATOM 160 CG2 VAL A 20 9.882 1.833 23.960 1.00 8.64 C +ATOM 161 N SER A 21 8.650 6.317 25.693 1.00 8.64 N +ATOM 162 CA SER A 21 8.096 7.229 26.687 1.00 8.64 C +ATOM 163 C SER A 21 7.175 6.497 27.657 1.00 8.64 C +ATOM 164 CB SER A 21 7.332 8.365 26.005 1.00 8.64 C +ATOM 165 O SER A 21 6.372 5.657 27.245 1.00 8.64 O +ATOM 166 OG SER A 21 5.963 8.343 26.373 1.00 8.64 O +ATOM 167 N GLN A 22 7.597 6.311 28.856 1.00 8.64 N +ATOM 168 CA GLN A 22 6.871 5.820 30.022 1.00 8.64 C +ATOM 169 C GLN A 22 5.495 6.472 30.128 1.00 8.64 C +ATOM 170 CB GLN A 22 7.672 6.073 31.300 1.00 8.64 C +ATOM 171 O GLN A 22 4.544 5.852 30.609 1.00 8.64 O +ATOM 172 CG GLN A 22 8.650 4.958 31.644 1.00 8.64 C +ATOM 173 CD GLN A 22 9.489 5.270 32.869 1.00 8.64 C +ATOM 174 NE2 GLN A 22 10.341 4.329 33.260 1.00 8.64 N +ATOM 175 OE1 GLN A 22 9.372 6.349 33.460 1.00 8.64 O +ATOM 176 N SER A 23 5.264 7.531 29.349 1.00 8.64 N +ATOM 177 CA SER A 23 4.000 8.243 29.506 1.00 8.64 C +ATOM 178 C SER A 23 2.897 7.603 28.670 1.00 8.64 C +ATOM 179 CB SER A 23 4.160 9.713 29.115 1.00 8.64 C +ATOM 180 O SER A 23 1.719 7.671 29.029 1.00 8.64 O +ATOM 181 OG SER A 23 4.522 9.834 27.751 1.00 8.64 O +ATOM 182 N VAL A 24 3.264 6.700 27.791 1.00 8.64 N +ATOM 183 CA VAL A 24 2.305 6.038 26.913 1.00 8.64 C +ATOM 184 C VAL A 24 1.784 4.767 27.581 1.00 8.64 C +ATOM 185 CB VAL A 24 2.931 5.700 25.541 1.00 8.64 C +ATOM 186 O VAL A 24 0.599 4.443 27.474 1.00 8.64 O +ATOM 187 CG1 VAL A 24 1.954 4.895 24.686 1.00 8.64 C +ATOM 188 CG2 VAL A 24 3.350 6.979 24.818 1.00 8.64 C +ATOM 189 N ILE A 25 2.491 4.247 28.597 1.00 8.64 N +ATOM 190 CA ILE A 25 2.093 2.989 29.220 1.00 8.64 C +ATOM 191 C ILE A 25 1.071 3.261 30.322 1.00 8.64 C +ATOM 192 CB ILE A 25 3.311 2.231 29.793 1.00 8.64 C +ATOM 193 O ILE A 25 0.106 2.509 30.482 1.00 8.64 O +ATOM 194 CG1 ILE A 25 4.253 1.802 28.662 1.00 8.64 C +ATOM 195 CG2 ILE A 25 2.856 1.021 30.615 1.00 8.64 C +ATOM 196 CD1 ILE A 25 5.548 1.162 29.144 1.00 8.64 C +ATOM 197 N LYS A 26 0.966 4.583 30.669 1.00 8.64 N +ATOM 198 CA LYS A 26 0.057 4.828 31.785 1.00 8.64 C +ATOM 199 C LYS A 26 -1.334 5.210 31.288 1.00 8.64 C +ATOM 200 CB LYS A 26 0.608 5.928 32.694 1.00 8.64 C +ATOM 201 O LYS A 26 -2.335 4.927 31.951 1.00 8.64 O +ATOM 202 CG LYS A 26 1.656 5.444 33.686 1.00 8.64 C +ATOM 203 CD LYS A 26 2.100 6.562 34.621 1.00 8.64 C +ATOM 204 CE LYS A 26 3.222 6.107 35.544 1.00 8.64 C +ATOM 205 NZ LYS A 26 3.671 7.205 36.452 1.00 8.64 N +ATOM 206 N SER A 27 -1.519 5.405 29.984 1.00 8.64 N +ATOM 207 CA SER A 27 -2.818 5.830 29.472 1.00 8.64 C +ATOM 208 C SER A 27 -3.593 4.654 28.886 1.00 8.64 C +ATOM 209 CB SER A 27 -2.647 6.918 28.412 1.00 8.64 C +ATOM 210 O SER A 27 -4.818 4.714 28.757 1.00 8.64 O +ATOM 211 OG SER A 27 -1.783 7.941 28.876 1.00 8.64 O +ATOM 212 N ILE A 28 -3.005 3.525 29.012 1.00 8.64 N +ATOM 213 CA ILE A 28 -3.699 2.421 28.358 1.00 8.64 C +ATOM 214 C ILE A 28 -4.341 1.519 29.410 1.00 8.64 C +ATOM 215 CB ILE A 28 -2.741 1.603 27.463 1.00 8.64 C +ATOM 216 O ILE A 28 -5.121 0.624 29.076 1.00 8.64 O +ATOM 217 CG1 ILE A 28 -2.140 2.495 26.370 1.00 8.64 C +ATOM 218 CG2 ILE A 28 -3.468 0.402 26.851 1.00 8.64 C +ATOM 219 CD1 ILE A 28 -1.059 1.814 25.541 1.00 8.64 C +ATOM 220 N LYS A 29 -4.577 2.107 30.648 1.00 8.64 N +ATOM 221 CA LYS A 29 -5.233 1.333 31.698 1.00 8.64 C +ATOM 222 C LYS A 29 -6.750 1.358 31.533 1.00 8.64 C +ATOM 223 CB LYS A 29 -4.847 1.867 33.078 1.00 8.64 C +ATOM 224 O LYS A 29 -7.358 2.430 31.493 1.00 8.64 O +ATOM 225 CG LYS A 29 -3.666 1.147 33.712 1.00 8.64 C +ATOM 226 CD LYS A 29 -3.402 1.645 35.127 1.00 8.64 C +ATOM 227 CE LYS A 29 -2.190 0.960 35.745 1.00 8.64 C +ATOM 228 NZ LYS A 29 -1.924 1.448 37.131 1.00 8.64 N +ATOM 229 N GLY A 30 -7.229 1.083 30.294 1.00 8.64 N +ATOM 230 CA GLY A 30 -8.601 0.618 30.176 1.00 8.64 C +ATOM 231 C GLY A 30 -8.899 -0.029 28.837 1.00 8.64 C +ATOM 232 O GLY A 30 -9.937 -0.674 28.670 1.00 8.64 O +ATOM 233 N ILE A 31 -7.830 -0.266 28.050 1.00 8.64 N +ATOM 234 CA ILE A 31 -8.106 -0.892 26.762 1.00 8.64 C +ATOM 235 C ILE A 31 -7.458 -2.274 26.711 1.00 8.64 C +ATOM 236 CB ILE A 31 -7.601 -0.021 25.590 1.00 8.64 C +ATOM 237 O ILE A 31 -6.281 -2.428 27.045 1.00 8.64 O +ATOM 238 CG1 ILE A 31 -8.334 1.326 25.572 1.00 8.64 C +ATOM 239 CG2 ILE A 31 -7.773 -0.756 24.258 1.00 8.64 C +ATOM 240 CD1 ILE A 31 -7.778 2.319 24.562 1.00 8.64 C +ATOM 241 N LYS A 32 -8.177 -3.230 27.032 1.00 8.64 N +ATOM 242 CA LYS A 32 -7.883 -4.648 26.851 1.00 8.64 C +ATOM 243 C LYS A 32 -7.261 -4.910 25.482 1.00 8.64 C +ATOM 244 CB LYS A 32 -9.152 -5.485 27.018 1.00 8.64 C +ATOM 245 O LYS A 32 -7.777 -4.449 24.461 1.00 8.64 O +ATOM 246 CG LYS A 32 -9.602 -5.645 28.463 1.00 8.64 C +ATOM 247 CD LYS A 32 -10.735 -6.656 28.587 1.00 8.64 C +ATOM 248 CE LYS A 32 -11.232 -6.767 30.022 1.00 8.64 C +ATOM 249 NZ LYS A 32 -12.323 -7.777 30.154 1.00 8.64 N +ATOM 250 N SER A 33 -5.945 -4.505 25.154 1.00 8.64 N +ATOM 251 CA SER A 33 -4.816 -4.797 24.277 1.00 8.64 C +ATOM 252 C SER A 33 -5.289 -5.219 22.890 1.00 8.64 C +ATOM 253 CB SER A 33 -3.937 -5.894 24.880 1.00 8.64 C +ATOM 254 O SER A 33 -6.134 -6.107 22.759 1.00 8.64 O +ATOM 255 OG SER A 33 -4.706 -7.046 25.181 1.00 8.64 O +ATOM 256 N LYS A 34 -5.506 -4.290 21.907 1.00 8.64 N +ATOM 257 CA LYS A 34 -5.176 -4.717 20.550 1.00 8.64 C +ATOM 258 C LYS A 34 -3.963 -3.963 20.015 1.00 8.64 C +ATOM 259 CB LYS A 34 -6.372 -4.514 19.618 1.00 8.64 C +ATOM 260 O LYS A 34 -3.833 -2.755 20.227 1.00 8.64 O +ATOM 261 CG LYS A 34 -7.490 -5.528 19.815 1.00 8.64 C +ATOM 262 CD LYS A 34 -8.616 -5.321 18.811 1.00 8.64 C +ATOM 263 CE LYS A 34 -9.773 -6.278 19.062 1.00 8.64 C +ATOM 264 NZ LYS A 34 -10.875 -6.087 18.072 1.00 8.64 N +ATOM 265 N HIS A 35 -2.824 -4.493 19.949 1.00 8.64 N +ATOM 266 CA HIS A 35 -1.491 -4.114 19.495 1.00 8.64 C +ATOM 267 C HIS A 35 -1.527 -3.581 18.066 1.00 8.64 C +ATOM 268 CB HIS A 35 -0.535 -5.304 19.588 1.00 8.64 C +ATOM 269 O HIS A 35 -2.122 -4.202 17.182 1.00 8.64 O +ATOM 270 CG HIS A 35 -0.328 -5.799 20.983 1.00 8.64 C +ATOM 271 CD2 HIS A 35 -0.455 -7.038 21.515 1.00 8.64 C +ATOM 272 ND1 HIS A 35 0.057 -4.973 22.017 1.00 8.64 N +ATOM 273 CE1 HIS A 35 0.158 -5.684 23.127 1.00 8.64 C +ATOM 274 NE2 HIS A 35 -0.147 -6.940 22.850 1.00 8.64 N +ATOM 275 N VAL A 36 -1.503 -2.286 17.829 1.00 8.64 N +ATOM 276 CA VAL A 36 -1.192 -1.701 16.529 1.00 8.64 C +ATOM 277 C VAL A 36 0.304 -1.410 16.437 1.00 8.64 C +ATOM 278 CB VAL A 36 -2.002 -0.410 16.279 1.00 8.64 C +ATOM 279 O VAL A 36 0.894 -0.861 17.371 1.00 8.64 O +ATOM 280 CG1 VAL A 36 -1.660 0.188 14.915 1.00 8.64 C +ATOM 281 CG2 VAL A 36 -3.500 -0.692 16.381 1.00 8.64 C +ATOM 282 N PHE A 37 1.020 -2.124 15.486 1.00 8.64 N +ATOM 283 CA PHE A 37 2.422 -1.855 15.187 1.00 8.64 C +ATOM 284 C PHE A 37 2.551 -0.928 13.985 1.00 8.64 C +ATOM 285 CB PHE A 37 3.177 -3.162 14.926 1.00 8.64 C +ATOM 286 O PHE A 37 1.807 -1.060 13.010 1.00 8.64 O +ATOM 287 CG PHE A 37 3.251 -4.071 16.124 1.00 8.64 C +ATOM 288 CD1 PHE A 37 2.301 -5.066 16.318 1.00 8.64 C +ATOM 289 CD2 PHE A 37 4.270 -3.928 17.057 1.00 8.64 C +ATOM 290 CE1 PHE A 37 2.367 -5.908 17.426 1.00 8.64 C +ATOM 291 CE2 PHE A 37 4.342 -4.766 18.166 1.00 8.64 C +ATOM 292 CZ PHE A 37 3.389 -5.755 18.349 1.00 8.64 C +ATOM 293 N GLU A 38 3.248 0.202 14.094 1.00 8.64 N +ATOM 294 CA GLU A 38 3.645 1.092 13.007 1.00 8.64 C +ATOM 295 C GLU A 38 5.086 0.830 12.578 1.00 8.64 C +ATOM 296 CB GLU A 38 3.477 2.556 13.421 1.00 8.64 C +ATOM 297 O GLU A 38 5.981 0.731 13.419 1.00 8.64 O +ATOM 298 CG GLU A 38 2.545 3.348 12.515 1.00 8.64 C +ATOM 299 CD GLU A 38 2.381 4.798 12.943 1.00 8.64 C +ATOM 300 OE1 GLU A 38 1.724 5.575 12.214 1.00 8.64 O +ATOM 301 OE2 GLU A 38 2.914 5.159 14.016 1.00 8.64 O +ATOM 302 N LEU A 39 5.280 0.263 11.288 1.00 8.64 N +ATOM 303 CA LEU A 39 6.615 0.024 10.751 1.00 8.64 C +ATOM 304 C LEU A 39 7.133 1.253 10.013 1.00 8.64 C +ATOM 305 CB LEU A 39 6.607 -1.185 9.811 1.00 8.64 C +ATOM 306 O LEU A 39 6.371 1.936 9.324 1.00 8.64 O +ATOM 307 CG LEU A 39 6.486 -2.558 10.474 1.00 8.64 C +ATOM 308 CD1 LEU A 39 5.969 -3.586 9.473 1.00 8.64 C +ATOM 309 CD2 LEU A 39 7.829 -2.994 11.050 1.00 8.64 C +ATOM 310 N PRO A 40 8.338 1.782 10.282 1.00 8.64 N +ATOM 311 CA PRO A 40 8.943 2.928 9.599 1.00 8.64 C +ATOM 312 C PRO A 40 9.200 2.662 8.117 1.00 8.64 C +ATOM 313 CB PRO A 40 10.259 3.133 10.353 1.00 8.64 C +ATOM 314 O PRO A 40 9.522 1.534 7.735 1.00 8.64 O +ATOM 315 CG PRO A 40 10.482 1.852 11.090 1.00 8.64 C +ATOM 316 CD PRO A 40 9.238 1.017 10.981 1.00 8.64 C +ATOM 317 N ILE A 41 8.660 3.312 7.012 1.00 8.64 N +ATOM 318 CA ILE A 41 8.626 3.183 5.560 1.00 8.64 C +ATOM 319 C ILE A 41 9.887 3.798 4.958 1.00 8.64 C +ATOM 320 CB ILE A 41 7.367 3.850 4.962 1.00 8.64 C +ATOM 321 O ILE A 41 10.222 4.950 5.244 1.00 8.64 O +ATOM 322 CG1 ILE A 41 6.099 3.238 5.570 1.00 8.64 C +ATOM 323 CG2 ILE A 41 7.360 3.722 3.436 1.00 8.64 C +ATOM 324 CD1 ILE A 41 4.813 3.939 5.155 1.00 8.64 C +ATOM 325 N ASN A 42 10.941 3.063 4.710 1.00 8.64 N +ATOM 326 CA ASN A 42 11.934 3.552 3.759 1.00 8.64 C +ATOM 327 C ASN A 42 11.614 3.106 2.335 1.00 8.64 C +ATOM 328 CB ASN A 42 13.336 3.085 4.159 1.00 8.64 C +ATOM 329 O ASN A 42 10.806 2.198 2.131 1.00 8.64 O +ATOM 330 CG ASN A 42 13.525 1.591 3.986 1.00 8.64 C +ATOM 331 ND2 ASN A 42 14.569 1.051 4.604 1.00 8.64 N +ATOM 332 OD1 ASN A 42 12.741 0.928 3.303 1.00 8.64 O +ATOM 333 N ASP A 43 11.701 4.001 1.461 1.00 8.64 N +ATOM 334 CA ASP A 43 11.331 4.253 0.072 1.00 8.64 C +ATOM 335 C ASP A 43 11.757 3.096 -0.829 1.00 8.64 C +ATOM 336 CB ASP A 43 11.956 5.561 -0.421 1.00 8.64 C +ATOM 337 O ASP A 43 11.082 2.788 -1.813 1.00 8.64 O +ATOM 338 CG ASP A 43 11.313 6.793 0.191 1.00 8.64 C +ATOM 339 OD1 ASP A 43 11.900 7.893 0.108 1.00 8.64 O +ATOM 340 OD2 ASP A 43 10.210 6.661 0.764 1.00 8.64 O +ATOM 341 N LYS A 44 12.353 2.046 -0.514 1.00 8.64 N +ATOM 342 CA LYS A 44 12.727 0.989 -1.449 1.00 8.64 C +ATOM 343 C LYS A 44 11.978 -0.305 -1.142 1.00 8.64 C +ATOM 344 CB LYS A 44 14.236 0.744 -1.407 1.00 8.64 C +ATOM 345 O LYS A 44 11.810 -1.154 -2.021 1.00 8.64 O +ATOM 346 CG LYS A 44 15.059 1.832 -2.080 1.00 8.64 C +ATOM 347 CD LYS A 44 16.537 1.468 -2.124 1.00 8.64 C +ATOM 348 CE LYS A 44 17.370 2.584 -2.741 1.00 8.64 C +ATOM 349 NZ LYS A 44 18.820 2.229 -2.793 1.00 8.64 N +ATOM 350 N THR A 45 11.103 -0.291 -0.142 1.00 8.64 N +ATOM 351 CA THR A 45 10.454 -1.552 0.197 1.00 8.64 C +ATOM 352 C THR A 45 8.939 -1.437 0.054 1.00 8.64 C +ATOM 353 CB THR A 45 10.805 -1.993 1.630 1.00 8.64 C +ATOM 354 O THR A 45 8.223 -2.436 0.149 1.00 8.64 O +ATOM 355 CG2 THR A 45 12.095 -2.806 1.656 1.00 8.64 C +ATOM 356 OG1 THR A 45 10.968 -0.832 2.454 1.00 8.64 O +ATOM 357 N LYS A 46 8.412 -0.546 -0.642 1.00 8.64 N +ATOM 358 CA LYS A 46 6.961 -0.392 -0.700 1.00 8.64 C +ATOM 359 C LYS A 46 6.386 -1.066 -1.942 1.00 8.64 C +ATOM 360 CB LYS A 46 6.578 1.089 -0.682 1.00 8.64 C +ATOM 361 O LYS A 46 6.699 -0.675 -3.068 1.00 8.64 O +ATOM 362 CG LYS A 46 6.727 1.750 0.680 1.00 8.64 C +ATOM 363 CD LYS A 46 6.243 3.195 0.656 1.00 8.64 C +ATOM 364 CE LYS A 46 6.459 3.880 1.998 1.00 8.64 C +ATOM 365 NZ LYS A 46 6.003 5.302 1.974 1.00 8.64 N +ATOM 366 N ARG A 47 6.500 -2.491 -2.290 1.00 8.64 N +ATOM 367 CA ARG A 47 5.304 -2.979 -2.968 1.00 8.64 C +ATOM 368 C ARG A 47 5.112 -4.474 -2.731 1.00 8.64 C +ATOM 369 CB ARG A 47 5.382 -2.692 -4.469 1.00 8.64 C +ATOM 370 O ARG A 47 5.990 -5.276 -3.055 1.00 8.64 O +ATOM 371 CG ARG A 47 4.430 -1.603 -4.938 1.00 8.64 C +ATOM 372 CD ARG A 47 4.526 -1.379 -6.440 1.00 8.64 C +ATOM 373 NE ARG A 47 4.838 0.011 -6.759 1.00 8.64 N +ATOM 374 NH1 ARG A 47 4.576 -0.237 -9.041 1.00 8.64 N +ATOM 375 NH2 ARG A 47 5.152 1.801 -8.164 1.00 8.64 N +ATOM 376 CZ ARG A 47 4.854 0.522 -7.987 1.00 8.64 C +ATOM 377 N TYR A 48 4.776 -5.036 -1.672 1.00 8.64 N +ATOM 378 CA TYR A 48 4.245 -6.394 -1.657 1.00 8.64 C +ATOM 379 C TYR A 48 2.731 -6.390 -1.828 1.00 8.64 C +ATOM 380 CB TYR A 48 4.621 -7.104 -0.353 1.00 8.64 C +ATOM 381 O TYR A 48 2.027 -5.622 -1.168 1.00 8.64 O +ATOM 382 CG TYR A 48 6.098 -7.385 -0.219 1.00 8.64 C +ATOM 383 CD1 TYR A 48 6.931 -6.518 0.484 1.00 8.64 C +ATOM 384 CD2 TYR A 48 6.663 -8.518 -0.795 1.00 8.64 C +ATOM 385 CE1 TYR A 48 8.293 -6.772 0.609 1.00 8.64 C +ATOM 386 CE2 TYR A 48 8.024 -8.782 -0.677 1.00 8.64 C +ATOM 387 OH TYR A 48 10.177 -8.161 0.146 1.00 8.64 O +ATOM 388 CZ TYR A 48 8.829 -7.905 0.026 1.00 8.64 C +ATOM 389 N ILE A 49 2.115 -6.489 -3.067 1.00 8.64 N +ATOM 390 CA ILE A 49 0.852 -6.880 -3.684 1.00 8.64 C +ATOM 391 C ILE A 49 0.101 -7.837 -2.761 1.00 8.64 C +ATOM 392 CB ILE A 49 1.076 -7.534 -5.066 1.00 8.64 C +ATOM 393 O ILE A 49 0.588 -8.929 -2.459 1.00 8.64 O +ATOM 394 CG1 ILE A 49 1.810 -6.567 -6.002 1.00 8.64 C +ATOM 395 CG2 ILE A 49 -0.256 -7.981 -5.674 1.00 8.64 C +ATOM 396 CD1 ILE A 49 2.101 -7.142 -7.381 1.00 8.64 C +ATOM 397 N LEU A 50 -0.593 -7.592 -1.660 1.00 8.64 N +ATOM 398 CA LEU A 50 -1.573 -8.173 -0.750 1.00 8.64 C +ATOM 399 C LEU A 50 -2.949 -8.245 -1.405 1.00 8.64 C +ATOM 400 CB LEU A 50 -1.653 -7.358 0.544 1.00 8.64 C +ATOM 401 O LEU A 50 -3.384 -7.289 -2.050 1.00 8.64 O +ATOM 402 CG LEU A 50 -0.484 -7.515 1.517 1.00 8.64 C +ATOM 403 CD1 LEU A 50 -0.441 -6.339 2.488 1.00 8.64 C +ATOM 404 CD2 LEU A 50 -0.591 -8.835 2.273 1.00 8.64 C +ATOM 405 N GLY A 51 -3.407 -9.384 -2.202 1.00 8.64 N +ATOM 406 CA GLY A 51 -4.655 -10.100 -1.989 1.00 8.64 C +ATOM 407 C GLY A 51 -5.574 -10.070 -3.195 1.00 8.64 C +ATOM 408 O GLY A 51 -6.476 -9.233 -3.274 1.00 8.64 O +ATOM 409 N ALA A 52 -5.228 -10.625 -4.456 1.00 8.64 N +ATOM 410 CA ALA A 52 -6.350 -10.631 -5.391 1.00 8.64 C +ATOM 411 C ALA A 52 -6.952 -12.027 -5.518 1.00 8.64 C +ATOM 412 CB ALA A 52 -5.905 -10.121 -6.760 1.00 8.64 C +ATOM 413 O ALA A 52 -6.245 -12.989 -5.829 1.00 8.64 O +ATOM 414 N THR A 53 -7.821 -12.550 -4.658 1.00 8.64 N +ATOM 415 CA THR A 53 -8.645 -13.723 -4.929 1.00 8.64 C +ATOM 416 C THR A 53 -9.877 -13.343 -5.745 1.00 8.64 C +ATOM 417 CB THR A 53 -9.083 -14.411 -3.622 1.00 8.64 C +ATOM 418 O THR A 53 -10.285 -12.180 -5.756 1.00 8.64 O +ATOM 419 CG2 THR A 53 -7.877 -14.908 -2.831 1.00 8.64 C +ATOM 420 OG1 THR A 53 -9.815 -13.477 -2.820 1.00 8.64 O +ATOM 421 N GLU A 54 -10.028 -13.989 -6.922 1.00 8.64 N +ATOM 422 CA GLU A 54 -11.170 -13.968 -7.831 1.00 8.64 C +ATOM 423 C GLU A 54 -12.473 -13.725 -7.074 1.00 8.64 C +ATOM 424 CB GLU A 54 -11.256 -15.279 -8.618 1.00 8.64 C +ATOM 425 O GLU A 54 -13.399 -13.109 -7.604 1.00 8.64 O +ATOM 426 CG GLU A 54 -10.288 -15.357 -9.789 1.00 8.64 C +ATOM 427 CD GLU A 54 -10.430 -16.635 -10.601 1.00 8.64 C +ATOM 428 OE1 GLU A 54 -9.704 -16.798 -11.608 1.00 8.64 O +ATOM 429 OE2 GLU A 54 -11.276 -17.478 -10.228 1.00 8.64 O +ATOM 430 N THR A 55 -12.360 -13.799 -5.890 1.00 8.64 N +ATOM 431 CA THR A 55 -13.448 -13.571 -4.946 1.00 8.64 C +ATOM 432 C THR A 55 -13.101 -12.439 -3.983 1.00 8.64 C +ATOM 433 CB THR A 55 -13.771 -14.847 -4.146 1.00 8.64 C +ATOM 434 O THR A 55 -11.941 -12.282 -3.594 1.00 8.64 O +ATOM 435 CG2 THR A 55 -14.391 -15.915 -5.041 1.00 8.64 C +ATOM 436 OG1 THR A 55 -12.563 -15.363 -3.573 1.00 8.64 O +ATOM 437 N LYS A 56 -13.303 -11.292 -4.478 1.00 8.64 N +ATOM 438 CA LYS A 56 -13.121 -9.990 -3.842 1.00 8.64 C +ATOM 439 C LYS A 56 -12.513 -10.138 -2.450 1.00 8.64 C +ATOM 440 CB LYS A 56 -14.454 -9.244 -3.755 1.00 8.64 C +ATOM 441 O LYS A 56 -13.231 -10.124 -1.449 1.00 8.64 O +ATOM 442 CG LYS A 56 -15.022 -8.833 -5.105 1.00 8.64 C +ATOM 443 CD LYS A 56 -16.241 -7.932 -4.948 1.00 8.64 C +ATOM 444 CE LYS A 56 -16.839 -7.558 -6.298 1.00 8.64 C +ATOM 445 NZ LYS A 56 -18.009 -6.643 -6.150 1.00 8.64 N +ATOM 446 N GLU A 57 -11.444 -10.923 -2.365 1.00 8.64 N +ATOM 447 CA GLU A 57 -10.922 -10.869 -1.003 1.00 8.64 C +ATOM 448 C GLU A 57 -9.945 -9.710 -0.832 1.00 8.64 C +ATOM 449 CB GLU A 57 -10.241 -12.190 -0.635 1.00 8.64 C +ATOM 450 O GLU A 57 -9.129 -9.443 -1.717 1.00 8.64 O +ATOM 451 CG GLU A 57 -11.101 -13.108 0.221 1.00 8.64 C +ATOM 452 CD GLU A 57 -10.378 -14.373 0.656 1.00 8.64 C +ATOM 453 OE1 GLU A 57 -10.943 -15.149 1.461 1.00 8.64 O +ATOM 454 OE2 GLU A 57 -9.238 -14.590 0.190 1.00 8.64 O +ATOM 455 N GLU A 58 -10.407 -8.718 -0.201 1.00 8.64 N +ATOM 456 CA GLU A 58 -9.599 -7.616 0.312 1.00 8.64 C +ATOM 457 C GLU A 58 -8.294 -8.124 0.918 1.00 8.64 C +ATOM 458 CB GLU A 58 -10.384 -6.812 1.352 1.00 8.64 C +ATOM 459 O GLU A 58 -8.299 -9.054 1.728 1.00 8.64 O +ATOM 460 CG GLU A 58 -10.114 -5.315 1.303 1.00 8.64 C +ATOM 461 CD GLU A 58 -10.947 -4.523 2.298 1.00 8.64 C +ATOM 462 OE1 GLU A 58 -10.733 -3.296 2.428 1.00 8.64 O +ATOM 463 OE2 GLU A 58 -11.819 -5.134 2.955 1.00 8.64 O +ATOM 464 N VAL A 59 -7.183 -8.070 0.289 1.00 6.72 N +ATOM 465 CA VAL A 59 -5.863 -8.557 0.675 1.00 6.72 C +ATOM 466 C VAL A 59 -5.151 -7.505 1.523 1.00 6.72 C +ATOM 467 CB VAL A 59 -5.006 -8.915 -0.560 1.00 6.72 C +ATOM 468 O VAL A 59 -4.402 -7.842 2.442 1.00 6.72 O +ATOM 469 CG1 VAL A 59 -3.820 -9.790 -0.159 1.00 6.72 C +ATOM 470 CG2 VAL A 59 -5.860 -9.616 -1.615 1.00 6.72 C +ATOM 471 N LEU A 60 -5.923 -6.877 2.559 1.00 6.72 N +ATOM 472 CA LEU A 60 -5.509 -6.065 3.699 1.00 6.72 C +ATOM 473 C LEU A 60 -5.287 -4.615 3.281 1.00 6.72 C +ATOM 474 CB LEU A 60 -4.231 -6.631 4.324 1.00 6.72 C +ATOM 475 O LEU A 60 -4.724 -4.351 2.216 1.00 6.72 O +ATOM 476 CG LEU A 60 -4.398 -7.870 5.205 1.00 6.72 C +ATOM 477 CD1 LEU A 60 -3.064 -8.591 5.363 1.00 6.72 C +ATOM 478 CD2 LEU A 60 -4.969 -7.486 6.565 1.00 6.72 C +ATOM 479 N PRO A 61 -6.061 -3.706 3.745 1.00 6.72 N +ATOM 480 CA PRO A 61 -5.827 -2.294 3.434 1.00 6.72 C +ATOM 481 C PRO A 61 -4.391 -1.859 3.717 1.00 6.72 C +ATOM 482 CB PRO A 61 -6.810 -1.563 4.353 1.00 6.72 C +ATOM 483 O PRO A 61 -3.782 -2.317 4.687 1.00 6.72 O +ATOM 484 CG PRO A 61 -7.183 -2.571 5.392 1.00 6.72 C +ATOM 485 CD PRO A 61 -6.789 -3.933 4.898 1.00 6.72 C +ATOM 486 N ASN A 62 -3.671 -1.566 2.620 1.00 6.72 N +ATOM 487 CA ASN A 62 -2.420 -0.834 2.785 1.00 6.72 C +ATOM 488 C ASN A 62 -2.630 0.672 2.655 1.00 6.72 C +ATOM 489 CB ASN A 62 -1.379 -1.315 1.772 1.00 6.72 C +ATOM 490 O ASN A 62 -3.578 1.117 2.004 1.00 6.72 O +ATOM 491 CG ASN A 62 -0.729 -2.622 2.178 1.00 6.72 C +ATOM 492 ND2 ASN A 62 -0.250 -3.376 1.195 1.00 6.72 N +ATOM 493 OD1 ASN A 62 -0.659 -2.953 3.364 1.00 6.72 O +ATOM 494 N TYR A 63 -2.223 1.347 3.620 1.00 6.72 N +ATOM 495 CA TYR A 63 -2.253 2.806 3.627 1.00 6.72 C +ATOM 496 C TYR A 63 -0.969 3.381 3.043 1.00 6.72 C +ATOM 497 CB TYR A 63 -2.461 3.332 5.050 1.00 6.72 C +ATOM 498 O TYR A 63 0.099 2.774 3.157 1.00 6.72 O +ATOM 499 CG TYR A 63 -3.732 2.841 5.700 1.00 6.72 C +ATOM 500 CD1 TYR A 63 -3.747 1.663 6.442 1.00 6.72 C +ATOM 501 CD2 TYR A 63 -4.920 3.553 5.571 1.00 6.72 C +ATOM 502 CE1 TYR A 63 -4.917 1.205 7.041 1.00 6.72 C +ATOM 503 CE2 TYR A 63 -6.095 3.105 6.166 1.00 6.72 C +ATOM 504 OH TYR A 63 -7.244 1.484 7.488 1.00 6.72 O +ATOM 505 CZ TYR A 63 -6.083 1.932 6.898 1.00 6.72 C +ATOM 506 N VAL A 64 -1.142 4.342 2.173 1.00 4.66 N +ATOM 507 CA VAL A 64 -0.000 5.090 1.658 1.00 4.66 C +ATOM 508 C VAL A 64 -0.129 6.561 2.046 1.00 4.66 C +ATOM 509 CB VAL A 64 0.125 4.951 0.125 1.00 4.66 C +ATOM 510 O VAL A 64 -1.219 7.133 1.982 1.00 4.66 O +ATOM 511 CG1 VAL A 64 -1.005 5.701 -0.579 1.00 4.66 C +ATOM 512 CG2 VAL A 64 1.485 5.462 -0.348 1.00 4.66 C +ATOM 513 N LYS A 65 0.916 7.003 2.673 1.00 6.72 N +ATOM 514 CA LYS A 65 0.982 8.432 2.966 1.00 6.72 C +ATOM 515 C LYS A 65 1.692 9.190 1.848 1.00 6.72 C +ATOM 516 CB LYS A 65 1.693 8.674 4.298 1.00 6.72 C +ATOM 517 O LYS A 65 2.804 8.830 1.455 1.00 6.72 O +ATOM 518 CG LYS A 65 1.604 10.110 4.795 1.00 6.72 C +ATOM 519 CD LYS A 65 2.207 10.258 6.186 1.00 6.72 C +ATOM 520 CE LYS A 65 2.153 11.701 6.669 1.00 6.72 C +ATOM 521 NZ LYS A 65 2.726 11.848 8.040 1.00 6.72 N +ATOM 522 N VAL A 66 1.057 10.179 1.205 1.00 6.72 N +ATOM 523 CA VAL A 66 1.588 11.120 0.224 1.00 6.72 C +ATOM 524 C VAL A 66 1.485 12.544 0.766 1.00 6.72 C +ATOM 525 CB VAL A 66 0.847 11.009 -1.127 1.00 6.72 C +ATOM 526 O VAL A 66 0.389 13.105 0.851 1.00 6.72 O +ATOM 527 CG1 VAL A 66 1.477 11.935 -2.166 1.00 6.72 C +ATOM 528 CG2 VAL A 66 0.854 9.564 -1.622 1.00 6.72 C +ATOM 529 N GLY A 67 2.645 13.103 1.210 1.00 6.72 N +ATOM 530 CA GLY A 67 2.556 14.375 1.909 1.00 6.72 C +ATOM 531 C GLY A 67 1.847 14.273 3.247 1.00 6.72 C +ATOM 532 O GLY A 67 2.248 13.488 4.109 1.00 6.72 O +ATOM 533 N SER A 68 0.772 15.228 3.461 1.00 6.72 N +ATOM 534 CA SER A 68 -0.022 15.225 4.685 1.00 6.72 C +ATOM 535 C SER A 68 -1.250 14.332 4.545 1.00 6.72 C +ATOM 536 CB SER A 68 -0.453 16.647 5.048 1.00 6.72 C +ATOM 537 O SER A 68 -2.061 14.234 5.469 1.00 6.72 O +ATOM 538 OG SER A 68 -1.210 17.226 3.999 1.00 6.72 O +ATOM 539 N ASP A 69 -1.295 13.620 3.451 1.00 6.72 N +ATOM 540 CA ASP A 69 -2.507 12.844 3.203 1.00 6.72 C +ATOM 541 C ASP A 69 -2.237 11.347 3.332 1.00 6.72 C +ATOM 542 CB ASP A 69 -3.071 13.160 1.816 1.00 6.72 C +ATOM 543 O ASP A 69 -1.146 10.877 3.002 1.00 6.72 O +ATOM 544 CG ASP A 69 -3.522 14.603 1.673 1.00 6.72 C +ATOM 545 OD1 ASP A 69 -3.312 15.204 0.598 1.00 6.72 O +ATOM 546 OD2 ASP A 69 -4.090 15.144 2.646 1.00 6.72 O +ATOM 547 N LEU A 70 -3.211 10.550 3.934 1.00 4.66 N +ATOM 548 CA LEU A 70 -3.196 9.096 4.057 1.00 4.66 C +ATOM 549 C LEU A 70 -4.202 8.459 3.105 1.00 4.66 C +ATOM 550 CB LEU A 70 -3.502 8.677 5.498 1.00 4.66 C +ATOM 551 O LEU A 70 -5.357 8.886 3.039 1.00 4.66 O +ATOM 552 CG LEU A 70 -3.258 7.207 5.842 1.00 4.66 C +ATOM 553 CD1 LEU A 70 -1.767 6.890 5.788 1.00 4.66 C +ATOM 554 CD2 LEU A 70 -3.830 6.877 7.216 1.00 4.66 C +ATOM 555 N TYR A 71 -3.655 7.568 2.260 1.00 4.66 N +ATOM 556 CA TYR A 71 -4.497 6.862 1.300 1.00 4.66 C +ATOM 557 C TYR A 71 -4.630 5.390 1.671 1.00 4.66 C +ATOM 558 CB TYR A 71 -3.926 6.994 -0.115 1.00 4.66 C +ATOM 559 O TYR A 71 -3.674 4.774 2.148 1.00 4.66 O +ATOM 560 CG TYR A 71 -3.900 8.412 -0.629 1.00 4.66 C +ATOM 561 CD1 TYR A 71 -2.852 9.272 -0.307 1.00 4.66 C +ATOM 562 CD2 TYR A 71 -4.923 8.897 -1.437 1.00 4.66 C +ATOM 563 CE1 TYR A 71 -2.824 10.581 -0.777 1.00 4.66 C +ATOM 564 CE2 TYR A 71 -4.906 10.204 -1.913 1.00 4.66 C +ATOM 565 OH TYR A 71 -3.832 12.332 -2.046 1.00 4.66 O +ATOM 566 CZ TYR A 71 -3.854 11.037 -1.578 1.00 4.66 C +ATOM 567 N ARG A 72 -5.791 4.900 1.584 1.00 4.66 N +ATOM 568 CA ARG A 72 -6.044 3.465 1.655 1.00 4.66 C +ATOM 569 C ARG A 72 -5.981 2.829 0.270 1.00 4.66 C +ATOM 570 CB ARG A 72 -7.406 3.190 2.296 1.00 4.66 C +ATOM 571 O ARG A 72 -6.604 3.321 -0.674 1.00 4.66 O +ATOM 572 CG ARG A 72 -7.679 1.717 2.555 1.00 4.66 C +ATOM 573 CD ARG A 72 -9.040 1.500 3.201 1.00 4.66 C +ATOM 574 NE ARG A 72 -9.274 0.091 3.505 1.00 4.66 N +ATOM 575 NH1 ARG A 72 -11.472 0.383 4.153 1.00 4.66 N +ATOM 576 NH2 ARG A 72 -10.528 -1.705 4.194 1.00 4.66 N +ATOM 577 CZ ARG A 72 -10.424 -0.407 3.950 1.00 4.66 C +ATOM 578 N LEU A 73 -5.092 1.875 0.180 1.00 4.66 N +ATOM 579 CA LEU A 73 -4.937 1.157 -1.080 1.00 4.66 C +ATOM 580 C LEU A 73 -5.727 -0.147 -1.064 1.00 4.66 C +ATOM 581 CB LEU A 73 -3.458 0.868 -1.354 1.00 4.66 C +ATOM 582 O LEU A 73 -5.700 -0.882 -0.074 1.00 4.66 O +ATOM 583 CG LEU A 73 -2.548 2.087 -1.518 1.00 4.66 C +ATOM 584 CD1 LEU A 73 -1.084 1.663 -1.481 1.00 4.66 C +ATOM 585 CD2 LEU A 73 -2.866 2.820 -2.817 1.00 4.66 C +ATOM 586 N LYS A 74 -6.458 -0.297 -2.040 1.00 4.66 N +ATOM 587 CA LYS A 74 -7.147 -1.567 -2.251 1.00 4.66 C +ATOM 588 C LYS A 74 -6.751 -2.192 -3.586 1.00 4.66 C +ATOM 589 CB LYS A 74 -8.662 -1.371 -2.193 1.00 4.66 C +ATOM 590 O LYS A 74 -6.697 -1.504 -4.608 1.00 4.66 O +ATOM 591 CG LYS A 74 -9.189 -1.022 -0.809 1.00 4.66 C +ATOM 592 CD LYS A 74 -10.710 -0.932 -0.796 1.00 4.66 C +ATOM 593 CE LYS A 74 -11.234 -0.515 0.572 1.00 4.66 C +ATOM 594 NZ LYS A 74 -12.725 -0.424 0.589 1.00 4.66 N +ATOM 595 N ALA A 75 -6.218 -3.399 -3.444 1.00 4.66 N +ATOM 596 CA ALA A 75 -5.851 -4.111 -4.666 1.00 4.66 C +ATOM 597 C ALA A 75 -6.823 -5.253 -4.948 1.00 4.66 C +ATOM 598 CB ALA A 75 -4.424 -4.644 -4.562 1.00 4.66 C +ATOM 599 O ALA A 75 -7.332 -5.887 -4.021 1.00 4.66 O +ATOM 600 N TYR A 76 -7.207 -5.295 -6.175 1.00 6.72 N +ATOM 601 CA TYR A 76 -8.028 -6.438 -6.558 1.00 6.72 C +ATOM 602 C TYR A 76 -7.576 -7.011 -7.896 1.00 6.72 C +ATOM 603 CB TYR A 76 -9.505 -6.039 -6.633 1.00 6.72 C +ATOM 604 O TYR A 76 -6.891 -6.335 -8.668 1.00 6.72 O +ATOM 605 CG TYR A 76 -9.757 -4.799 -7.457 1.00 6.72 C +ATOM 606 CD1 TYR A 76 -10.045 -4.889 -8.817 1.00 6.72 C +ATOM 607 CD2 TYR A 76 -9.707 -3.536 -6.878 1.00 6.72 C +ATOM 608 CE1 TYR A 76 -10.276 -3.750 -9.580 1.00 6.72 C +ATOM 609 CE2 TYR A 76 -9.937 -2.389 -7.631 1.00 6.72 C +ATOM 610 OH TYR A 76 -10.448 -1.375 -9.730 1.00 6.72 O +ATOM 611 CZ TYR A 76 -10.220 -2.506 -8.979 1.00 6.72 C +ATOM 612 N ARG A 77 -7.735 -8.365 -8.035 1.00 6.72 N +ATOM 613 CA ARG A 77 -7.400 -9.083 -9.261 1.00 6.72 C +ATOM 614 C ARG A 77 -8.658 -9.466 -10.033 1.00 6.72 C +ATOM 615 CB ARG A 77 -6.578 -10.335 -8.944 1.00 6.72 C +ATOM 616 O ARG A 77 -9.624 -9.964 -9.449 1.00 6.72 O +ATOM 617 CG ARG A 77 -6.025 -11.036 -10.174 1.00 6.72 C +ATOM 618 CD ARG A 77 -5.161 -12.233 -9.800 1.00 6.72 C +ATOM 619 NE ARG A 77 -4.366 -12.698 -10.933 1.00 6.72 N +ATOM 620 NH1 ARG A 77 -3.944 -14.788 -10.043 1.00 6.72 N +ATOM 621 NH2 ARG A 77 -3.103 -14.221 -12.098 1.00 6.72 N +ATOM 622 CZ ARG A 77 -3.806 -13.901 -11.022 1.00 6.72 C +ATOM 623 N GLU A 78 -8.703 -8.968 -11.357 1.00 6.72 N +ATOM 624 CA GLU A 78 -9.710 -9.433 -12.306 1.00 6.72 C +ATOM 625 C GLU A 78 -9.072 -10.231 -13.440 1.00 6.72 C +ATOM 626 CB GLU A 78 -10.500 -8.252 -12.875 1.00 6.72 C +ATOM 627 O GLU A 78 -7.847 -10.344 -13.513 1.00 6.72 O +ATOM 628 CG GLU A 78 -11.352 -7.528 -11.842 1.00 6.72 C +ATOM 629 CD GLU A 78 -12.234 -6.446 -12.444 1.00 6.72 C +ATOM 630 OE1 GLU A 78 -12.957 -5.760 -11.686 1.00 6.72 O +ATOM 631 OE2 GLU A 78 -12.203 -6.283 -13.684 1.00 6.72 O +ATOM 632 N LYS A 79 -9.991 -10.926 -14.239 1.00 6.72 N +ATOM 633 CA LYS A 79 -9.539 -11.713 -15.383 1.00 6.72 C +ATOM 634 C LYS A 79 -8.611 -10.898 -16.279 1.00 6.72 C +ATOM 635 CB LYS A 79 -10.735 -12.221 -16.191 1.00 6.72 C +ATOM 636 O LYS A 79 -7.610 -11.415 -16.778 1.00 6.72 O +ATOM 637 CG LYS A 79 -10.419 -13.415 -17.079 1.00 6.72 C +ATOM 638 CD LYS A 79 -11.665 -13.928 -17.790 1.00 6.72 C +ATOM 639 CE LYS A 79 -11.329 -15.033 -18.782 1.00 6.72 C +ATOM 640 NZ LYS A 79 -12.556 -15.601 -19.415 1.00 6.72 N +ATOM 641 N SER A 80 -8.775 -9.596 -16.232 1.00 6.72 N +ATOM 642 CA SER A 80 -8.111 -8.740 -17.209 1.00 6.72 C +ATOM 643 C SER A 80 -6.887 -8.057 -16.606 1.00 6.72 C +ATOM 644 CB SER A 80 -9.080 -7.685 -17.745 1.00 6.72 C +ATOM 645 O SER A 80 -6.072 -7.482 -17.329 1.00 6.72 O +ATOM 646 OG SER A 80 -9.634 -6.926 -16.684 1.00 6.72 O +ATOM 647 N GLY A 81 -6.705 -8.234 -15.324 1.00 6.72 N +ATOM 648 CA GLY A 81 -5.576 -7.527 -14.740 1.00 6.72 C +ATOM 649 C GLY A 81 -5.757 -7.232 -13.263 1.00 6.72 C +ATOM 650 O GLY A 81 -6.659 -7.775 -12.622 1.00 6.72 O +ATOM 651 N VAL A 82 -4.698 -6.651 -12.655 1.00 6.72 N +ATOM 652 CA VAL A 82 -4.661 -6.238 -11.256 1.00 6.72 C +ATOM 653 C VAL A 82 -4.972 -4.747 -11.149 1.00 6.72 C +ATOM 654 CB VAL A 82 -3.292 -6.545 -10.609 1.00 6.72 C +ATOM 655 O VAL A 82 -4.465 -3.943 -11.935 1.00 6.72 O +ATOM 656 CG1 VAL A 82 -3.263 -6.076 -9.156 1.00 6.72 C +ATOM 657 CG2 VAL A 82 -2.983 -8.039 -10.698 1.00 6.72 C +ATOM 658 N TYR A 83 -5.928 -4.428 -10.292 1.00 6.72 N +ATOM 659 CA TYR A 83 -6.336 -3.045 -10.073 1.00 6.72 C +ATOM 660 C TYR A 83 -5.965 -2.583 -8.668 1.00 6.72 C +ATOM 661 CB TYR A 83 -7.843 -2.888 -10.293 1.00 6.72 C +ATOM 662 O TYR A 83 -5.976 -3.376 -7.724 1.00 6.72 O +ATOM 663 CG TYR A 83 -8.298 -3.276 -11.679 1.00 6.72 C +ATOM 664 CD1 TYR A 83 -8.526 -4.610 -12.011 1.00 6.72 C +ATOM 665 CD2 TYR A 83 -8.499 -2.311 -12.660 1.00 6.72 C +ATOM 666 CE1 TYR A 83 -8.942 -4.973 -13.287 1.00 6.72 C +ATOM 667 CE2 TYR A 83 -8.916 -2.662 -13.940 1.00 6.72 C +ATOM 668 OH TYR A 83 -9.548 -4.347 -15.509 1.00 6.72 O +ATOM 669 CZ TYR A 83 -9.135 -3.993 -14.243 1.00 6.72 C +ATOM 670 N VAL A 84 -5.503 -1.346 -8.569 1.00 4.66 N +ATOM 671 CA VAL A 84 -5.269 -0.727 -7.268 1.00 4.66 C +ATOM 672 C VAL A 84 -6.170 0.496 -7.109 1.00 4.66 C +ATOM 673 CB VAL A 84 -3.788 -0.325 -7.089 1.00 4.66 C +ATOM 674 O VAL A 84 -6.240 1.343 -8.002 1.00 4.66 O +ATOM 675 CG1 VAL A 84 -3.573 0.352 -5.737 1.00 4.66 C +ATOM 676 CG2 VAL A 84 -2.884 -1.549 -7.227 1.00 4.66 C +ATOM 677 N ARG A 85 -6.913 0.488 -6.095 1.00 6.72 N +ATOM 678 CA ARG A 85 -7.754 1.633 -5.763 1.00 6.72 C +ATOM 679 C ARG A 85 -7.206 2.383 -4.553 1.00 6.72 C +ATOM 680 CB ARG A 85 -9.192 1.183 -5.492 1.00 6.72 C +ATOM 681 O ARG A 85 -6.730 1.766 -3.597 1.00 6.72 O +ATOM 682 CG ARG A 85 -10.184 2.329 -5.379 1.00 6.72 C +ATOM 683 CD ARG A 85 -11.596 1.828 -5.108 1.00 6.72 C +ATOM 684 NE ARG A 85 -12.400 1.796 -6.326 1.00 6.72 N +ATOM 685 NH1 ARG A 85 -14.250 0.839 -5.327 1.00 6.72 N +ATOM 686 NH2 ARG A 85 -14.283 1.347 -7.562 1.00 6.72 N +ATOM 687 CZ ARG A 85 -13.643 1.327 -6.402 1.00 6.72 C +ATOM 688 N THR A 86 -7.071 3.732 -4.691 1.00 6.72 N +ATOM 689 CA THR A 86 -6.621 4.528 -3.555 1.00 6.72 C +ATOM 690 C THR A 86 -7.733 5.453 -3.068 1.00 6.72 C +ATOM 691 CB THR A 86 -5.376 5.361 -3.914 1.00 6.72 C +ATOM 692 O THR A 86 -8.527 5.952 -3.868 1.00 6.72 O +ATOM 693 CG2 THR A 86 -4.246 4.471 -4.420 1.00 6.72 C +ATOM 694 OG1 THR A 86 -5.719 6.306 -4.935 1.00 6.72 O +ATOM 695 N ASN A 87 -7.936 5.475 -1.707 1.00 6.72 N +ATOM 696 CA ASN A 87 -8.831 6.444 -1.084 1.00 6.72 C +ATOM 697 C ASN A 87 -8.087 7.347 -0.105 1.00 6.72 C +ATOM 698 CB ASN A 87 -9.985 5.730 -0.376 1.00 6.72 C +ATOM 699 O ASN A 87 -7.297 6.868 0.711 1.00 6.72 O +ATOM 700 CG ASN A 87 -10.918 5.027 -1.342 1.00 6.72 C +ATOM 701 ND2 ASN A 87 -11.709 4.093 -0.828 1.00 6.72 N +ATOM 702 OD1 ASN A 87 -10.927 5.321 -2.540 1.00 6.72 O +ATOM 703 N LYS A 88 -8.092 8.633 -0.265 1.00 6.72 N +ATOM 704 CA LYS A 88 -7.558 9.565 0.724 1.00 6.72 C +ATOM 705 C LYS A 88 -8.305 9.448 2.049 1.00 6.72 C +ATOM 706 CB LYS A 88 -7.634 11.001 0.204 1.00 6.72 C +ATOM 707 O LYS A 88 -9.536 9.505 2.081 1.00 6.72 O +ATOM 708 CG LYS A 88 -6.902 12.014 1.072 1.00 6.72 C +ATOM 709 CD LYS A 88 -6.972 13.415 0.478 1.00 6.72 C +ATOM 710 CE LYS A 88 -6.293 14.439 1.377 1.00 6.72 C +ATOM 711 NZ LYS A 88 -6.372 15.816 0.806 1.00 6.72 N +ATOM 712 N LEU A 89 -7.493 9.040 3.089 1.00 6.72 N +ATOM 713 CA LEU A 89 -8.056 8.925 4.430 1.00 6.72 C +ATOM 714 C LEU A 89 -8.211 10.298 5.074 1.00 6.72 C +ATOM 715 CB LEU A 89 -7.174 8.033 5.307 1.00 6.72 C +ATOM 716 O LEU A 89 -7.367 11.177 4.884 1.00 6.72 O +ATOM 717 CG LEU A 89 -7.197 6.537 4.993 1.00 6.72 C +ATOM 718 CD1 LEU A 89 -6.028 5.834 5.676 1.00 6.72 C +ATOM 719 CD2 LEU A 89 -8.523 5.920 5.422 1.00 6.72 C +ATOM 720 N GLY A 90 -9.340 10.708 5.578 1.00 6.72 N +ATOM 721 CA GLY A 90 -9.715 11.894 6.333 1.00 6.72 C +ATOM 722 C GLY A 90 -10.588 12.851 5.544 1.00 6.72 C +ATOM 723 O GLY A 90 -11.001 13.892 6.061 1.00 6.72 O +ATOM 724 N PHE A 91 -10.821 12.471 4.312 1.00 8.64 N +ATOM 725 CA PHE A 91 -11.769 13.293 3.569 1.00 8.64 C +ATOM 726 C PHE A 91 -12.999 12.482 3.179 1.00 8.64 C +ATOM 727 CB PHE A 91 -11.109 13.881 2.318 1.00 8.64 C +ATOM 728 O PHE A 91 -12.889 11.479 2.471 1.00 8.64 O +ATOM 729 CG PHE A 91 -10.275 15.103 2.589 1.00 8.64 C +ATOM 730 CD1 PHE A 91 -8.918 14.989 2.865 1.00 8.64 C +ATOM 731 CD2 PHE A 91 -10.848 16.368 2.568 1.00 8.64 C +ATOM 732 CE1 PHE A 91 -8.143 16.119 3.117 1.00 8.64 C +ATOM 733 CE2 PHE A 91 -10.081 17.502 2.818 1.00 8.64 C +ATOM 734 CZ PHE A 91 -8.729 17.376 3.093 1.00 8.64 C +ATOM 735 N GLU A 92 -13.879 12.165 4.125 1.00 8.64 N +ATOM 736 CA GLU A 92 -15.112 11.423 3.881 1.00 8.64 C +ATOM 737 C GLU A 92 -16.103 12.251 3.067 1.00 8.64 C +ATOM 738 CB GLU A 92 -15.750 10.990 5.203 1.00 8.64 C +ATOM 739 O GLU A 92 -16.591 13.281 3.535 1.00 8.64 O +ATOM 740 CG GLU A 92 -15.244 9.651 5.721 1.00 8.64 C +ATOM 741 CD GLU A 92 -15.990 9.164 6.954 1.00 8.64 C +ATOM 742 OE1 GLU A 92 -15.708 8.041 7.429 1.00 8.64 O +ATOM 743 OE2 GLU A 92 -16.864 9.911 7.447 1.00 8.64 O +ATOM 744 N ASP A 93 -15.801 12.461 1.827 1.00 8.64 N +ATOM 745 CA ASP A 93 -17.029 12.844 1.137 1.00 8.64 C +ATOM 746 C ASP A 93 -17.790 11.614 0.648 1.00 8.64 C +ATOM 747 CB ASP A 93 -16.717 13.772 -0.039 1.00 8.64 C +ATOM 748 O ASP A 93 -17.320 10.899 -0.240 1.00 8.64 O +ATOM 749 CG ASP A 93 -17.958 14.419 -0.628 1.00 8.64 C +ATOM 750 OD1 ASP A 93 -17.828 15.359 -1.442 1.00 8.64 O +ATOM 751 OD2 ASP A 93 -19.076 13.986 -0.274 1.00 8.64 O +ATOM 752 N PRO A 94 -18.711 11.191 1.508 1.00 8.64 N +ATOM 753 CA PRO A 94 -19.501 10.003 1.175 1.00 8.64 C +ATOM 754 C PRO A 94 -20.029 10.028 -0.257 1.00 8.64 C +ATOM 755 CB PRO A 94 -20.652 10.050 2.183 1.00 8.64 C +ATOM 756 O PRO A 94 -20.234 8.972 -0.862 1.00 8.64 O +ATOM 757 CG PRO A 94 -20.438 11.316 2.948 1.00 8.64 C +ATOM 758 CD PRO A 94 -19.181 11.969 2.449 1.00 8.64 C +ATOM 759 N LYS A 95 -19.825 11.145 -1.002 1.00 8.64 N +ATOM 760 CA LYS A 95 -20.410 11.170 -2.340 1.00 8.64 C +ATOM 761 C LYS A 95 -19.342 11.416 -3.402 1.00 8.64 C +ATOM 762 CB LYS A 95 -21.496 12.243 -2.431 1.00 8.64 C +ATOM 763 O LYS A 95 -19.636 11.407 -4.599 1.00 8.64 O +ATOM 764 CG LYS A 95 -22.740 11.936 -1.610 1.00 8.64 C +ATOM 765 CD LYS A 95 -23.832 12.972 -1.841 1.00 8.64 C +ATOM 766 CE LYS A 95 -25.054 12.701 -0.973 1.00 8.64 C +ATOM 767 NZ LYS A 95 -26.134 13.705 -1.208 1.00 8.64 N +ATOM 768 N SER A 96 -18.077 11.513 -2.983 1.00 8.64 N +ATOM 769 CA SER A 96 -17.157 11.939 -4.032 1.00 8.64 C +ATOM 770 C SER A 96 -16.418 10.749 -4.635 1.00 8.64 C +ATOM 771 CB SER A 96 -16.149 12.950 -3.483 1.00 8.64 C +ATOM 772 O SER A 96 -15.813 9.955 -3.912 1.00 8.64 O +ATOM 773 OG SER A 96 -14.940 12.907 -4.222 1.00 8.64 O +ATOM 774 N PHE A 97 -16.938 10.059 -5.545 1.00 8.64 N +ATOM 775 CA PHE A 97 -16.265 9.188 -6.501 1.00 8.64 C +ATOM 776 C PHE A 97 -14.975 9.827 -7.000 1.00 8.64 C +ATOM 777 CB PHE A 97 -17.186 8.871 -7.683 1.00 8.64 C +ATOM 778 O PHE A 97 -14.111 9.144 -7.554 1.00 8.64 O +ATOM 779 CG PHE A 97 -18.358 7.998 -7.322 1.00 8.64 C +ATOM 780 CD1 PHE A 97 -19.593 8.558 -7.018 1.00 8.64 C +ATOM 781 CD2 PHE A 97 -18.224 6.616 -7.288 1.00 8.64 C +ATOM 782 CE1 PHE A 97 -20.679 7.752 -6.684 1.00 8.64 C +ATOM 783 CE2 PHE A 97 -19.305 5.804 -6.955 1.00 8.64 C +ATOM 784 CZ PHE A 97 -20.531 6.375 -6.652 1.00 8.64 C +ATOM 785 N LEU A 98 -14.602 11.045 -6.442 1.00 8.64 N +ATOM 786 CA LEU A 98 -13.538 11.768 -7.129 1.00 8.64 C +ATOM 787 C LEU A 98 -12.193 11.530 -6.451 1.00 8.64 C +ATOM 788 CB LEU A 98 -13.845 13.267 -7.166 1.00 8.64 C +ATOM 789 O LEU A 98 -11.141 11.781 -7.043 1.00 8.64 O +ATOM 790 CG LEU A 98 -14.980 13.704 -8.094 1.00 8.64 C +ATOM 791 CD1 LEU A 98 -15.347 15.161 -7.833 1.00 8.64 C +ATOM 792 CD2 LEU A 98 -14.586 13.500 -9.553 1.00 8.64 C +ATOM 793 N SER A 99 -12.192 10.556 -5.446 1.00 8.64 N +ATOM 794 CA SER A 99 -10.815 10.410 -4.984 1.00 8.64 C +ATOM 795 C SER A 99 -10.321 8.979 -5.164 1.00 8.64 C +ATOM 796 CB SER A 99 -10.696 10.819 -3.515 1.00 8.64 C +ATOM 797 O SER A 99 -9.273 8.607 -4.630 1.00 8.64 O +ATOM 798 OG SER A 99 -11.600 10.080 -2.712 1.00 8.64 O +ATOM 799 N ILE A 100 -11.121 8.337 -6.116 1.00 6.72 N +ATOM 800 CA ILE A 100 -10.638 6.974 -6.304 1.00 6.72 C +ATOM 801 C ILE A 100 -9.891 6.870 -7.632 1.00 6.72 C +ATOM 802 CB ILE A 100 -11.796 5.953 -6.259 1.00 6.72 C +ATOM 803 O ILE A 100 -10.389 7.318 -8.668 1.00 6.72 O +ATOM 804 CG1 ILE A 100 -12.507 6.010 -4.902 1.00 6.72 C +ATOM 805 CG2 ILE A 100 -11.282 4.539 -6.549 1.00 6.72 C +ATOM 806 CD1 ILE A 100 -13.793 5.197 -4.841 1.00 6.72 C +ATOM 807 N LYS A 101 -8.620 6.701 -7.581 1.00 6.72 N +ATOM 808 CA LYS A 101 -7.845 6.403 -8.782 1.00 6.72 C +ATOM 809 C LYS A 101 -7.608 4.902 -8.924 1.00 6.72 C +ATOM 810 CB LYS A 101 -6.507 7.144 -8.756 1.00 6.72 C +ATOM 811 O LYS A 101 -7.289 4.223 -7.946 1.00 6.72 O +ATOM 812 CG LYS A 101 -6.629 8.647 -8.958 1.00 6.72 C +ATOM 813 CD LYS A 101 -5.263 9.304 -9.110 1.00 6.72 C +ATOM 814 CE LYS A 101 -5.380 10.815 -9.257 1.00 6.72 C +ATOM 815 NZ LYS A 101 -4.046 11.459 -9.443 1.00 6.72 N +ATOM 816 N GLU A 102 -8.161 4.357 -9.913 1.00 8.64 N +ATOM 817 CA GLU A 102 -7.946 2.952 -10.245 1.00 8.64 C +ATOM 818 C GLU A 102 -6.847 2.794 -11.292 1.00 8.64 C +ATOM 819 CB GLU A 102 -9.244 2.312 -10.744 1.00 8.64 C +ATOM 820 O GLU A 102 -6.808 3.537 -12.275 1.00 8.64 O +ATOM 821 CG GLU A 102 -9.192 0.793 -10.818 1.00 8.64 C +ATOM 822 CD GLU A 102 -10.515 0.167 -11.229 1.00 8.64 C +ATOM 823 OE1 GLU A 102 -10.588 -0.435 -12.325 1.00 8.64 O +ATOM 824 OE2 GLU A 102 -11.488 0.280 -10.449 1.00 8.64 O +ATOM 825 N TYR A 103 -5.920 1.999 -10.999 1.00 6.72 N +ATOM 826 CA TYR A 103 -4.857 1.671 -11.943 1.00 6.72 C +ATOM 827 C TYR A 103 -5.001 0.241 -12.451 1.00 6.72 C +ATOM 828 CB TYR A 103 -3.483 1.857 -11.292 1.00 6.72 C +ATOM 829 O TYR A 103 -5.256 -0.679 -11.670 1.00 6.72 O +ATOM 830 CG TYR A 103 -3.244 3.253 -10.767 1.00 6.72 C +ATOM 831 CD1 TYR A 103 -3.589 3.595 -9.462 1.00 6.72 C +ATOM 832 CD2 TYR A 103 -2.676 4.231 -11.576 1.00 6.72 C +ATOM 833 CE1 TYR A 103 -3.372 4.880 -8.975 1.00 6.72 C +ATOM 834 CE2 TYR A 103 -2.455 5.519 -11.100 1.00 6.72 C +ATOM 835 OH TYR A 103 -2.589 7.107 -9.323 1.00 6.72 O +ATOM 836 CZ TYR A 103 -2.805 5.833 -9.800 1.00 6.72 C +ATOM 837 N LYS A 104 -5.083 -0.004 -13.802 1.00 8.64 N +ATOM 838 CA LYS A 104 -5.140 -1.339 -14.393 1.00 8.64 C +ATOM 839 C LYS A 104 -3.759 -1.796 -14.852 1.00 8.64 C +ATOM 840 CB LYS A 104 -6.119 -1.364 -15.568 1.00 8.64 C +ATOM 841 O LYS A 104 -3.042 -1.048 -15.520 1.00 8.64 O +ATOM 842 CG LYS A 104 -6.341 -2.749 -16.158 1.00 8.64 C +ATOM 843 CD LYS A 104 -7.313 -2.708 -17.331 1.00 8.64 C +ATOM 844 CE LYS A 104 -7.429 -4.065 -18.010 1.00 8.64 C +ATOM 845 NZ LYS A 104 -8.354 -4.022 -19.181 1.00 8.64 N +ATOM 846 N PHE A 105 -3.346 -2.868 -14.276 1.00 8.64 N +ATOM 847 CA PHE A 105 -2.144 -3.522 -14.782 1.00 8.64 C +ATOM 848 C PHE A 105 -2.504 -4.749 -15.611 1.00 8.64 C +ATOM 849 CB PHE A 105 -1.221 -3.921 -13.626 1.00 8.64 C +ATOM 850 O PHE A 105 -3.367 -5.537 -15.220 1.00 8.64 O +ATOM 851 CG PHE A 105 -0.739 -2.756 -12.804 1.00 8.64 C +ATOM 852 CD1 PHE A 105 -1.429 -2.357 -11.666 1.00 8.64 C +ATOM 853 CD2 PHE A 105 0.405 -2.059 -13.171 1.00 8.64 C +ATOM 854 CE1 PHE A 105 -0.985 -1.279 -10.904 1.00 8.64 C +ATOM 855 CE2 PHE A 105 0.855 -0.981 -12.414 1.00 8.64 C +ATOM 856 CZ PHE A 105 0.158 -0.592 -11.282 1.00 8.64 C +ATOM 857 N GLY A 106 -2.172 -4.885 -16.945 1.00 8.64 N +ATOM 858 CA GLY A 106 -2.470 -5.978 -17.858 1.00 8.64 C +ATOM 859 C GLY A 106 -1.496 -7.136 -17.740 1.00 8.64 C +ATOM 860 O GLY A 106 -0.364 -6.957 -17.287 1.00 8.64 O +ATOM 861 N THR A 107 -1.964 -8.404 -17.592 1.00 8.64 N +ATOM 862 CA THR A 107 -1.518 -9.786 -17.454 1.00 8.64 C +ATOM 863 C THR A 107 -0.972 -10.313 -18.777 1.00 8.64 C +ATOM 864 CB THR A 107 -2.662 -10.696 -16.969 1.00 8.64 C +ATOM 865 O THR A 107 -0.155 -11.237 -18.794 1.00 8.64 O +ATOM 866 CG2 THR A 107 -2.988 -10.435 -15.502 1.00 8.64 C +ATOM 867 OG1 THR A 107 -3.831 -10.446 -17.758 1.00 8.64 O +ATOM 868 N ARG A 108 -0.421 -9.500 -19.707 1.00 8.64 N +ATOM 869 CA ARG A 108 0.415 -10.048 -20.770 1.00 8.64 C +ATOM 870 C ARG A 108 0.370 -9.165 -22.012 1.00 8.64 C +ATOM 871 CB ARG A 108 -0.025 -11.471 -21.121 1.00 8.64 C +ATOM 872 O ARG A 108 1.162 -9.347 -22.939 1.00 8.64 O +ATOM 873 CG ARG A 108 0.888 -12.553 -20.567 1.00 8.64 C +ATOM 874 CD ARG A 108 0.437 -13.943 -20.992 1.00 8.64 C +ATOM 875 NE ARG A 108 1.440 -14.956 -20.674 1.00 8.64 N +ATOM 876 NH1 ARG A 108 0.214 -16.728 -21.506 1.00 8.64 N +ATOM 877 NH2 ARG A 108 2.286 -17.089 -20.593 1.00 8.64 N +ATOM 878 CZ ARG A 108 1.311 -16.255 -20.925 1.00 8.64 C +ATOM 879 N THR A 109 0.007 -7.906 -21.921 1.00 8.64 N +ATOM 880 CA THR A 109 0.199 -7.193 -23.179 1.00 8.64 C +ATOM 881 C THR A 109 0.557 -5.731 -22.923 1.00 8.64 C +ATOM 882 CB THR A 109 -1.060 -7.269 -24.063 1.00 8.64 C +ATOM 883 O THR A 109 -0.113 -5.050 -22.143 1.00 8.64 O +ATOM 884 CG2 THR A 109 -1.194 -8.643 -24.711 1.00 8.64 C +ATOM 885 OG1 THR A 109 -2.218 -7.018 -23.257 1.00 8.64 O +ATOM 886 N GLY A 110 1.725 -5.448 -22.409 1.00 8.64 N +ATOM 887 CA GLY A 110 2.620 -4.305 -22.494 1.00 8.64 C +ATOM 888 C GLY A 110 1.933 -3.044 -22.984 1.00 8.64 C +ATOM 889 O GLY A 110 1.499 -2.976 -24.136 1.00 8.64 O +ATOM 890 N GLY A 111 0.749 -2.551 -22.421 1.00 8.64 N +ATOM 891 CA GLY A 111 0.179 -1.258 -22.766 1.00 8.64 C +ATOM 892 C GLY A 111 1.093 -0.094 -22.431 1.00 8.64 C +ATOM 893 O GLY A 111 1.993 -0.225 -21.598 1.00 8.64 O +ATOM 894 N ASN A 112 1.646 0.666 -23.583 1.00 8.64 N +ATOM 895 CA ASN A 112 2.351 1.904 -23.896 1.00 8.64 C +ATOM 896 C ASN A 112 2.500 2.792 -22.664 1.00 8.64 C +ATOM 897 CB ASN A 112 1.633 2.664 -25.014 1.00 8.64 C +ATOM 898 O ASN A 112 1.528 3.028 -21.943 1.00 8.64 O +ATOM 899 CG ASN A 112 1.795 2.002 -26.368 1.00 8.64 C +ATOM 900 ND2 ASN A 112 0.868 2.277 -27.278 1.00 8.64 N +ATOM 901 OD1 ASN A 112 2.746 1.248 -26.594 1.00 8.64 O +ATOM 902 N PHE A 113 3.510 2.618 -21.947 1.00 8.64 N +ATOM 903 CA PHE A 113 4.082 3.644 -21.083 1.00 8.64 C +ATOM 904 C PHE A 113 4.194 4.972 -21.821 1.00 8.64 C +ATOM 905 CB PHE A 113 5.459 3.211 -20.570 1.00 8.64 C +ATOM 906 O PHE A 113 4.651 5.016 -22.966 1.00 8.64 O +ATOM 907 CG PHE A 113 5.526 3.047 -19.075 1.00 8.64 C +ATOM 908 CD1 PHE A 113 5.224 1.827 -18.482 1.00 8.64 C +ATOM 909 CD2 PHE A 113 5.890 4.113 -18.264 1.00 8.64 C +ATOM 910 CE1 PHE A 113 5.285 1.672 -17.099 1.00 8.64 C +ATOM 911 CE2 PHE A 113 5.953 3.966 -16.881 1.00 8.64 C +ATOM 912 CZ PHE A 113 5.649 2.745 -16.300 1.00 8.64 C +ATOM 913 N THR A 114 3.305 5.791 -21.704 1.00 8.64 N +ATOM 914 CA THR A 114 3.524 7.113 -22.280 1.00 8.64 C +ATOM 915 C THR A 114 4.409 7.959 -21.369 1.00 8.64 C +ATOM 916 CB THR A 114 2.190 7.843 -22.526 1.00 8.64 C +ATOM 917 O THR A 114 4.850 9.044 -21.756 1.00 8.64 O +ATOM 918 CG2 THR A 114 1.348 7.113 -23.568 1.00 8.64 C +ATOM 919 OG1 THR A 114 1.456 7.913 -21.298 1.00 8.64 O +ATOM 920 N GLY A 115 5.268 7.248 -20.677 1.00 8.64 N +ATOM 921 CA GLY A 115 6.263 8.086 -20.027 1.00 8.64 C +ATOM 922 C GLY A 115 7.507 7.322 -19.614 1.00 8.64 C +ATOM 923 O GLY A 115 7.525 6.090 -19.647 1.00 8.64 O +ATOM 924 N GLU A 116 8.641 7.657 -20.084 1.00 8.64 N +ATOM 925 CA GLU A 116 9.970 7.216 -19.672 1.00 8.64 C +ATOM 926 C GLU A 116 10.120 7.260 -18.154 1.00 8.64 C +ATOM 927 CB GLU A 116 11.051 8.077 -20.332 1.00 8.64 C +ATOM 928 O GLU A 116 9.646 8.194 -17.505 1.00 8.64 O +ATOM 929 CG GLU A 116 11.400 7.645 -21.749 1.00 8.64 C +ATOM 930 CD GLU A 116 12.569 8.417 -22.341 1.00 8.64 C +ATOM 931 OE1 GLU A 116 12.995 8.099 -23.474 1.00 8.64 O +ATOM 932 OE2 GLU A 116 13.062 9.347 -21.665 1.00 8.64 O +ATOM 933 N LEU A 117 10.088 5.989 -17.564 1.00 8.64 N +ATOM 934 CA LEU A 117 10.578 5.973 -16.190 1.00 8.64 C +ATOM 935 C LEU A 117 12.036 6.415 -16.127 1.00 8.64 C +ATOM 936 CB LEU A 117 10.430 4.575 -15.584 1.00 8.64 C +ATOM 937 O LEU A 117 12.809 6.155 -17.053 1.00 8.64 O +ATOM 938 CG LEU A 117 9.001 4.097 -15.323 1.00 8.64 C +ATOM 939 CD1 LEU A 117 8.989 2.601 -15.025 1.00 8.64 C +ATOM 940 CD2 LEU A 117 8.376 4.881 -14.175 1.00 8.64 C +ATOM 941 N THR A 118 12.196 7.278 -15.327 1.00 8.64 N +ATOM 942 CA THR A 118 13.590 7.626 -15.076 1.00 8.64 C +ATOM 943 C THR A 118 14.353 6.432 -14.511 1.00 8.64 C +ATOM 944 CB THR A 118 13.702 8.816 -14.105 1.00 8.64 C +ATOM 945 O THR A 118 13.747 5.457 -14.061 1.00 8.64 O +ATOM 946 CG2 THR A 118 12.896 10.011 -14.604 1.00 8.64 C +ATOM 947 OG1 THR A 118 13.207 8.424 -12.818 1.00 8.64 O +ATOM 948 N LYS A 119 15.698 6.199 -14.938 1.00 8.64 N +ATOM 949 CA LYS A 119 16.574 5.167 -14.391 1.00 8.64 C +ATOM 950 C LYS A 119 16.371 5.015 -12.886 1.00 8.64 C +ATOM 951 CB LYS A 119 18.038 5.489 -14.693 1.00 8.64 C +ATOM 952 O LYS A 119 16.303 3.897 -12.373 1.00 8.64 O +ATOM 953 CG LYS A 119 18.973 4.296 -14.566 1.00 8.64 C +ATOM 954 CD LYS A 119 20.389 4.646 -15.006 1.00 8.64 C +ATOM 955 CE LYS A 119 21.344 3.477 -14.803 1.00 8.64 C +ATOM 956 NZ LYS A 119 22.724 3.802 -15.272 1.00 8.64 N +ATOM 957 N GLN A 120 16.211 6.129 -12.176 1.00 8.64 N +ATOM 958 CA GLN A 120 16.025 6.126 -10.729 1.00 8.64 C +ATOM 959 C GLN A 120 14.683 5.507 -10.349 1.00 8.64 C +ATOM 960 CB GLN A 120 16.124 7.547 -10.171 1.00 8.64 C +ATOM 961 O GLN A 120 14.600 4.729 -9.396 1.00 8.64 O +ATOM 962 CG GLN A 120 17.540 7.960 -9.792 1.00 8.64 C +ATOM 963 CD GLN A 120 17.891 9.357 -10.270 1.00 8.64 C +ATOM 964 NE2 GLN A 120 19.145 9.751 -10.077 1.00 8.64 N +ATOM 965 OE1 GLN A 120 17.043 10.074 -10.809 1.00 8.64 O +ATOM 966 N GLU A 121 13.628 5.733 -11.076 1.00 8.64 N +ATOM 967 CA GLU A 121 12.298 5.200 -10.795 1.00 8.64 C +ATOM 968 C GLU A 121 12.253 3.689 -11.006 1.00 8.64 C +ATOM 969 CB GLU A 121 11.248 5.885 -11.674 1.00 8.64 C +ATOM 970 O GLU A 121 11.648 2.965 -10.213 1.00 8.64 O +ATOM 971 CG GLU A 121 10.953 7.323 -11.272 1.00 8.64 C +ATOM 972 CD GLU A 121 10.094 8.064 -12.284 1.00 8.64 C +ATOM 973 OE1 GLU A 121 9.364 9.002 -11.889 1.00 8.64 O +ATOM 974 OE2 GLU A 121 10.149 7.704 -13.481 1.00 8.64 O +ATOM 975 N LEU A 122 12.979 3.340 -12.063 1.00 8.64 N +ATOM 976 CA LEU A 122 13.046 1.913 -12.359 1.00 8.64 C +ATOM 977 C LEU A 122 13.813 1.168 -11.272 1.00 8.64 C +ATOM 978 CB LEU A 122 13.709 1.678 -13.719 1.00 8.64 C +ATOM 979 O LEU A 122 13.397 0.090 -10.840 1.00 8.64 O +ATOM 980 CG LEU A 122 12.936 0.805 -14.709 1.00 8.64 C +ATOM 981 CD1 LEU A 122 12.751 1.540 -16.032 1.00 8.64 C +ATOM 982 CD2 LEU A 122 13.653 -0.523 -14.925 1.00 8.64 C +ATOM 983 N VAL A 123 14.840 1.780 -10.710 1.00 8.64 N +ATOM 984 CA VAL A 123 15.653 1.191 -9.651 1.00 8.64 C +ATOM 985 C VAL A 123 14.832 1.085 -8.368 1.00 8.64 C +ATOM 986 CB VAL A 123 16.936 2.013 -9.397 1.00 8.64 C +ATOM 987 O VAL A 123 14.826 0.040 -7.712 1.00 8.64 O +ATOM 988 CG1 VAL A 123 17.646 1.529 -8.134 1.00 8.64 C +ATOM 989 CG2 VAL A 123 17.870 1.931 -10.603 1.00 8.64 C +ATOM 990 N TYR A 124 14.059 2.039 -8.098 1.00 8.64 N +ATOM 991 CA TYR A 124 13.254 2.069 -6.881 1.00 8.64 C +ATOM 992 C TYR A 124 12.121 1.051 -6.952 1.00 8.64 C +ATOM 993 CB TYR A 124 12.683 3.471 -6.649 1.00 8.64 C +ATOM 994 O TYR A 124 11.855 0.342 -5.979 1.00 8.64 O +ATOM 995 CG TYR A 124 13.673 4.438 -6.047 1.00 8.64 C +ATOM 996 CD1 TYR A 124 13.975 5.640 -6.682 1.00 8.64 C +ATOM 997 CD2 TYR A 124 14.309 4.151 -4.844 1.00 8.64 C +ATOM 998 CE1 TYR A 124 14.889 6.534 -6.133 1.00 8.64 C +ATOM 999 CE2 TYR A 124 15.225 5.037 -4.286 1.00 8.64 C +ATOM 1000 OH TYR A 124 16.413 7.105 -4.387 1.00 8.64 O +ATOM 1001 CZ TYR A 124 15.507 6.224 -4.936 1.00 8.64 C +ATOM 1002 N THR A 125 11.428 1.001 -8.096 1.00 6.72 N +ATOM 1003 CA THR A 125 10.313 0.078 -8.276 1.00 6.72 C +ATOM 1004 C THR A 125 10.789 -1.369 -8.188 1.00 6.72 C +ATOM 1005 CB THR A 125 9.609 0.308 -9.626 1.00 6.72 C +ATOM 1006 O THR A 125 10.169 -2.192 -7.510 1.00 6.72 O +ATOM 1007 CG2 THR A 125 8.382 -0.585 -9.767 1.00 6.72 C +ATOM 1008 OG1 THR A 125 9.202 1.679 -9.718 1.00 6.72 O +ATOM 1009 N ASN A 126 11.985 -1.576 -8.827 1.00 8.64 N +ATOM 1010 CA ASN A 126 12.563 -2.916 -8.803 1.00 8.64 C +ATOM 1011 C ASN A 126 13.002 -3.311 -7.396 1.00 8.64 C +ATOM 1012 CB ASN A 126 13.742 -3.009 -9.773 1.00 8.64 C +ATOM 1013 O ASN A 126 12.771 -4.442 -6.965 1.00 8.64 O +ATOM 1014 CG ASN A 126 13.310 -3.334 -11.189 1.00 8.64 C +ATOM 1015 ND2 ASN A 126 14.170 -3.035 -12.156 1.00 8.64 N +ATOM 1016 OD1 ASN A 126 12.211 -3.848 -11.413 1.00 8.64 O +ATOM 1017 N GLN A 127 13.590 -2.342 -6.627 1.00 8.64 N +ATOM 1018 CA GLN A 127 14.004 -2.576 -5.247 1.00 8.64 C +ATOM 1019 C GLN A 127 12.798 -2.827 -4.346 1.00 8.64 C +ATOM 1020 CB GLN A 127 14.814 -1.390 -4.721 1.00 8.64 C +ATOM 1021 O GLN A 127 12.824 -3.725 -3.501 1.00 8.64 O +ATOM 1022 CG GLN A 127 16.303 -1.478 -5.029 1.00 8.64 C +ATOM 1023 CD GLN A 127 17.083 -0.291 -4.496 1.00 8.64 C +ATOM 1024 NE2 GLN A 127 18.405 -0.421 -4.454 1.00 8.64 N +ATOM 1025 OE1 GLN A 127 16.503 0.734 -4.125 1.00 8.64 O +ATOM 1026 N TRP A 128 11.824 -2.167 -4.535 1.00 6.72 N +ATOM 1027 CA TRP A 128 10.602 -2.286 -3.746 1.00 6.72 C +ATOM 1028 C TRP A 128 9.926 -3.632 -3.989 1.00 6.72 C +ATOM 1029 CB TRP A 128 9.634 -1.147 -4.077 1.00 6.72 C +ATOM 1030 O TRP A 128 9.562 -4.331 -3.040 1.00 6.72 O +ATOM 1031 CG TRP A 128 8.375 -1.162 -3.264 1.00 6.72 C +ATOM 1032 CD1 TRP A 128 8.207 -0.675 -1.997 1.00 6.72 C +ATOM 1033 CD2 TRP A 128 7.108 -1.696 -3.661 1.00 6.72 C +ATOM 1034 CE2 TRP A 128 6.216 -1.497 -2.584 1.00 6.72 C +ATOM 1035 CE3 TRP A 128 6.641 -2.322 -4.824 1.00 6.72 C +ATOM 1036 NE1 TRP A 128 6.910 -0.874 -1.583 1.00 6.72 N +ATOM 1037 CH2 TRP A 128 4.449 -2.515 -3.786 1.00 6.72 C +ATOM 1038 CZ2 TRP A 128 4.880 -1.904 -2.637 1.00 6.72 C +ATOM 1039 CZ3 TRP A 128 5.311 -2.726 -4.874 1.00 6.72 C +ATOM 1040 N VAL A 129 9.837 -4.041 -5.286 1.00 6.72 N +ATOM 1041 CA VAL A 129 9.170 -5.280 -5.670 1.00 6.72 C +ATOM 1042 C VAL A 129 9.963 -6.477 -5.149 1.00 6.72 C +ATOM 1043 CB VAL A 129 8.999 -5.381 -7.203 1.00 6.72 C +ATOM 1044 O VAL A 129 9.389 -7.411 -4.585 1.00 6.72 O +ATOM 1045 CG1 VAL A 129 8.505 -6.771 -7.600 1.00 6.72 C +ATOM 1046 CG2 VAL A 129 8.036 -4.306 -7.703 1.00 6.72 C +ATOM 1047 N ASN A 130 11.351 -6.333 -5.277 1.00 8.64 N +ATOM 1048 CA ASN A 130 12.247 -7.407 -4.861 1.00 8.64 C +ATOM 1049 C ASN A 130 12.259 -7.571 -3.344 1.00 8.64 C +ATOM 1050 CB ASN A 130 13.664 -7.155 -5.379 1.00 8.64 C +ATOM 1051 O ASN A 130 12.263 -8.695 -2.837 1.00 8.64 O +ATOM 1052 CG ASN A 130 13.854 -7.617 -6.810 1.00 8.64 C +ATOM 1053 ND2 ASN A 130 14.863 -7.073 -7.481 1.00 8.64 N +ATOM 1054 OD1 ASN A 130 13.100 -8.457 -7.309 1.00 8.64 O +ATOM 1055 N GLU A 131 12.069 -6.443 -2.521 1.00 8.64 N +ATOM 1056 CA GLU A 131 12.094 -6.449 -1.061 1.00 8.64 C +ATOM 1057 C GLU A 131 10.759 -6.913 -0.488 1.00 8.64 C +ATOM 1058 CB GLU A 131 12.442 -5.058 -0.524 1.00 8.64 C +ATOM 1059 O GLU A 131 10.721 -7.591 0.541 1.00 8.64 O +ATOM 1060 CG GLU A 131 13.913 -4.694 -0.665 1.00 8.64 C +ATOM 1061 CD GLU A 131 14.234 -3.292 -0.171 1.00 8.64 C +ATOM 1062 OE1 GLU A 131 15.414 -2.879 -0.244 1.00 8.64 O +ATOM 1063 OE2 GLU A 131 13.299 -2.603 0.293 1.00 8.64 O +ATOM 1064 N ASN A 132 9.736 -6.720 -1.184 1.00 8.64 N +ATOM 1065 CA ASN A 132 8.429 -6.916 -0.566 1.00 8.64 C +ATOM 1066 C ASN A 132 7.786 -8.224 -1.016 1.00 8.64 C +ATOM 1067 CB ASN A 132 7.506 -5.736 -0.876 1.00 8.64 C +ATOM 1068 O ASN A 132 7.067 -8.865 -0.247 1.00 8.64 O +ATOM 1069 CG ASN A 132 7.833 -4.505 -0.054 1.00 8.64 C +ATOM 1070 ND2 ASN A 132 8.463 -3.521 -0.684 1.00 8.64 N +ATOM 1071 OD1 ASN A 132 7.524 -4.440 1.139 1.00 8.64 O +ATOM 1072 N ILE A 133 8.282 -8.675 -2.231 1.00 8.64 N +ATOM 1073 CA ILE A 133 7.755 -9.943 -2.723 1.00 8.64 C +ATOM 1074 C ILE A 133 8.504 -11.102 -2.068 1.00 8.64 C +ATOM 1075 CB ILE A 133 7.858 -10.039 -4.261 1.00 8.64 C +ATOM 1076 O ILE A 133 7.902 -12.121 -1.721 1.00 8.64 O +ATOM 1077 CG1 ILE A 133 6.915 -9.029 -4.924 1.00 8.64 C +ATOM 1078 CG2 ILE A 133 7.555 -11.464 -4.735 1.00 8.64 C +ATOM 1079 CD1 ILE A 133 7.015 -8.993 -6.443 1.00 8.64 C +ATOM 1080 N THR A 134 9.730 -10.791 -1.686 1.00 8.64 N +ATOM 1081 CA THR A 134 10.543 -11.821 -1.050 1.00 8.64 C +ATOM 1082 C THR A 134 10.104 -12.041 0.394 1.00 8.64 C +ATOM 1083 CB THR A 134 12.038 -11.453 -1.085 1.00 8.64 C +ATOM 1084 O THR A 134 10.048 -13.179 0.865 1.00 8.64 O +ATOM 1085 CG2 THR A 134 12.910 -12.668 -0.785 1.00 8.64 C +ATOM 1086 OG1 THR A 134 12.370 -10.951 -2.386 1.00 8.64 O +ATOM 1087 N LEU A 135 9.481 -11.013 1.084 1.00 8.64 N +ATOM 1088 CA LEU A 135 9.061 -11.103 2.479 1.00 8.64 C +ATOM 1089 C LEU A 135 7.653 -11.678 2.586 1.00 8.64 C +ATOM 1090 CB LEU A 135 9.114 -9.726 3.145 1.00 8.64 C +ATOM 1091 O LEU A 135 7.369 -12.474 3.484 1.00 8.64 O +ATOM 1092 CG LEU A 135 10.480 -9.278 3.668 1.00 8.64 C +ATOM 1093 CD1 LEU A 135 10.532 -7.758 3.780 1.00 8.64 C +ATOM 1094 CD2 LEU A 135 10.777 -9.929 5.014 1.00 8.64 C +ATOM 1095 N ALA A 136 6.896 -11.485 1.503 1.00 8.64 N +ATOM 1096 CA ALA A 136 5.482 -11.826 1.637 1.00 8.64 C +ATOM 1097 C ALA A 136 5.208 -13.240 1.133 1.00 8.64 C +ATOM 1098 CB ALA A 136 4.618 -10.818 0.882 1.00 8.64 C +ATOM 1099 O ALA A 136 4.289 -13.909 1.611 1.00 8.64 O +ATOM 1100 N ASN A 137 6.194 -13.758 0.415 1.00 8.64 N +ATOM 1101 CA ASN A 137 6.057 -15.090 -0.164 1.00 8.64 C +ATOM 1102 C ASN A 137 6.849 -16.128 0.626 1.00 8.64 C +ATOM 1103 CB ASN A 137 6.499 -15.087 -1.629 1.00 8.64 C +ATOM 1104 O ASN A 137 8.055 -15.975 0.826 1.00 8.64 O +ATOM 1105 CG ASN A 137 5.464 -14.473 -2.550 1.00 8.64 C +ATOM 1106 ND2 ASN A 137 5.908 -13.993 -3.705 1.00 8.64 N +ATOM 1107 OD1 ASN A 137 4.274 -14.431 -2.226 1.00 8.64 O +ATOM 1108 N GLY A 138 6.587 -16.377 1.995 1.00 8.64 N +ATOM 1109 CA GLY A 138 6.876 -17.550 2.803 1.00 8.64 C +ATOM 1110 C GLY A 138 7.966 -18.427 2.215 1.00 8.64 C +ATOM 1111 O GLY A 138 8.031 -19.623 2.504 1.00 8.64 O +ATOM 1112 N TYR A 139 9.137 -17.824 1.543 1.00 8.64 N +ATOM 1113 CA TYR A 139 10.214 -18.693 1.080 1.00 8.64 C +ATOM 1114 C TYR A 139 11.083 -19.153 2.245 1.00 8.64 C +ATOM 1115 CB TYR A 139 11.076 -17.973 0.039 1.00 8.64 C +ATOM 1116 O TYR A 139 11.385 -18.370 3.149 1.00 8.64 O +ATOM 1117 CG TYR A 139 10.546 -18.087 -1.370 1.00 8.64 C +ATOM 1118 CD1 TYR A 139 9.749 -17.085 -1.919 1.00 8.64 C +ATOM 1119 CD2 TYR A 139 10.841 -19.197 -2.155 1.00 8.64 C +ATOM 1120 CE1 TYR A 139 9.257 -17.187 -3.216 1.00 8.64 C +ATOM 1121 CE2 TYR A 139 10.355 -19.309 -3.454 1.00 8.64 C +ATOM 1122 OH TYR A 139 9.082 -18.406 -5.260 1.00 8.64 O +ATOM 1123 CZ TYR A 139 9.566 -18.300 -3.975 1.00 8.64 C +ATOM 1124 N ILE A 140 10.744 -20.317 2.913 1.00 8.64 N +ATOM 1125 CA ILE A 140 11.618 -21.208 3.668 1.00 8.64 C +ATOM 1126 C ILE A 140 13.062 -21.031 3.204 1.00 8.64 C +ATOM 1127 CB ILE A 140 11.185 -22.684 3.518 1.00 8.64 C +ATOM 1128 O ILE A 140 13.364 -21.194 2.020 1.00 8.64 O +ATOM 1129 CG1 ILE A 140 9.748 -22.872 4.019 1.00 8.64 C +ATOM 1130 CG2 ILE A 140 12.150 -23.609 4.265 1.00 8.64 C +ATOM 1131 CD1 ILE A 140 9.252 -24.310 3.951 1.00 8.64 C +ATOM 1132 N SER A 141 13.713 -20.010 3.556 1.00 8.64 N +ATOM 1133 CA SER A 141 15.171 -20.025 3.497 1.00 8.64 C +ATOM 1134 C SER A 141 15.734 -21.319 4.073 1.00 8.64 C +ATOM 1135 CB SER A 141 15.750 -18.827 4.252 1.00 8.64 C +ATOM 1136 O SER A 141 15.479 -21.652 5.233 1.00 8.64 O +ATOM 1137 OG SER A 141 17.145 -18.982 4.447 1.00 8.64 O +ATOM 1138 N ALA A 142 15.693 -22.473 3.364 1.00 8.64 N +ATOM 1139 CA ALA A 142 16.460 -23.706 3.208 1.00 8.64 C +ATOM 1140 C ALA A 142 17.958 -23.439 3.331 1.00 8.64 C +ATOM 1141 CB ALA A 142 16.148 -24.360 1.864 1.00 8.64 C +ATOM 1142 O ALA A 142 18.474 -22.483 2.749 1.00 8.64 O +ATOM 1143 N ASP A 143 18.576 -22.902 4.585 1.00 8.64 N +ATOM 1144 CA ASP A 143 19.977 -23.243 4.809 1.00 8.64 C +ATOM 1145 C ASP A 143 20.646 -22.233 5.738 1.00 8.64 C +ATOM 1146 CB ASP A 143 20.732 -23.315 3.480 1.00 8.64 C +ATOM 1147 O ASP A 143 20.988 -21.126 5.317 1.00 8.64 O +ATOM 1148 CG ASP A 143 21.865 -24.326 3.495 1.00 8.64 C +ATOM 1149 OD1 ASP A 143 22.444 -24.609 2.424 1.00 8.64 O +ATOM 1150 OD2 ASP A 143 22.180 -24.846 4.587 1.00 8.64 O +ATOM 1151 N SER A 144 20.228 -21.957 7.002 1.00 8.64 N +ATOM 1152 CA SER A 144 21.341 -21.510 7.832 1.00 8.64 C +ATOM 1153 C SER A 144 21.253 -22.095 9.238 1.00 8.64 C +ATOM 1154 CB SER A 144 21.373 -19.983 7.908 1.00 8.64 C +ATOM 1155 O SER A 144 21.817 -21.541 10.183 1.00 8.64 O +ATOM 1156 OG SER A 144 20.155 -19.479 8.427 1.00 8.64 O +ATOM 1157 N ARG A 145 20.767 -23.335 9.468 1.00 8.64 N +ATOM 1158 CA ARG A 145 20.907 -23.823 10.836 1.00 8.64 C +ATOM 1159 C ARG A 145 22.363 -24.145 11.156 1.00 8.64 C +ATOM 1160 CB ARG A 145 20.036 -25.062 11.058 1.00 8.64 C +ATOM 1161 O ARG A 145 23.035 -24.835 10.387 1.00 8.64 O +ATOM 1162 CG ARG A 145 18.542 -24.781 11.007 1.00 8.64 C +ATOM 1163 CD ARG A 145 17.724 -26.058 11.140 1.00 8.64 C +ATOM 1164 NE ARG A 145 17.859 -26.910 9.963 1.00 8.64 N +ATOM 1165 NH1 ARG A 145 16.657 -28.674 10.846 1.00 8.64 N +ATOM 1166 NH2 ARG A 145 17.532 -28.821 8.732 1.00 8.64 N +ATOM 1167 CZ ARG A 145 17.349 -28.133 9.850 1.00 8.64 C +ATOM 1168 N THR A 146 23.240 -23.194 11.377 1.00 8.64 N +ATOM 1169 CA THR A 146 24.353 -23.445 12.287 1.00 8.64 C +ATOM 1170 C THR A 146 23.848 -23.974 13.626 1.00 8.64 C +ATOM 1171 CB THR A 146 25.186 -22.170 12.516 1.00 8.64 C +ATOM 1172 O THR A 146 22.915 -23.417 14.208 1.00 8.64 O +ATOM 1173 CG2 THR A 146 26.113 -21.899 11.336 1.00 8.64 C +ATOM 1174 OG1 THR A 146 24.303 -21.054 12.683 1.00 8.64 O +ATOM 1175 N VAL A 147 23.601 -25.297 13.669 1.00 8.64 N +ATOM 1176 CA VAL A 147 23.604 -26.015 14.940 1.00 8.64 C +ATOM 1177 C VAL A 147 25.042 -26.295 15.372 1.00 8.64 C +ATOM 1178 CB VAL A 147 22.809 -27.337 14.846 1.00 8.64 C +ATOM 1179 O VAL A 147 25.856 -26.768 14.575 1.00 8.64 O +ATOM 1180 CG1 VAL A 147 22.708 -28.004 16.217 1.00 8.64 C +ATOM 1181 CG2 VAL A 147 21.418 -27.082 14.269 1.00 8.64 C +ATOM 1182 N ASP A 148 25.801 -25.404 15.970 1.00 8.64 N +ATOM 1183 CA ASP A 148 26.582 -25.450 17.202 1.00 8.64 C +ATOM 1184 C ASP A 148 27.024 -24.051 17.625 1.00 8.64 C +ATOM 1185 CB ASP A 148 27.802 -26.358 17.031 1.00 8.64 C +ATOM 1186 O ASP A 148 27.467 -23.256 16.793 1.00 8.64 O +ATOM 1187 CG ASP A 148 27.486 -27.826 17.259 1.00 8.64 C +ATOM 1188 OD1 ASP A 148 28.258 -28.694 16.799 1.00 8.64 O +ATOM 1189 OD2 ASP A 148 26.454 -28.116 17.901 1.00 8.64 O +TER 1190 ASP A 148 +ENDMDL +END diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/relax/utils.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/relax/utils.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,80 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Utils for minimization.""" +import io +from alphafold.common import residue_constants +from Bio import PDB +import numpy as np +from simtk.openmm import app as openmm_app +from simtk.openmm.app.internal.pdbstructure import PdbStructure + + +def overwrite_pdb_coordinates(pdb_str: str, pos) -> str: + pdb_file = io.StringIO(pdb_str) + structure = PdbStructure(pdb_file) + topology = openmm_app.PDBFile(structure).getTopology() + with io.StringIO() as f: + openmm_app.PDBFile.writeFile(topology, pos, f) + return f.getvalue() + + +def overwrite_b_factors(pdb_str: str, bfactors: np.ndarray) -> str: + """Overwrites the B-factors in pdb_str with contents of bfactors array. + + Args: + pdb_str: An input PDB string. + bfactors: A numpy array with shape [1, n_residues, 37]. We assume that the + B-factors are per residue; i.e. that the nonzero entries are identical in + [0, i, :]. + + Returns: + A new PDB string with the B-factors replaced. + """ + if bfactors.shape[-1] != residue_constants.atom_type_num: + raise ValueError( + f'Invalid final dimension size for bfactors: {bfactors.shape[-1]}.') + + parser = PDB.PDBParser(QUIET=True) + handle = io.StringIO(pdb_str) + structure = parser.get_structure('', handle) + + curr_resid = ('', '', '') + idx = -1 + for atom in structure.get_atoms(): + atom_resid = atom.parent.get_id() + if atom_resid != curr_resid: + idx += 1 + if idx >= bfactors.shape[0]: + raise ValueError('Index into bfactors exceeds number of residues. ' + 'B-factors shape: {shape}, idx: {idx}.') + curr_resid = atom_resid + atom.bfactor = bfactors[idx, residue_constants.atom_order['CA']] + + new_pdb = io.StringIO() + pdb_io = PDB.PDBIO() + pdb_io.set_structure(structure) + pdb_io.save(new_pdb) + return new_pdb.getvalue() + + +def assert_equal_nonterminal_atom_types( + atom_mask: np.ndarray, ref_atom_mask: np.ndarray): + """Checks that pre- and post-minimized proteins have same atom set.""" + # Ignore any terminal OXT atoms which may have been added by minimization. + oxt = residue_constants.atom_order['OXT'] + no_oxt_mask = np.ones(shape=atom_mask.shape, dtype=np.bool) + no_oxt_mask[..., oxt] = False + np.testing.assert_almost_equal(ref_atom_mask[no_oxt_mask], + atom_mask[no_oxt_mask]) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/alphafold/relax/utils_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/alphafold/relax/utils_test.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,55 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for utils.""" + +import os + +from absl.testing import absltest +from alphafold.common import protein +from alphafold.relax import utils +import numpy as np +# Internal import (7716). + + +class UtilsTest(absltest.TestCase): + + def test_overwrite_b_factors(self): + testdir = os.path.join( + absltest.get_default_test_srcdir(), + 'alphafold/relax/testdata/' + 'multiple_disulfides_target.pdb') + with open(testdir) as f: + test_pdb = f.read() + n_residues = 191 + bfactors = np.stack([np.arange(0, n_residues)] * 37, axis=-1) + + output_pdb = utils.overwrite_b_factors(test_pdb, bfactors) + + # Check that the atom lines are unchanged apart from the B-factors. + atom_lines_original = [l for l in test_pdb.split('\n') if l[:4] == ('ATOM')] + atom_lines_new = [l for l in output_pdb.split('\n') if l[:4] == ('ATOM')] + for line_original, line_new in zip(atom_lines_original, atom_lines_new): + self.assertEqual(line_original[:60].strip(), line_new[:60].strip()) + self.assertEqual(line_original[66:].strip(), line_new[66:].strip()) + + # Check B-factors are correctly set for all atoms present. + as_protein = protein.from_pdb_string(output_pdb) + np.testing.assert_almost_equal( + np.where(as_protein.atom_mask > 0, as_protein.b_factors, 0), + np.where(as_protein.atom_mask > 0, bfactors, 0)) + + +if __name__ == '__main__': + absltest.main() diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/docker/Dockerfile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/docker/Dockerfile Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,85 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ARG CUDA=11.0 +FROM nvidia/cuda:${CUDA}-cudnn8-runtime-ubuntu18.04 +# FROM directive resets ARGS, so we specify again (the value is retained if +# previously set). +ARG CUDA + +# Use bash to support string substitution. +SHELL ["/bin/bash", "-c"] + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + cmake \ + cuda-command-line-tools-${CUDA/./-} \ + git \ + hmmer \ + kalign \ + tzdata \ + wget \ + && rm -rf /var/lib/apt/lists/* + +# Compile HHsuite from source. +RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \ + && mkdir /tmp/hh-suite/build \ + && pushd /tmp/hh-suite/build \ + && cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. \ + && make -j 4 && make install \ + && ln -s /opt/hhsuite/bin/* /usr/bin \ + && popd \ + && rm -rf /tmp/hh-suite + +# Install Miniconda package manager. +RUN wget -q -P /tmp \ + https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \ + && bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \ + && rm /tmp/Miniconda3-latest-Linux-x86_64.sh + +# Install conda packages. +ENV PATH="/opt/conda/bin:$PATH" +RUN conda update -qy conda \ + && conda install -y -c conda-forge \ + openmm=7.5.1 \ + cudatoolkit==${CUDA_VERSION} \ + pdbfixer \ + pip \ + python=3.7 + +COPY . /app/alphafold +RUN wget -q -P /app/alphafold/alphafold/common/ \ + https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt + +# Install pip packages. +RUN pip3 install --upgrade pip \ + && pip3 install -r /app/alphafold/requirements.txt \ + && pip3 install --upgrade jax jaxlib==0.1.69+cuda${CUDA/./} -f \ + https://storage.googleapis.com/jax-releases/jax_releases.html + +# Apply OpenMM patch. +WORKDIR /opt/conda/lib/python3.7/site-packages +RUN patch -p0 < /app/alphafold/docker/openmm.patch + +# We need to run `ldconfig` first to ensure GPUs are visible, due to some quirk +# with Debian. See https://github.com/NVIDIA/nvidia-docker/issues/1399 for +# details. +# ENTRYPOINT does not support easily running multiple commands, so instead we +# write a shell script to wrap them up. +WORKDIR /app/alphafold +RUN echo $'#!/bin/bash\n\ +ldconfig\n\ +python /app/alphafold/run_alphafold.py "$@"' > /app/run_alphafold.sh \ + && chmod +x /app/run_alphafold.sh +ENTRYPOINT ["/app/run_alphafold.sh"] diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/docker/openmm.patch --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/docker/openmm.patch Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,42 @@ +Index: simtk/openmm/app/topology.py +=================================================================== +--- simtk.orig/openmm/app/topology.py ++++ simtk/openmm/app/topology.py +@@ -356,19 +356,35 @@ + def isCyx(res): + names = [atom.name for atom in res._atoms] + return 'SG' in names and 'HG' not in names ++ # This function is used to prevent multiple di-sulfide bonds from being ++ # assigned to a given atom. This is a DeepMind modification. ++ def isDisulfideBonded(atom): ++ for b in self._bonds: ++ if (atom in b and b[0].name == 'SG' and ++ b[1].name == 'SG'): ++ return True ++ ++ return False + + cyx = [res for res in self.residues() if res.name == 'CYS' and isCyx(res)] + atomNames = [[atom.name for atom in res._atoms] for res in cyx] + for i in range(len(cyx)): + sg1 = cyx[i]._atoms[atomNames[i].index('SG')] + pos1 = positions[sg1.index] ++ candidate_distance, candidate_atom = 0.3*nanometers, None + for j in range(i): + sg2 = cyx[j]._atoms[atomNames[j].index('SG')] + pos2 = positions[sg2.index] + delta = [x-y for (x,y) in zip(pos1, pos2)] + distance = sqrt(delta[0]*delta[0] + delta[1]*delta[1] + delta[2]*delta[2]) +- if distance < 0.3*nanometers: +- self.addBond(sg1, sg2) ++ if distance < candidate_distance and not isDisulfideBonded(sg2): ++ candidate_distance = distance ++ candidate_atom = sg2 ++ # Assign bond to closest pair. ++ if candidate_atom: ++ self.addBond(sg1, candidate_atom) ++ ++ + + class Chain(object): + """A Chain object represents a chain within a Topology.""" diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/docker/requirements.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/docker/requirements.txt Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,3 @@ +# Dependencies necessary to execute run_docker.py +absl-py==0.13.0 +docker==5.0.0 diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/docker/run_docker.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/docker/run_docker.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,231 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Docker launch script for Alphafold docker image.""" + +import os +import pathlib +import signal +from typing import Tuple + +from absl import app +from absl import flags +from absl import logging +import docker +from docker import types + + +flags.DEFINE_bool( + 'use_gpu', True, 'Enable NVIDIA runtime to run with GPUs.') +flags.DEFINE_string( + 'gpu_devices', 'all', + 'Comma separated list of devices to pass to NVIDIA_VISIBLE_DEVICES.') +flags.DEFINE_list( + 'fasta_paths', None, 'Paths to FASTA files, each containing a prediction ' + 'target that will be folded one after another. If a FASTA file contains ' + 'multiple sequences, then it will be folded as a multimer. Paths should be ' + 'separated by commas. All FASTA paths must have a unique basename as the ' + 'basename is used to name the output directories for each prediction.') +flags.DEFINE_list( + 'is_prokaryote_list', None, 'Optional for multimer system, not used by the ' + 'single chain system. This list should contain a boolean for each fasta ' + 'specifying true where the target complex is from a prokaryote, and false ' + 'where it is not, or where the origin is unknown. These values determine ' + 'the pairing method for the MSA.') +flags.DEFINE_string( + 'output_dir', '/tmp/alphafold', + 'Path to a directory that will store the results.') +flags.DEFINE_string( + 'data_dir', None, + 'Path to directory with supporting data: AlphaFold parameters and genetic ' + 'and template databases. Set to the target of download_all_databases.sh.') +flags.DEFINE_string( + 'docker_image_name', 'alphafold', 'Name of the AlphaFold Docker image.') +flags.DEFINE_string( + 'max_template_date', None, + 'Maximum template release date to consider (ISO-8601 format: YYYY-MM-DD). ' + 'Important if folding historical test sets.') +flags.DEFINE_enum( + 'db_preset', 'full_dbs', ['full_dbs', 'reduced_dbs'], + 'Choose preset MSA database configuration - smaller genetic database ' + 'config (reduced_dbs) or full genetic database config (full_dbs)') +flags.DEFINE_enum( + 'model_preset', 'monomer', + ['monomer', 'monomer_casp14', 'monomer_ptm', 'multimer'], + 'Choose preset model configuration - the monomer model, the monomer model ' + 'with extra ensembling, monomer model with pTM head, or multimer model') +flags.DEFINE_boolean( + 'benchmark', False, + 'Run multiple JAX model evaluations to obtain a timing that excludes the ' + 'compilation time, which should be more indicative of the time required ' + 'for inferencing many proteins.') +flags.DEFINE_boolean( + 'use_precomputed_msas', False, + 'Whether to read MSAs that have been written to disk. WARNING: This will ' + 'not check if the sequence, database or configuration have changed.') + +FLAGS = flags.FLAGS + +_ROOT_MOUNT_DIRECTORY = '/mnt/' + + +def _create_mount(mount_name: str, path: str) -> Tuple[types.Mount, str]: + path = os.path.abspath(path) + source_path = os.path.dirname(path) + target_path = os.path.join(_ROOT_MOUNT_DIRECTORY, mount_name) + logging.info('Mounting %s -> %s', source_path, target_path) + mount = types.Mount(target_path, source_path, type='bind', read_only=True) + return mount, os.path.join(target_path, os.path.basename(path)) + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + # You can individually override the following paths if you have placed the + # data in locations other than the FLAGS.data_dir. + + # Path to the Uniref90 database for use by JackHMMER. + uniref90_database_path = os.path.join( + FLAGS.data_dir, 'uniref90', 'uniref90.fasta') + + # Path to the Uniprot database for use by JackHMMER. + uniprot_database_path = os.path.join( + FLAGS.data_dir, 'uniprot', 'uniprot.fasta') + + # Path to the MGnify database for use by JackHMMER. + mgnify_database_path = os.path.join( + FLAGS.data_dir, 'mgnify', 'mgy_clusters_2018_12.fa') + + # Path to the BFD database for use by HHblits. + bfd_database_path = os.path.join( + FLAGS.data_dir, 'bfd', + 'bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt') + + # Path to the Small BFD database for use by JackHMMER. + small_bfd_database_path = os.path.join( + FLAGS.data_dir, 'small_bfd', 'bfd-first_non_consensus_sequences.fasta') + + # Path to the Uniclust30 database for use by HHblits. + uniclust30_database_path = os.path.join( + FLAGS.data_dir, 'uniclust30', 'uniclust30_2018_08', 'uniclust30_2018_08') + + # Path to the PDB70 database for use by HHsearch. + pdb70_database_path = os.path.join(FLAGS.data_dir, 'pdb70', 'pdb70') + + # Path to the PDB seqres database for use by hmmsearch. + pdb_seqres_database_path = os.path.join( + FLAGS.data_dir, 'pdb_seqres', 'pdb_seqres.txt') + + # Path to a directory with template mmCIF structures, each named .cif. + template_mmcif_dir = os.path.join(FLAGS.data_dir, 'pdb_mmcif', 'mmcif_files') + + # Path to a file mapping obsolete PDB IDs to their replacements. + obsolete_pdbs_path = os.path.join(FLAGS.data_dir, 'pdb_mmcif', 'obsolete.dat') + + alphafold_path = pathlib.Path(__file__).parent.parent + data_dir_path = pathlib.Path(FLAGS.data_dir) + if alphafold_path == data_dir_path or alphafold_path in data_dir_path.parents: + raise app.UsageError( + f'The download directory {FLAGS.data_dir} should not be a subdirectory ' + f'in the AlphaFold repository directory. If it is, the Docker build is ' + f'slow since the large databases are copied during the image creation.') + + mounts = [] + command_args = [] + + # Mount each fasta path as a unique target directory. + target_fasta_paths = [] + for i, fasta_path in enumerate(FLAGS.fasta_paths): + mount, target_path = _create_mount(f'fasta_path_{i}', fasta_path) + mounts.append(mount) + target_fasta_paths.append(target_path) + command_args.append(f'--fasta_paths={",".join(target_fasta_paths)}') + + database_paths = [ + ('uniref90_database_path', uniref90_database_path), + ('mgnify_database_path', mgnify_database_path), + ('data_dir', FLAGS.data_dir), + ('template_mmcif_dir', template_mmcif_dir), + ('obsolete_pdbs_path', obsolete_pdbs_path), + ] + + if FLAGS.model_preset == 'multimer': + database_paths.append(('uniprot_database_path', uniprot_database_path)) + database_paths.append(('pdb_seqres_database_path', + pdb_seqres_database_path)) + else: + database_paths.append(('pdb70_database_path', pdb70_database_path)) + + if FLAGS.db_preset == 'reduced_dbs': + database_paths.append(('small_bfd_database_path', small_bfd_database_path)) + else: + database_paths.extend([ + ('uniclust30_database_path', uniclust30_database_path), + ('bfd_database_path', bfd_database_path), + ]) + for name, path in database_paths: + if path: + mount, target_path = _create_mount(name, path) + mounts.append(mount) + command_args.append(f'--{name}={target_path}') + + output_target_path = os.path.join(_ROOT_MOUNT_DIRECTORY, 'output') + mounts.append(types.Mount(output_target_path, FLAGS.output_dir, type='bind')) + + command_args.extend([ + f'--output_dir={output_target_path}', + f'--max_template_date={FLAGS.max_template_date}', + f'--db_preset={FLAGS.db_preset}', + f'--model_preset={FLAGS.model_preset}', + f'--benchmark={FLAGS.benchmark}', + f'--use_precomputed_msas={FLAGS.use_precomputed_msas}', + '--logtostderr', + ]) + + if FLAGS.is_prokaryote_list: + command_args.append( + f'--is_prokaryote_list={",".join(FLAGS.is_prokaryote_list)}') + + client = docker.from_env() + container = client.containers.run( + image=FLAGS.docker_image_name, + command=command_args, + runtime='nvidia' if FLAGS.use_gpu else None, + remove=True, + detach=True, + mounts=mounts, + environment={ + 'NVIDIA_VISIBLE_DEVICES': FLAGS.gpu_devices, + # The following flags allow us to make predictions on proteins that + # would typically be too long to fit into GPU memory. + 'TF_FORCE_UNIFIED_MEMORY': '1', + 'XLA_PYTHON_CLIENT_MEM_FRACTION': '4.0', + }) + + # Add signal handler to ensure CTRL+C also stops the running container. + signal.signal(signal.SIGINT, + lambda unused_sig, unused_frame: container.kill()) + + for line in container.logs(stream=True): + logging.info(line.strip().decode('utf-8')) + + +if __name__ == '__main__': + flags.mark_flags_as_required([ + 'data_dir', + 'fasta_paths', + 'max_template_date', + ]) + app.run(main) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/imgs/casp14_predictions.gif Binary file docker/alphafold/imgs/casp14_predictions.gif has changed diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/imgs/header.jpg Binary file docker/alphafold/imgs/header.jpg has changed diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/notebooks/AlphaFold.ipynb --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/notebooks/AlphaFold.ipynb Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,795 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "pc5-mbsX9PZC" + }, + "source": [ + "# AlphaFold Colab\n", + "\n", + "This Colab notebook allows you to easily predict the structure of a protein using a slightly simplified version of [AlphaFold v2.1.0](https://doi.org/10.1038/s41586-021-03819-2). \n", + "\n", + "**Differences to AlphaFold v2.1.0**\n", + "\n", + "In comparison to AlphaFold v2.1.0, this Colab notebook uses **no templates (homologous structures)** and a selected portion of the [BFD database](https://bfd.mmseqs.com/). We have validated these changes on several thousand recent PDB structures. While accuracy will be near-identical to the full AlphaFold system on many targets, a small fraction have a large drop in accuracy due to the smaller MSA and lack of templates. For best reliability, we recommend instead using the [full open source AlphaFold](https://github.com/deepmind/alphafold/), or the [AlphaFold Protein Structure Database](https://alphafold.ebi.ac.uk/).\n", + "\n", + "**This Colab has an small drop in average accuracy for multimers compared to local AlphaFold installation, for full multimer accuracy it is highly recommended to run [AlphaFold locally](https://github.com/deepmind/alphafold#running-alphafold).** Moreover, the AlphaFold-Multimer requires searching for MSA for every unique sequence in the complex, hence it is substantially slower. If your notebook times-out due to slow multimer MSA search, we recommend either using Colab Pro or running AlphaFold locally.\n", + "\n", + "Please note that this Colab notebook is provided as an early-access prototype and is not a finished product. It is provided for theoretical modelling only and caution should be exercised in its use. \n", + "\n", + "**Citing this work**\n", + "\n", + "Any publication that discloses findings arising from using this notebook should [cite](https://github.com/deepmind/alphafold/#citing-this-work) the [AlphaFold paper](https://doi.org/10.1038/s41586-021-03819-2).\n", + "\n", + "**Licenses**\n", + "\n", + "This Colab uses the [AlphaFold model parameters](https://github.com/deepmind/alphafold/#model-parameters-license) and its outputs are thus for non-commercial use only, under the Creative Commons Attribution-NonCommercial 4.0 International ([CC BY-NC 4.0](https://creativecommons.org/licenses/by-nc/4.0/legalcode)) license. The Colab itself is provided under the [Apache 2.0 license](https://www.apache.org/licenses/LICENSE-2.0). See the full license statement below.\n", + "\n", + "**More information**\n", + "\n", + "You can find more information about how AlphaFold works in the following papers:\n", + "\n", + "* [AlphaFold methods paper](https://www.nature.com/articles/s41586-021-03819-2)\n", + "* [AlphaFold predictions of the human proteome paper](https://www.nature.com/articles/s41586-021-03828-1)\n", + "* [AlphaFold-Multimer paper](https://www.biorxiv.org/content/10.1101/2021.10.04.463034v1)\n", + "\n", + "FAQ on how to interpret AlphaFold predictions are [here](https://alphafold.ebi.ac.uk/faq)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "woIxeCPygt7K" + }, + "outputs": [], + "source": [ + "#@title Install third-party software\n", + "\n", + "#@markdown Please execute this cell by pressing the _Play_ button \n", + "#@markdown on the left to download and import third-party software \n", + "#@markdown in this Colab notebook. (See the [acknowledgements](https://github.com/deepmind/alphafold/#acknowledgements) in our readme.)\n", + "\n", + "#@markdown **Note**: This installs the software on the Colab \n", + "#@markdown notebook in the cloud and not on your computer.\n", + "\n", + "from IPython.utils import io\n", + "import os\n", + "import subprocess\n", + "import tqdm.notebook\n", + "\n", + "TQDM_BAR_FORMAT = '{l_bar}{bar}| {n_fmt}/{total_fmt} [elapsed: {elapsed} remaining: {remaining}]'\n", + "\n", + "try:\n", + " with tqdm.notebook.tqdm(total=100, bar_format=TQDM_BAR_FORMAT) as pbar:\n", + " with io.capture_output() as captured:\n", + " # Uninstall default Colab version of TF.\n", + " %shell pip uninstall -y tensorflow\n", + "\n", + " %shell sudo apt install --quiet --yes hmmer\n", + " pbar.update(6)\n", + "\n", + " # Install py3dmol.\n", + " %shell pip install py3dmol\n", + " pbar.update(2)\n", + "\n", + " # Install OpenMM and pdbfixer.\n", + " %shell rm -rf /opt/conda\n", + " %shell wget -q -P /tmp \\\n", + " https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh \\\n", + " \u0026\u0026 bash /tmp/Miniconda3-latest-Linux-x86_64.sh -b -p /opt/conda \\\n", + " \u0026\u0026 rm /tmp/Miniconda3-latest-Linux-x86_64.sh\n", + " pbar.update(9)\n", + "\n", + " PATH=%env PATH\n", + " %env PATH=/opt/conda/bin:{PATH}\n", + " %shell conda update -qy conda \\\n", + " \u0026\u0026 conda install -qy -c conda-forge \\\n", + " python=3.7 \\\n", + " openmm=7.5.1 \\\n", + " pdbfixer\n", + " pbar.update(80)\n", + "\n", + " # Create a ramdisk to store a database chunk to make Jackhmmer run fast.\n", + " %shell sudo mkdir -m 777 --parents /tmp/ramdisk\n", + " %shell sudo mount -t tmpfs -o size=9G ramdisk /tmp/ramdisk\n", + " pbar.update(2)\n", + "\n", + " %shell wget -q -P /content \\\n", + " https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt\n", + " pbar.update(1)\n", + "except subprocess.CalledProcessError:\n", + " print(captured)\n", + " raise" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "VzJ5iMjTtoZw" + }, + "outputs": [], + "source": [ + "#@title Download AlphaFold\n", + "\n", + "#@markdown Please execute this cell by pressing the *Play* button on \n", + "#@markdown the left.\n", + "\n", + "GIT_REPO = 'https://github.com/deepmind/alphafold'\n", + "\n", + "SOURCE_URL = 'https://storage.googleapis.com/alphafold/alphafold_params_colab_2021-10-27.tar'\n", + "PARAMS_DIR = './alphafold/data/params'\n", + "PARAMS_PATH = os.path.join(PARAMS_DIR, os.path.basename(SOURCE_URL))\n", + "\n", + "try:\n", + " with tqdm.notebook.tqdm(total=100, bar_format=TQDM_BAR_FORMAT) as pbar:\n", + " with io.capture_output() as captured:\n", + " %shell rm -rf alphafold\n", + " %shell git clone --branch main {GIT_REPO} alphafold\n", + " pbar.update(8)\n", + " # Install the required versions of all dependencies.\n", + " %shell pip3 install -r ./alphafold/requirements.txt\n", + " # Run setup.py to install only AlphaFold.\n", + " %shell pip3 install --no-dependencies ./alphafold\n", + " pbar.update(10)\n", + "\n", + " # Apply OpenMM patch.\n", + " %shell pushd /opt/conda/lib/python3.7/site-packages/ \u0026\u0026 \\\n", + " patch -p0 \u003c /content/alphafold/docker/openmm.patch \u0026\u0026 \\\n", + " popd\n", + "\n", + " # Make sure stereo_chemical_props.txt is in all locations where it could be searched for.\n", + " %shell mkdir -p /content/alphafold/alphafold/common\n", + " %shell cp -f /content/stereo_chemical_props.txt /content/alphafold/alphafold/common\n", + " %shell mkdir -p /opt/conda/lib/python3.7/site-packages/alphafold/common/\n", + " %shell cp -f /content/stereo_chemical_props.txt /opt/conda/lib/python3.7/site-packages/alphafold/common/\n", + "\n", + " %shell mkdir --parents \"{PARAMS_DIR}\"\n", + " %shell wget -O \"{PARAMS_PATH}\" \"{SOURCE_URL}\"\n", + " pbar.update(27)\n", + "\n", + " %shell tar --extract --verbose --file=\"{PARAMS_PATH}\" \\\n", + " --directory=\"{PARAMS_DIR}\" --preserve-permissions\n", + " %shell rm \"{PARAMS_PATH}\"\n", + " pbar.update(55)\n", + "except subprocess.CalledProcessError:\n", + " print(captured)\n", + " raise\n", + "\n", + "import jax\n", + "if jax.local_devices()[0].platform == 'tpu':\n", + " raise RuntimeError('Colab TPU runtime not supported. Change it to GPU via Runtime -\u003e Change Runtime Type -\u003e Hardware accelerator -\u003e GPU.')\n", + "elif jax.local_devices()[0].platform == 'cpu':\n", + " raise RuntimeError('Colab CPU runtime not supported. Change it to GPU via Runtime -\u003e Change Runtime Type -\u003e Hardware accelerator -\u003e GPU.')\n", + "else:\n", + " print(f'Running with {jax.local_devices()[0].device_kind} GPU')\n", + "\n", + "# Make sure everything we need is on the path.\n", + "import sys\n", + "sys.path.append('/opt/conda/lib/python3.7/site-packages')\n", + "sys.path.append('/content/alphafold')\n", + "\n", + "# Make sure all necessary environment variables are set.\n", + "import os\n", + "os.environ['TF_FORCE_UNIFIED_MEMORY'] = '1'\n", + "os.environ['XLA_PYTHON_CLIENT_MEM_FRACTION'] = '2.0'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "W4JpOs6oA-QS" + }, + "source": [ + "## Making a prediction\n", + "\n", + "Please paste the sequence of your protein in the text box below, then run the remaining cells via _Runtime_ \u003e _Run after_. You can also run the cells individually by pressing the _Play_ button on the left.\n", + "\n", + "Note that the search against databases and the actual prediction can take some time, from minutes to hours, depending on the length of the protein and what type of GPU you are allocated by Colab (see FAQ below)." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "rowN0bVYLe9n" + }, + "outputs": [], + "source": [ + "#@title Enter the amino acid sequence(s) to fold ⬇️\n", + "#@markdown Enter the amino acid sequence(s) to fold:\n", + "#@markdown * If you enter only a single sequence, the monomer model will be used.\n", + "#@markdown * If you enter multiple sequences, the multimer model will be used.\n", + "\n", + "from alphafold.notebooks import notebook_utils\n", + "\n", + "sequence_1 = 'MAAHKGAEHHHKAAEHHEQAAKHHHAAAEHHEKGEHEQAAHHADTAYAHHKHAEEHAAQAAKHDAEHHAPKPH' #@param {type:\"string\"}\n", + "sequence_2 = '' #@param {type:\"string\"}\n", + "sequence_3 = '' #@param {type:\"string\"}\n", + "sequence_4 = '' #@param {type:\"string\"}\n", + "sequence_5 = '' #@param {type:\"string\"}\n", + "sequence_6 = '' #@param {type:\"string\"}\n", + "sequence_7 = '' #@param {type:\"string\"}\n", + "sequence_8 = '' #@param {type:\"string\"}\n", + "\n", + "input_sequences = (sequence_1, sequence_2, sequence_3, sequence_4,\n", + " sequence_5, sequence_6, sequence_7, sequence_8)\n", + "\n", + "#@markdown If folding a complex target and all the input sequences are\n", + "#@markdown prokaryotic then set `is_prokaryotic` to `True`. Set to `False`\n", + "#@markdown otherwise or if the origin is unknown.\n", + "\n", + "is_prokaryote = False #@param {type:\"boolean\"}\n", + "\n", + "MIN_SINGLE_SEQUENCE_LENGTH = 16\n", + "MAX_SINGLE_SEQUENCE_LENGTH = 2500\n", + "MAX_MULTIMER_LENGTH = 2500\n", + "\n", + "# Validate the input.\n", + "sequences, model_type_to_use = notebook_utils.validate_input(\n", + " input_sequences=input_sequences,\n", + " min_length=MIN_SINGLE_SEQUENCE_LENGTH,\n", + " max_length=MAX_SINGLE_SEQUENCE_LENGTH,\n", + " max_multimer_length=MAX_MULTIMER_LENGTH)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "2tTeTTsLKPjB" + }, + "outputs": [], + "source": [ + "#@title Search against genetic databases\n", + "\n", + "#@markdown Once this cell has been executed, you will see\n", + "#@markdown statistics about the multiple sequence alignment \n", + "#@markdown (MSA) that will be used by AlphaFold. In particular, \n", + "#@markdown you’ll see how well each residue is covered by similar \n", + "#@markdown sequences in the MSA.\n", + "\n", + "# --- Python imports ---\n", + "import collections\n", + "import copy\n", + "from concurrent import futures\n", + "import json\n", + "import random\n", + "\n", + "from urllib import request\n", + "from google.colab import files\n", + "from matplotlib import gridspec\n", + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import py3Dmol\n", + "\n", + "from alphafold.model import model\n", + "from alphafold.model import config\n", + "from alphafold.model import data\n", + "\n", + "from alphafold.data import feature_processing\n", + "from alphafold.data import msa_pairing\n", + "from alphafold.data import parsers\n", + "from alphafold.data import pipeline\n", + "from alphafold.data import pipeline_multimer\n", + "from alphafold.data.tools import jackhmmer\n", + "\n", + "from alphafold.common import protein\n", + "\n", + "from alphafold.relax import relax\n", + "from alphafold.relax import utils\n", + "\n", + "from IPython import display\n", + "from ipywidgets import GridspecLayout\n", + "from ipywidgets import Output\n", + "\n", + "# Color bands for visualizing plddt\n", + "PLDDT_BANDS = [(0, 50, '#FF7D45'),\n", + " (50, 70, '#FFDB13'),\n", + " (70, 90, '#65CBF3'),\n", + " (90, 100, '#0053D6')]\n", + "\n", + "# --- Find the closest source ---\n", + "test_url_pattern = 'https://storage.googleapis.com/alphafold-colab{:s}/latest/uniref90_2021_03.fasta.1'\n", + "ex = futures.ThreadPoolExecutor(3)\n", + "def fetch(source):\n", + " request.urlretrieve(test_url_pattern.format(source))\n", + " return source\n", + "fs = [ex.submit(fetch, source) for source in ['', '-europe', '-asia']]\n", + "source = None\n", + "for f in futures.as_completed(fs):\n", + " source = f.result()\n", + " ex.shutdown()\n", + " break\n", + "\n", + "JACKHMMER_BINARY_PATH = '/usr/bin/jackhmmer'\n", + "DB_ROOT_PATH = f'https://storage.googleapis.com/alphafold-colab{source}/latest/'\n", + "# The z_value is the number of sequences in a database.\n", + "MSA_DATABASES = [\n", + " {'db_name': 'uniref90',\n", + " 'db_path': f'{DB_ROOT_PATH}uniref90_2021_03.fasta',\n", + " 'num_streamed_chunks': 59,\n", + " 'z_value': 135_301_051},\n", + " {'db_name': 'smallbfd',\n", + " 'db_path': f'{DB_ROOT_PATH}bfd-first_non_consensus_sequences.fasta',\n", + " 'num_streamed_chunks': 17,\n", + " 'z_value': 65_984_053},\n", + " {'db_name': 'mgnify',\n", + " 'db_path': f'{DB_ROOT_PATH}mgy_clusters_2019_05.fasta',\n", + " 'num_streamed_chunks': 71,\n", + " 'z_value': 304_820_129},\n", + "]\n", + "\n", + "# Search UniProt and construct the all_seq features only for heteromers, not homomers.\n", + "if model_type_to_use == notebook_utils.ModelType.MULTIMER and len(set(sequences)) \u003e 1:\n", + " MSA_DATABASES.extend([\n", + " # Swiss-Prot and TrEMBL are concatenated together as UniProt.\n", + " {'db_name': 'uniprot',\n", + " 'db_path': f'{DB_ROOT_PATH}uniprot_2021_03.fasta',\n", + " 'num_streamed_chunks': 98,\n", + " 'z_value': 219_174_961 + 565_254},\n", + " ])\n", + "\n", + "TOTAL_JACKHMMER_CHUNKS = sum([cfg['num_streamed_chunks'] for cfg in MSA_DATABASES])\n", + "\n", + "MAX_HITS = {\n", + " 'uniref90': 10_000,\n", + " 'smallbfd': 5_000,\n", + " 'mgnify': 501,\n", + " 'uniprot': 50_000,\n", + "}\n", + "\n", + "\n", + "def get_msa(fasta_path):\n", + " \"\"\"Searches for MSA for the given sequence using chunked Jackhmmer search.\"\"\"\n", + "\n", + " # Run the search against chunks of genetic databases (since the genetic\n", + " # databases don't fit in Colab disk).\n", + " raw_msa_results = collections.defaultdict(list)\n", + " with tqdm.notebook.tqdm(total=TOTAL_JACKHMMER_CHUNKS, bar_format=TQDM_BAR_FORMAT) as pbar:\n", + " def jackhmmer_chunk_callback(i):\n", + " pbar.update(n=1)\n", + "\n", + " for db_config in MSA_DATABASES:\n", + " db_name = db_config['db_name']\n", + " pbar.set_description(f'Searching {db_name}')\n", + " jackhmmer_runner = jackhmmer.Jackhmmer(\n", + " binary_path=JACKHMMER_BINARY_PATH,\n", + " database_path=db_config['db_path'],\n", + " get_tblout=True,\n", + " num_streamed_chunks=db_config['num_streamed_chunks'],\n", + " streaming_callback=jackhmmer_chunk_callback,\n", + " z_value=db_config['z_value'])\n", + " # Group the results by database name.\n", + " raw_msa_results[db_name].extend(jackhmmer_runner.query(fasta_path))\n", + "\n", + " return raw_msa_results\n", + "\n", + "\n", + "features_for_chain = {}\n", + "raw_msa_results_for_sequence = {}\n", + "for sequence_index, sequence in enumerate(sequences, start=1):\n", + " print(f'\\nGetting MSA for sequence {sequence_index}')\n", + "\n", + " fasta_path = f'target_{sequence_index}.fasta'\n", + " with open(fasta_path, 'wt') as f:\n", + " f.write(f'\u003equery\\n{sequence}')\n", + "\n", + " # Don't do redundant work for multiple copies of the same chain in the multimer.\n", + " if sequence not in raw_msa_results_for_sequence:\n", + " raw_msa_results = get_msa(fasta_path=fasta_path)\n", + " raw_msa_results_for_sequence[sequence] = raw_msa_results\n", + " else:\n", + " raw_msa_results = copy.deepcopy(raw_msa_results_for_sequence[sequence])\n", + "\n", + " # Extract the MSAs from the Stockholm files.\n", + " # NB: deduplication happens later in pipeline.make_msa_features.\n", + " single_chain_msas = []\n", + " uniprot_msa = None\n", + " for db_name, db_results in raw_msa_results.items():\n", + " merged_msa = notebook_utils.merge_chunked_msa(\n", + " results=db_results, max_hits=MAX_HITS.get(db_name))\n", + " if merged_msa.sequences and db_name != 'uniprot':\n", + " single_chain_msas.append(merged_msa)\n", + " msa_size = len(set(merged_msa.sequences))\n", + " print(f'{msa_size} unique sequences found in {db_name} for sequence {sequence_index}')\n", + " elif merged_msa.sequences and db_name == 'uniprot':\n", + " uniprot_msa = merged_msa\n", + "\n", + " notebook_utils.show_msa_info(single_chain_msas=single_chain_msas, sequence_index=sequence_index)\n", + "\n", + " # Turn the raw data into model features.\n", + " feature_dict = {}\n", + " feature_dict.update(pipeline.make_sequence_features(\n", + " sequence=sequence, description='query', num_res=len(sequence)))\n", + " feature_dict.update(pipeline.make_msa_features(msas=single_chain_msas))\n", + " # We don't use templates in AlphaFold Colab notebook, add only empty placeholder features.\n", + " feature_dict.update(notebook_utils.empty_placeholder_template_features(\n", + " num_templates=0, num_res=len(sequence)))\n", + "\n", + " # Construct the all_seq features only for heteromers, not homomers.\n", + " if model_type_to_use == notebook_utils.ModelType.MULTIMER and len(set(sequences)) \u003e 1:\n", + " valid_feats = msa_pairing.MSA_FEATURES + (\n", + " 'msa_uniprot_accession_identifiers',\n", + " 'msa_species_identifiers',\n", + " )\n", + " all_seq_features = {\n", + " f'{k}_all_seq': v for k, v in pipeline.make_msa_features([uniprot_msa]).items()\n", + " if k in valid_feats}\n", + " feature_dict.update(all_seq_features)\n", + "\n", + " features_for_chain[protein.PDB_CHAIN_IDS[sequence_index - 1]] = feature_dict\n", + "\n", + "\n", + "# Do further feature post-processing depending on the model type.\n", + "if model_type_to_use == notebook_utils.ModelType.MONOMER:\n", + " np_example = features_for_chain[protein.PDB_CHAIN_IDS[0]]\n", + "\n", + "elif model_type_to_use == notebook_utils.ModelType.MULTIMER:\n", + " all_chain_features = {}\n", + " for chain_id, chain_features in features_for_chain.items():\n", + " all_chain_features[chain_id] = pipeline_multimer.convert_monomer_features(\n", + " chain_features, chain_id)\n", + "\n", + " all_chain_features = pipeline_multimer.add_assembly_features(all_chain_features)\n", + "\n", + " np_example = feature_processing.pair_and_merge(\n", + " all_chain_features=all_chain_features, is_prokaryote=is_prokaryote)\n", + "\n", + " # Pad MSA to avoid zero-sized extra_msa.\n", + " np_example = pipeline_multimer.pad_msa(np_example, min_num_seq=512)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": { + "cellView": "form", + "id": "XUo6foMQxwS2" + }, + "outputs": [], + "source": [ + "#@title Run AlphaFold and download prediction\n", + "\n", + "#@markdown Once this cell has been executed, a zip-archive with\n", + "#@markdown the obtained prediction will be automatically downloaded\n", + "#@markdown to your computer.\n", + "\n", + "#@markdown In case you are having issues with the relaxation stage, you can disable it below.\n", + "#@markdown Warning: This means that the prediction might have distracting\n", + "#@markdown small stereochemical violations.\n", + "\n", + "run_relax = True #@param {type:\"boolean\"}\n", + "\n", + "# --- Run the model ---\n", + "if model_type_to_use == notebook_utils.ModelType.MONOMER:\n", + " model_names = config.MODEL_PRESETS['monomer'] + ('model_2_ptm',)\n", + "elif model_type_to_use == notebook_utils.ModelType.MULTIMER:\n", + " model_names = config.MODEL_PRESETS['multimer']\n", + "\n", + "output_dir = 'prediction'\n", + "os.makedirs(output_dir, exist_ok=True)\n", + "\n", + "plddts = {}\n", + "ranking_confidences = {}\n", + "pae_outputs = {}\n", + "unrelaxed_proteins = {}\n", + "\n", + "with tqdm.notebook.tqdm(total=len(model_names) + 1, bar_format=TQDM_BAR_FORMAT) as pbar:\n", + " for model_name in model_names:\n", + " pbar.set_description(f'Running {model_name}')\n", + "\n", + " cfg = config.model_config(model_name)\n", + " if model_type_to_use == notebook_utils.ModelType.MONOMER:\n", + " cfg.data.eval.num_ensemble = 1\n", + " elif model_type_to_use == notebook_utils.ModelType.MULTIMER:\n", + " cfg.model.num_ensemble_eval = 1\n", + " params = data.get_model_haiku_params(model_name, './alphafold/data')\n", + " model_runner = model.RunModel(cfg, params)\n", + " processed_feature_dict = model_runner.process_features(np_example, random_seed=0)\n", + " prediction = model_runner.predict(processed_feature_dict, random_seed=random.randrange(sys.maxsize))\n", + "\n", + " mean_plddt = prediction['plddt'].mean()\n", + "\n", + " if model_type_to_use == notebook_utils.ModelType.MONOMER:\n", + " if 'predicted_aligned_error' in prediction:\n", + " pae_outputs[model_name] = (prediction['predicted_aligned_error'],\n", + " prediction['max_predicted_aligned_error'])\n", + " else:\n", + " # Monomer models are sorted by mean pLDDT. Do not put monomer pTM models here as they\n", + " # should never get selected.\n", + " ranking_confidences[model_name] = prediction['ranking_confidence']\n", + " plddts[model_name] = prediction['plddt']\n", + " elif model_type_to_use == notebook_utils.ModelType.MULTIMER:\n", + " # Multimer models are sorted by pTM+ipTM.\n", + " ranking_confidences[model_name] = prediction['ranking_confidence']\n", + " plddts[model_name] = prediction['plddt']\n", + " pae_outputs[model_name] = (prediction['predicted_aligned_error'],\n", + " prediction['max_predicted_aligned_error'])\n", + "\n", + " # Set the b-factors to the per-residue plddt.\n", + " final_atom_mask = prediction['structure_module']['final_atom_mask']\n", + " b_factors = prediction['plddt'][:, None] * final_atom_mask\n", + " unrelaxed_protein = protein.from_prediction(\n", + " processed_feature_dict,\n", + " prediction,\n", + " b_factors=b_factors,\n", + " remove_leading_feature_dimension=(\n", + " model_type_to_use == notebook_utils.ModelType.MONOMER))\n", + " unrelaxed_proteins[model_name] = unrelaxed_protein\n", + "\n", + " # Delete unused outputs to save memory.\n", + " del model_runner\n", + " del params\n", + " del prediction\n", + " pbar.update(n=1)\n", + "\n", + " # --- AMBER relax the best model ---\n", + "\n", + " # Find the best model according to the mean pLDDT.\n", + " best_model_name = max(ranking_confidences.keys(), key=lambda x: ranking_confidences[x])\n", + "\n", + " if run_relax:\n", + " pbar.set_description(f'AMBER relaxation')\n", + " amber_relaxer = relax.AmberRelaxation(\n", + " max_iterations=0,\n", + " tolerance=2.39,\n", + " stiffness=10.0,\n", + " exclude_residues=[],\n", + " max_outer_iterations=3)\n", + " relaxed_pdb, _, _ = amber_relaxer.process(prot=unrelaxed_proteins[best_model_name])\n", + " else:\n", + " print('Warning: Running without the relaxation stage.')\n", + " relaxed_pdb = protein.to_pdb(unrelaxed_proteins[best_model_name])\n", + " pbar.update(n=1) # Finished AMBER relax.\n", + "\n", + "# Construct multiclass b-factors to indicate confidence bands\n", + "# 0=very low, 1=low, 2=confident, 3=very high\n", + "banded_b_factors = []\n", + "for plddt in plddts[best_model_name]:\n", + " for idx, (min_val, max_val, _) in enumerate(PLDDT_BANDS):\n", + " if plddt \u003e= min_val and plddt \u003c= max_val:\n", + " banded_b_factors.append(idx)\n", + " break\n", + "banded_b_factors = np.array(banded_b_factors)[:, None] * final_atom_mask\n", + "to_visualize_pdb = utils.overwrite_b_factors(relaxed_pdb, banded_b_factors)\n", + "\n", + "\n", + "# Write out the prediction\n", + "pred_output_path = os.path.join(output_dir, 'selected_prediction.pdb')\n", + "with open(pred_output_path, 'w') as f:\n", + " f.write(relaxed_pdb)\n", + "\n", + "\n", + "# --- Visualise the prediction \u0026 confidence ---\n", + "show_sidechains = True\n", + "def plot_plddt_legend():\n", + " \"\"\"Plots the legend for pLDDT.\"\"\"\n", + " thresh = ['Very low (pLDDT \u003c 50)',\n", + " 'Low (70 \u003e pLDDT \u003e 50)',\n", + " 'Confident (90 \u003e pLDDT \u003e 70)',\n", + " 'Very high (pLDDT \u003e 90)']\n", + "\n", + " colors = [x[2] for x in PLDDT_BANDS]\n", + "\n", + " plt.figure(figsize=(2, 2))\n", + " for c in colors:\n", + " plt.bar(0, 0, color=c)\n", + " plt.legend(thresh, frameon=False, loc='center', fontsize=20)\n", + " plt.xticks([])\n", + " plt.yticks([])\n", + " ax = plt.gca()\n", + " ax.spines['right'].set_visible(False)\n", + " ax.spines['top'].set_visible(False)\n", + " ax.spines['left'].set_visible(False)\n", + " ax.spines['bottom'].set_visible(False)\n", + " plt.title('Model Confidence', fontsize=20, pad=20)\n", + " return plt\n", + "\n", + "# Show the structure coloured by chain if the multimer model has been used.\n", + "if model_type_to_use == notebook_utils.ModelType.MULTIMER:\n", + " multichain_view = py3Dmol.view(width=800, height=600)\n", + " multichain_view.addModelsAsFrames(to_visualize_pdb)\n", + " multichain_style = {'cartoon': {'colorscheme': 'chain'}}\n", + " multichain_view.setStyle({'model': -1}, multichain_style)\n", + " multichain_view.zoomTo()\n", + " multichain_view.show()\n", + "\n", + "# Color the structure by per-residue pLDDT\n", + "color_map = {i: bands[2] for i, bands in enumerate(PLDDT_BANDS)}\n", + "view = py3Dmol.view(width=800, height=600)\n", + "view.addModelsAsFrames(to_visualize_pdb)\n", + "style = {'cartoon': {'colorscheme': {'prop': 'b', 'map': color_map}}}\n", + "if show_sidechains:\n", + " style['stick'] = {}\n", + "view.setStyle({'model': -1}, style)\n", + "view.zoomTo()\n", + "\n", + "grid = GridspecLayout(1, 2)\n", + "out = Output()\n", + "with out:\n", + " view.show()\n", + "grid[0, 0] = out\n", + "\n", + "out = Output()\n", + "with out:\n", + " plot_plddt_legend().show()\n", + "grid[0, 1] = out\n", + "\n", + "display.display(grid)\n", + "\n", + "# Display pLDDT and predicted aligned error (if output by the model).\n", + "if pae_outputs:\n", + " num_plots = 2\n", + "else:\n", + " num_plots = 1\n", + "\n", + "plt.figure(figsize=[8 * num_plots, 6])\n", + "plt.subplot(1, num_plots, 1)\n", + "plt.plot(plddts[best_model_name])\n", + "plt.title('Predicted LDDT')\n", + "plt.xlabel('Residue')\n", + "plt.ylabel('pLDDT')\n", + "\n", + "if num_plots == 2:\n", + " plt.subplot(1, 2, 2)\n", + " pae, max_pae = list(pae_outputs.values())[0]\n", + " plt.imshow(pae, vmin=0., vmax=max_pae, cmap='Greens_r')\n", + " plt.colorbar(fraction=0.046, pad=0.04)\n", + "\n", + " # Display lines at chain boundaries.\n", + " best_unrelaxed_prot = unrelaxed_proteins[best_model_name]\n", + " total_num_res = best_unrelaxed_prot.residue_index.shape[-1]\n", + " chain_ids = best_unrelaxed_prot.chain_index\n", + " for chain_boundary in np.nonzero(chain_ids[:-1] - chain_ids[1:]):\n", + " if chain_boundary.size:\n", + " plt.plot([0, total_num_res], [chain_boundary, chain_boundary], color='red')\n", + " plt.plot([chain_boundary, chain_boundary], [0, total_num_res], color='red')\n", + "\n", + " plt.title('Predicted Aligned Error')\n", + " plt.xlabel('Scored residue')\n", + " plt.ylabel('Aligned residue')\n", + "\n", + "# Save the predicted aligned error (if it exists).\n", + "pae_output_path = os.path.join(output_dir, 'predicted_aligned_error.json')\n", + "if pae_outputs:\n", + " # Save predicted aligned error in the same format as the AF EMBL DB.\n", + " pae_data = notebook_utils.get_pae_json(pae=pae, max_pae=max_pae.item())\n", + " with open(pae_output_path, 'w') as f:\n", + " f.write(pae_data)\n", + "\n", + "# --- Download the predictions ---\n", + "!zip -q -r {output_dir}.zip {output_dir}\n", + "files.download(f'{output_dir}.zip')" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "lUQAn5LYC5n4" + }, + "source": [ + "### Interpreting the prediction\n", + "\n", + "In general predicted LDDT (pLDDT) is best used for intra-domain confidence, whereas Predicted Aligned Error (PAE) is best used for determining between domain or between chain confidence.\n", + "\n", + "Please see the [AlphaFold methods paper](https://www.nature.com/articles/s41586-021-03819-2), the [AlphaFold predictions of the human proteome paper](https://www.nature.com/articles/s41586-021-03828-1), and the [AlphaFold-Multimer paper](https://www.biorxiv.org/content/10.1101/2021.10.04.463034v1) as well as [our FAQ](https://alphafold.ebi.ac.uk/faq) on how to interpret AlphaFold predictions." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "jeb2z8DIA4om" + }, + "source": [ + "## FAQ \u0026 Troubleshooting\n", + "\n", + "\n", + "* How do I get a predicted protein structure for my protein?\n", + " * Click on the _Connect_ button on the top right to get started.\n", + " * Paste the amino acid sequence of your protein (without any headers) into the “Enter the amino acid sequence to fold”.\n", + " * Run all cells in the Colab, either by running them individually (with the play button on the left side) or via _Runtime_ \u003e _Run all._\n", + " * The predicted protein structure will be downloaded once all cells have been executed. Note: This can take minutes to hours - see below.\n", + "* How long will this take?\n", + " * Downloading the AlphaFold source code can take up to a few minutes.\n", + " * Downloading and installing the third-party software can take up to a few minutes.\n", + " * The search against genetic databases can take minutes to hours.\n", + " * Running AlphaFold and generating the prediction can take minutes to hours, depending on the length of your protein and on which GPU-type Colab has assigned you.\n", + "* My Colab no longer seems to be doing anything, what should I do?\n", + " * Some steps may take minutes to hours to complete.\n", + " * If nothing happens or if you receive an error message, try restarting your Colab runtime via _Runtime_ \u003e _Restart runtime_.\n", + " * If this doesn’t help, try resetting your Colab runtime via _Runtime_ \u003e _Factory reset runtime_.\n", + "* How does this compare to the open-source version of AlphaFold?\n", + " * This Colab version of AlphaFold searches a selected portion of the BFD dataset and currently doesn’t use templates, so its accuracy is reduced in comparison to the full version of AlphaFold that is described in the [AlphaFold paper](https://doi.org/10.1038/s41586-021-03819-2) and [Github repo](https://github.com/deepmind/alphafold/) (the full version is available via the inference script).\n", + "* What is a Colab?\n", + " * See the [Colab FAQ](https://research.google.com/colaboratory/faq.html).\n", + "* I received a warning “Notebook requires high RAM”, what do I do?\n", + " * The resources allocated to your Colab vary. See the [Colab FAQ](https://research.google.com/colaboratory/faq.html) for more details.\n", + " * You can execute the Colab nonetheless.\n", + "* I received an error “Colab CPU runtime not supported” or “No GPU/TPU found”, what do I do?\n", + " * Colab CPU runtime is not supported. Try changing your runtime via _Runtime_ \u003e _Change runtime type_ \u003e _Hardware accelerator_ \u003e _GPU_.\n", + " * The type of GPU allocated to your Colab varies. See the [Colab FAQ](https://research.google.com/colaboratory/faq.html) for more details.\n", + " * If you receive “Cannot connect to GPU backend”, you can try again later to see if Colab allocates you a GPU.\n", + " * [Colab Pro](https://colab.research.google.com/signup) offers priority access to GPUs.\n", + "* I received an error “ModuleNotFoundError: No module named ...”, even though I ran the cell that imports it, what do I do?\n", + " * Colab notebooks on the free tier time out after a certain amount of time. See the [Colab FAQ](https://research.google.com/colaboratory/faq.html#idle-timeouts). Try rerunning the whole notebook from the beginning.\n", + "* Does this tool install anything on my computer?\n", + " * No, everything happens in the cloud on Google Colab.\n", + " * At the end of the Colab execution a zip-archive with the obtained prediction will be automatically downloaded to your computer.\n", + "* How should I share feedback and bug reports?\n", + " * Please share any feedback and bug reports as an [issue](https://github.com/deepmind/alphafold/issues) on Github.\n", + "\n", + "\n", + "## Related work\n", + "\n", + "Take a look at these Colab notebooks provided by the community (please note that these notebooks may vary from our validated AlphaFold system and we cannot guarantee their accuracy):\n", + "\n", + "* The [ColabFold AlphaFold2 notebook](https://colab.research.google.com/github/sokrypton/ColabFold/blob/main/AlphaFold2.ipynb) by Sergey Ovchinnikov, Milot Mirdita and Martin Steinegger, which uses an API hosted at the Södinglab based on the MMseqs2 server ([Mirdita et al. 2019, Bioinformatics](https://academic.oup.com/bioinformatics/article/35/16/2856/5280135)) for the multiple sequence alignment creation.\n" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "YfPhvYgKC81B" + }, + "source": [ + "# License and Disclaimer\n", + "\n", + "This is not an officially-supported Google product.\n", + "\n", + "This Colab notebook and other information provided is for theoretical modelling only, caution should be exercised in its use. It is provided ‘as-is’ without any warranty of any kind, whether expressed or implied. Information is not intended to be a substitute for professional medical advice, diagnosis, or treatment, and does not constitute medical or other professional advice.\n", + "\n", + "Copyright 2021 DeepMind Technologies Limited.\n", + "\n", + "\n", + "## AlphaFold Code License\n", + "\n", + "Licensed under the Apache License, Version 2.0 (the \"License\"); you may not use this file except in compliance with the License. You may obtain a copy of the License at https://www.apache.org/licenses/LICENSE-2.0.\n", + "\n", + "Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an \"AS IS\" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.\n", + "\n", + "## Model Parameters License\n", + "\n", + "The AlphaFold parameters are made available for non-commercial use only, under the terms of the Creative Commons Attribution-NonCommercial 4.0 International (CC BY-NC 4.0) license. You can find details at: https://creativecommons.org/licenses/by-nc/4.0/legalcode\n", + "\n", + "\n", + "## Third-party software\n", + "\n", + "Use of the third-party software, libraries or code referred to in the [Acknowledgements section](https://github.com/deepmind/alphafold/#acknowledgements) in the AlphaFold README may be governed by separate terms and conditions or license provisions. Your use of the third-party software, libraries or code is subject to any such terms and you should check that you can comply with any applicable restrictions or terms and conditions before use.\n", + "\n", + "\n", + "## Mirrored Databases\n", + "\n", + "The following databases have been mirrored by DeepMind, and are available with reference to the following:\n", + "* UniProt: v2021\\_03 (unmodified), by The UniProt Consortium, available under a [Creative Commons Attribution-NoDerivatives 4.0 International License](http://creativecommons.org/licenses/by-nd/4.0/).\n", + "* UniRef90: v2021\\_03 (unmodified), by The UniProt Consortium, available under a [Creative Commons Attribution-NoDerivatives 4.0 International License](http://creativecommons.org/licenses/by-nd/4.0/).\n", + "* MGnify: v2019\\_05 (unmodified), by Mitchell AL et al., available free of all copyright restrictions and made fully and freely available for both non-commercial and commercial use under [CC0 1.0 Universal (CC0 1.0) Public Domain Dedication](https://creativecommons.org/publicdomain/zero/1.0/).\n", + "* BFD: (modified), by Steinegger M. and Söding J., modified by DeepMind, available under a [Creative Commons Attribution-ShareAlike 4.0 International License](https://creativecommons.org/licenses/by/4.0/). See the Methods section of the [AlphaFold proteome paper](https://www.nature.com/articles/s41586-021-03828-1) for details." + ] + } + ], + "metadata": { + "accelerator": "GPU", + "colab": { + "collapsed_sections": [], + "name": "AlphaFold.ipynb", + "private_outputs": true, + "provenance": [] + }, + "kernelspec": { + "display_name": "Python 3", + "name": "python3" + }, + "language_info": { + "name": "python" + } + }, + "nbformat": 4, + "nbformat_minor": 0 +} diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/requirements.txt --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/requirements.txt Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,13 @@ +absl-py==0.13.0 +biopython==1.79 +chex==0.0.7 +dm-haiku==0.0.4 +dm-tree==0.1.6 +docker==5.0.0 +immutabledict==2.0.0 +jax==0.2.14 +ml-collections==0.1.0 +numpy==1.19.5 +pandas==1.3.4 +scipy==1.7.0 +tensorflow-cpu==2.5.0 diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/run_alphafold.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/run_alphafold.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,427 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Full AlphaFold protein structure prediction script.""" +import json +import os +import pathlib +import pickle +import random +import shutil +import sys +import time +from typing import Dict, Union, Optional + +from absl import app +from absl import flags +from absl import logging +from alphafold.common import protein +from alphafold.common import residue_constants +from alphafold.data import pipeline +from alphafold.data import pipeline_multimer +from alphafold.data import templates +from alphafold.data.tools import hhsearch +from alphafold.data.tools import hmmsearch +from alphafold.model import config +from alphafold.model import model +from alphafold.relax import relax +import numpy as np + +from alphafold.model import data +# Internal import (7716). + +logging.set_verbosity(logging.INFO) + +flags.DEFINE_list( + 'fasta_paths', None, 'Paths to FASTA files, each containing a prediction ' + 'target that will be folded one after another. If a FASTA file contains ' + 'multiple sequences, then it will be folded as a multimer. Paths should be ' + 'separated by commas. All FASTA paths must have a unique basename as the ' + 'basename is used to name the output directories for each prediction.') +flags.DEFINE_list( + 'is_prokaryote_list', None, 'Optional for multimer system, not used by the ' + 'single chain system. This list should contain a boolean for each fasta ' + 'specifying true where the target complex is from a prokaryote, and false ' + 'where it is not, or where the origin is unknown. These values determine ' + 'the pairing method for the MSA.') + +flags.DEFINE_string('data_dir', None, 'Path to directory of supporting data.') +flags.DEFINE_string('output_dir', None, 'Path to a directory that will ' + 'store the results.') +flags.DEFINE_string('jackhmmer_binary_path', shutil.which('jackhmmer'), + 'Path to the JackHMMER executable.') +flags.DEFINE_string('hhblits_binary_path', shutil.which('hhblits'), + 'Path to the HHblits executable.') +flags.DEFINE_string('hhsearch_binary_path', shutil.which('hhsearch'), + 'Path to the HHsearch executable.') +flags.DEFINE_string('hmmsearch_binary_path', shutil.which('hmmsearch'), + 'Path to the hmmsearch executable.') +flags.DEFINE_string('hmmbuild_binary_path', shutil.which('hmmbuild'), + 'Path to the hmmbuild executable.') +flags.DEFINE_string('kalign_binary_path', shutil.which('kalign'), + 'Path to the Kalign executable.') +flags.DEFINE_string('uniref90_database_path', None, 'Path to the Uniref90 ' + 'database for use by JackHMMER.') +flags.DEFINE_string('mgnify_database_path', None, 'Path to the MGnify ' + 'database for use by JackHMMER.') +flags.DEFINE_string('bfd_database_path', None, 'Path to the BFD ' + 'database for use by HHblits.') +flags.DEFINE_string('small_bfd_database_path', None, 'Path to the small ' + 'version of BFD used with the "reduced_dbs" preset.') +flags.DEFINE_string('uniclust30_database_path', None, 'Path to the Uniclust30 ' + 'database for use by HHblits.') +flags.DEFINE_string('uniprot_database_path', None, 'Path to the Uniprot ' + 'database for use by JackHMMer.') +flags.DEFINE_string('pdb70_database_path', None, 'Path to the PDB70 ' + 'database for use by HHsearch.') +flags.DEFINE_string('pdb_seqres_database_path', None, 'Path to the PDB ' + 'seqres database for use by hmmsearch.') +flags.DEFINE_string('template_mmcif_dir', None, 'Path to a directory with ' + 'template mmCIF structures, each named .cif') +flags.DEFINE_string('max_template_date', None, 'Maximum template release date ' + 'to consider. Important if folding historical test sets.') +flags.DEFINE_string('obsolete_pdbs_path', None, 'Path to file containing a ' + 'mapping from obsolete PDB IDs to the PDB IDs of their ' + 'replacements.') +flags.DEFINE_enum('db_preset', 'full_dbs', + ['full_dbs', 'reduced_dbs'], + 'Choose preset MSA database configuration - ' + 'smaller genetic database config (reduced_dbs) or ' + 'full genetic database config (full_dbs)') +flags.DEFINE_enum('model_preset', 'monomer', + ['monomer', 'monomer_casp14', 'monomer_ptm', 'multimer'], + 'Choose preset model configuration - the monomer model, ' + 'the monomer model with extra ensembling, monomer model with ' + 'pTM head, or multimer model') +flags.DEFINE_boolean('benchmark', False, 'Run multiple JAX model evaluations ' + 'to obtain a timing that excludes the compilation time, ' + 'which should be more indicative of the time required for ' + 'inferencing many proteins.') +flags.DEFINE_integer('random_seed', None, 'The random seed for the data ' + 'pipeline. By default, this is randomly generated. Note ' + 'that even if this is set, Alphafold may still not be ' + 'deterministic, because processes like GPU inference are ' + 'nondeterministic.') +flags.DEFINE_boolean('use_precomputed_msas', False, 'Whether to read MSAs that ' + 'have been written to disk. WARNING: This will not check ' + 'if the sequence, database or configuration have changed.') + +FLAGS = flags.FLAGS + +MAX_TEMPLATE_HITS = 20 +RELAX_MAX_ITERATIONS = 0 +RELAX_ENERGY_TOLERANCE = 2.39 +RELAX_STIFFNESS = 10.0 +RELAX_EXCLUDE_RESIDUES = [] +RELAX_MAX_OUTER_ITERATIONS = 3 + + +def _check_flag(flag_name: str, + other_flag_name: str, + should_be_set: bool): + if should_be_set != bool(FLAGS[flag_name].value): + verb = 'be' if should_be_set else 'not be' + raise ValueError(f'{flag_name} must {verb} set when running with ' + f'"--{other_flag_name}={FLAGS[other_flag_name].value}".') + + +def predict_structure( + fasta_path: str, + fasta_name: str, + output_dir_base: str, + data_pipeline: Union[pipeline.DataPipeline, pipeline_multimer.DataPipeline], + model_runners: Dict[str, model.RunModel], + amber_relaxer: relax.AmberRelaxation, + benchmark: bool, + random_seed: int, + is_prokaryote: Optional[bool] = None): + """Predicts structure using AlphaFold for the given sequence.""" + logging.info('Predicting %s', fasta_name) + timings = {} + output_dir = os.path.join(output_dir_base, fasta_name) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + msa_output_dir = os.path.join(output_dir, 'msas') + if not os.path.exists(msa_output_dir): + os.makedirs(msa_output_dir) + + # Get features. + t_0 = time.time() + if is_prokaryote is None: + feature_dict = data_pipeline.process( + input_fasta_path=fasta_path, + msa_output_dir=msa_output_dir) + else: + feature_dict = data_pipeline.process( + input_fasta_path=fasta_path, + msa_output_dir=msa_output_dir, + is_prokaryote=is_prokaryote) + timings['features'] = time.time() - t_0 + + # Write out features as a pickled dictionary. + features_output_path = os.path.join(output_dir, 'features.pkl') + with open(features_output_path, 'wb') as f: + pickle.dump(feature_dict, f, protocol=4) + + unrelaxed_pdbs = {} + relaxed_pdbs = {} + ranking_confidences = {} + + # Run the models. + num_models = len(model_runners) + for model_index, (model_name, model_runner) in enumerate( + model_runners.items()): + logging.info('Running model %s on %s', model_name, fasta_name) + t_0 = time.time() + model_random_seed = model_index + random_seed * num_models + processed_feature_dict = model_runner.process_features( + feature_dict, random_seed=model_random_seed) + timings[f'process_features_{model_name}'] = time.time() - t_0 + + t_0 = time.time() + prediction_result = model_runner.predict(processed_feature_dict, + random_seed=model_random_seed) + t_diff = time.time() - t_0 + timings[f'predict_and_compile_{model_name}'] = t_diff + logging.info( + 'Total JAX model %s on %s predict time (includes compilation time, see --benchmark): %.1fs', + model_name, fasta_name, t_diff) + + if benchmark: + t_0 = time.time() + model_runner.predict(processed_feature_dict, + random_seed=model_random_seed) + t_diff = time.time() - t_0 + timings[f'predict_benchmark_{model_name}'] = t_diff + logging.info( + 'Total JAX model %s on %s predict time (excludes compilation time): %.1fs', + model_name, fasta_name, t_diff) + + plddt = prediction_result['plddt'] + ranking_confidences[model_name] = prediction_result['ranking_confidence'] + + # Save the model outputs. + result_output_path = os.path.join(output_dir, f'result_{model_name}.pkl') + with open(result_output_path, 'wb') as f: + pickle.dump(prediction_result, f, protocol=4) + + # Add the predicted LDDT in the b-factor column. + # Note that higher predicted LDDT value means higher model confidence. + plddt_b_factors = np.repeat( + plddt[:, None], residue_constants.atom_type_num, axis=-1) + unrelaxed_protein = protein.from_prediction( + features=processed_feature_dict, + result=prediction_result, + b_factors=plddt_b_factors, + remove_leading_feature_dimension=not model_runner.multimer_mode) + + unrelaxed_pdbs[model_name] = protein.to_pdb(unrelaxed_protein) + unrelaxed_pdb_path = os.path.join(output_dir, f'unrelaxed_{model_name}.pdb') + with open(unrelaxed_pdb_path, 'w') as f: + f.write(unrelaxed_pdbs[model_name]) + + if amber_relaxer: + # Relax the prediction. + t_0 = time.time() + relaxed_pdb_str, _, _ = amber_relaxer.process(prot=unrelaxed_protein) + timings[f'relax_{model_name}'] = time.time() - t_0 + + relaxed_pdbs[model_name] = relaxed_pdb_str + + # Save the relaxed PDB. + relaxed_output_path = os.path.join( + output_dir, f'relaxed_{model_name}.pdb') + with open(relaxed_output_path, 'w') as f: + f.write(relaxed_pdb_str) + + # Rank by model confidence and write out relaxed PDBs in rank order. + ranked_order = [] + for idx, (model_name, _) in enumerate( + sorted(ranking_confidences.items(), key=lambda x: x[1], reverse=True)): + ranked_order.append(model_name) + ranked_output_path = os.path.join(output_dir, f'ranked_{idx}.pdb') + with open(ranked_output_path, 'w') as f: + if amber_relaxer: + f.write(relaxed_pdbs[model_name]) + else: + f.write(unrelaxed_pdbs[model_name]) + + ranking_output_path = os.path.join(output_dir, 'ranking_debug.json') + with open(ranking_output_path, 'w') as f: + label = 'iptm+ptm' if 'iptm' in prediction_result else 'plddts' + f.write(json.dumps( + {label: ranking_confidences, 'order': ranked_order}, indent=4)) + + logging.info('Final timings for %s: %s', fasta_name, timings) + + timings_output_path = os.path.join(output_dir, 'timings.json') + with open(timings_output_path, 'w') as f: + f.write(json.dumps(timings, indent=4)) + + +def main(argv): + if len(argv) > 1: + raise app.UsageError('Too many command-line arguments.') + + for tool_name in ( + 'jackhmmer', 'hhblits', 'hhsearch', 'hmmsearch', 'hmmbuild', 'kalign'): + if not FLAGS[f'{tool_name}_binary_path'].value: + raise ValueError(f'Could not find path to the "{tool_name}" binary. Make ' + 'sure it is installed on your system.') + + use_small_bfd = FLAGS.db_preset == 'reduced_dbs' + _check_flag('small_bfd_database_path', 'db_preset', + should_be_set=use_small_bfd) + _check_flag('bfd_database_path', 'db_preset', + should_be_set=not use_small_bfd) + _check_flag('uniclust30_database_path', 'db_preset', + should_be_set=not use_small_bfd) + + run_multimer_system = 'multimer' in FLAGS.model_preset + _check_flag('pdb70_database_path', 'model_preset', + should_be_set=not run_multimer_system) + _check_flag('pdb_seqres_database_path', 'model_preset', + should_be_set=run_multimer_system) + _check_flag('uniprot_database_path', 'model_preset', + should_be_set=run_multimer_system) + + if FLAGS.model_preset == 'monomer_casp14': + num_ensemble = 8 + else: + num_ensemble = 1 + + # Check for duplicate FASTA file names. + fasta_names = [pathlib.Path(p).stem for p in FLAGS.fasta_paths] + if len(fasta_names) != len(set(fasta_names)): + raise ValueError('All FASTA paths must have a unique basename.') + + # Check that is_prokaryote_list has same number of elements as fasta_paths, + # and convert to bool. + if FLAGS.is_prokaryote_list: + if len(FLAGS.is_prokaryote_list) != len(FLAGS.fasta_paths): + raise ValueError('--is_prokaryote_list must either be omitted or match ' + 'length of --fasta_paths.') + is_prokaryote_list = [] + for s in FLAGS.is_prokaryote_list: + if s in ('true', 'false'): + is_prokaryote_list.append(s == 'true') + else: + raise ValueError('--is_prokaryote_list must contain comma separated ' + 'true or false values.') + else: # Default is_prokaryote to False. + is_prokaryote_list = [False] * len(fasta_names) + + if run_multimer_system: + template_searcher = hmmsearch.Hmmsearch( + binary_path=FLAGS.hmmsearch_binary_path, + hmmbuild_binary_path=FLAGS.hmmbuild_binary_path, + database_path=FLAGS.pdb_seqres_database_path) + template_featurizer = templates.HmmsearchHitFeaturizer( + mmcif_dir=FLAGS.template_mmcif_dir, + max_template_date=FLAGS.max_template_date, + max_hits=MAX_TEMPLATE_HITS, + kalign_binary_path=FLAGS.kalign_binary_path, + release_dates_path=None, + obsolete_pdbs_path=FLAGS.obsolete_pdbs_path) + else: + template_searcher = hhsearch.HHSearch( + binary_path=FLAGS.hhsearch_binary_path, + databases=[FLAGS.pdb70_database_path]) + template_featurizer = templates.HhsearchHitFeaturizer( + mmcif_dir=FLAGS.template_mmcif_dir, + max_template_date=FLAGS.max_template_date, + max_hits=MAX_TEMPLATE_HITS, + kalign_binary_path=FLAGS.kalign_binary_path, + release_dates_path=None, + obsolete_pdbs_path=FLAGS.obsolete_pdbs_path) + + monomer_data_pipeline = pipeline.DataPipeline( + jackhmmer_binary_path=FLAGS.jackhmmer_binary_path, + hhblits_binary_path=FLAGS.hhblits_binary_path, + uniref90_database_path=FLAGS.uniref90_database_path, + mgnify_database_path=FLAGS.mgnify_database_path, + bfd_database_path=FLAGS.bfd_database_path, + uniclust30_database_path=FLAGS.uniclust30_database_path, + small_bfd_database_path=FLAGS.small_bfd_database_path, + template_searcher=template_searcher, + template_featurizer=template_featurizer, + use_small_bfd=use_small_bfd, + use_precomputed_msas=FLAGS.use_precomputed_msas) + + if run_multimer_system: + data_pipeline = pipeline_multimer.DataPipeline( + monomer_data_pipeline=monomer_data_pipeline, + jackhmmer_binary_path=FLAGS.jackhmmer_binary_path, + uniprot_database_path=FLAGS.uniprot_database_path, + use_precomputed_msas=FLAGS.use_precomputed_msas) + else: + data_pipeline = monomer_data_pipeline + + model_runners = {} + model_names = config.MODEL_PRESETS[FLAGS.model_preset] + for model_name in model_names: + model_config = config.model_config(model_name) + if run_multimer_system: + model_config.model.num_ensemble_eval = num_ensemble + else: + model_config.data.eval.num_ensemble = num_ensemble + model_params = data.get_model_haiku_params( + model_name=model_name, data_dir=FLAGS.data_dir) + model_runner = model.RunModel(model_config, model_params) + model_runners[model_name] = model_runner + + logging.info('Have %d models: %s', len(model_runners), + list(model_runners.keys())) + + amber_relaxer = relax.AmberRelaxation( + max_iterations=RELAX_MAX_ITERATIONS, + tolerance=RELAX_ENERGY_TOLERANCE, + stiffness=RELAX_STIFFNESS, + exclude_residues=RELAX_EXCLUDE_RESIDUES, + max_outer_iterations=RELAX_MAX_OUTER_ITERATIONS) + + random_seed = FLAGS.random_seed + if random_seed is None: + random_seed = random.randrange(sys.maxsize // len(model_names)) + logging.info('Using random seed %d for the data pipeline', random_seed) + + # Predict structure for each of the sequences. + for i, fasta_path in enumerate(FLAGS.fasta_paths): + is_prokaryote = is_prokaryote_list[i] if run_multimer_system else None + fasta_name = fasta_names[i] + predict_structure( + fasta_path=fasta_path, + fasta_name=fasta_name, + output_dir_base=FLAGS.output_dir, + data_pipeline=data_pipeline, + model_runners=model_runners, + amber_relaxer=amber_relaxer, + benchmark=FLAGS.benchmark, + random_seed=random_seed, + is_prokaryote=is_prokaryote) + + +if __name__ == '__main__': + flags.mark_flags_as_required([ + 'fasta_paths', + 'output_dir', + 'data_dir', + 'uniref90_database_path', + 'mgnify_database_path', + 'template_mmcif_dir', + 'max_template_date', + 'obsolete_pdbs_path', + ]) + + app.run(main) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/run_alphafold_test.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/run_alphafold_test.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,101 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +"""Tests for run_alphafold.""" + +import os + +from absl.testing import absltest +from absl.testing import parameterized +import run_alphafold +import mock +import numpy as np +# Internal import (7716). + + +class RunAlphafoldTest(parameterized.TestCase): + + @parameterized.named_parameters( + ('relax', True), + ('no_relax', False), + ) + def test_end_to_end(self, do_relax): + + data_pipeline_mock = mock.Mock() + model_runner_mock = mock.Mock() + amber_relaxer_mock = mock.Mock() + + data_pipeline_mock.process.return_value = {} + model_runner_mock.process_features.return_value = { + 'aatype': np.zeros((12, 10), dtype=np.int32), + 'residue_index': np.tile(np.arange(10, dtype=np.int32)[None], (12, 1)), + } + model_runner_mock.predict.return_value = { + 'structure_module': { + 'final_atom_positions': np.zeros((10, 37, 3)), + 'final_atom_mask': np.ones((10, 37)), + }, + 'predicted_lddt': { + 'logits': np.ones((10, 50)), + }, + 'plddt': np.ones(10) * 42, + 'ranking_confidence': 90, + 'ptm': np.array(0.), + 'aligned_confidence_probs': np.zeros((10, 10, 50)), + 'predicted_aligned_error': np.zeros((10, 10)), + 'max_predicted_aligned_error': np.array(0.), + } + model_runner_mock.multimer_mode = False + amber_relaxer_mock.process.return_value = ('RELAXED', None, None) + + fasta_path = os.path.join(absltest.get_default_test_tmpdir(), + 'target.fasta') + with open(fasta_path, 'wt') as f: + f.write('>A\nAAAAAAAAAAAAA') + fasta_name = 'test' + + out_dir = absltest.get_default_test_tmpdir() + + run_alphafold.predict_structure( + fasta_path=fasta_path, + fasta_name=fasta_name, + output_dir_base=out_dir, + data_pipeline=data_pipeline_mock, + model_runners={'model1': model_runner_mock}, + amber_relaxer=amber_relaxer_mock if do_relax else None, + benchmark=False, + random_seed=0) + + base_output_files = os.listdir(out_dir) + self.assertIn('target.fasta', base_output_files) + self.assertIn('test', base_output_files) + + target_output_files = os.listdir(os.path.join(out_dir, 'test')) + expected_files = [ + 'features.pkl', 'msas', 'ranked_0.pdb', 'ranking_debug.json', + 'result_model1.pkl', 'timings.json', 'unrelaxed_model1.pdb', + ] + if do_relax: + expected_files.append('relaxed_model1.pdb') + self.assertCountEqual(expected_files, target_output_files) + + # Check that pLDDT is set in the B-factor column. + with open(os.path.join(out_dir, 'test', 'unrelaxed_model1.pdb')) as f: + for line in f: + if line.startswith('ATOM'): + self.assertEqual(line[61:66], '42.00') + + +if __name__ == '__main__': + absltest.main() diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/scripts/download_all_data.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/scripts/download_all_data.sh Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,74 @@ +#!/bin/bash +# +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Downloads and unzips all required data for AlphaFold. +# +# Usage: bash download_all_data.sh /path/to/download/directory +set -e + +if [[ $# -eq 0 ]]; then + echo "Error: download directory must be provided as an input argument." + exit 1 +fi + +if ! command -v aria2c &> /dev/null ; then + echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)." + exit 1 +fi + +DOWNLOAD_DIR="$1" +DOWNLOAD_MODE="${2:-full_dbs}" # Default mode to full_dbs. +if [[ "${DOWNLOAD_MODE}" != full_dbs && "${DOWNLOAD_MODE}" != reduced_dbs ]] +then + echo "DOWNLOAD_MODE ${DOWNLOAD_MODE} not recognized." + exit 1 +fi + +SCRIPT_DIR="$(dirname "$(realpath "$0")")" + +echo "Downloading AlphaFold parameters..." +bash "${SCRIPT_DIR}/download_alphafold_params.sh" "${DOWNLOAD_DIR}" + +if [[ "${DOWNLOAD_MODE}" = reduced_dbs ]] ; then + echo "Downloading Small BFD..." + bash "${SCRIPT_DIR}/download_small_bfd.sh" "${DOWNLOAD_DIR}" +else + echo "Downloading BFD..." + bash "${SCRIPT_DIR}/download_bfd.sh" "${DOWNLOAD_DIR}" +fi + +echo "Downloading MGnify..." +bash "${SCRIPT_DIR}/download_mgnify.sh" "${DOWNLOAD_DIR}" + +echo "Downloading PDB70..." +bash "${SCRIPT_DIR}/download_pdb70.sh" "${DOWNLOAD_DIR}" + +echo "Downloading PDB mmCIF files..." +bash "${SCRIPT_DIR}/download_pdb_mmcif.sh" "${DOWNLOAD_DIR}" + +echo "Downloading Uniclust30..." +bash "${SCRIPT_DIR}/download_uniclust30.sh" "${DOWNLOAD_DIR}" + +echo "Downloading Uniref90..." +bash "${SCRIPT_DIR}/download_uniref90.sh" "${DOWNLOAD_DIR}" + +echo "Downloading UniProt..." +bash "${SCRIPT_DIR}/download_uniprot.sh" "${DOWNLOAD_DIR}" + +echo "Downloading PDB SeqRes..." +bash "${SCRIPT_DIR}/download_pdb_seqres.sh" "${DOWNLOAD_DIR}" + +echo "All data downloaded." diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/scripts/download_alphafold_params.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/scripts/download_alphafold_params.sh Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,41 @@ +#!/bin/bash +# +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Downloads and unzips the AlphaFold parameters. +# +# Usage: bash download_alphafold_params.sh /path/to/download/directory +set -e + +if [[ $# -eq 0 ]]; then + echo "Error: download directory must be provided as an input argument." + exit 1 +fi + +if ! command -v aria2c &> /dev/null ; then + echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)." + exit 1 +fi + +DOWNLOAD_DIR="$1" +ROOT_DIR="${DOWNLOAD_DIR}/params" +SOURCE_URL="https://storage.googleapis.com/alphafold/alphafold_params_2021-10-27.tar" +BASENAME=$(basename "${SOURCE_URL}") + +mkdir --parents "${ROOT_DIR}" +aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}" +tar --extract --verbose --file="${ROOT_DIR}/${BASENAME}" \ + --directory="${ROOT_DIR}" --preserve-permissions +rm "${ROOT_DIR}/${BASENAME}" diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/scripts/download_bfd.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/scripts/download_bfd.sh Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,43 @@ +#!/bin/bash +# +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Downloads and unzips the BFD database for AlphaFold. +# +# Usage: bash download_bfd.sh /path/to/download/directory +set -e + +if [[ $# -eq 0 ]]; then + echo "Error: download directory must be provided as an input argument." + exit 1 +fi + +if ! command -v aria2c &> /dev/null ; then + echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)." + exit 1 +fi + +DOWNLOAD_DIR="$1" +ROOT_DIR="${DOWNLOAD_DIR}/bfd" +# Mirror of: +# https://bfd.mmseqs.com/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz. +SOURCE_URL="https://storage.googleapis.com/alphafold-databases/casp14_versions/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt.tar.gz" +BASENAME=$(basename "${SOURCE_URL}") + +mkdir --parents "${ROOT_DIR}" +aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}" +tar --extract --verbose --file="${ROOT_DIR}/${BASENAME}" \ + --directory="${ROOT_DIR}" +rm "${ROOT_DIR}/${BASENAME}" diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/scripts/download_mgnify.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/scripts/download_mgnify.sh Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,43 @@ +#!/bin/bash +# +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Downloads and unzips the MGnify database for AlphaFold. +# +# Usage: bash download_mgnify.sh /path/to/download/directory +set -e + +if [[ $# -eq 0 ]]; then + echo "Error: download directory must be provided as an input argument." + exit 1 +fi + +if ! command -v aria2c &> /dev/null ; then + echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)." + exit 1 +fi + +DOWNLOAD_DIR="$1" +ROOT_DIR="${DOWNLOAD_DIR}/mgnify" +# Mirror of: +# ftp://ftp.ebi.ac.uk/pub/databases/metagenomics/peptide_database/2018_12/mgy_clusters.fa.gz +SOURCE_URL="https://storage.googleapis.com/alphafold-databases/casp14_versions/mgy_clusters_2018_12.fa.gz" +BASENAME=$(basename "${SOURCE_URL}") + +mkdir --parents "${ROOT_DIR}" +aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}" +pushd "${ROOT_DIR}" +gunzip "${ROOT_DIR}/${BASENAME}" +popd diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/scripts/download_pdb70.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/scripts/download_pdb70.sh Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,41 @@ +#!/bin/bash +# +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Downloads and unzips the PDB70 database for AlphaFold. +# +# Usage: bash download_pdb70.sh /path/to/download/directory +set -e + +if [[ $# -eq 0 ]]; then + echo "Error: download directory must be provided as an input argument." + exit 1 +fi + +if ! command -v aria2c &> /dev/null ; then + echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)." + exit 1 +fi + +DOWNLOAD_DIR="$1" +ROOT_DIR="${DOWNLOAD_DIR}/pdb70" +SOURCE_URL="http://wwwuser.gwdg.de/~compbiol/data/hhsuite/databases/hhsuite_dbs/old-releases/pdb70_from_mmcif_200401.tar.gz" +BASENAME=$(basename "${SOURCE_URL}") + +mkdir --parents "${ROOT_DIR}" +aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}" +tar --extract --verbose --file="${ROOT_DIR}/${BASENAME}" \ + --directory="${ROOT_DIR}" +rm "${ROOT_DIR}/${BASENAME}" diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/scripts/download_pdb_mmcif.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/scripts/download_pdb_mmcif.sh Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,61 @@ +#!/bin/bash +# +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Downloads, unzips and flattens the PDB database for AlphaFold. +# +# Usage: bash download_pdb_mmcif.sh /path/to/download/directory +set -e + +if [[ $# -eq 0 ]]; then + echo "Error: download directory must be provided as an input argument." + exit 1 +fi + +if ! command -v aria2c &> /dev/null ; then + echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)." + exit 1 +fi + +if ! command -v rsync &> /dev/null ; then + echo "Error: rsync could not be found. Please install rsync." + exit 1 +fi + +DOWNLOAD_DIR="$1" +ROOT_DIR="${DOWNLOAD_DIR}/pdb_mmcif" +RAW_DIR="${ROOT_DIR}/raw" +MMCIF_DIR="${ROOT_DIR}/mmcif_files" + +echo "Running rsync to fetch all mmCIF files (note that the rsync progress estimate might be inaccurate)..." +mkdir --parents "${RAW_DIR}" +rsync --recursive --links --perms --times --compress --info=progress2 --delete --port=33444 \ + rsync.rcsb.org::ftp_data/structures/divided/mmCIF/ \ + "${RAW_DIR}" + +echo "Unzipping all mmCIF files..." +find "${RAW_DIR}/" -type f -iname "*.gz" -exec gunzip {} + + +echo "Flattening all mmCIF files..." +mkdir --parents "${MMCIF_DIR}" +find "${RAW_DIR}" -type d -empty -delete # Delete empty directories. +for subdir in "${RAW_DIR}"/*; do + mv "${subdir}/"*.cif "${MMCIF_DIR}" +done + +# Delete empty download directory structure. +find "${RAW_DIR}" -type d -empty -delete + +aria2c "ftp://ftp.wwpdb.org/pub/pdb/data/status/obsolete.dat" --dir="${ROOT_DIR}" diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/scripts/download_pdb_seqres.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/scripts/download_pdb_seqres.sh Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,38 @@ +#!/bin/bash +# +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Downloads and unzips the PDB SeqRes database for AlphaFold. +# +# Usage: bash download_pdb_seqres.sh /path/to/download/directory +set -e + +if [[ $# -eq 0 ]]; then + echo "Error: download directory must be provided as an input argument." + exit 1 +fi + +if ! command -v aria2c &> /dev/null ; then + echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)." + exit 1 +fi + +DOWNLOAD_DIR="$1" +ROOT_DIR="${DOWNLOAD_DIR}/pdb_seqres" +SOURCE_URL="ftp://ftp.wwpdb.org/pub/pdb/derived_data/pdb_seqres.txt" +BASENAME=$(basename "${SOURCE_URL}") + +mkdir --parents "${ROOT_DIR}" +aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}" diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/scripts/download_small_bfd.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/scripts/download_small_bfd.sh Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,41 @@ +#!/bin/bash +# +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Downloads and unzips the Small BFD database for AlphaFold. +# +# Usage: bash download_small_bfd.sh /path/to/download/directory +set -e + +if [[ $# -eq 0 ]]; then + echo "Error: download directory must be provided as an input argument." + exit 1 +fi + +if ! command -v aria2c &> /dev/null ; then + echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)." + exit 1 +fi + +DOWNLOAD_DIR="$1" +ROOT_DIR="${DOWNLOAD_DIR}/small_bfd" +SOURCE_URL="https://storage.googleapis.com/alphafold-databases/reduced_dbs/bfd-first_non_consensus_sequences.fasta.gz" +BASENAME=$(basename "${SOURCE_URL}") + +mkdir --parents "${ROOT_DIR}" +aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}" +pushd "${ROOT_DIR}" +gunzip "${ROOT_DIR}/${BASENAME}" +popd diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/scripts/download_uniclust30.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/scripts/download_uniclust30.sh Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,43 @@ +#!/bin/bash +# +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Downloads and unzips the Uniclust30 database for AlphaFold. +# +# Usage: bash download_uniclust30.sh /path/to/download/directory +set -e + +if [[ $# -eq 0 ]]; then + echo "Error: download directory must be provided as an input argument." + exit 1 +fi + +if ! command -v aria2c &> /dev/null ; then + echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)." + exit 1 +fi + +DOWNLOAD_DIR="$1" +ROOT_DIR="${DOWNLOAD_DIR}/uniclust30" +# Mirror of: +# http://wwwuser.gwdg.de/~compbiol/uniclust/2018_08/uniclust30_2018_08_hhsuite.tar.gz +SOURCE_URL="https://storage.googleapis.com/alphafold-databases/casp14_versions/uniclust30_2018_08_hhsuite.tar.gz" +BASENAME=$(basename "${SOURCE_URL}") + +mkdir --parents "${ROOT_DIR}" +aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}" +tar --extract --verbose --file="${ROOT_DIR}/${BASENAME}" \ + --directory="${ROOT_DIR}" +rm "${ROOT_DIR}/${BASENAME}" diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/scripts/download_uniprot.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/scripts/download_uniprot.sh Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,55 @@ +#!/bin/bash +# +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Downloads, unzips and merges the SwissProt and TrEMBL databases for +# AlphaFold-Multimer. +# +# Usage: bash download_uniprot.sh /path/to/download/directory +set -e + +if [[ $# -eq 0 ]]; then + echo "Error: download directory must be provided as an input argument." + exit 1 +fi + +if ! command -v aria2c &> /dev/null ; then + echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)." + exit 1 +fi + +DOWNLOAD_DIR="$1" +ROOT_DIR="${DOWNLOAD_DIR}/uniprot" + +TREMBL_SOURCE_URL="ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_trembl.fasta.gz" +TREMBL_BASENAME=$(basename "${TREMBL_SOURCE_URL}") +TREMBL_UNZIPPED_BASENAME="${TREMBL_BASENAME%.gz}" + +SPROT_SOURCE_URL="ftp://ftp.ebi.ac.uk/pub/databases/uniprot/current_release/knowledgebase/complete/uniprot_sprot.fasta.gz" +SPROT_BASENAME=$(basename "${SPROT_SOURCE_URL}") +SPROT_UNZIPPED_BASENAME="${SPROT_BASENAME%.gz}" + +mkdir --parents "${ROOT_DIR}" +aria2c "${TREMBL_SOURCE_URL}" --dir="${ROOT_DIR}" +aria2c "${SPROT_SOURCE_URL}" --dir="${ROOT_DIR}" +pushd "${ROOT_DIR}" +gunzip "${ROOT_DIR}/${TREMBL_BASENAME}" +gunzip "${ROOT_DIR}/${SPROT_BASENAME}" + +# Concatenate TrEMBL and SwissProt, rename to uniprot and clean up. +cat "${ROOT_DIR}/${SPROT_UNZIPPED_BASENAME}" >> "${ROOT_DIR}/${TREMBL_UNZIPPED_BASENAME}" +mv "${ROOT_DIR}/${TREMBL_UNZIPPED_BASENAME}" "${ROOT_DIR}/uniprot.fasta" +rm "${ROOT_DIR}/${SPROT_UNZIPPED_BASENAME}" +popd diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/scripts/download_uniref90.sh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/scripts/download_uniref90.sh Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,41 @@ +#!/bin/bash +# +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# Downloads and unzips the UniRef90 database for AlphaFold. +# +# Usage: bash download_uniref90.sh /path/to/download/directory +set -e + +if [[ $# -eq 0 ]]; then + echo "Error: download directory must be provided as an input argument." + exit 1 +fi + +if ! command -v aria2c &> /dev/null ; then + echo "Error: aria2c could not be found. Please install aria2c (sudo apt install aria2)." + exit 1 +fi + +DOWNLOAD_DIR="$1" +ROOT_DIR="${DOWNLOAD_DIR}/uniref90" +SOURCE_URL="ftp://ftp.uniprot.org/pub/databases/uniprot/uniref/uniref90/uniref90.fasta.gz" +BASENAME=$(basename "${SOURCE_URL}") + +mkdir --parents "${ROOT_DIR}" +aria2c "${SOURCE_URL}" --dir="${ROOT_DIR}" +pushd "${ROOT_DIR}" +gunzip "${ROOT_DIR}/${BASENAME}" +popd diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/alphafold/setup.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/alphafold/setup.py Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,58 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +"""Install script for setuptools.""" + +from setuptools import find_packages +from setuptools import setup + +setup( + name='alphafold', + version='2.1.0', + description='An implementation of the inference pipeline of AlphaFold v2.0.' + 'This is a completely new model that was entered as AlphaFold2 in CASP14 ' + 'and published in Nature.', + author='DeepMind', + author_email='alphafold@deepmind.com', + license='Apache License, Version 2.0', + url='https://github.com/deepmind/alphafold', + packages=find_packages(), + install_requires=[ + 'absl-py', + 'biopython', + 'chex', + 'dm-haiku', + 'dm-tree', + 'docker', + 'immutabledict', + 'jax', + 'ml-collections', + 'numpy', + 'pandas', + 'scipy', + 'tensorflow-cpu', + ], + tests_require=['mock'], + classifiers=[ + 'Development Status :: 5 - Production/Stable', + 'Intended Audience :: Science/Research', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: POSIX :: Linux', + 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3.9', + 'Programming Language :: Python :: 3.10', + 'Topic :: Scientific/Engineering :: Artificial Intelligence', + ], +) diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/claremcwhite/Dockerfile --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/claremcwhite/Dockerfile Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,87 @@ +# Copyright 2021 DeepMind Technologies Limited +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +ARG CUDA_FULL=11.2.2 +FROM nvidia/cuda:${CUDA_FULL}-cudnn8-runtime-ubuntu20.04 +# FROM directive resets ARGS, so we specify again (the value is retained if +# previously set). +ARG CUDA_FULL +ARG CUDA=11.2 +# JAXLIB no longer built for all minor CUDA versions: +# https://github.com/google/jax/blob/main/CHANGELOG.md#jaxlib-0166-may-11-2021 +ARG CUDA_JAXLIB=11.1 + +# Use bash to support string substitution. +SHELL ["/bin/bash", "-c"] + +RUN apt-get update && DEBIAN_FRONTEND=noninteractive apt-get install -y \ + build-essential \ + cmake \ + cuda-command-line-tools-${CUDA/./-} \ + git \ + hmmer \ + kalign \ + tzdata \ + wget \ + && rm -rf /var/lib/apt/lists/* + +# Compile HHsuite from source. +RUN git clone --branch v3.3.0 https://github.com/soedinglab/hh-suite.git /tmp/hh-suite \ + && mkdir /tmp/hh-suite/build +WORKDIR /tmp/hh-suite/build +RUN cmake -DCMAKE_INSTALL_PREFIX=/opt/hhsuite .. \ + && make -j 4 && make install \ + && ln -s /opt/hhsuite/bin/* /usr/bin \ + && rm -rf /tmp/hh-suite + +# Install Miniconda package manger. +RUN wget -q -P /tmp \ + https://repo.anaconda.com/miniconda/Miniconda3-py38_4.9.2-Linux-x86_64.sh \ + && bash /tmp/Miniconda3-py38_4.9.2-Linux-x86_64.sh -b -p /opt/conda \ + && rm /tmp/Miniconda3-py38_4.9.2-Linux-x86_64.sh + +# Install conda packages. +ENV PATH="/opt/conda/bin:$PATH" +RUN conda update -qy conda \ + && conda install -y -c conda-forge \ + openmm=7.5.1 \ + cudatoolkit==${CUDA_FULL} \ + pdbfixer \ + pip + +COPY . /app/alphafold +RUN wget -q -P /app/alphafold/alphafold/common/ \ + https://git.scicore.unibas.ch/schwede/openstructure/-/raw/7102c63615b64735c4941278d92b554ec94415f8/modules/mol/alg/src/stereo_chemical_props.txt + +# Install pip packages. +RUN pip3 install --upgrade pip \ + && pip3 install -r /app/alphafold/requirements.txt \ + && pip3 install --upgrade jax jaxlib==0.1.69+cuda${CUDA_JAXLIB/./} -f \ + https://storage.googleapis.com/jax-releases/jax_releases.html + +# Apply OpenMM patch. +WORKDIR /opt/conda/lib/python3.8/site-packages +RUN patch -p0 < /app/alphafold/docker/openmm.patch + +# We need to run `ldconfig` first to ensure GPUs are visible, due to some quirk +# with Debian. See https://github.com/NVIDIA/nvidia-docker/issues/1399 for +# details. +# ENTRYPOINT does not support easily running multiple commands, so instead we +# write a shell script to wrap them up. +WORKDIR /app/alphafold +RUN echo $'#!/bin/bash\n\ +ldconfig\n\ +python /app/alphafold/run_alphafold.py "$@"' > /app/run_alphafold.sh \ + && chmod +x /app/run_alphafold.sh +ENTRYPOINT ["/app/run_alphafold.sh"] diff -r 7ae9d78b06f5 -r 6c92e000d684 docker/claremcwhite/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/docker/claremcwhite/README.md Tue Mar 01 02:53:05 2022 +0000 @@ -0,0 +1,1 @@ +Cam: I think this is the source of the claremcwhite alphafold container but not sure diff -r 7ae9d78b06f5 -r 6c92e000d684 scripts/download_all_data.sh diff -r 7ae9d78b06f5 -r 6c92e000d684 scripts/download_alphafold_params.sh diff -r 7ae9d78b06f5 -r 6c92e000d684 scripts/download_bfd.sh diff -r 7ae9d78b06f5 -r 6c92e000d684 scripts/download_mgnify.sh diff -r 7ae9d78b06f5 -r 6c92e000d684 scripts/download_pdb70.sh diff -r 7ae9d78b06f5 -r 6c92e000d684 scripts/download_pdb_mmcif.sh diff -r 7ae9d78b06f5 -r 6c92e000d684 scripts/download_pdb_seqres.sh diff -r 7ae9d78b06f5 -r 6c92e000d684 scripts/download_small_bfd.sh diff -r 7ae9d78b06f5 -r 6c92e000d684 scripts/download_uniclust30.sh diff -r 7ae9d78b06f5 -r 6c92e000d684 scripts/download_uniprot.sh diff -r 7ae9d78b06f5 -r 6c92e000d684 scripts/download_uniref90.sh diff -r 7ae9d78b06f5 -r 6c92e000d684 validate_fasta.py --- a/validate_fasta.py Fri Jan 28 04:56:29 2022 +0000 +++ b/validate_fasta.py Tue Mar 01 02:53:05 2022 +0000 @@ -1,5 +1,6 @@ +"""Validate input FASTA sequence.""" - +import re import argparse from typing import List, TextIO @@ -11,51 +12,67 @@ class FastaLoader: - def __init__(self): - """creates a Fasta() from a file""" - self.fastas: List[Fasta] = [] + def __init__(self, fasta_path: str): + """Initialize from FASTA file.""" + self.fastas = [] + self.load(fasta_path) + print("Loaded FASTA sequences:") + for f in self.fastas: + print(f.header) + print(f.aa_seq) def load(self, fasta_path: str): - """ - load function has to be very flexible. - file may be normal fasta format (header, seq) or can just be a bare sequence. - """ - with open(fasta_path, 'r') as fp: - header, sequence = self.interpret_first_line(fp) - line = fp.readline().rstrip('\n') - - while line: - if line.startswith('>'): - self.update_fastas(header, sequence) - header = line - sequence = '' - else: - sequence += line - line = fp.readline().rstrip('\n') + """Load bare or FASTA formatted sequence.""" + with open(fasta_path, 'r') as f: + self.content = f.read() + + if "__cn__" in self.content: + # Pasted content with escaped characters + self.newline = '__cn__' + self.caret = '__gt__' + else: + # Uploaded file with normal content + self.newline = '\n' + self.caret = '>' + + self.lines = self.content.split(self.newline) + header, sequence = self.interpret_first_line() + + i = 0 + while i < len(self.lines): + line = self.lines[i] + if line.startswith(self.caret): + self.update_fastas(header, sequence) + header = '>' + self.strip_header(line) + sequence = '' + else: + sequence += line.strip('\n ') + i += 1 # after reading whole file, header & sequence buffers might be full self.update_fastas(header, sequence) - return self.fastas - def interpret_first_line(self, fp: TextIO): - header = '' - sequence = '' - line = fp.readline().rstrip('\n') - if line.startswith('>'): - header = line + def interpret_first_line(self): + line = self.lines[0] + if line.startswith(self.caret): + header = '>' + self.strip_header(line) + return header, '' else: - sequence += line - return header, sequence - + return '', line + + def strip_header(self, line): + """Strip characters escaped with underscores from pasted text.""" + return re.sub(r'\_\_.{2}\_\_', '', line).strip('>') + def update_fastas(self, header: str, sequence: str): # if we have a sequence - if not sequence == '': + if sequence: # create generic header if not exists - if header == '': + if not header: fasta_count = len(self.fastas) header = f'>sequence_{fasta_count}' - # create new Fasta + # Create new Fasta self.fastas.append(Fasta(header, sequence)) @@ -65,9 +82,9 @@ self.min_length = 30 self.max_length = 2000 self.iupac_characters = { - 'A', 'B', 'C', 'D', 'E', 'F', 'G', - 'H', 'I', 'K', 'L', 'M', 'N', 'P', - 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', + 'A', 'B', 'C', 'D', 'E', 'F', 'G', + 'H', 'I', 'K', 'L', 'M', 'N', 'P', + 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z', '-' } @@ -76,9 +93,9 @@ self.validate_num_seqs() self.validate_length() self.validate_alphabet() - # not checking for 'X' nucleotides at the moment. - # alphafold can throw an error if it doesn't like it. - #self.validate_x() + # not checking for 'X' nucleotides at the moment. + # alphafold can throw an error if it doesn't like it. + #self.validate_x() def validate_num_seqs(self) -> None: if len(self.fasta_list) > 1: @@ -93,19 +110,19 @@ raise Exception(f'Error encountered validating fasta: Sequence too short ({len(fasta.aa_seq)}aa). Must be > 30aa') if len(fasta.aa_seq) > self.max_length: raise Exception(f'Error encountered validating fasta: Sequence too long ({len(fasta.aa_seq)}aa). Must be < 2000aa') - + def validate_alphabet(self): """ - Confirms whether the sequence conforms to IUPAC codes. - If not, reports the offending character and its position. - """ + Confirms whether the sequence conforms to IUPAC codes. + If not, reports the offending character and its position. + """ fasta = self.fasta_list[0] for i, char in enumerate(fasta.aa_seq.upper()): if char not in self.iupac_characters: - raise Exception(f'Error encountered validating fasta: Invalid amino acid found at pos {i}: {char}') + raise Exception(f'Error encountered validating fasta: Invalid amino acid found at pos {i}: "{char}"') def validate_x(self): - """checks if any bases are X. TODO check whether alphafold accepts X bases. """ + """checks if any bases are X. TODO check whether alphafold accepts X bases. """ fasta = self.fasta_list[0] for i, char in enumerate(fasta.aa_seq.upper()): if char == 'X': @@ -134,28 +151,27 @@ def main(): # load fasta file args = parse_args() - fl = FastaLoader() - fastas = fl.load(args.input_fasta) + fas = FastaLoader(args.input_fasta) # validate - fv = FastaValidator(fastas) + fv = FastaValidator(fas.fastas) fv.validate() # write cleaned version fw = FastaWriter() - fw.write(fastas[0]) + fw.write(fas.fastas[0]) - + def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser() parser.add_argument( - "input_fasta", - help="input fasta file", + "input_fasta", + help="input fasta file", type=str - ) + ) return parser.parse_args() if __name__ == '__main__': - main() \ No newline at end of file + main()