Mercurial > repos > galaxy-australia > alphafold2
comparison alphafold.xml @ 14:d00e15139065 draft
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit d490defa32d9c318137d2d781243b392cb14110d-dirty
author | galaxy-australia |
---|---|
date | Tue, 28 Feb 2023 01:15:42 +0000 |
parents | c0e71cb2bd1b |
children | a58f7eb0df2c |
comparison
equal
deleted
inserted
replaced
13:c0e71cb2bd1b | 14:d00e15139065 |
---|---|
1 <tool id="alphafold" name="Alphafold 2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> | 1 <tool id="alphafold" name="Alphafold 2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> |
2 <description> - AI-guided 3D structural prediction of proteins</description> | 2 <description> - AI-guided 3D structural prediction of proteins</description> |
3 <macros> | 3 <macros> |
4 <token name="@TOOL_VERSION@">2.1.2</token> | 4 <token name="@TOOL_VERSION@">2.3.1</token> |
5 <token name="@VERSION_SUFFIX@">4</token> | 5 <token name="@VERSION_SUFFIX@">0</token> |
6 </macros> | 6 </macros> |
7 <edam_topics> | 7 <edam_topics> |
8 <edam_topic>topic_0082</edam_topic> | 8 <edam_topic>topic_0082</edam_topic> |
9 </edam_topics> | 9 </edam_topics> |
10 <edam_operations> | 10 <edam_operations> |
11 <edam_operation>operation_0474</edam_operation> | 11 <edam_operation>operation_0474</edam_operation> |
12 </edam_operations> | 12 </edam_operations> |
13 <xrefs> | 13 <xrefs> |
14 <xref type="bio.tools">alphafold_2.0</xref> | 14 <xref type="bio.tools">alphafold_2</xref> |
15 </xrefs> | 15 </xrefs> |
16 <requirements> | 16 <requirements> |
17 <container type="docker">neoformit/alphafold:v2.1.2_0</container> | 17 <container type="docker">neoformit/alphafold:v2.3.1_1</container> |
18 </requirements> | 18 </requirements> |
19 <command detect_errors="exit_code"><![CDATA[ | 19 <command detect_errors="exit_code"><![CDATA[ |
20 | 20 |
21 ## $ALPHAFOLD_DB variable should point to the location of the AlphaFold | 21 ## $ALPHAFOLD_DB variable should point to the location of the AlphaFold |
22 ## databases - defaults to /data | 22 ## databases - defaults to /data |
23 | 23 |
24 ## fasta setup ---------------------------- | 24 ## Read FASTA input ---------------------------- |
25 #if $fasta_or_text.input_mode == 'history': | 25 #if $fasta_or_text.input_mode == 'history': |
26 cp '$fasta_or_text.fasta_file' input.fasta && | 26 cp '$fasta_or_text.fasta_file' input.fasta |
27 | 27 |
28 #elif $fasta_or_text.input_mode == 'textbox': | 28 #elif $fasta_or_text.input_mode == 'textbox': |
29 echo '$fasta_or_text.fasta_text' > input.fasta && | 29 echo '$fasta_or_text.fasta_text' > input.fasta |
30 #end if | 30 #end if |
31 | 31 |
32 python3 '$__tool_directory__/validate_fasta.py' input.fasta | 32 && python3 '$__tool_directory__/validate_fasta.py' input.fasta |
33 --min_length \${ALPHAFOLD_AA_LENGTH_MIN:-0} | 33 --min_length \${ALPHAFOLD_AA_LENGTH_MIN:-0} |
34 --max_length \${ALPHAFOLD_AA_LENGTH_MAX:-0} | 34 --max_length \${ALPHAFOLD_AA_LENGTH_MAX:-0} |
35 #if $multimer: | 35 #if $multimer: |
36 --multimer | 36 --multimer |
37 #end if | 37 #end if |
38 > alphafold.fasta && | 38 > alphafold.fasta |
39 | 39 |
40 ## env vars ------------------------------- | 40 ## Env vars ------------------------------- |
41 export TF_FORCE_UNIFIED_MEMORY=1 && | 41 && export TF_FORCE_UNIFIED_MEMORY=1 |
42 export XLA_PYTHON_CLIENT_MEM_FRACTION=4.0 && | 42 && export XLA_PYTHON_CLIENT_MEM_FRACTION=4.0 |
43 export DATE=`date +"%Y-%m-%d"` && | 43 && export TODAY=`date +"%Y-%m-%d"` |
44 | 44 |
45 ## run alphafold ------------------------- | 45 ## Run alphafold ------------------------- |
46 python /app/alphafold/run_alphafold.py | 46 && python /app/alphafold/run_alphafold.py |
47 --fasta_paths alphafold.fasta | 47 --fasta_paths alphafold.fasta |
48 --output_dir output | 48 --output_dir output |
49 --data_dir \${ALPHAFOLD_DB:-/data} | 49 --data_dir \${ALPHAFOLD_DB:-/data} |
50 --max_template_date=\$DATE | 50 |
51 | 51 ## Set reference database paths |
52 ## Set reference data explicitly | 52 --uniref90_database_path \${ALPHAFOLD_DB:-/data}/uniref90/uniref90.fasta |
53 --uniref90_database_path \${ALPHAFOLD_DB:-/data}/uniref90/uniref90.fasta | 53 --mgnify_database_path \${ALPHAFOLD_DB:-/data}/mgnify/mgy_clusters_2022_05.fa |
54 --mgnify_database_path \${ALPHAFOLD_DB:-/data}/mgnify/mgy_clusters_2018_12.fa | 54 --template_mmcif_dir \${ALPHAFOLD_DB:-/data}/pdb_mmcif/mmcif_files |
55 --template_mmcif_dir \${ALPHAFOLD_DB:-/data}/pdb_mmcif/mmcif_files | 55 --obsolete_pdbs_path \${ALPHAFOLD_DB:-/data}/pdb_mmcif/obsolete.dat |
56 --obsolete_pdbs_path \${ALPHAFOLD_DB:-/data}/pdb_mmcif/obsolete.dat | 56 #if $dbs == 'full': |
57 #if $dbs == 'full': | 57 --bfd_database_path \${ALPHAFOLD_DB:-/data}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt |
58 --bfd_database_path \${ALPHAFOLD_DB:-/data}/bfd/bfd_metaclust_clu_complete_id30_c90_final_seq.sorted_opt | 58 --uniref30_database_path \${ALPHAFOLD_DB:-/data}/uniref30/UniRef30_2021_03 |
59 --uniclust30_database_path \${ALPHAFOLD_DB:-/data}/uniclust30/uniclust30_2018_08/uniclust30_2018_08 | 59 #else |
60 #else | 60 --db_preset=reduced_dbs |
61 --db_preset=reduced_dbs | 61 --small_bfd_database_path \${ALPHAFOLD_DB:-/data}/small_bfd/bfd-first_non_consensus_sequences.fasta |
62 --small_bfd_database_path \${ALPHAFOLD_DB:-/data}/small_bfd/bfd-first_non_consensus_sequences.fasta | 62 #end if |
63 #end if | 63 |
64 | 64 #if $max_template_date: |
65 ## Param introduced in AlphaFold v2.1.2: | 65 --max_template_date=$max_template_date |
66 --use_gpu_relax=\${ALPHAFOLD_USE_GPU:-True} | 66 #else |
67 | 67 --max_template_date=\$TODAY |
68 #if $multimer: | 68 #end if |
69 --model_preset=multimer | 69 |
70 --pdb_seqres_database_path=\${ALPHAFOLD_DB:-/data}/pdb_seqres/pdb_seqres.txt | 70 --use_gpu_relax=\${ALPHAFOLD_USE_GPU:-True} ## introduced in v2.1.2 |
71 --uniprot_database_path=\${ALPHAFOLD_DB:-/data}/uniprot/uniprot.fasta | 71 |
72 ##--num_multimer_predictions_per_model=1 ## introduced alphafold>=2.2.0 | 72 #if $multimer: |
73 | 73 --model_preset=multimer |
74 #else | 74 --pdb_seqres_database_path=\${ALPHAFOLD_DB:-/data}/pdb_seqres/pdb_seqres.txt |
75 --pdb70_database_path \${ALPHAFOLD_DB:-/data}/pdb70/pdb70 | 75 --uniprot_database_path=\${ALPHAFOLD_DB:-/data}/uniprot/uniprot.fasta |
76 #end if | 76 --num_multimer_predictions_per_model=1 ## introduced in v2.2.0 |
77 && | 77 #else |
78 --pdb70_database_path \${ALPHAFOLD_DB:-/data}/pdb70/pdb70 | |
79 #end if | |
78 | 80 |
79 ## Generate additional outputs ------------ | 81 ## Generate additional outputs ------------ |
80 python3 '$__tool_directory__/gen_extra_outputs.py' output/alphafold $output_plddts | 82 && python3 '$__tool_directory__/outputs.py' output/alphafold $outputs.plddts |
81 #if $multimer: | 83 #if $multimer: |
82 --multimer | 84 --multimer |
83 #end if | 85 #end if |
84 && | |
85 | 86 |
86 ## HTML output | 87 ## HTML output |
87 mkdir -p '${ html.files_path }' && | 88 && mkdir -p '${ html.files_path }' |
88 cp '$__tool_directory__/alphafold.html' '${html}' && | 89 && cp '$__tool_directory__/alphafold.html' '${html}' |
89 cp output/alphafold/ranked_*.pdb '${html.files_path}' && | 90 && cp output/alphafold/ranked_*.pdb '${html.files_path}' |
90 | 91 |
91 ## This is a (hacky) fix for a bug that has appeared in multiple Pulsar servers. | 92 ## This is a (hacky) fix for a bug that has appeared in multiple Pulsar servers. |
92 ## The working directory ends up two levels deep and the visualization html page | 93 ## The working directory ends up two levels deep and the visualization html page |
93 ## fails to load the PDB files as static assets. | 94 ## fails to load the PDB files as static assets. |
94 (([ -d working ] && cp -r working/* .) || true) | 95 && (([ -d working ] && cp -r working/* .) || true) |
95 | 96 |
96 ]]></command> | 97 ]]></command> |
97 <inputs> | 98 <inputs> |
98 <conditional name="fasta_or_text"> | 99 <conditional name="fasta_or_text"> |
99 <param name="input_mode" type="select" label="Fasta Input" help="Protein sequence(s) to fold. Input can be fasta file from history, or text. Sequence must be valid IUPAC amino acid characters. If multiple sequences FASTA file provided, multimer mode must be selected."> | 100 <param name="input_mode" type="select" label="Fasta Input" help="Protein sequence(s) to fold. Input can be fasta file from history, or text. Sequence must be valid IUPAC amino acid characters. If multiple-sequence FASTA file provided, multimer mode must be selected."> |
100 <option value="history">Use fasta from history</option> | 101 <option value="history">Use fasta from history</option> |
101 <option value="textbox">Paste sequence into textbox</option> | 102 <option value="textbox">Paste sequence into textbox</option> |
102 </param> | 103 </param> |
103 <when value="history"> | 104 <when value="history"> |
104 <param name="fasta_file" type="data" multiple="false" format="fasta" label="Fasta file from history" help="Select single FASTA protein file from your history. If you wish to fold multiple proteins, submit an individual job for each protein. If you wish to run AlphaFold multimer, please supply multiple sequences in this file." /> | 105 <param name="fasta_file" type="data" multiple="false" format="fasta" label="Fasta file from history" help="Select single FASTA protein file from your history. If you wish to fold multiple proteins, submit an individual job for each protein. If you wish to run AlphaFold multimer, please supply multiple sequences in this file." /> |
107 <param name="fasta_text" type="text" area="true" value="" label="Paste sequence" help="Paste single protein sequence into the textbox. If you wish to fold multiple proteins, submit individual jobs for each protein. If you wish to run AlphaFold multimer, please supply multiple sequences in FASTA format." /> | 108 <param name="fasta_text" type="text" area="true" value="" label="Paste sequence" help="Paste single protein sequence into the textbox. If you wish to fold multiple proteins, submit individual jobs for each protein. If you wish to run AlphaFold multimer, please supply multiple sequences in FASTA format." /> |
108 </when> | 109 </when> |
109 </conditional> | 110 </conditional> |
110 | 111 |
111 <param | 112 <param |
113 name="max_template_date" | |
114 type="text" | |
115 label="Max template date (yyyy-mm-dd) (optional)" | |
116 help="The model will reference PDB structures deposited before this date only. Defaults to today's date." | |
117 optional="true" | |
118 > | |
119 <sanitizer> | |
120 <valid initial="string.digits"> | |
121 <add value="-" /> | |
122 </valid> | |
123 </sanitizer> | |
124 <validator type="regex">[0-9]{4}-[0-9]{2}-[0-9]{2}</validator> | |
125 </param> | |
126 | |
127 <param | |
112 name="dbs" | 128 name="dbs" |
113 type="select" | 129 type="select" |
114 display="radio" | 130 display="radio" |
115 label="Select database" | 131 label="Select database" |
116 help="The reduced database allows significantly faster run time in | 132 help="The reduced database allows significantly faster run time in |
123 <param | 139 <param |
124 name="multimer" | 140 name="multimer" |
125 type="boolean" | 141 type="boolean" |
126 checked="false" | 142 checked="false" |
127 label="Multimer mode" | 143 label="Multimer mode" |
128 help="Fold a protein multimer from multiple input sequences. You must input multiple sequences to run this mode." | 144 help="Fold a protein multimer from multiple input sequences. You must input multiple sequences in FASTA to run this mode." |
129 /> | 145 /> |
130 | 146 |
131 <param name="output_plddts" type="boolean" checked="false" label="Output per-residue confidence scores" truevalue="--plddts" falsevalue="" help="Alphafold produces a pLDDT score between 0-100 for each residue in the folded models. High scores represent high confidence in placement for the residue, while low scoring residues have lower confidence. Sections of low confidence often occur in disordered regions. " /> | 147 <section name="outputs" title="Optional outputs" expanded="false"> |
148 <param | |
149 name="confidence_scores" | |
150 type="boolean" | |
151 checked="false" | |
152 label="Per-model confidence scores" | |
153 help="A tabular file showing average confidence score for each model (predicted template modelling (PTM) score; interface PTM is incorporated into this score for multimer predictions)." | |
154 /> | |
155 <param | |
156 name="plddts" | |
157 type="boolean" | |
158 checked="false" | |
159 label="Per-residue confidence scores" | |
160 truevalue="--plddts" | |
161 falsevalue="" | |
162 help="Alphafold produces a pLDDT score between 0-100 for each residue in the folded models. High scores represent high confidence in placement for the residue, while low scoring residues have lower confidence. This output is a tabular file with five rows (one for each output PDB model), with each column providing a pLDDT score for a single residue. These data have been parsed from the model pickle files (below)." | |
163 /> | |
164 <param | |
165 name="model_pkls" | |
166 type="boolean" | |
167 checked="false" | |
168 label="ranked_*.pkl" | |
169 help="A pickle file containing metrics used for the assessment of the model's accuracy. These include per-residue pLDDT scores (see above), predicted TM (Template Modelling) score, which is a global superposition metric and predicted aligned error (a matrix size (number of residues) x (number of residues) where each position describes the confidence of the residue's 3D position relative to another residue in the model; can be used for the interpretation of relative positions of domains). Pickle files can be read and processed using the Python 'pickle' library. Outputs are named respectively to PDB outputs." | |
170 /> | |
171 <param | |
172 name="relax_json" | |
173 type="boolean" | |
174 checked="false" | |
175 label="relax_metrics.json" | |
176 help="A JSON-formatted text file containing relax metrics (mostly remaining violations)." | |
177 /> | |
178 </section> | |
132 </inputs> | 179 </inputs> |
133 | 180 |
134 <outputs> | 181 <outputs> |
135 <data name="model5" format="pdb" from_work_dir="output/alphafold/ranked_4.pdb" label="${tool.name} on ${on_string}: Model 5"/> | 182 <data name="model5" format="pdb" from_work_dir="output/alphafold/ranked_4.pdb" label="${tool.name} on ${on_string}: PDB ranked 4"/> |
136 <data name="model4" format="pdb" from_work_dir="output/alphafold/ranked_3.pdb" label="${tool.name} on ${on_string}: Model 4"/> | 183 <data name="model4" format="pdb" from_work_dir="output/alphafold/ranked_3.pdb" label="${tool.name} on ${on_string}: PDB ranked 3"/> |
137 <data name="model3" format="pdb" from_work_dir="output/alphafold/ranked_2.pdb" label="${tool.name} on ${on_string}: Model 3"/> | 184 <data name="model3" format="pdb" from_work_dir="output/alphafold/ranked_2.pdb" label="${tool.name} on ${on_string}: PDB ranked 2"/> |
138 <data name="model2" format="pdb" from_work_dir="output/alphafold/ranked_1.pdb" label="${tool.name} on ${on_string}: Model 2"/> | 185 <data name="model2" format="pdb" from_work_dir="output/alphafold/ranked_1.pdb" label="${tool.name} on ${on_string}: PDB ranked 1"/> |
139 <data name="model1" format="pdb" from_work_dir="output/alphafold/ranked_0.pdb" label="${tool.name} on ${on_string}: Model 1"/> | 186 <data name="model1" format="pdb" from_work_dir="output/alphafold/ranked_0.pdb" label="${tool.name} on ${on_string}: PDB ranked 0"/> |
140 <data name="confidence_scores" format="tsv" from_work_dir="output/alphafold/model_confidence_scores.tsv" label="${tool.name} on ${on_string}: Model confidence scores"/> | |
141 <data name="plddts" format="tsv" from_work_dir="output/alphafold/plddts.tsv" label="${tool.name} on ${on_string}: Per-residue confidence scores (plddts)"> | |
142 <filter>(output_plddts)</filter> | |
143 </data> | |
144 <data name="html" format="html" label="${tool.name} on ${on_string}: Visualization" /> | 187 <data name="html" format="html" label="${tool.name} on ${on_string}: Visualization" /> |
188 | |
189 <!-- Optional outputs --> | |
190 <data | |
191 name="output_confidence_scores" | |
192 format="tabular" | |
193 from_work_dir="output/alphafold/extra/model_confidence_scores.tsv" | |
194 label="${tool.name} on ${on_string}: Model confidence scores" | |
195 > | |
196 <filter>outputs['confidence_scores']</filter> | |
197 </data> | |
198 | |
199 <data | |
200 name="output_plddts" | |
201 format="tabular" | |
202 from_work_dir="output/alphafold/extra/plddts.tsv" | |
203 label="${tool.name} on ${on_string}: Per-residue confidence scores (plddts)" | |
204 > | |
205 <filter>outputs['plddts']</filter> | |
206 </data> | |
207 | |
208 <data | |
209 name="output_ranked_4_pkl" | |
210 format="binary" | |
211 from_work_dir="output/alphafold/extra/ranked_4.pkl" | |
212 label="${tool.name} on ${on_string}: ranked_4.pkl" | |
213 > | |
214 <filter>outputs['model_pkls']</filter> | |
215 </data> | |
216 <data | |
217 name="output_ranked_3_pkl" | |
218 format="binary" | |
219 from_work_dir="output/alphafold/extra/ranked_3.pkl" | |
220 label="${tool.name} on ${on_string}: ranked_3.pkl" | |
221 > | |
222 <filter>outputs['model_pkls']</filter> | |
223 </data> | |
224 <data | |
225 name="output_ranked_2_pkl" | |
226 format="binary" | |
227 from_work_dir="output/alphafold/extra/ranked_2.pkl" | |
228 label="${tool.name} on ${on_string}: ranked_2.pkl" | |
229 > | |
230 <filter>outputs['model_pkls']</filter> | |
231 </data> | |
232 <data | |
233 name="output_ranked_1_pkl" | |
234 format="binary" | |
235 from_work_dir="output/alphafold/extra/ranked_1.pkl" | |
236 label="${tool.name} on ${on_string}: ranked_1.pkl" | |
237 > | |
238 <filter>outputs['model_pkls']</filter> | |
239 </data> | |
240 <data | |
241 name="output_ranked_0_pkl" | |
242 format="binary" | |
243 from_work_dir="output/alphafold/extra/ranked_0.pkl" | |
244 label="${tool.name} on ${on_string}: ranked_0.pkl" | |
245 > | |
246 <filter>outputs['model_pkls']</filter> | |
247 </data> | |
248 <data | |
249 name="output_relax_json" | |
250 format="json" | |
251 from_work_dir="output/alphafold/extra/relax_metrics_ranked.json" | |
252 label="${tool.name} on ${on_string}: relax_metrics_ranked.json" | |
253 > | |
254 <filter>outputs['relax_json']</filter> | |
255 </data> | |
145 </outputs> | 256 </outputs> |
257 | |
146 <tests> | 258 <tests> |
147 <test expect_num_outputs="8"> | 259 <test expect_num_outputs="8"> |
148 <conditional name="fasta_or_text"> | 260 <conditional name="fasta_or_text"> |
149 <param name="input_mode" value="history"/> | 261 <param name="input_mode" value="history"/> |
150 <param name="fasta_file" value="test1.fasta"/> | 262 <param name="fasta_file" value="test1.fasta"/> |
151 </conditional> | 263 </conditional> |
152 <param name="output_plddts" value="true"/> | 264 <param name="plddts" value="true"/> |
153 <output name="plddts"> | 265 <output name="output_plddts"> |
154 <assert_contents> | 266 <assert_contents> |
155 <has_n_columns n="2"/> | 267 <has_n_columns n="2"/> |
156 <has_n_lines n="6"/> | 268 <has_n_lines n="6"/> |
157 <has_size value="2900" delta="300"/> | 269 <has_size value="2900" delta="300"/> |
158 </assert_contents> | 270 </assert_contents> |
159 </output> | 271 </output> |
160 <output name="confidence_scores"> | 272 <output name="output_confidence_scores"> |
161 <assert_contents> | 273 <assert_contents> |
162 <has_n_columns n="2"/> | 274 <has_n_columns n="2"/> |
163 <has_n_lines n="6"/> | 275 <has_n_lines n="6"/> |
164 <has_size value="70" delta="50"/> | 276 <has_size value="70" delta="50"/> |
165 </assert_contents> | 277 </assert_contents> |
203 </tests> | 315 </tests> |
204 <help><![CDATA[ | 316 <help><![CDATA[ |
205 | 317 |
206 .. class:: infomark | 318 .. class:: infomark |
207 | 319 |
320 | AlphaFold v2: AI-guided 3D structural prediction of proteins | |
321 | | |
322 | **NOTE: this tool packages AlphaFold v2.3.1.** | |
323 | | |
324 | This means that the neural network has been trained on PDBs with a release | |
325 | date before 2021-09-30 (the training cutoff was 2018-04-30 until ``v2.3.0``). | |
326 | | |
327 | Find out more in the technical and release notes: | |
328 | | |
329 | |
330 - `Release notes for v2.3.1 <https://github.com/deepmind/alphafold/releases/tag/v2.3.1>`_ | |
331 - `Technical notes for v2.3 <https://github.com/deepmind/alphafold/blob/main/docs/technical_note_v2.3.0.md>`_ | |
332 | |
333 | If you want to use AlphaFold trained against an older cutoff date, switch to Galaxy version ``2.1.2`` (which was trained to data up to 2018-04-30). | |
334 | | |
335 | |
208 **What it does** | 336 **What it does** |
209 | 337 |
210 | AlphaFold v2.1: AI-guided 3D structure prediction of proteins | |
211 | | |
212 | |
213 *What is AlphaFold?* | 338 *What is AlphaFold?* |
214 | 339 |
215 | AlphaFold is a program which uses neural networks to predict the tertiary (3D) structure of proteins. AlphaFold accepts an amino acid sequence (in Fasta format), then will 'fold' that sequence into a 3D model. | 340 | AlphaFold is a program which uses neural networks to predict the tertiary (3D) structure of proteins. AlphaFold accepts an amino acid sequence in Fasta format, which will be "folded" into a 3D model. |
216 | | |
217 | **NOTE: AlphaFold has numerous versions - this tool uses AlphaFold v2.1.2.** | |
218 | | 341 | |
219 | 342 |
220 *What makes AlphaFold different?* | 343 *What makes AlphaFold different?* |
221 | 344 |
222 | The ability to use computers to predict 3D protein structures with high accuracy is desirable because it removes the time-consuming and costly process of determining structures experimentally. | 345 | The ability to use computers to predict 3D protein structures with high accuracy is desirable because it removes the time-consuming and costly process of determining structures experimentally. |
223 | In-silico protein folding has been an active field of research for decades, but existing tools ran more slowly and with less reliability than AlphaFold. | 346 | In-silico protein folding has been an active field of research for decades, but existing tools were slower and far less reliable than AlphaFold. |
224 | AlphaFold represents a leap forward by regularly predicting structures to atomic-level accuracy, even when no similar structures are known. | 347 | AlphaFold represents a leap forward by regularly predicting structures to atomic-level accuracy, even when no similar structures are known. |
225 | | 348 | |
349 | |
350 | |
351 **Input** | |
352 | |
353 *Amino acid sequence* | |
354 | |
355 | AlphaFold monomer (default) accepts a **single amino acid sequence** in FASTA format. | |
356 | You can choose to input either a file from your Galaxy history or paste a sequence into a text box. | |
357 | If you choose the ``multimer`` option, you can supply a FASTA file containing **multiple sequences** to be folded concurrently into a multimer. | |
358 | | |
359 | | |
360 | |
361 **Outputs** | |
362 | |
363 *Visualization* | |
364 | |
365 An interactive 3D graphic of the best predicted molecular structures. | |
366 This output can be opened in Galaxy to give a visual impression of the results, with different structural representations to choose from. | |
367 Open the "Visualization" history output by clicking on the "view data" icon: | |
368 | |
369 .. image:: https://github.com/usegalaxy-au/galaxy-local-tools/blob/1a8d3e8daa7ccc5a345ca377697735ab95ed0666/tools/alphafold/static/img/alphafold-visualization.png?raw=true | |
370 :height: 520 | |
371 :alt: Result visualization | |
372 | |
373 | | |
374 | |
375 *PDB files* | |
376 | |
377 | Five PDB (Protein Data Bank) files are be created, ordered by rank, as predicted by AlphaFold. | |
378 | These files describe the molecular structures and can be used for downstream analysis. e.g. *in silico* molecular docking. | |
379 | **PLEASE NOTE** that all outputs have been renamed to their respective rank order, including model and model.pkl files. | |
380 | | |
381 | |
382 *Model confidence scores (optional)* | |
383 | |
384 | This optional output produces a file which describes the confidence scores for each model (based on `pLDDTs <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3799472/>`_, or the ``iptm+ptm`` score if run in multimer mode) which may be useful for downstream analysis. | |
385 | Model confidence scores are also included as a column (replacing ``bFactor``) in the default PDB output. | |
386 | | |
387 | | |
388 | |
389 *Model data files (ranked_n.pkl)* | |
390 | |
391 | Per-model data stored in pickle files (a Python binary data format). These files can be used as inputs to downstream analysis software (such as Chimera X) for visualizing structures and computing kinetics between protein multimers and domains. | |
392 | The tool will produce one ``.pkl`` output for each of the PDB models. | |
393 | | |
394 | | |
395 | |
396 *relax_metrics.json (optional)* | |
397 | |
398 | A JSON-formatted text file containing relax metrics (mostly remaining violations). | |
399 | | |
400 | |
401 **AlphaFold configuration** | |
402 | |
403 | We have configured AlphaFold to run with the parameters suggested by default on `AlphaFold's GitHub <https://github.com/deepmind/alphafold>`_. | |
404 | This means that it runs with Amber relaxation enabled, with relaxed PDB models collected as output datasets. If there are additonal parameters that you would like to interact with, please `send a support request to Galaxy AU <https://site.usegalaxy.org.au/request/support>`_, or open an issue on `our GitHub <https://github.com/usegalaxy-au/tools-au>`_. | |
405 | | |
406 | | |
407 | |
408 **External Resources** | |
409 | |
410 We highly recommend checking out the | |
411 `Alphafold Protein Structure Database <https://alphafold.ebi.ac.uk/>`_, | |
412 which contains pre-computed structures for over 200 million known proteins. | |
413 See also: | |
414 | |
415 - `Google Deepmind's article on AlphaFold <https://deepmind.com/blog/article/alphafold-a-solution-to-a-50-year-old-grand-challenge-in-biology>`_ | |
416 - `AlphaFold source code on GitHub <https://github.com/deepmind/alphafold>`_ | |
226 | 417 |
227 *Downstream analysis* | 418 *Downstream analysis* |
228 | 419 |
229 | Obtaining a protein structure prediction is the first step in many analyses. | 420 | Obtaining a protein structure prediction is the first step in many analyses. |
230 | The 3D models created by AlphaFold can be used in downstream analysis, including the following: | 421 | The 3D models created by AlphaFold can be used in downstream analysis, including the following: |
235 - Molecular docking | 426 - Molecular docking |
236 3D structures can be used to predict the binding affinity of different compounds. | 427 3D structures can be used to predict the binding affinity of different compounds. |
237 This is especially useful in screening drug candidates. | 428 This is especially useful in screening drug candidates. |
238 - Protein-protein interactions | 429 - Protein-protein interactions |
239 Proteins associate in many biological processes, including intracellular signalling pathways and protein complex formation. | 430 Proteins associate in many biological processes, including intracellular signalling pathways and protein complex formation. |
240 To predict these interactions, other programs may ingest 3D models predicted by AlphaFold. Proprietary softwares include `GOLD <https://www.ccdc.cam.ac.uk/solutions/csd-discovery/components/gold/>`_ and `SeeSAR <https://www.biosolveit.de/SeeSAR>`_, but many `free and open-source options <https://en.wikipedia.org/wiki/List_of_protein-ligand_docking_software>`_ are available such as `AutoDock <https://autodock.scripps.edu/>`_ and `SwissDock <http://www.swissdock.ch/>`_. | 431 To predict these interactions, other programs may ingest 3D models predicted by AlphaFold. Proprietary softwares include `GOLD <https://www.ccdc.cam.ac.uk/solutions/csd-discovery/components/gold/>`_ and `SeeSAR <https://www.biosolveit.de/SeeSAR>`_, but many `free and open-source options <https://en.wikipedia.org/wiki/List_of_protein-ligand_docking_software>`_ are available such as `AutoDock <https://autodock.scripps.edu/>`_, `SwissDock <http://www.swissdock.ch/>`_, `DockQ <https://github.com/bjornwallner/DockQ>`_, `MM-Align <https://zhanggroup.org/MM-align/>`_ and `TM-Align <https://zhanggroup.org/TM-align/>`_. Protein-protein interactions are often inferred from AlphaFold-Multimer predictions, which provide a level of confidence in binding affinity between homomer/heteromer subunits. |
241 | |
242 | Protein complex interactions are also commonly observed with AlphaFold's multimer prediction mode. | |
243 | | |
244 | | |
245 | |
246 **Input** | |
247 | |
248 *Amino acid sequence* | |
249 | |
250 | AlphaFold monomer (default) accepts a **single amino acid sequence** in FASTA format. | |
251 | You can choose to input either a file from your Galaxy history or paste a sequence into a text box. | |
252 | If you choose the ``multimer`` option, you can supply a FASTA file containing **multiple sequences** to be folded concurrently into a multimer. | |
253 | | |
254 | | |
255 | |
256 **Outputs** | |
257 | |
258 *Visualization* | |
259 | |
260 An interactive 3D graphic of the best predicted molecular structures. | |
261 This output can be opened in Galaxy to give a visual impression of the results, with different structural representations to choose from. | |
262 Open the "Visualization" history output by clicking on the "view data" icon: | |
263 | |
264 .. image:: https://github.com/usegalaxy-au/galaxy-local-tools/blob/1a8d3e8daa7ccc5a345ca377697735ab95ed0666/tools/alphafold/static/img/alphafold-visualization.png?raw=true | |
265 :height: 520 | |
266 :alt: Result visualization | |
267 | |
268 | | |
269 | |
270 *PDB files* | |
271 | |
272 | Five PDB (Protein Data Bank) files will be created for the best ranking models predicted by AlphaFold. | |
273 | These files describe the molecular structures and can be used for downstream analysis. e.g. *in silico* molecular docking. | |
274 | | |
275 | |
276 *Model confidence scores (optional)* | |
277 | |
278 | This optional output produces a file which describes the confidence scores for each model (based on `pLDDTs <https://www.ncbi.nlm.nih.gov/pmc/articles/PMC3799472/>`_, or the ``iptm+ptm`` score if run in multimer mode) which may be useful for downstream analysis. | |
279 | Model confidence scores are also included as a column (replacing ``bFactor``) in the default PDB output. | |
280 | | |
281 | | |
282 | |
283 **AlphaFold configuration** | |
284 | |
285 | We have configured AlphaFold to run with the parameters suggested by default on `AlphaFold's GitHub <https://github.com/deepmind/alphafold>`_. | |
286 | This means that it runs against the full database with Amber relaxation, with ``max_template_date`` set to today's date. If there are additonal parameters that you would like to interact with, please `send a support request to Galaxy AU <https://site.usegalaxy.org.au/request/support>`_, or open an issue on `our GitHub <https://github.com/usegalaxy-au/tools-au>`_. | |
287 | | |
288 | | |
289 | |
290 **External Resources** | |
291 | |
292 We HIGHLY recommend checking out the | |
293 `Alphafold Protein Structure Database <https://alphafold.ebi.ac.uk/>`_, | |
294 which contains pre-computed structures for over 200 million known proteins. | |
295 See also: | |
296 | |
297 - `Google Deepmind's article on AlphaFold <https://deepmind.com/blog/article/alphafold-a-solution-to-a-50-year-old-grand-challenge-in-biology>`_ | |
298 - `AlphaFold source code on GitHub <https://github.com/deepmind/alphafold>`_ | |
299 | 432 |
300 ]]></help> | 433 ]]></help> |
301 <citations> | 434 <citations> |
302 <citation type="doi">https://doi.org/10.1038/s41586-021-03819-2</citation> | 435 <citation type="doi">https://doi.org/10.1038/s41586-021-03819-2</citation> |
303 <citation type="doi">https://doi.org/10.1101/2021.10.04.463034</citation> | 436 <citation type="doi">https://doi.org/10.1101/2021.10.04.463034</citation> |