alphafold2: alphafold.xml comparison

comparison alphafold.xml @ 20:6ab1a261520a draft

planemo upload for repository https://github.com/usegalaxy-au/tools-au commit c3a90eb12ada44d477541baa4dd6182be29cd554-dirty

author	galaxy-australia
date	Sun, 28 Jul 2024 20:09:55 +0000
parents	2f7702fd0a4c
children	e7f1b552a695

comparison

equal deleted inserted replaced

-:2f7702fd0a4c
+:6ab1a261520a
-<tool id="alphafold" name="Alphafold 2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01">
+<tool id="alphafold" name="Alphafold 2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05">
 <description> - AI-guided 3D structural prediction of proteins</description>
 <macros>
-<token name="@TOOL_VERSION@">2.3.1</token>
+<token name="@TOOL_VERSION@">2.3.2</token>
 <token name="@TOOL_MINOR_VERSION@">2.3</token>
-<token name="@VERSION_SUFFIX@">5</token>
+<token name="@VERSION_SUFFIX@">0</token>
 <import>macro_output.xml</import>
 <import>macro_test_output.xml</import>
 </macros>
 <edam_topics>
 <edam_topic>topic_0082</edam_topic>
 </edam_operations>
 <xrefs>
 <xref type="bio.tools">alphafold_2</xref>
 </xrefs>
 <requirements>
-<container type="docker">neoformit/alphafold:v2.3.1_2</container>
+<container type="docker">neoformit/alphafold:v2.3.2_0</container>
 </requirements>
 <required_files>
 <include path="scripts/outputs.py" />
 <include path="scripts/validate_fasta.py" />
-<include path="alphafold.html" />
+<include path="scripts/alphafold.html" />
 </required_files>
 <command detect_errors="exit_code"><![CDATA[
 ## Developers: to test with mock alphafold run, set `export PLANEMO_TESTING=1`
 ## in planemo's gx_venv_n/bin/activate script. AlphaFold outputs will be copied
 #end if
 && python3 '$__tool_directory__/scripts/validate_fasta.py' input.fasta
 --min_length \${ALPHAFOLD_AA_LENGTH_MIN:-0}
 --max_length \${ALPHAFOLD_AA_LENGTH_MAX:-0}
-#if $model_preset == 'multimer':
+#if $model_preset.selection == 'multimer':
 --multimer
 --max-sequences \${ALPHAFOLD_MAX_SEQUENCES:-10}
 #end if
 > alphafold.fasta
 && export TODAY=`date +"%Y-%m-%d"`
 ## Run AlphaFold  -------------------------------------------------------------
 #if os.environ.get('PLANEMO_TESTING'):
 ## Run in testing mode (mocks a successful AlphaFold run by copying outputs)
-&& echo "Creating dummy outputs for model_preset=$model_preset..."
+&& echo "Creating dummy outputs for model_preset=$model_preset.selection..."
 && bash '$__tool_directory__/scripts/mock_alphafold.sh' $model_preset
 #else:
 ## Run AlphaFold
 && python /app/alphafold/run_alphafold.py
 --fasta_paths alphafold.fasta
 --output_dir output
 --data_dir \${ALPHAFOLD_DB:-/data}/@TOOL_MINOR_VERSION@/
---model_preset=$model_preset
+--model_preset=$model_preset.selection
 ## Set reference database paths
 --uniref90_database_path   \${ALPHAFOLD_DB:-/data}/@TOOL_MINOR_VERSION@/uniref90/uniref90.fasta
 --mgnify_database_path     \${ALPHAFOLD_DB:-/data}/@TOOL_MINOR_VERSION@/mgnify/mgy_clusters_2022_05.fa
 --template_mmcif_dir       \${ALPHAFOLD_DB:-/data}/@TOOL_MINOR_VERSION@/pdb_mmcif/mmcif_files
 #else
 --db_preset=reduced_dbs
 --small_bfd_database_path  \${ALPHAFOLD_DB:-/data}/@TOOL_MINOR_VERSION@/small_bfd/bfd-first_non_consensus_sequences.fasta
 #end if
-#if $max_template_date:
+#if $advanced.max_template_date:
---max_template_date=$max_template_date
+--max_template_date=$advanced.max_template_date
 #else
 --max_template_date=\$TODAY
 #end if
---use_gpu_relax=\${ALPHAFOLD_USE_GPU:-True}  ## introduced in v2.1.2
+--use_gpu_relax=\${ALPHAFOLD_USE_GPU:-True}
-#if $model_preset == 'multimer':
+#if $model_preset.selection == 'multimer':
 --pdb_seqres_database_path=\${ALPHAFOLD_DB:-/data}/@TOOL_MINOR_VERSION@/pdb_seqres/pdb_seqres.txt
 --uniprot_database_path=\${ALPHAFOLD_DB:-/data}/@TOOL_MINOR_VERSION@/uniprot/uniprot.fasta
---num_multimer_predictions_per_model=1  ## introduced in v2.2.0
+--num_multimer_predictions_per_model=$model_preset.num_multimer_predictions_per_model
 #else
 --pdb70_database_path \${ALPHAFOLD_DB:-/data}/@TOOL_MINOR_VERSION@/pdb70/pdb70
 #end if
+## Galaxy-specific options --------------------------------------------
+## See https://github.com/neoformit/alphafold/tree/release_2.3.2_galaxy
+#if $advanced.disable_amber_relax:
+--disable_amber_relax
+#end if
+#if $advanced.limit_model_outputs:
+--output_models=$limit_model_outputs
+#end if
+## End Galaxy-specific options ----------------------------------------
 #end if
 ## Generate additional outputs ------------------------------------------------
 && python3 '$__tool_directory__/scripts/outputs.py' output/alphafold
 $outputs.plddts
 $outputs.model_pkls
 $outputs.pae_csv
 $outputs.plots
-#if $model_preset == 'multimer':
+#if $model_preset.selection == 'multimer':
 --multimer
 #end if
 ## HTML output
 && mkdir -p '${ html.files_path }'
-&& cp '$__tool_directory__/alphafold.html' '${html}'
+&& cp output/alphafold/extra/alphafold.html '${html}'
 && cp output/alphafold/ranked_*.pdb '${html.files_path}'
 ## This is a (hacky) fix for a bug that has appeared in multiple Pulsar servers.
 ## The working directory ends up two levels deep and the visualization html page
 ## fails to load the PDB files as static assets.
 <param name="fasta_text" type="text" area="true" value="" label="Paste sequence" help="Paste single protein sequence into the textbox. If you wish to fold multiple proteins, submit individual jobs for each protein. If you wish to run AlphaFold multimer, please supply multiple sequences in FASTA format." />
 </when>
 </conditional>
 <param
-name="max_template_date"
-type="text"
-label="Max template date (yyyy-mm-dd) (optional)"
-help="The model will reference PDB structures deposited before this date only. Defaults to today's date."
-optional="true"
->
-<sanitizer>
-<valid initial="string.digits">
-<add value="-" />
-</valid>
-</sanitizer>
-<validator type="regex">[0-9]{4}-[0-9]{2}-[0-9]{2}</validator>
-</param>
-<param
 name="dbs"
 type="select"
 display="radio"
 label="Select database"
 help="The reduced database allows significantly faster run time in
 >
 <option value="reduced" selected="true">Reduced database</option>
 <option value="full">Full database</option>
 </param>
+<conditional name="model_preset">
 <param
-name="model_preset"
+name="selection"
 type="select"
 label="Model preset"
 help="Select which prediction model to run. The monomer model is the most accurate for single protein prediction. The multimer model allows prediction of protein complexes."
 >
 <option value="monomer" selected="true">monomer - default prediction model</option>
 </option>
 <option value="multimer">
 multimer - model a protein complex (requires multi-sequence FASTA input)
 </option>
 </param>
+<when value="monomer"></when>
+<when value="monomer_ptm"></when>
+<when value="multimer">
+<param
+name="num_multimer_predictions_per_model"
+type="integer"
+value="5"
+label="Multimer predictions per model"
+help="How many predictions (each with a different random seed) will be generated per model. E.g. if this is 2 and there are 5 models then there will be 10 predictions per input. For a small drop in accuracy you may wish to run a single seed per model (default 5, max 10)."
+min="1"
+max="10"
+/>
+</when>
+</conditional>
+<section name="advanced" title="Advanced options" expanded="false">
+<param
+name="max_template_date"
+type="text"
+label="Max template date (yyyy-mm-dd) (optional)"
+help="The model will reference PDB structures deposited before this date only. Defaults to today's date."
+optional="true"
+>
+<sanitizer>
+<valid initial="string.digits">
+<add value="-" />
+</valid>
+</sanitizer>
+<validator type="regex">[0-9]{4}-[0-9]{2}-[0-9]{2}</validator>
+</param>
+<param
+name="disable_amber_relax"
+type="boolean"
+label="Disable Amber relaxation"
+value="false"
+optional="true"
+help="Amber relaxation can be disabled to speed up processing time. Amber relaxation is used to refine predicted structures by removing stereochemical violations, resulting in more accurate prediction of side-chain geometry. Disabling this option with large proteins may lead to artefacts in the predicted structure. Disabling amber relax will result in the unrelaxed models being collected as PDB outputs."
+/>
+<param
+name="limit_model_outputs"
+type="integer"
+label="Limit model outputs"
+value="5"
+help="Limit the number of models to output. The top N models will be output, where N is the value entered here (default 5). Please note that the top-ranking model is not always the correct one, and it is usually recommended to inspect multiple models. Reducing the number of models will result in a slight reduction in run time."
+min="1"
+max="5"
+/>
+</section>
 <section name="outputs" title="Optional outputs" expanded="false">
 <param
 name="plots"
 type="boolean"
 name="relax_json"
 type="boolean"
 checked="false"
 label="relax_metrics.json"
 help="A JSON-formatted text file containing relax metrics (mostly remaining violations)."
+/>
+<param
+name="timings_json"
+type="boolean"
+checked="false"
+label="timings.json"
+help="A JSON file with timings reported for each phase of the AlphaFold run."
 />
 </section>
 </inputs>
 <outputs>
 <expand macro="output_confidence_scores" />
 <expand macro="output_pickles" />
 <expand macro="output_pae_csv" />
 <expand macro="output_plots" />
 <expand macro="output_relax_json" />
+<expand macro="output_timings_json" />
 </outputs>
 <tests>
 <!-- Test monomer with default outputs -->
 <test expect_num_outputs="6">
 <conditional name="fasta_or_text">
 <param name="input_mode" value="history"/>
 <param name="fasta_file" value="test1.fasta"/>
 </conditional>
-<param name="model_preset" value="monomer"/>
+<param name="model_preset|selection" value="monomer"/>
 <expand macro="test_output_pdb_models" />
 </test>
 <!-- Test monomer with all outputs -->
 <test expect_num_outputs="19">
 <conditional name="fasta_or_text">
 <param name="input_mode" value="history"/>
 <param name="fasta_file" value="test1.fasta"/>
 </conditional>
-<param name="model_preset" value="monomer"/>
+<param name="model_preset|selection" value="monomer"/>
 <param name="outputs|plots" value="true"/>
 <param name="outputs|confidence_scores" value="true"/>
 <param name="outputs|plddts" value="true"/>
 <param name="outputs|pae_csv" value="true"/>
 <param name="outputs|model_pkls" value="true"/>
 <test expect_num_outputs="24">
 <conditional name="fasta_or_text">
 <param name="input_mode" value="history"/>
 <param name="fasta_file" value="test1.fasta"/>
 </conditional>
-<param name="model_preset" value="monomer_ptm"/>
+<param name="model_preset|selection" value="monomer_ptm"/>
 <param name="outputs|plots" value="true"/>
 <param name="outputs|confidence_scores" value="true"/>
 <param name="outputs|plddts" value="true"/>
 <param name="outputs|pae_csv" value="true"/>
 <param name="outputs|model_pkls" value="true"/>
 <test expect_num_outputs="24">
 <conditional name="fasta_or_text">
 <param name="input_mode" value="history"/>
 <param name="fasta_file" value="multimer.fasta"/>
 </conditional>
-<param name="model_preset" value="multimer"/>
+<param name="model_preset|selection" value="multimer"/>
 <param name="outputs|plots" value="true"/>
 <param name="outputs|confidence_scores" value="true"/>
 <param name="outputs|plddts" value="true"/>
 <param name="outputs|pae_csv" value="true"/>
 <param name="outputs|model_pkls" value="true"/>
 <param name="outputs|relax_json" value="true"/>
+<param name="outputs|timings_json" value="true"/>
 <expand macro="test_output_plots_3" />
 <expand macro="test_output_confidence_scores" />
 <expand macro="test_output_plddts" />
 <expand macro="test_output_pdb_models" />
 <expand macro="test_output_pickles" />
 <expand macro="test_output_relax_json" />
+<expand macro="test_output_timings_json" />
 <expand macro="test_output_pae_csv" />
 </test>
 </tests>
 <help><![CDATA[
 .. class:: infomark
 | AlphaFold v2: AI-guided 3D structural prediction of proteins
 |
-| **NOTE: this tool packages AlphaFold v2.3.1.**
+| **NOTE: this tool packages** `a modified branch of AlphaFold v2.3.2. <https://github.com/neoformit/alphafold/tree/release_2.3.2_galaxy>`_
 |
 | This means that the neural network has been trained on PDBs with a release
 | date before 2021-09-30 (the training cutoff was 2018-04-30 until ``v2.3.0``).
 |
 | Find out more in the technical and release notes:
 |
-- `Release notes for v2.3.1 <https://github.com/deepmind/alphafold/releases/tag/v2.3.1>`_
+- `Release notes for v2.3.2 <https://github.com/deepmind/alphafold/releases/tag/v2.3.2>`_
 - `Technical notes for v2.3 <https://github.com/deepmind/alphafold/blob/main/docs/technical_note_v2.3.0.md>`_
-| If you want to use AlphaFold trained against an older cutoff date, switch to Galaxy version ``2.1.2`` (which was trained to data up to 2018-04-30).
-|
 **What it does**
 *What is AlphaFold?*
 | AlphaFold monomer (default) accepts a **single amino acid sequence** in FASTA format.
 | You can choose to input either a file from your Galaxy history or paste a sequence into a text box.
 | If you choose the ``multimer`` option, you can supply a FASTA file containing **multiple sequences** to be folded concurrently into a multimer.
 |
+| For pairwise screening of target-candidate with multimer, you can submit a list of paired protein sequences in batch mode (i.e. two protein sequences in each FASTA file).
 |
 **Outputs**
 *Visualization*
 |
 *PDB files*
-| Five PDB (Protein Data Bank) files are be created, ordered by rank, as predicted by AlphaFold.
+| PDB (Protein Data Bank) files (5 by default) are be created, ordered by rank, as predicted by AlphaFold. The tool produces 5 models by default, but this can be reduced with the "Limit model outputs" for a reduced run time.
 | These files describe the molecular structures and can be used for downstream analysis. e.g. *in silico* molecular docking.
 | **PLEASE NOTE** that all outputs have been renamed to their respective rank order, including model and model.pkl files.
 |
 *Model confidence scores (optional)*
 |
 *relax_metrics.json (optional)*
 | A JSON-formatted text file containing relax metrics (primarily remaining violations).
+|
+|
+*timings.json (optional)*
+| A JSON-formatted text file containing the timings for each phase of the prediction.
 |
 |
 **AlphaFold configuration**

Mercurial > repos > galaxy-australia > alphafold2

comparison alphafold.xml @ 20:6ab1a261520a draft