Mercurial > repos > galaxy-australia > alphafold2
diff alphafold.xml @ 20:6ab1a261520a draft default tip
planemo upload for repository https://github.com/usegalaxy-au/tools-au commit c3a90eb12ada44d477541baa4dd6182be29cd554-dirty
author | galaxy-australia |
---|---|
date | Sun, 28 Jul 2024 20:09:55 +0000 |
parents | 2f7702fd0a4c |
children |
line wrap: on
line diff
--- a/alphafold.xml Wed May 08 06:26:55 2024 +0000 +++ b/alphafold.xml Sun Jul 28 20:09:55 2024 +0000 @@ -1,9 +1,9 @@ -<tool id="alphafold" name="Alphafold 2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="20.01"> +<tool id="alphafold" name="Alphafold 2" version="@TOOL_VERSION@+galaxy@VERSION_SUFFIX@" profile="22.05"> <description> - AI-guided 3D structural prediction of proteins</description> <macros> - <token name="@TOOL_VERSION@">2.3.1</token> + <token name="@TOOL_VERSION@">2.3.2</token> <token name="@TOOL_MINOR_VERSION@">2.3</token> - <token name="@VERSION_SUFFIX@">5</token> + <token name="@VERSION_SUFFIX@">0</token> <import>macro_output.xml</import> <import>macro_test_output.xml</import> </macros> @@ -17,12 +17,12 @@ <xref type="bio.tools">alphafold_2</xref> </xrefs> <requirements> - <container type="docker">neoformit/alphafold:v2.3.1_2</container> + <container type="docker">neoformit/alphafold:v2.3.2_0</container> </requirements> <required_files> <include path="scripts/outputs.py" /> <include path="scripts/validate_fasta.py" /> - <include path="alphafold.html" /> + <include path="scripts/alphafold.html" /> </required_files> <command detect_errors="exit_code"><![CDATA[ @@ -46,7 +46,7 @@ && python3 '$__tool_directory__/scripts/validate_fasta.py' input.fasta --min_length \${ALPHAFOLD_AA_LENGTH_MIN:-0} --max_length \${ALPHAFOLD_AA_LENGTH_MAX:-0} -#if $model_preset == 'multimer': +#if $model_preset.selection == 'multimer': --multimer --max-sequences \${ALPHAFOLD_MAX_SEQUENCES:-10} #end if @@ -60,7 +60,7 @@ ## Run AlphaFold ------------------------------------------------------------- #if os.environ.get('PLANEMO_TESTING'): ## Run in testing mode (mocks a successful AlphaFold run by copying outputs) - && echo "Creating dummy outputs for model_preset=$model_preset..." + && echo "Creating dummy outputs for model_preset=$model_preset.selection..." && bash '$__tool_directory__/scripts/mock_alphafold.sh' $model_preset #else: ## Run AlphaFold @@ -68,7 +68,7 @@ --fasta_paths alphafold.fasta --output_dir output --data_dir \${ALPHAFOLD_DB:-/data}/@TOOL_MINOR_VERSION@/ - --model_preset=$model_preset + --model_preset=$model_preset.selection ## Set reference database paths --uniref90_database_path \${ALPHAFOLD_DB:-/data}/@TOOL_MINOR_VERSION@/uniref90/uniref90.fasta @@ -83,21 +83,33 @@ --small_bfd_database_path \${ALPHAFOLD_DB:-/data}/@TOOL_MINOR_VERSION@/small_bfd/bfd-first_non_consensus_sequences.fasta #end if - #if $max_template_date: - --max_template_date=$max_template_date + #if $advanced.max_template_date: + --max_template_date=$advanced.max_template_date #else --max_template_date=\$TODAY #end if - --use_gpu_relax=\${ALPHAFOLD_USE_GPU:-True} ## introduced in v2.1.2 + --use_gpu_relax=\${ALPHAFOLD_USE_GPU:-True} - #if $model_preset == 'multimer': + #if $model_preset.selection == 'multimer': --pdb_seqres_database_path=\${ALPHAFOLD_DB:-/data}/@TOOL_MINOR_VERSION@/pdb_seqres/pdb_seqres.txt --uniprot_database_path=\${ALPHAFOLD_DB:-/data}/@TOOL_MINOR_VERSION@/uniprot/uniprot.fasta - --num_multimer_predictions_per_model=1 ## introduced in v2.2.0 + --num_multimer_predictions_per_model=$model_preset.num_multimer_predictions_per_model #else --pdb70_database_path \${ALPHAFOLD_DB:-/data}/@TOOL_MINOR_VERSION@/pdb70/pdb70 #end if + + ## Galaxy-specific options -------------------------------------------- + ## See https://github.com/neoformit/alphafold/tree/release_2.3.2_galaxy + #if $advanced.disable_amber_relax: + --disable_amber_relax + #end if + + #if $advanced.limit_model_outputs: + --output_models=$limit_model_outputs + #end if + ## End Galaxy-specific options ---------------------------------------- + #end if ## Generate additional outputs ------------------------------------------------ @@ -106,13 +118,13 @@ $outputs.model_pkls $outputs.pae_csv $outputs.plots -#if $model_preset == 'multimer': +#if $model_preset.selection == 'multimer': --multimer #end if ## HTML output && mkdir -p '${ html.files_path }' -&& cp '$__tool_directory__/alphafold.html' '${html}' +&& cp output/alphafold/extra/alphafold.html '${html}' && cp output/alphafold/ranked_*.pdb '${html.files_path}' ## This is a (hacky) fix for a bug that has appeared in multiple Pulsar servers. @@ -136,21 +148,6 @@ </conditional> <param - name="max_template_date" - type="text" - label="Max template date (yyyy-mm-dd) (optional)" - help="The model will reference PDB structures deposited before this date only. Defaults to today's date." - optional="true" - > - <sanitizer> - <valid initial="string.digits"> - <add value="-" /> - </valid> - </sanitizer> - <validator type="regex">[0-9]{4}-[0-9]{2}-[0-9]{2}</validator> - </param> - - <param name="dbs" type="select" display="radio" @@ -162,8 +159,9 @@ <option value="full">Full database</option> </param> + <conditional name="model_preset"> <param - name="model_preset" + name="selection" type="select" label="Model preset" help="Select which prediction model to run. The monomer model is the most accurate for single protein prediction. The multimer model allows prediction of protein complexes." @@ -176,6 +174,56 @@ multimer - model a protein complex (requires multi-sequence FASTA input) </option> </param> + <when value="monomer"></when> + <when value="monomer_ptm"></when> + <when value="multimer"> + <param + name="num_multimer_predictions_per_model" + type="integer" + value="5" + label="Multimer predictions per model" + help="How many predictions (each with a different random seed) will be generated per model. E.g. if this is 2 and there are 5 models then there will be 10 predictions per input. For a small drop in accuracy you may wish to run a single seed per model (default 5, max 10)." + min="1" + max="10" + /> + </when> + </conditional> + + <section name="advanced" title="Advanced options" expanded="false"> + <param + name="max_template_date" + type="text" + label="Max template date (yyyy-mm-dd) (optional)" + help="The model will reference PDB structures deposited before this date only. Defaults to today's date." + optional="true" + > + <sanitizer> + <valid initial="string.digits"> + <add value="-" /> + </valid> + </sanitizer> + <validator type="regex">[0-9]{4}-[0-9]{2}-[0-9]{2}</validator> + </param> + + <param + name="disable_amber_relax" + type="boolean" + label="Disable Amber relaxation" + value="false" + optional="true" + help="Amber relaxation can be disabled to speed up processing time. Amber relaxation is used to refine predicted structures by removing stereochemical violations, resulting in more accurate prediction of side-chain geometry. Disabling this option with large proteins may lead to artefacts in the predicted structure. Disabling amber relax will result in the unrelaxed models being collected as PDB outputs." + /> + + <param + name="limit_model_outputs" + type="integer" + label="Limit model outputs" + value="5" + help="Limit the number of models to output. The top N models will be output, where N is the value entered here (default 5). Please note that the top-ranking model is not always the correct one, and it is usually recommended to inspect multiple models. Reducing the number of models will result in a slight reduction in run time." + min="1" + max="5" + /> + </section> <section name="outputs" title="Optional outputs" expanded="false"> <param @@ -228,6 +276,13 @@ label="relax_metrics.json" help="A JSON-formatted text file containing relax metrics (mostly remaining violations)." /> + <param + name="timings_json" + type="boolean" + checked="false" + label="timings.json" + help="A JSON file with timings reported for each phase of the AlphaFold run." + /> </section> </inputs> @@ -241,6 +296,7 @@ <expand macro="output_pae_csv" /> <expand macro="output_plots" /> <expand macro="output_relax_json" /> + <expand macro="output_timings_json" /> </outputs> <tests> @@ -250,7 +306,7 @@ <param name="input_mode" value="history"/> <param name="fasta_file" value="test1.fasta"/> </conditional> - <param name="model_preset" value="monomer"/> + <param name="model_preset|selection" value="monomer"/> <expand macro="test_output_pdb_models" /> </test> @@ -260,7 +316,7 @@ <param name="input_mode" value="history"/> <param name="fasta_file" value="test1.fasta"/> </conditional> - <param name="model_preset" value="monomer"/> + <param name="model_preset|selection" value="monomer"/> <param name="outputs|plots" value="true"/> <param name="outputs|confidence_scores" value="true"/> <param name="outputs|plddts" value="true"/> @@ -281,7 +337,7 @@ <param name="input_mode" value="history"/> <param name="fasta_file" value="test1.fasta"/> </conditional> - <param name="model_preset" value="monomer_ptm"/> + <param name="model_preset|selection" value="monomer_ptm"/> <param name="outputs|plots" value="true"/> <param name="outputs|confidence_scores" value="true"/> <param name="outputs|plddts" value="true"/> @@ -303,19 +359,21 @@ <param name="input_mode" value="history"/> <param name="fasta_file" value="multimer.fasta"/> </conditional> - <param name="model_preset" value="multimer"/> + <param name="model_preset|selection" value="multimer"/> <param name="outputs|plots" value="true"/> <param name="outputs|confidence_scores" value="true"/> <param name="outputs|plddts" value="true"/> <param name="outputs|pae_csv" value="true"/> <param name="outputs|model_pkls" value="true"/> <param name="outputs|relax_json" value="true"/> + <param name="outputs|timings_json" value="true"/> <expand macro="test_output_plots_3" /> <expand macro="test_output_confidence_scores" /> <expand macro="test_output_plddts" /> <expand macro="test_output_pdb_models" /> <expand macro="test_output_pickles" /> <expand macro="test_output_relax_json" /> + <expand macro="test_output_timings_json" /> <expand macro="test_output_pae_csv" /> </test> </tests> @@ -325,7 +383,7 @@ | AlphaFold v2: AI-guided 3D structural prediction of proteins | - | **NOTE: this tool packages AlphaFold v2.3.1.** + | **NOTE: this tool packages** `a modified branch of AlphaFold v2.3.2. <https://github.com/neoformit/alphafold/tree/release_2.3.2_galaxy>`_ | | This means that the neural network has been trained on PDBs with a release | date before 2021-09-30 (the training cutoff was 2018-04-30 until ``v2.3.0``). @@ -333,12 +391,9 @@ | Find out more in the technical and release notes: | - - `Release notes for v2.3.1 <https://github.com/deepmind/alphafold/releases/tag/v2.3.1>`_ + - `Release notes for v2.3.2 <https://github.com/deepmind/alphafold/releases/tag/v2.3.2>`_ - `Technical notes for v2.3 <https://github.com/deepmind/alphafold/blob/main/docs/technical_note_v2.3.0.md>`_ - | If you want to use AlphaFold trained against an older cutoff date, switch to Galaxy version ``2.1.2`` (which was trained to data up to 2018-04-30). - | - **What it does** *What is AlphaFold?* @@ -362,6 +417,7 @@ | You can choose to input either a file from your Galaxy history or paste a sequence into a text box. | If you choose the ``multimer`` option, you can supply a FASTA file containing **multiple sequences** to be folded concurrently into a multimer. | + | For pairwise screening of target-candidate with multimer, you can submit a list of paired protein sequences in batch mode (i.e. two protein sequences in each FASTA file). | **Outputs** @@ -380,7 +436,7 @@ *PDB files* - | Five PDB (Protein Data Bank) files are be created, ordered by rank, as predicted by AlphaFold. + | PDB (Protein Data Bank) files (5 by default) are be created, ordered by rank, as predicted by AlphaFold. The tool produces 5 models by default, but this can be reduced with the "Limit model outputs" for a reduced run time. | These files describe the molecular structures and can be used for downstream analysis. e.g. *in silico* molecular docking. | **PLEASE NOTE** that all outputs have been renamed to their respective rank order, including model and model.pkl files. | @@ -421,6 +477,12 @@ | | + *timings.json (optional)* + + | A JSON-formatted text file containing the timings for each phase of the prediction. + | + | + **AlphaFold configuration** | We have configured AlphaFold to run with the parameters suggested by default on `AlphaFold's GitHub <https://github.com/deepmind/alphafold>`_.